diff --git a/misc/dreamcast/VertexTransform.S b/misc/dreamcast/VertexTransform.S index 908a05eab..4625f3e00 100644 --- a/misc/dreamcast/VertexTransform.S +++ b/misc/dreamcast/VertexTransform.S @@ -170,17 +170,12 @@ _DrawTexturedQuads: ! CLIPFLAGS TESTING and #15,FLG - cmp/eq #0,FLG ! T = CLIPFLAGS == 0 (all points invisible) - bt/s .T_NONE_VISIBLE ! if T goto NONE_VISIBLE - nop - bra .T_SOME_VISIBLE + cmp/eq #0,FLG ! T = CLIPFLAGS == 0 (all points invisible) + bf/s .T_LOOP_END ! if !T goto LOOP_END nop -.T_NONE_VISIBLE: - bra .T_LOOP_END ! jump to loop end after executing instruction in delay slot - add #-128, DST ! DST -= 4 * sizeof(VERTEX), move back to prior quad, so that this invisible quad gets overwritten in next iteration - -.T_SOME_VISIBLE: +! No points visible case + add #-128, DST ! DST -= 4 * sizeof(VERTEX), move back to prior quad, so that this invisible quad gets overwritten in next iteration .T_LOOP_END: dt CNT ! count--; T = count == 0 @@ -218,17 +213,12 @@ _DrawColouredQuads: ! CLIPFLAGS TESTING and #15,FLG - cmp/eq #0,FLG ! T = CLIPFLAGS == 0 (all points invisible) - bt/s .C_NONE_VISIBLE ! if T goto NONE_VISIBLE - nop - bra .C_SOME_VISIBLE + cmp/eq #0,FLG ! T = CLIPFLAGS == 0 (all points invisible) + bf/s .C_LOOP_END ! if !T goto LOOP_END nop -.C_NONE_VISIBLE: - bra .C_LOOP_END ! jump to loop end after executing instruction in delay slot - add #-128, DST ! dst -= 4 * sizeof(VERTEX), move back to 1 vertex before start of quad - -.C_SOME_VISIBLE: +! No points visible case + add #-128, DST ! dst -= 4 * sizeof(VERTEX), move back to 1 vertex before start of quad .C_LOOP_END: dt CNT ! count--; T = count == 0 @@ -237,4 +227,4 @@ _DrawColouredQuads: TransformEnd .size _DrawColouredQuads, .-_DrawColouredQuads -.type _DrawColouredQuads, %function \ No newline at end of file +.type _DrawColouredQuads, %function diff --git a/third_party/gldc/src/gldc.h b/third_party/gldc/src/gldc.h index 048ee8f95..f24df4899 100644 --- a/third_party/gldc/src/gldc.h +++ b/third_party/gldc/src/gldc.h @@ -9,13 +9,9 @@ typedef struct { /* Same 32 byte layout as pvr_vertex_t */ uint32_t flags; float x, y, z; - float u, v; + uint32_t u, v; // really floats, but stored as uint for better load/store codegen uint32_t bgra; - - /* In the pvr_vertex_t structure, this next 4 bytes is oargb - * but we're not using that for now, so having W here makes the code - * simpler */ - float w; + float w; // actually oargb, but repurposed since unused } __attribute__ ((aligned (32))) Vertex; typedef struct { diff --git a/third_party/gldc/src/sh4.c b/third_party/gldc/src/sh4.c index f13043272..c5c3e58f1 100644 --- a/third_party/gldc/src/sh4.c +++ b/third_party/gldc/src/sh4.c @@ -21,21 +21,26 @@ static GL_FORCE_INLINE float _glFastInvert(float x) { return sh4_fsrra(x * x); } -#define PushVertex(vtx) \ - _glPerspectiveDivideVertex(vtx); \ - _glPushHeaderOrVertex(vtx); +static GL_FORCE_INLINE void PushVertex(Vertex* v) { + volatile Vertex* dst = (Vertex*)(sq); + float f = _glFastInvert(v->w); + // Convert to NDC (viewport already applied) + float x = v->x * f; + float y = v->y * f; -static GL_FORCE_INLINE void _glPerspectiveDivideVertex(Vertex* vertex) { - const float f = _glFastInvert(vertex->w); - - /* Convert to NDC (viewport already applied) */ - vertex->x = vertex->x * f; - vertex->y = vertex->y * f; - vertex->z = f; + dst->flags = v->flags; + dst->x = x; + dst->y = y; + dst->z = f; + dst->u = v->u; + dst->v = v->v; + dst->bgra = v->bgra; + __asm__("pref @%0" : : "r"(dst)); + dst++; } -static inline void _glPushHeaderOrVertex(Vertex* v) { - uint32_t* s = (uint32_t*) v; +static inline void PushCommand(Vertex* v) { + uint32_t* s = (uint32_t*)v; sq[0] = *(s++); sq[1] = *(s++); sq[2] = *(s++); @@ -304,7 +309,7 @@ void SceneListSubmit(Vertex* v3, int n) { case PVR_CMD_VERTEX: continue; default: - _glPushHeaderOrVertex(v3); + PushCommand(v3); continue; };