mirror of
https://github.com/ClassiCube/ClassiCube.git
synced 2025-01-22 09:01:57 -05:00
Dreamcast: Make TnL slightly more efficient
This commit is contained in:
parent
5a72b13822
commit
a970aea405
3 changed files with 29 additions and 38 deletions
|
@ -171,17 +171,12 @@ _DrawTexturedQuads:
|
||||||
! CLIPFLAGS TESTING
|
! CLIPFLAGS TESTING
|
||||||
and #15,FLG
|
and #15,FLG
|
||||||
cmp/eq #0,FLG ! T = CLIPFLAGS == 0 (all points invisible)
|
cmp/eq #0,FLG ! T = CLIPFLAGS == 0 (all points invisible)
|
||||||
bt/s .T_NONE_VISIBLE ! if T goto NONE_VISIBLE
|
bf/s .T_LOOP_END ! if !T goto LOOP_END
|
||||||
nop
|
|
||||||
bra .T_SOME_VISIBLE
|
|
||||||
nop
|
nop
|
||||||
|
|
||||||
.T_NONE_VISIBLE:
|
! No points visible case
|
||||||
bra .T_LOOP_END ! jump to loop end after executing instruction in delay slot
|
|
||||||
add #-128, DST ! DST -= 4 * sizeof(VERTEX), move back to prior quad, so that this invisible quad gets overwritten in next iteration
|
add #-128, DST ! DST -= 4 * sizeof(VERTEX), move back to prior quad, so that this invisible quad gets overwritten in next iteration
|
||||||
|
|
||||||
.T_SOME_VISIBLE:
|
|
||||||
|
|
||||||
.T_LOOP_END:
|
.T_LOOP_END:
|
||||||
dt CNT ! count--; T = count == 0
|
dt CNT ! count--; T = count == 0
|
||||||
bf .T_TRANSFORM_QUAD ! if !T then goto T_TRANSFORM_QUAD
|
bf .T_TRANSFORM_QUAD ! if !T then goto T_TRANSFORM_QUAD
|
||||||
|
@ -219,17 +214,12 @@ _DrawColouredQuads:
|
||||||
! CLIPFLAGS TESTING
|
! CLIPFLAGS TESTING
|
||||||
and #15,FLG
|
and #15,FLG
|
||||||
cmp/eq #0,FLG ! T = CLIPFLAGS == 0 (all points invisible)
|
cmp/eq #0,FLG ! T = CLIPFLAGS == 0 (all points invisible)
|
||||||
bt/s .C_NONE_VISIBLE ! if T goto NONE_VISIBLE
|
bf/s .C_LOOP_END ! if !T goto LOOP_END
|
||||||
nop
|
|
||||||
bra .C_SOME_VISIBLE
|
|
||||||
nop
|
nop
|
||||||
|
|
||||||
.C_NONE_VISIBLE:
|
! No points visible case
|
||||||
bra .C_LOOP_END ! jump to loop end after executing instruction in delay slot
|
|
||||||
add #-128, DST ! dst -= 4 * sizeof(VERTEX), move back to 1 vertex before start of quad
|
add #-128, DST ! dst -= 4 * sizeof(VERTEX), move back to 1 vertex before start of quad
|
||||||
|
|
||||||
.C_SOME_VISIBLE:
|
|
||||||
|
|
||||||
.C_LOOP_END:
|
.C_LOOP_END:
|
||||||
dt CNT ! count--; T = count == 0
|
dt CNT ! count--; T = count == 0
|
||||||
bf .C_TRANSFORM_QUAD ! if !T then goto TRANSFORM_QUAD
|
bf .C_TRANSFORM_QUAD ! if !T then goto TRANSFORM_QUAD
|
||||||
|
|
8
third_party/gldc/src/gldc.h
vendored
8
third_party/gldc/src/gldc.h
vendored
|
@ -9,13 +9,9 @@ typedef struct {
|
||||||
/* Same 32 byte layout as pvr_vertex_t */
|
/* Same 32 byte layout as pvr_vertex_t */
|
||||||
uint32_t flags;
|
uint32_t flags;
|
||||||
float x, y, z;
|
float x, y, z;
|
||||||
float u, v;
|
uint32_t u, v; // really floats, but stored as uint for better load/store codegen
|
||||||
uint32_t bgra;
|
uint32_t bgra;
|
||||||
|
float w; // actually oargb, but repurposed since unused
|
||||||
/* In the pvr_vertex_t structure, this next 4 bytes is oargb
|
|
||||||
* but we're not using that for now, so having W here makes the code
|
|
||||||
* simpler */
|
|
||||||
float w;
|
|
||||||
} __attribute__ ((aligned (32))) Vertex;
|
} __attribute__ ((aligned (32))) Vertex;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
|
31
third_party/gldc/src/sh4.c
vendored
31
third_party/gldc/src/sh4.c
vendored
|
@ -21,21 +21,26 @@ static GL_FORCE_INLINE float _glFastInvert(float x) {
|
||||||
return sh4_fsrra(x * x);
|
return sh4_fsrra(x * x);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define PushVertex(vtx) \
|
static GL_FORCE_INLINE void PushVertex(Vertex* v) {
|
||||||
_glPerspectiveDivideVertex(vtx); \
|
volatile Vertex* dst = (Vertex*)(sq);
|
||||||
_glPushHeaderOrVertex(vtx);
|
float f = _glFastInvert(v->w);
|
||||||
|
// Convert to NDC (viewport already applied)
|
||||||
|
float x = v->x * f;
|
||||||
|
float y = v->y * f;
|
||||||
|
|
||||||
static GL_FORCE_INLINE void _glPerspectiveDivideVertex(Vertex* vertex) {
|
dst->flags = v->flags;
|
||||||
const float f = _glFastInvert(vertex->w);
|
dst->x = x;
|
||||||
|
dst->y = y;
|
||||||
/* Convert to NDC (viewport already applied) */
|
dst->z = f;
|
||||||
vertex->x = vertex->x * f;
|
dst->u = v->u;
|
||||||
vertex->y = vertex->y * f;
|
dst->v = v->v;
|
||||||
vertex->z = f;
|
dst->bgra = v->bgra;
|
||||||
|
__asm__("pref @%0" : : "r"(dst));
|
||||||
|
dst++;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void _glPushHeaderOrVertex(Vertex* v) {
|
static inline void PushCommand(Vertex* v) {
|
||||||
uint32_t* s = (uint32_t*) v;
|
uint32_t* s = (uint32_t*)v;
|
||||||
sq[0] = *(s++);
|
sq[0] = *(s++);
|
||||||
sq[1] = *(s++);
|
sq[1] = *(s++);
|
||||||
sq[2] = *(s++);
|
sq[2] = *(s++);
|
||||||
|
@ -304,7 +309,7 @@ void SceneListSubmit(Vertex* v3, int n) {
|
||||||
case PVR_CMD_VERTEX:
|
case PVR_CMD_VERTEX:
|
||||||
continue;
|
continue;
|
||||||
default:
|
default:
|
||||||
_glPushHeaderOrVertex(v3);
|
PushCommand(v3);
|
||||||
continue;
|
continue;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue