mirror of
https://github.com/ClassiCube/ClassiCube.git
synced 2025-01-22 09:01:57 -05:00
Dreamcast: Make TnL slightly more efficient
This commit is contained in:
parent
5a72b13822
commit
a970aea405
3 changed files with 29 additions and 38 deletions
|
@ -171,17 +171,12 @@ _DrawTexturedQuads:
|
|||
! CLIPFLAGS TESTING
|
||||
and #15,FLG
|
||||
cmp/eq #0,FLG ! T = CLIPFLAGS == 0 (all points invisible)
|
||||
bt/s .T_NONE_VISIBLE ! if T goto NONE_VISIBLE
|
||||
nop
|
||||
bra .T_SOME_VISIBLE
|
||||
bf/s .T_LOOP_END ! if !T goto LOOP_END
|
||||
nop
|
||||
|
||||
.T_NONE_VISIBLE:
|
||||
bra .T_LOOP_END ! jump to loop end after executing instruction in delay slot
|
||||
! No points visible case
|
||||
add #-128, DST ! DST -= 4 * sizeof(VERTEX), move back to prior quad, so that this invisible quad gets overwritten in next iteration
|
||||
|
||||
.T_SOME_VISIBLE:
|
||||
|
||||
.T_LOOP_END:
|
||||
dt CNT ! count--; T = count == 0
|
||||
bf .T_TRANSFORM_QUAD ! if !T then goto T_TRANSFORM_QUAD
|
||||
|
@ -219,17 +214,12 @@ _DrawColouredQuads:
|
|||
! CLIPFLAGS TESTING
|
||||
and #15,FLG
|
||||
cmp/eq #0,FLG ! T = CLIPFLAGS == 0 (all points invisible)
|
||||
bt/s .C_NONE_VISIBLE ! if T goto NONE_VISIBLE
|
||||
nop
|
||||
bra .C_SOME_VISIBLE
|
||||
bf/s .C_LOOP_END ! if !T goto LOOP_END
|
||||
nop
|
||||
|
||||
.C_NONE_VISIBLE:
|
||||
bra .C_LOOP_END ! jump to loop end after executing instruction in delay slot
|
||||
! No points visible case
|
||||
add #-128, DST ! dst -= 4 * sizeof(VERTEX), move back to 1 vertex before start of quad
|
||||
|
||||
.C_SOME_VISIBLE:
|
||||
|
||||
.C_LOOP_END:
|
||||
dt CNT ! count--; T = count == 0
|
||||
bf .C_TRANSFORM_QUAD ! if !T then goto TRANSFORM_QUAD
|
||||
|
|
8
third_party/gldc/src/gldc.h
vendored
8
third_party/gldc/src/gldc.h
vendored
|
@ -9,13 +9,9 @@ typedef struct {
|
|||
/* Same 32 byte layout as pvr_vertex_t */
|
||||
uint32_t flags;
|
||||
float x, y, z;
|
||||
float u, v;
|
||||
uint32_t u, v; // really floats, but stored as uint for better load/store codegen
|
||||
uint32_t bgra;
|
||||
|
||||
/* In the pvr_vertex_t structure, this next 4 bytes is oargb
|
||||
* but we're not using that for now, so having W here makes the code
|
||||
* simpler */
|
||||
float w;
|
||||
float w; // actually oargb, but repurposed since unused
|
||||
} __attribute__ ((aligned (32))) Vertex;
|
||||
|
||||
typedef struct {
|
||||
|
|
31
third_party/gldc/src/sh4.c
vendored
31
third_party/gldc/src/sh4.c
vendored
|
@ -21,21 +21,26 @@ static GL_FORCE_INLINE float _glFastInvert(float x) {
|
|||
return sh4_fsrra(x * x);
|
||||
}
|
||||
|
||||
#define PushVertex(vtx) \
|
||||
_glPerspectiveDivideVertex(vtx); \
|
||||
_glPushHeaderOrVertex(vtx);
|
||||
static GL_FORCE_INLINE void PushVertex(Vertex* v) {
|
||||
volatile Vertex* dst = (Vertex*)(sq);
|
||||
float f = _glFastInvert(v->w);
|
||||
// Convert to NDC (viewport already applied)
|
||||
float x = v->x * f;
|
||||
float y = v->y * f;
|
||||
|
||||
static GL_FORCE_INLINE void _glPerspectiveDivideVertex(Vertex* vertex) {
|
||||
const float f = _glFastInvert(vertex->w);
|
||||
|
||||
/* Convert to NDC (viewport already applied) */
|
||||
vertex->x = vertex->x * f;
|
||||
vertex->y = vertex->y * f;
|
||||
vertex->z = f;
|
||||
dst->flags = v->flags;
|
||||
dst->x = x;
|
||||
dst->y = y;
|
||||
dst->z = f;
|
||||
dst->u = v->u;
|
||||
dst->v = v->v;
|
||||
dst->bgra = v->bgra;
|
||||
__asm__("pref @%0" : : "r"(dst));
|
||||
dst++;
|
||||
}
|
||||
|
||||
static inline void _glPushHeaderOrVertex(Vertex* v) {
|
||||
uint32_t* s = (uint32_t*) v;
|
||||
static inline void PushCommand(Vertex* v) {
|
||||
uint32_t* s = (uint32_t*)v;
|
||||
sq[0] = *(s++);
|
||||
sq[1] = *(s++);
|
||||
sq[2] = *(s++);
|
||||
|
@ -304,7 +309,7 @@ void SceneListSubmit(Vertex* v3, int n) {
|
|||
case PVR_CMD_VERTEX:
|
||||
continue;
|
||||
default:
|
||||
_glPushHeaderOrVertex(v3);
|
||||
PushCommand(v3);
|
||||
continue;
|
||||
};
|
||||
|
||||
|
|
Loading…
Reference in a new issue