mirror of
https://github.com/ClassiCube/ClassiCube.git
synced 2025-01-22 17:12:25 -05:00
Dreamcast: Slightly optimise performance by avoiding separate viewport transform
This commit is contained in:
parent
967d52ea6a
commit
11fb4a2b65
6 changed files with 27 additions and 143 deletions
|
@ -6,16 +6,6 @@
|
|||
! r13 = cur vertex
|
||||
! r14 = next vertex (prefetch)
|
||||
|
||||
!fr12 = VIEWPORT_HWIDTH
|
||||
!fr13 = VIEWPORT_HHEIGHT
|
||||
!fr14 = VIEWPORT_X_PLUS_HWIDTH
|
||||
!fr15 = VIEWPORT_Y_PLUS_HHEIGHT
|
||||
|
||||
#define F_HW fr12
|
||||
#define F_HH fr13
|
||||
#define F_XP fr14
|
||||
#define F_YP fr15
|
||||
|
||||
#define R_VTX r10
|
||||
#define R_EOL r11
|
||||
#define REG_CMD_VTX r10
|
||||
|
@ -51,42 +41,8 @@
|
|||
add #32,r8 ! EX, SQ += 32
|
||||
.endm
|
||||
|
||||
|
||||
! Pushes a vertex to the store queue
|
||||
! CLOBBERS: fr0, fr4, fr5
|
||||
! INPUTS: R (vertex)
|
||||
! OUTPUTS:
|
||||
! TODO optimise greatly
|
||||
.macro ViewportTransform R
|
||||
! INVERSE W CALCULATION
|
||||
add #28, \R ! EX, \R = &vertex->w
|
||||
fmov.s @\R,fr0 ! LS, fr0 = vertex->w
|
||||
fmul fr0,fr0 ! FE, fr0 = fr0 * fr0
|
||||
add #-24, \R ! EX, \R = &vertex->x
|
||||
fsrra fr0 ! FE, fr0 = 1 / sqrt(fr0) -> 1 / vertex->w
|
||||
|
||||
! TRANSFORM X
|
||||
fmov.s @\R,fr4 ! LS, fr4 = vertex->x
|
||||
fmov F_XP,fr5 ! LS, fr5 = VIEWPORT_X_PLUS_HWIDTH
|
||||
fmul F_HW,fr4 ! FE, fr4 = VIEWPORT_HWIDTH * vertex->x
|
||||
fmac fr0,fr4,fr5 ! FE, fr5 = fr0 * fr4 + fr5 -- (X * F * hwidth) + x_plus_hwidth
|
||||
fmov.s fr5,@\R ! LS, vertex->x = fr5
|
||||
add #4, \R ! EX, \R = &vertex->y
|
||||
|
||||
! TRANSFORM Y
|
||||
fmov.s @\R,fr4 ! LS, fr4 = vertex->y
|
||||
fmov F_YP,fr5 ! LS, fr5 = VIEWPORT_Y_PLUS_HHEIGHT
|
||||
fmul F_HH,fr4 ! FE, fr4 = VIEWPORT_HHEIGHT * vertex->y
|
||||
fmac fr0,fr4,fr5 ! FE, fr5 = fr0 * fr4 + fr5 -- (Y * F * hheight) + y_plus_hheight
|
||||
fmov.s fr5,@\R ! LS, vertex->y = fr5
|
||||
add #4, \R ! EX, \R = &vertex->z
|
||||
|
||||
! ASSIGN Z
|
||||
fmov.s fr0,@\R ! LS, vertex->z = fr0
|
||||
add #-12, \R ! EX, \R -= 12 (back to start of vertex)
|
||||
.endm
|
||||
|
||||
! Transforms then pushes a vertex to the store queue
|
||||
! note: Vertices are assumed as pre viewport transformed already
|
||||
! CLOBBERS: r2, fr0, fr4, fr5
|
||||
! INPUTS: R (vertex), r8 (SQ global)
|
||||
! OUTPUTS: R, r8 altered
|
||||
|
@ -109,24 +65,20 @@
|
|||
|
||||
! TRANSFORM X
|
||||
fmov.s @\R,fr4 ! LS, fr4 = SRC->x
|
||||
fmov F_XP,fr5 ! LS, fr5 = VIEWPORT_X_PLUS_HWIDTH
|
||||
fmul F_HW,fr4 ! FE, fr4 = VIEWPORT_HWIDTH * SRC->x
|
||||
fmul fr0,fr4 ! FE, fr4 = invW * SRC->x
|
||||
mov.l @(20,\R),r2 ! LS, tmp = SRC->bgra
|
||||
mov.l r2,@(20,r8) ! LS, SRC->bgra = tmp
|
||||
fmac fr0,fr4,fr5 ! FE, fr5 = invW * fr4 + fr5 -- (X * F * hwidth) + x_plus_hwidth
|
||||
add #4, \R ! EX, SRC += 4
|
||||
fmov.s fr5,@r8 ! LS, DST->x = fr5
|
||||
fmov.s fr4,@r8 ! LS, DST->x = fr4
|
||||
|
||||
! TRANSFORM Y
|
||||
fmov.s @\R,fr4 ! LS, fr4 = SRC->y
|
||||
fmov F_YP,fr5 ! LS, fr5 = VIEWPORT_Y_PLUS_HHEIGHT
|
||||
fmul F_HH,fr4 ! FE, fr4 = VIEWPORT_HHEIGHT * SRC->y
|
||||
add #8, r8 ! EX, DST += 8
|
||||
fmul fr0,fr4 ! FE, fr4 = invW * SRC->y
|
||||
fmov.s fr0,@r8 ! LS, DST->z = invW
|
||||
fmac fr0,fr4,fr5 ! FE, fr5 = invW * fr4 + fr5 -- (Y * F * hheight) + y_plus_hheight
|
||||
add #-4, r8 ! EX, DST -= 4
|
||||
add #-8, \R ! EX, src -= 8 (back to start of vertex)
|
||||
fmov.s fr5,@r8 ! LS, DST->y = fr5
|
||||
fmov.s fr4,@r8 ! LS, DST->y = fr4
|
||||
|
||||
add #-8,r8 ! EX, DST -= 8 (back to start of vertex)
|
||||
pref @r8 ! LS, Trigger SQ
|
||||
|
@ -561,17 +513,6 @@ _ProcessVertexList:
|
|||
mov.l r13,@-r15
|
||||
mov.l r14,@-r15
|
||||
sts.l pr,@-r15
|
||||
! STORE FPU REGISTERS
|
||||
fmov.s F_HW,@-r15
|
||||
fmov.s F_HH,@-r15
|
||||
fmov.s F_XP,@-r15
|
||||
fmov.s F_YP,@-r15
|
||||
! VIEWPORT SETUP
|
||||
mov.l .VP_1,r0 ! LS, &vp
|
||||
fmov.s @r0+,F_HW ! LS, vp.HWIDTH
|
||||
fmov.s @r0+,F_HH ! LS, vp.HHEIGHT
|
||||
fmov.s @r0+,F_XP ! LS, vp.X_PLUS_HWIDTH
|
||||
fmov.s @r0+,F_YP ! LS, vp.Y_PLUS_HHEIGHT
|
||||
! REGISTER SETUP
|
||||
mov r4,r14
|
||||
mov r4,r13
|
||||
|
@ -583,33 +524,11 @@ _ProcessVertexList:
|
|||
bra SUBMIT_LOOP
|
||||
add #-64,r15
|
||||
|
||||
! Handles a non-vertex command
|
||||
! Submits a PowerVR GPU command
|
||||
DO_CMD:
|
||||
mov r13,r4 ! r4 = CUR
|
||||
mov r1,r0 ! r0 = MASK
|
||||
cmp/eq #35,r0 ! T = MASK == 0x23
|
||||
bt.s 9f ! if (T) goto 9;
|
||||
nop
|
||||
! PowerVR GPU command
|
||||
PushVertex REG_V0
|
||||
bra NEXT_ITER
|
||||
nop
|
||||
! Viewport update command
|
||||
9:
|
||||
add #4,r4
|
||||
mov.l .VP_1,r2
|
||||
! Load VIEWPORT registers
|
||||
fmov.s @r4+,F_HW ! VIEWPORT_HWIDTH = src->x
|
||||
fmov.s @r4+,F_HH ! VIEWPORT_HHEIGHT = src->y
|
||||
fmov.s @r4+,F_XP ! VIEWPORT_X_PLUS_HWIDTH = src->z
|
||||
add #16,r2
|
||||
fmov.s @r4+,F_YP ! VIEWPORT_Y_PLUS_HHEIGHT = src->u
|
||||
! And store to vp global
|
||||
fmov.s F_YP,@-r2
|
||||
fmov.s F_XP,@-r2
|
||||
fmov.s F_HH,@-r2
|
||||
bra NEXT_ITER
|
||||
fmov.s F_HW,@-r2
|
||||
|
||||
SUBMIT_LOOP:
|
||||
mov.l @r13,r0 ! FLAGS = CUR->flags
|
||||
|
@ -645,18 +564,6 @@ NEXT_ITER:
|
|||
mov r14,r13 ! CUR = NEXT
|
||||
|
||||
add #64,r15
|
||||
! VIEWPORT SAVE
|
||||
mov.l .VP_1,r0
|
||||
add #16,r0
|
||||
fmov.s F_YP,@-r0
|
||||
fmov.s F_XP,@-r0
|
||||
fmov.s F_HH,@-r0
|
||||
fmov.s F_HW,@-r0
|
||||
! RESTORE FPU REGISTERS
|
||||
fmov.s @r15+,F_YP
|
||||
fmov.s @r15+,F_XP
|
||||
fmov.s @r15+,F_HH
|
||||
fmov.s @r15+,F_HW
|
||||
! RESTORE CPU REGISTERS
|
||||
lds.l @r15+,pr
|
||||
mov.l @r15+,r14
|
||||
|
|
|
@ -48,11 +48,6 @@
|
|||
!fr5 = y
|
||||
!fr6 = z
|
||||
!fr7 = w
|
||||
!fr8 = VIEWPORT_HWIDTH
|
||||
!fr9 = VIEWPORT_HHEIGHT
|
||||
!fr10 = VIEWPORT_X_PLUS_HWIDTH
|
||||
!fr11 = VIEWPORT_Y_PLUS_HHEIGHT
|
||||
|
||||
!fv4 = XYZW
|
||||
|
||||
|
||||
|
|
|
@ -448,11 +448,21 @@ static matrix_t __attribute__((aligned(32))) _proj, _view;
|
|||
static float textureOffsetX, textureOffsetY;
|
||||
static int textureOffset;
|
||||
|
||||
static float vp_scaleX, vp_scaleY, vp_offsetX, vp_offsetY;
|
||||
static matrix_t __attribute__((aligned(32))) mat_vp;
|
||||
|
||||
void Gfx_LoadMatrix(MatrixType type, const struct Matrix* matrix) {
|
||||
if (type == MATRIX_PROJ) memcpy(&_proj, matrix, sizeof(struct Matrix));
|
||||
if (type == MATRIX_VIEW) memcpy(&_view, matrix, sizeof(struct Matrix));
|
||||
|
||||
mat_load( &_proj);
|
||||
memcpy(&mat_vp, &Matrix_Identity, sizeof(struct Matrix));
|
||||
mat_vp[0][0] = vp_scaleX;
|
||||
mat_vp[1][1] = vp_scaleY;
|
||||
mat_vp[3][0] = vp_offsetX;
|
||||
mat_vp[3][1] = vp_offsetY;
|
||||
|
||||
mat_load(&mat_vp);
|
||||
mat_apply(&_proj);
|
||||
mat_apply(&_view);
|
||||
}
|
||||
|
||||
|
@ -645,13 +655,10 @@ static void PushCommand(void* cmd) {
|
|||
}
|
||||
|
||||
void Gfx_SetViewport(int x, int y, int w, int h) {
|
||||
Vertex c;
|
||||
c.flags = PVR_CMD_USERCLIP | 0x23;
|
||||
c.x = w * 0.5f; // hwidth
|
||||
c.y = h * -0.5f; // hheight
|
||||
c.z = x + w * 0.5f; // x_plus_hwidth
|
||||
c.u = y + h * 0.5f; // y_plus_hheight
|
||||
PushCommand(&c);
|
||||
vp_scaleX = w * 0.5f; // hwidth
|
||||
vp_scaleY = h * -0.5f; // hheight
|
||||
vp_offsetX = x + w * 0.5f; // x_plus_hwidth
|
||||
vp_offsetY = y + h * 0.5f; // y_plus_hheight
|
||||
}
|
||||
|
||||
void Gfx_SetScissor(int x, int y, int w, int h) {
|
||||
|
|
9
third_party/gldc/src/gldc.h
vendored
9
third_party/gldc/src/gldc.h
vendored
|
@ -44,15 +44,6 @@ typedef struct {
|
|||
|
||||
#define GL_FORCE_INLINE static __attribute__((always_inline)) inline
|
||||
|
||||
typedef struct {
|
||||
float hwidth; /* width * 0.5f */
|
||||
float hheight; /* height * 0.5f */
|
||||
float x_plus_hwidth;
|
||||
float y_plus_hheight;
|
||||
} Viewport;
|
||||
|
||||
extern Viewport VIEWPORTS[3];
|
||||
|
||||
typedef struct {
|
||||
//0
|
||||
GLuint index;
|
||||
|
|
25
third_party/gldc/src/sh4.c
vendored
25
third_party/gldc/src/sh4.c
vendored
|
@ -8,7 +8,6 @@
|
|||
|
||||
#define SQ_BASE_ADDRESS (void*) 0xe0000000
|
||||
#define PREFETCH(addr) __builtin_prefetch((addr))
|
||||
Viewport vp;
|
||||
|
||||
GL_FORCE_INLINE float _glFastInvert(float x) {
|
||||
return MATH_fsrra(x * x);
|
||||
|
@ -17,10 +16,10 @@ GL_FORCE_INLINE float _glFastInvert(float x) {
|
|||
GL_FORCE_INLINE void _glPerspectiveDivideVertex(Vertex* vertex) {
|
||||
const float f = _glFastInvert(vertex->w);
|
||||
|
||||
/* Convert to NDC and apply viewport */
|
||||
vertex->x = (vertex->x * f * vp.hwidth) + vp.x_plus_hwidth;
|
||||
vertex->y = (vertex->y * f * vp.hheight) + vp.y_plus_hheight;
|
||||
vertex->z = f;
|
||||
/* Convert to NDC (viewport already applied) */
|
||||
vertex->x = vertex->x * f;
|
||||
vertex->y = vertex->y * f;
|
||||
vertex->z = _glFastInvert(vertex->w);
|
||||
}
|
||||
|
||||
|
||||
|
@ -377,21 +376,8 @@ static void SubmitClipped(Vertex* v0, Vertex* v1, Vertex* v2, Vertex* v3, uint8_
|
|||
}
|
||||
}
|
||||
|
||||
static __attribute__((noinline)) void HandleCommand(Vertex* v) {
|
||||
if ((v->flags & 0xFF) != 0x23) {
|
||||
_glPushHeaderOrVertex(v);
|
||||
return;
|
||||
}
|
||||
|
||||
vp.hwidth = v->x;
|
||||
vp.hheight = v->y;
|
||||
vp.x_plus_hwidth = v->z;
|
||||
vp.y_plus_hheight = v->u;
|
||||
}
|
||||
|
||||
extern void ProcessVertexList(Vertex* v3, int n, void* sq_addr);
|
||||
void SceneListSubmit(Vertex* v3, int n, int type) {
|
||||
vp = VIEWPORTS[type];
|
||||
PVR_SET(SPAN_SORT_CFG, 0x0);
|
||||
|
||||
//Set PVR DMA registers
|
||||
|
@ -414,7 +400,7 @@ void SceneListSubmit(Vertex* v3, int n, int type) {
|
|||
case PVR_CMD_VERTEX:
|
||||
continue;
|
||||
default:
|
||||
HandleCommand(v3);
|
||||
_glPushHeaderOrVertex(v3);
|
||||
continue;
|
||||
};
|
||||
|
||||
|
@ -460,5 +446,4 @@ void SceneListSubmit(Vertex* v3, int n, int type) {
|
|||
}
|
||||
|
||||
_glFlushBuffer();
|
||||
VIEWPORTS[type] = vp;
|
||||
}
|
||||
|
|
1
third_party/gldc/src/state.c
vendored
1
third_party/gldc/src/state.c
vendored
|
@ -26,7 +26,6 @@ GLboolean AUTOSORT_ENABLED;
|
|||
AlignedVector OP_LIST;
|
||||
AlignedVector PT_LIST;
|
||||
AlignedVector TR_LIST;
|
||||
Viewport VIEWPORTS[3];
|
||||
|
||||
void glKosInit() {
|
||||
_glInitTextures();
|
||||
|
|
Loading…
Reference in a new issue