mirror of
https://github.com/ClassiCube/ClassiCube.git
synced 2025-01-22 17:12:25 -05:00
Dreamcast: Slightly optimise performance by avoiding separate viewport transform
This commit is contained in:
parent
967d52ea6a
commit
11fb4a2b65
6 changed files with 27 additions and 143 deletions
|
@ -6,16 +6,6 @@
|
||||||
! r13 = cur vertex
|
! r13 = cur vertex
|
||||||
! r14 = next vertex (prefetch)
|
! r14 = next vertex (prefetch)
|
||||||
|
|
||||||
!fr12 = VIEWPORT_HWIDTH
|
|
||||||
!fr13 = VIEWPORT_HHEIGHT
|
|
||||||
!fr14 = VIEWPORT_X_PLUS_HWIDTH
|
|
||||||
!fr15 = VIEWPORT_Y_PLUS_HHEIGHT
|
|
||||||
|
|
||||||
#define F_HW fr12
|
|
||||||
#define F_HH fr13
|
|
||||||
#define F_XP fr14
|
|
||||||
#define F_YP fr15
|
|
||||||
|
|
||||||
#define R_VTX r10
|
#define R_VTX r10
|
||||||
#define R_EOL r11
|
#define R_EOL r11
|
||||||
#define REG_CMD_VTX r10
|
#define REG_CMD_VTX r10
|
||||||
|
@ -51,42 +41,8 @@
|
||||||
add #32,r8 ! EX, SQ += 32
|
add #32,r8 ! EX, SQ += 32
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
|
|
||||||
! Pushes a vertex to the store queue
|
|
||||||
! CLOBBERS: fr0, fr4, fr5
|
|
||||||
! INPUTS: R (vertex)
|
|
||||||
! OUTPUTS:
|
|
||||||
! TODO optimise greatly
|
|
||||||
.macro ViewportTransform R
|
|
||||||
! INVERSE W CALCULATION
|
|
||||||
add #28, \R ! EX, \R = &vertex->w
|
|
||||||
fmov.s @\R,fr0 ! LS, fr0 = vertex->w
|
|
||||||
fmul fr0,fr0 ! FE, fr0 = fr0 * fr0
|
|
||||||
add #-24, \R ! EX, \R = &vertex->x
|
|
||||||
fsrra fr0 ! FE, fr0 = 1 / sqrt(fr0) -> 1 / vertex->w
|
|
||||||
|
|
||||||
! TRANSFORM X
|
|
||||||
fmov.s @\R,fr4 ! LS, fr4 = vertex->x
|
|
||||||
fmov F_XP,fr5 ! LS, fr5 = VIEWPORT_X_PLUS_HWIDTH
|
|
||||||
fmul F_HW,fr4 ! FE, fr4 = VIEWPORT_HWIDTH * vertex->x
|
|
||||||
fmac fr0,fr4,fr5 ! FE, fr5 = fr0 * fr4 + fr5 -- (X * F * hwidth) + x_plus_hwidth
|
|
||||||
fmov.s fr5,@\R ! LS, vertex->x = fr5
|
|
||||||
add #4, \R ! EX, \R = &vertex->y
|
|
||||||
|
|
||||||
! TRANSFORM Y
|
|
||||||
fmov.s @\R,fr4 ! LS, fr4 = vertex->y
|
|
||||||
fmov F_YP,fr5 ! LS, fr5 = VIEWPORT_Y_PLUS_HHEIGHT
|
|
||||||
fmul F_HH,fr4 ! FE, fr4 = VIEWPORT_HHEIGHT * vertex->y
|
|
||||||
fmac fr0,fr4,fr5 ! FE, fr5 = fr0 * fr4 + fr5 -- (Y * F * hheight) + y_plus_hheight
|
|
||||||
fmov.s fr5,@\R ! LS, vertex->y = fr5
|
|
||||||
add #4, \R ! EX, \R = &vertex->z
|
|
||||||
|
|
||||||
! ASSIGN Z
|
|
||||||
fmov.s fr0,@\R ! LS, vertex->z = fr0
|
|
||||||
add #-12, \R ! EX, \R -= 12 (back to start of vertex)
|
|
||||||
.endm
|
|
||||||
|
|
||||||
! Transforms then pushes a vertex to the store queue
|
! Transforms then pushes a vertex to the store queue
|
||||||
|
! note: Vertices are assumed as pre viewport transformed already
|
||||||
! CLOBBERS: r2, fr0, fr4, fr5
|
! CLOBBERS: r2, fr0, fr4, fr5
|
||||||
! INPUTS: R (vertex), r8 (SQ global)
|
! INPUTS: R (vertex), r8 (SQ global)
|
||||||
! OUTPUTS: R, r8 altered
|
! OUTPUTS: R, r8 altered
|
||||||
|
@ -109,24 +65,20 @@
|
||||||
|
|
||||||
! TRANSFORM X
|
! TRANSFORM X
|
||||||
fmov.s @\R,fr4 ! LS, fr4 = SRC->x
|
fmov.s @\R,fr4 ! LS, fr4 = SRC->x
|
||||||
fmov F_XP,fr5 ! LS, fr5 = VIEWPORT_X_PLUS_HWIDTH
|
fmul fr0,fr4 ! FE, fr4 = invW * SRC->x
|
||||||
fmul F_HW,fr4 ! FE, fr4 = VIEWPORT_HWIDTH * SRC->x
|
|
||||||
mov.l @(20,\R),r2 ! LS, tmp = SRC->bgra
|
mov.l @(20,\R),r2 ! LS, tmp = SRC->bgra
|
||||||
mov.l r2,@(20,r8) ! LS, SRC->bgra = tmp
|
mov.l r2,@(20,r8) ! LS, SRC->bgra = tmp
|
||||||
fmac fr0,fr4,fr5 ! FE, fr5 = invW * fr4 + fr5 -- (X * F * hwidth) + x_plus_hwidth
|
|
||||||
add #4, \R ! EX, SRC += 4
|
add #4, \R ! EX, SRC += 4
|
||||||
fmov.s fr5,@r8 ! LS, DST->x = fr5
|
fmov.s fr4,@r8 ! LS, DST->x = fr4
|
||||||
|
|
||||||
! TRANSFORM Y
|
! TRANSFORM Y
|
||||||
fmov.s @\R,fr4 ! LS, fr4 = SRC->y
|
fmov.s @\R,fr4 ! LS, fr4 = SRC->y
|
||||||
fmov F_YP,fr5 ! LS, fr5 = VIEWPORT_Y_PLUS_HHEIGHT
|
|
||||||
fmul F_HH,fr4 ! FE, fr4 = VIEWPORT_HHEIGHT * SRC->y
|
|
||||||
add #8, r8 ! EX, DST += 8
|
add #8, r8 ! EX, DST += 8
|
||||||
|
fmul fr0,fr4 ! FE, fr4 = invW * SRC->y
|
||||||
fmov.s fr0,@r8 ! LS, DST->z = invW
|
fmov.s fr0,@r8 ! LS, DST->z = invW
|
||||||
fmac fr0,fr4,fr5 ! FE, fr5 = invW * fr4 + fr5 -- (Y * F * hheight) + y_plus_hheight
|
|
||||||
add #-4, r8 ! EX, DST -= 4
|
add #-4, r8 ! EX, DST -= 4
|
||||||
add #-8, \R ! EX, src -= 8 (back to start of vertex)
|
add #-8, \R ! EX, src -= 8 (back to start of vertex)
|
||||||
fmov.s fr5,@r8 ! LS, DST->y = fr5
|
fmov.s fr4,@r8 ! LS, DST->y = fr4
|
||||||
|
|
||||||
add #-8,r8 ! EX, DST -= 8 (back to start of vertex)
|
add #-8,r8 ! EX, DST -= 8 (back to start of vertex)
|
||||||
pref @r8 ! LS, Trigger SQ
|
pref @r8 ! LS, Trigger SQ
|
||||||
|
@ -561,17 +513,6 @@ _ProcessVertexList:
|
||||||
mov.l r13,@-r15
|
mov.l r13,@-r15
|
||||||
mov.l r14,@-r15
|
mov.l r14,@-r15
|
||||||
sts.l pr,@-r15
|
sts.l pr,@-r15
|
||||||
! STORE FPU REGISTERS
|
|
||||||
fmov.s F_HW,@-r15
|
|
||||||
fmov.s F_HH,@-r15
|
|
||||||
fmov.s F_XP,@-r15
|
|
||||||
fmov.s F_YP,@-r15
|
|
||||||
! VIEWPORT SETUP
|
|
||||||
mov.l .VP_1,r0 ! LS, &vp
|
|
||||||
fmov.s @r0+,F_HW ! LS, vp.HWIDTH
|
|
||||||
fmov.s @r0+,F_HH ! LS, vp.HHEIGHT
|
|
||||||
fmov.s @r0+,F_XP ! LS, vp.X_PLUS_HWIDTH
|
|
||||||
fmov.s @r0+,F_YP ! LS, vp.Y_PLUS_HHEIGHT
|
|
||||||
! REGISTER SETUP
|
! REGISTER SETUP
|
||||||
mov r4,r14
|
mov r4,r14
|
||||||
mov r4,r13
|
mov r4,r13
|
||||||
|
@ -583,33 +524,11 @@ _ProcessVertexList:
|
||||||
bra SUBMIT_LOOP
|
bra SUBMIT_LOOP
|
||||||
add #-64,r15
|
add #-64,r15
|
||||||
|
|
||||||
! Handles a non-vertex command
|
! Submits a PowerVR GPU command
|
||||||
DO_CMD:
|
DO_CMD:
|
||||||
mov r13,r4 ! r4 = CUR
|
|
||||||
mov r1,r0 ! r0 = MASK
|
|
||||||
cmp/eq #35,r0 ! T = MASK == 0x23
|
|
||||||
bt.s 9f ! if (T) goto 9;
|
|
||||||
nop
|
|
||||||
! PowerVR GPU command
|
|
||||||
PushVertex REG_V0
|
PushVertex REG_V0
|
||||||
bra NEXT_ITER
|
bra NEXT_ITER
|
||||||
nop
|
nop
|
||||||
! Viewport update command
|
|
||||||
9:
|
|
||||||
add #4,r4
|
|
||||||
mov.l .VP_1,r2
|
|
||||||
! Load VIEWPORT registers
|
|
||||||
fmov.s @r4+,F_HW ! VIEWPORT_HWIDTH = src->x
|
|
||||||
fmov.s @r4+,F_HH ! VIEWPORT_HHEIGHT = src->y
|
|
||||||
fmov.s @r4+,F_XP ! VIEWPORT_X_PLUS_HWIDTH = src->z
|
|
||||||
add #16,r2
|
|
||||||
fmov.s @r4+,F_YP ! VIEWPORT_Y_PLUS_HHEIGHT = src->u
|
|
||||||
! And store to vp global
|
|
||||||
fmov.s F_YP,@-r2
|
|
||||||
fmov.s F_XP,@-r2
|
|
||||||
fmov.s F_HH,@-r2
|
|
||||||
bra NEXT_ITER
|
|
||||||
fmov.s F_HW,@-r2
|
|
||||||
|
|
||||||
SUBMIT_LOOP:
|
SUBMIT_LOOP:
|
||||||
mov.l @r13,r0 ! FLAGS = CUR->flags
|
mov.l @r13,r0 ! FLAGS = CUR->flags
|
||||||
|
@ -645,18 +564,6 @@ NEXT_ITER:
|
||||||
mov r14,r13 ! CUR = NEXT
|
mov r14,r13 ! CUR = NEXT
|
||||||
|
|
||||||
add #64,r15
|
add #64,r15
|
||||||
! VIEWPORT SAVE
|
|
||||||
mov.l .VP_1,r0
|
|
||||||
add #16,r0
|
|
||||||
fmov.s F_YP,@-r0
|
|
||||||
fmov.s F_XP,@-r0
|
|
||||||
fmov.s F_HH,@-r0
|
|
||||||
fmov.s F_HW,@-r0
|
|
||||||
! RESTORE FPU REGISTERS
|
|
||||||
fmov.s @r15+,F_YP
|
|
||||||
fmov.s @r15+,F_XP
|
|
||||||
fmov.s @r15+,F_HH
|
|
||||||
fmov.s @r15+,F_HW
|
|
||||||
! RESTORE CPU REGISTERS
|
! RESTORE CPU REGISTERS
|
||||||
lds.l @r15+,pr
|
lds.l @r15+,pr
|
||||||
mov.l @r15+,r14
|
mov.l @r15+,r14
|
||||||
|
|
|
@ -48,11 +48,6 @@
|
||||||
!fr5 = y
|
!fr5 = y
|
||||||
!fr6 = z
|
!fr6 = z
|
||||||
!fr7 = w
|
!fr7 = w
|
||||||
!fr8 = VIEWPORT_HWIDTH
|
|
||||||
!fr9 = VIEWPORT_HHEIGHT
|
|
||||||
!fr10 = VIEWPORT_X_PLUS_HWIDTH
|
|
||||||
!fr11 = VIEWPORT_Y_PLUS_HHEIGHT
|
|
||||||
|
|
||||||
!fv4 = XYZW
|
!fv4 = XYZW
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -448,11 +448,21 @@ static matrix_t __attribute__((aligned(32))) _proj, _view;
|
||||||
static float textureOffsetX, textureOffsetY;
|
static float textureOffsetX, textureOffsetY;
|
||||||
static int textureOffset;
|
static int textureOffset;
|
||||||
|
|
||||||
|
static float vp_scaleX, vp_scaleY, vp_offsetX, vp_offsetY;
|
||||||
|
static matrix_t __attribute__((aligned(32))) mat_vp;
|
||||||
|
|
||||||
void Gfx_LoadMatrix(MatrixType type, const struct Matrix* matrix) {
|
void Gfx_LoadMatrix(MatrixType type, const struct Matrix* matrix) {
|
||||||
if (type == MATRIX_PROJ) memcpy(&_proj, matrix, sizeof(struct Matrix));
|
if (type == MATRIX_PROJ) memcpy(&_proj, matrix, sizeof(struct Matrix));
|
||||||
if (type == MATRIX_VIEW) memcpy(&_view, matrix, sizeof(struct Matrix));
|
if (type == MATRIX_VIEW) memcpy(&_view, matrix, sizeof(struct Matrix));
|
||||||
|
|
||||||
mat_load( &_proj);
|
memcpy(&mat_vp, &Matrix_Identity, sizeof(struct Matrix));
|
||||||
|
mat_vp[0][0] = vp_scaleX;
|
||||||
|
mat_vp[1][1] = vp_scaleY;
|
||||||
|
mat_vp[3][0] = vp_offsetX;
|
||||||
|
mat_vp[3][1] = vp_offsetY;
|
||||||
|
|
||||||
|
mat_load(&mat_vp);
|
||||||
|
mat_apply(&_proj);
|
||||||
mat_apply(&_view);
|
mat_apply(&_view);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -645,13 +655,10 @@ static void PushCommand(void* cmd) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void Gfx_SetViewport(int x, int y, int w, int h) {
|
void Gfx_SetViewport(int x, int y, int w, int h) {
|
||||||
Vertex c;
|
vp_scaleX = w * 0.5f; // hwidth
|
||||||
c.flags = PVR_CMD_USERCLIP | 0x23;
|
vp_scaleY = h * -0.5f; // hheight
|
||||||
c.x = w * 0.5f; // hwidth
|
vp_offsetX = x + w * 0.5f; // x_plus_hwidth
|
||||||
c.y = h * -0.5f; // hheight
|
vp_offsetY = y + h * 0.5f; // y_plus_hheight
|
||||||
c.z = x + w * 0.5f; // x_plus_hwidth
|
|
||||||
c.u = y + h * 0.5f; // y_plus_hheight
|
|
||||||
PushCommand(&c);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Gfx_SetScissor(int x, int y, int w, int h) {
|
void Gfx_SetScissor(int x, int y, int w, int h) {
|
||||||
|
|
9
third_party/gldc/src/gldc.h
vendored
9
third_party/gldc/src/gldc.h
vendored
|
@ -44,15 +44,6 @@ typedef struct {
|
||||||
|
|
||||||
#define GL_FORCE_INLINE static __attribute__((always_inline)) inline
|
#define GL_FORCE_INLINE static __attribute__((always_inline)) inline
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
float hwidth; /* width * 0.5f */
|
|
||||||
float hheight; /* height * 0.5f */
|
|
||||||
float x_plus_hwidth;
|
|
||||||
float y_plus_hheight;
|
|
||||||
} Viewport;
|
|
||||||
|
|
||||||
extern Viewport VIEWPORTS[3];
|
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
//0
|
//0
|
||||||
GLuint index;
|
GLuint index;
|
||||||
|
|
25
third_party/gldc/src/sh4.c
vendored
25
third_party/gldc/src/sh4.c
vendored
|
@ -8,7 +8,6 @@
|
||||||
|
|
||||||
#define SQ_BASE_ADDRESS (void*) 0xe0000000
|
#define SQ_BASE_ADDRESS (void*) 0xe0000000
|
||||||
#define PREFETCH(addr) __builtin_prefetch((addr))
|
#define PREFETCH(addr) __builtin_prefetch((addr))
|
||||||
Viewport vp;
|
|
||||||
|
|
||||||
GL_FORCE_INLINE float _glFastInvert(float x) {
|
GL_FORCE_INLINE float _glFastInvert(float x) {
|
||||||
return MATH_fsrra(x * x);
|
return MATH_fsrra(x * x);
|
||||||
|
@ -17,10 +16,10 @@ GL_FORCE_INLINE float _glFastInvert(float x) {
|
||||||
GL_FORCE_INLINE void _glPerspectiveDivideVertex(Vertex* vertex) {
|
GL_FORCE_INLINE void _glPerspectiveDivideVertex(Vertex* vertex) {
|
||||||
const float f = _glFastInvert(vertex->w);
|
const float f = _glFastInvert(vertex->w);
|
||||||
|
|
||||||
/* Convert to NDC and apply viewport */
|
/* Convert to NDC (viewport already applied) */
|
||||||
vertex->x = (vertex->x * f * vp.hwidth) + vp.x_plus_hwidth;
|
vertex->x = vertex->x * f;
|
||||||
vertex->y = (vertex->y * f * vp.hheight) + vp.y_plus_hheight;
|
vertex->y = vertex->y * f;
|
||||||
vertex->z = f;
|
vertex->z = _glFastInvert(vertex->w);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -377,21 +376,8 @@ static void SubmitClipped(Vertex* v0, Vertex* v1, Vertex* v2, Vertex* v3, uint8_
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static __attribute__((noinline)) void HandleCommand(Vertex* v) {
|
|
||||||
if ((v->flags & 0xFF) != 0x23) {
|
|
||||||
_glPushHeaderOrVertex(v);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
vp.hwidth = v->x;
|
|
||||||
vp.hheight = v->y;
|
|
||||||
vp.x_plus_hwidth = v->z;
|
|
||||||
vp.y_plus_hheight = v->u;
|
|
||||||
}
|
|
||||||
|
|
||||||
extern void ProcessVertexList(Vertex* v3, int n, void* sq_addr);
|
extern void ProcessVertexList(Vertex* v3, int n, void* sq_addr);
|
||||||
void SceneListSubmit(Vertex* v3, int n, int type) {
|
void SceneListSubmit(Vertex* v3, int n, int type) {
|
||||||
vp = VIEWPORTS[type];
|
|
||||||
PVR_SET(SPAN_SORT_CFG, 0x0);
|
PVR_SET(SPAN_SORT_CFG, 0x0);
|
||||||
|
|
||||||
//Set PVR DMA registers
|
//Set PVR DMA registers
|
||||||
|
@ -414,7 +400,7 @@ void SceneListSubmit(Vertex* v3, int n, int type) {
|
||||||
case PVR_CMD_VERTEX:
|
case PVR_CMD_VERTEX:
|
||||||
continue;
|
continue;
|
||||||
default:
|
default:
|
||||||
HandleCommand(v3);
|
_glPushHeaderOrVertex(v3);
|
||||||
continue;
|
continue;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -460,5 +446,4 @@ void SceneListSubmit(Vertex* v3, int n, int type) {
|
||||||
}
|
}
|
||||||
|
|
||||||
_glFlushBuffer();
|
_glFlushBuffer();
|
||||||
VIEWPORTS[type] = vp;
|
|
||||||
}
|
}
|
||||||
|
|
1
third_party/gldc/src/state.c
vendored
1
third_party/gldc/src/state.c
vendored
|
@ -26,7 +26,6 @@ GLboolean AUTOSORT_ENABLED;
|
||||||
AlignedVector OP_LIST;
|
AlignedVector OP_LIST;
|
||||||
AlignedVector PT_LIST;
|
AlignedVector PT_LIST;
|
||||||
AlignedVector TR_LIST;
|
AlignedVector TR_LIST;
|
||||||
Viewport VIEWPORTS[3];
|
|
||||||
|
|
||||||
void glKosInit() {
|
void glKosInit() {
|
||||||
_glInitTextures();
|
_glInitTextures();
|
||||||
|
|
Loading…
Reference in a new issue