From c891f09b7e3c6de8c669f996bf66887d92dd3dc9 Mon Sep 17 00:00:00 2001 From: UnknownShadow200 Date: Sun, 7 Jul 2024 19:44:13 +1000 Subject: [PATCH] Dreamcast: Port clipped transforms to assembly, broken though --- misc/dreamcast/VertexClip.S | 192 +++++++------ misc/dreamcast/VertexClip2.S | 183 +++++++++++++ misc/dreamcast/VertexDraw.S | 516 ++++++++++++++++++++++++++++++++--- third_party/gldc/src/sh4.c | 50 ++-- 4 files changed, 794 insertions(+), 147 deletions(-) create mode 100644 misc/dreamcast/VertexClip2.S diff --git a/misc/dreamcast/VertexClip.S b/misc/dreamcast/VertexClip.S index 19b58ba4d..cf3bb7d30 100644 --- a/misc/dreamcast/VertexClip.S +++ b/misc/dreamcast/VertexClip.S @@ -1,19 +1,9 @@ -! r0 CLOBBERS -! r1 CLOBBERS -! r2 CLOBBERS -! r3 CLOBBERS -! r7 CLOBBERS -! r4 = v1 -! r5 = v2 -! r6 = OUT - -#define TM1 r1 -#define TM2 r3 -#define TM3 r7 - -#define IN1 r4 -#define IN2 r5 -#define OUT r6 +! r1 = v1, CLOBBERS +! r2 = v2, CLOBBERS +! r3 = OUT, CLOBBERS +! r4 = TMP 1, preserved +! r5 = TMP 2, preserved +! r0 = CL0, CLOBBERS ! FR0 = 0 ! FR1 = 0 @@ -28,6 +18,17 @@ ! FR10 = invT ! FR11 = t +#define TM1 r0 +#define TM2 r4 +#define CLO r5 + +#define IN1 r1 +#define IN2 r2 +#define OUT r3 + +#define CL1 r1 +#define CL2 r2 + ! Calculates the near plane intersection point between two points: ! float t = fabsf(v1->z) / fabsf(v2->z - v1->z) ! float invt = 1.0f - t; @@ -57,6 +58,8 @@ .align 4 .type _ClipLine,%function _ClipLine: + mov.l r4,@-r15 ! LS, push(r4) + mov.l r5,@-r15 ! LS, push(r5) mov IN1, TM1 ! MT, tmp = &v1 fldi0 fr4 ! LS, fr4 = 0 add #12, TM1 ! EX, tmp = &v1->z @@ -74,17 +77,17 @@ _ClipLine: fsrra fr11 ! FE, fr11 = 1 / abs(v2->z - v1->z) fabs fr2 ! LS, fr2 = abs(v1->z) fmul fr2,fr11 ! FE, fr11 = abs(v1->Z) / abs(v2->z - v1->z) --> t - add #4, IN1 ! EX, A = &v1->x + add #4, IN1 ! EX, v1 += 4 fldi1 fr10 ! LS, fr10 = 1 - add #4, IN2 ! EX, B = &v2->x - add #4, OUT ! EX, OUT = &OUT->x + add #4, IN2 ! EX, v2 += 4 + add #4, OUT ! EX, OUT += 4 fsub fr11,fr10 ! FE, fr10 = 1.0 - t --> invT - fmov.s @IN1+, fr2 ! LS, A1 = v1->x - fmov.s @IN2+, fr3 ! LS, B1 = v2->x + fmov.s @IN1+, fr2 ! LS, A1 = v1->x, v1 += 4 + fmov.s @IN2+, fr3 ! LS, B1 = v2->x, v2 += 4 fipr fv8, fv0 ! FE, LERP(A1, B1) - fmov.s @IN1+, fr6 ! LS, A2 = v1->y - fmov.s @IN2+, fr7 ! LS, B2 = v2->y + fmov.s @IN1+, fr6 ! LS, A2 = v1->y, v1 += 4 + fmov.s @IN2+, fr7 ! LS, B2 = v2->y, v2 += 4 fmov.s fr3,@OUT ! LS, OUT->x = LERP add #4, OUT ! EX, OUT += 4 @@ -97,82 +100,89 @@ _ClipLine: fmov.s fr1,@OUT ! LS, OUT->z = 0 add #4, OUT ! EX, OUT += 4 - fmov.s @IN1+, fr2 ! LS, A1 = v1->u - fmov.s @IN2+, fr3 ! LS, B1 = v2->u + fmov.s @IN1+, fr2 ! LS, A1 = v1->u, v1 += 4 + fmov.s @IN2+, fr3 ! LS, B1 = v2->u, v2 += 4 fipr fv8, fv0 ! FE, LERP(A1, B1) - fmov.s @IN1+, fr6 ! LS, A2 = v1->v - fmov.s @IN2+, fr7 ! LS, B2 = v2->v + fmov.s @IN1+, fr6 ! LS, A2 = v1->v, v1 += 4 + fmov.s @IN2+, fr7 ! LS, B2 = v2->v, v2 += 4 fmov.s fr3,@OUT ! LS, OUT->u = LERP add #4, OUT ! EX, OUT += 4 fipr fv8, fv4 ! FE, LERP(A2, B2) + add #4, IN1 ! EX, v1 += 4 + add #4, IN2 ! EX, v2 += 4 + fmov.s @IN1,fr2 ! LS, A1 = v1->w + fmov.s @IN2,fr3 ! LS, B1 = v2->w fmov.s fr7,@OUT ! LS, OUT->v = LERP - add #4, OUT ! EX, OUT += 4 + add #8, OUT ! EX, OUT += 8 - mov.l @IN1+,r0 ! LS, ACOLOR = v1->bgra - extu.b r0,r1 ! EX, tmp = ACOLOR.b - lds r1,fpul ! CO, FPUL = tmp + fipr fv8, fv0 ! FE, LERP(A1, B1) + add #-4, IN1 ! EX, v1 -= 4 + add #-4, IN2 ! EX, v2 -= 4 + fmov.s fr3,@OUT ! LS, OUT->w = lerp + add #-4, OUT ! EX, OUT -= 4 + + mov.l @IN1+,CL1 ! LS, ACOLOR = v1->bgra + extu.b CL1,TM1 ! EX, val = ACOLOR.b + lds TM1,fpul ! CO, FPUL = val float fpul,fr2 ! EX, fr2 = float(FPUL) - mov.l @IN2+,r2 ! LS, BCOLOR = v2->bgra - extu.b r2,r3 ! EX, tmp = BCOLOR.b - lds r3,fpul ! CO, FPUL = tmp + mov.l @IN2+,CL2 ! LS, BCOLOR = v2->bgra + extu.b CL2,TM1 ! EX, val = BCOLOR.b + lds TM1,fpul ! CO, FPUL = val + float fpul,fr3 ! EX, fr3 = float(FPUL) + fipr fv8, fv0 ! FE, LERP(A1, B1) + shlr8 CL1 ! EX, ACOLOR >>= 8 + ftrc fr3,fpul ! FE, FPUL = int(lerp) + shlr8 CL2 ! EX, BCOLOR >>= 8 + sts fpul,TM2 ! CO, tmp = FPUL + + extu.b CL1,TM1 ! EX, val = ACOLOR.g + lds TM1,fpul ! CO, FPUL = val + float fpul,fr2 ! EX, fr2 = float(FPUL) + extu.b CL2,TM1 ! EX, val = BCOLOR.g + lds TM1,fpul ! CO, FPUL = val + float fpul,fr3 ! EX, fr3 = float(FPUL) + fipr fv8, fv0 ! FE, LERP(A1, B1) + shlr8 CL1 ! EX, ACOLOR >>= 8 + ftrc fr3,fpul ! FE, FPUL = int(lerp) + extu.b TM2,TM2 ! EX, tmp = (uint8)tmp + mov TM2,CLO ! MT, OUTCOLOR.b = tmp + shlr8 CL2 ! EX, BCOLOR >>= 8 + sts fpul,TM2 ! CO, tmp = FPUL + + extu.b CL1,TM1 ! EX, val = ACOLOR.b + lds TM1,fpul ! CO, FPUL = val + float fpul,fr2 ! EX, fr2 = float(FPUL) + extu.b CL2,TM1 ! EX, val = BCOLOR.b + lds TM1,fpul ! CO, FPUL = val + float fpul,fr3 ! EX, fr3 = float(FPUL) + fipr fv8, fv0 ! FE, LERP(A1, B1) + shlr8 CL1 ! EX, ACOLOR >>= 8 + ftrc fr3,fpul ! FE, FPUL = int(lerp) + extu.b TM2,TM2 ! EX, tmp = (uint8)tmp + shll8 TM2 ! EX, tmp <<= 8 + or TM2,CLO ! EX, OUTCOLOR.r |= tmp + shlr8 CL2 ! EX, BCOLOR >>= 8 + sts fpul,TM2 ! CO, tmp = FPUL + + extu.b CL1,TM1 ! EX, val = ACOLOR.r + lds TM1,fpul ! CO, FPUL = val + float fpul,fr2 ! EX, fr2 = float(FPUL) + extu.b CL2,TM1 ! EX, val = BCOLOR.r + lds TM1,fpul ! CO, FPUL = val float fpul,fr3 ! EX, fr3 = float(FPUL) fipr fv8, fv0 ! FE, LERP(A1, B1) ftrc fr3,fpul ! FE, FPUL = int(lerp) - sts fpul,r3 ! CO, tmp = FPUL - extu.b r3,r3 ! EX, tmp = (uint8)tmp - mov r3, r7 ! MT, OUTCOLOR.b = tmp - - shlr8 r0 ! EX, ACOLOR >>= 8 - extu.b r0,r1 ! EX, tmp = ACOLOR.g - lds r1,fpul ! CO, FPUL = tmp - float fpul,fr2 ! EX, fr2 = float(FPUL) - shlr8 r2 ! EX, BCOLOR >>= 8 - extu.b r2,r3 ! EX, tmp = BCOLOR.g - lds r3,fpul ! CO, FPUL = tmp - float fpul,fr3 ! EX, fr3 = float(FPUL) - fipr fv8, fv0 ! FE, LERP(A1, B1) - ftrc fr3,fpul ! FE, FPUL = int(lerp) - sts fpul,r3 ! CO, tmp = FPUL - extu.b r3,r3 ! EX, tmp = (uint8)tmp - shll8 r3 ! EX, tmp <<= 8 - or r3,r7 ! EX, OUTCOLOR.r |= tmp - - shlr8 r0 ! EX, ACOLOR >>= 8 - extu.b r0,r1 ! EX, tmp = ACOLOR.b - lds r1,fpul ! CO, FPUL = tmp - float fpul,fr2 ! EX, fr2 = float(FPUL) - shlr8 r2 ! EX, BCOLOR >>= 8 - extu.b r2,r3 ! EX, tmp = BCOLOR.b - lds r3,fpul ! CO, FPUL = tmp - float fpul,fr3 ! EX, fr3 = float(FPUL) - fipr fv8, fv0 ! FE, LERP(A1, B1) - ftrc fr3,fpul ! FE, FPUL = int(lerp) - sts fpul,r3 ! CO, tmp = FPUL - extu.b r3,r3 ! EX, tmp = (uint8)tmp - shll16 r3 ! EX, tmp <<= 16 - or r3,r7 ! EX, OUTCOLOR.g |= tmp - - shlr8 r0 ! EX, ACOLOR >>= 8 - extu.b r0,r1 ! EX, tmp = ACOLOR.r - lds r1,fpul ! CO, FPUL = tmp - float fpul,fr2 ! EX, fr2 = float(FPUL) - shlr8 r2 ! EX, BCOLOR >>= 8 - extu.b r2,r3 ! EX, tmp = BCOLOR.r - lds r3,fpul ! CO, FPUL = tmp - float fpul,fr3 ! EX, fr3 = float(FPUL) - fipr fv8, fv0 ! FE, LERP(A1, B1) - ftrc fr3,fpul ! FE, FPUL = int(lerp) - sts fpul,r3 ! CO, tmp = FPUL - extu.b r3,r3 ! EX, tmp = (uint8)tmp - shll16 r3 ! EX, tmp <<= 16 - shll8 r3 ! EX, tmp <<= 8 - or r3,r7 ! EX, OUTCOLOR.a |= tmp - mov.l r7,@OUT ! LS, OUT->color = OUTCOLOR - - fmov.s @IN1+,fr2 ! LS, A1 = v1->w - fmov.s @IN2+,fr3 ! LS, B1 = v2->w - fipr fv8, fv0 ! FE, LERP(A1, B1) - add #4, OUT ! EX, OUT += 4 - rts ! CO, return after executing instruction in delay slot - fmov.s fr3,@OUT ! LS, OUT->w = lerp + extu.b TM2,TM2 ! EX, tmp = (uint8)tmp + shll16 TM2 ! EX, tmp <<= 16 + or TM2,CLO ! EX, OUTCOLOR.g |= tmp + sts fpul,TM2 ! CO, tmp = FPUL + extu.b TM2,TM2 ! EX, tmp = (uint8)tmp + shll16 TM2 ! EX, tmp <<= 16 + shll8 TM2 ! EX, tmp <<= 8 + or TM2,CLO ! EX, OUTCOLOR.a |= tmp + mov.l CLO,@OUT ! LS, OUT->color = OUTCOLOR + + mov.l @r15+,r5 ! LS, pop(r5) + rts ! CO, return after executing instruction in delay slot + mov.l @r15+,r4 ! LS, pop(r4) diff --git a/misc/dreamcast/VertexClip2.S b/misc/dreamcast/VertexClip2.S new file mode 100644 index 000000000..db6222774 --- /dev/null +++ b/misc/dreamcast/VertexClip2.S @@ -0,0 +1,183 @@ +! r1 CLOBBERS +! r3 CLOBBERS +! r7 CLOBBERS +! r4 = v1 +! r5 = v2 +! r6 = OUT + +! FR0 = 0 +! FR1 = 0 +! FR2 = A.1 +! FR3 = B.1 +! FR4 = 0 +! FR5 = 0 +! FR6 = A.2 +! FR7 = B.2 +! FR8 = 0 +! FR9 = 0 +! FR10 = invT +! FR11 = t + +#define TM1 r1 +#define TM2 r3 + +#define IN1 r4 +#define IN2 r5 +#define OUT r6 + +#define CL1 r4 +#define CL2 r5 +#define CLO r7 + +! Calculates the near plane intersection point between two points: +! float t = fabsf(v1->z) / fabsf(v2->z - v1->z) +! float invt = 1.0f - t; +! +! out->x = invt * v1->x + t * v2->x; +! out->y = invt * v1->y + t * v2->y; +! out->z = 0.0f; // clipped against near plane anyways (I.e Z/W = 0 --> Z = 0) +! +! out->u = invt * v1->u + t * v2->u; +! out->v = invt * v1->v + t * v2->v; +! out->w = invt * v1->w + t * v2->w; +! +! out->b = invt * v1->b + t * v2->b; +! out->g = invt * v1->g + t * v2->g; +! out->r = invt * v1->r + t * v2->r; +! out->a = invt * v1->a + t * v2->a; +! To optimise these calculations, FIPR is used: +! FIPR = FVm.x*FVn.x + FVm.y*FVn.x + FVm.z*FVn.z + FVm.w*FVn.w --> FVn.w +! FIPR can be used to accomplish "vout->Q invt * v1->Q + t * v2->Q" by: +! - assigning x/y components to 0 for both vectors +! - assigning t and invT to z/w of FVm vector +! - assigning v1 and v2 to z/w of FVn vector +! FIPR = 0*0 + 0*0 + t*v1->Q + invT*v2->Q --> FVn.w +! FIPR = t*v1->Q + invT*v2->Q --> FVn.w + +.global _ClipEdge +.align 4 +.type _ClipEdge,%function +_ClipEdge: + mov IN1, TM1 ! MT, tmp = &v1 + fldi0 fr4 ! LS, fr4 = 0 + add #12, TM1 ! EX, tmp = &v1->z + fmov.s @r1, fr2 ! LS, fr2 = v1->z + mov IN2, TM1 ! MT, tmp = &v2 + fldi0 fr5 ! LS, fr5 = 0 + add #12, TM1 ! EX, tmp = &v2->z + fmov.s @r1,fr11 ! LS, fr11 = v2->z + fsub fr2,fr11 ! FE, fr11 = v2->z - v1->z + fldi0 fr8 ! LS, fr8 = 0 + fmul fr11,fr11 ! FE, fr11 = (v2->z - v1->z) * (v2->z * v1->z) + fldi0 fr9 ! LS, fr9 = 0 + fldi0 fr0 ! LS, fr0 = 0 + fldi0 fr1 ! LS, fr1 = 0 + fsrra fr11 ! FE, fr11 = 1 / abs(v2->z - v1->z) + fabs fr2 ! LS, fr2 = abs(v1->z) + fmul fr2,fr11 ! FE, fr11 = abs(v1->Z) / abs(v2->z - v1->z) --> t + add #4, IN1 ! EX, v1 += 4 + fldi1 fr10 ! LS, fr10 = 1 + add #4, IN2 ! EX, v2 += 4 + add #4, OUT ! EX, OUT += 4 + fsub fr11,fr10 ! FE, fr10 = 1.0 - t --> invT + + fmov.s @IN1+, fr2 ! LS, A1 = v1->x, v1 += 4 + fmov.s @IN2+, fr3 ! LS, B1 = v2->x, v2 += 4 + fipr fv8, fv0 ! FE, LERP(A1, B1) + fmov.s @IN1+, fr6 ! LS, A2 = v1->y, v1 += 4 + fmov.s @IN2+, fr7 ! LS, B2 = v2->y, v2 += 4 + + fmov.s fr3,@OUT ! LS, OUT->x = LERP + add #4, OUT ! EX, OUT += 4 + fipr fv8, fv4 ! FE, LERP(A2, B2) + add #4, IN1 ! EX, v1 += 4 + add #4, IN2 ! EX, v2 += 4 + + fmov.s fr7,@OUT ! LS, OUT->y = LERP + add #4, OUT ! EX, OUT += 4 + fmov.s fr1,@OUT ! LS, OUT->z = 0 + add #4, OUT ! EX, OUT += 4 + + fmov.s @IN1+, fr2 ! LS, A1 = v1->u, v1 += 4 + fmov.s @IN2+, fr3 ! LS, B1 = v2->u, v2 += 4 + fipr fv8, fv0 ! FE, LERP(A1, B1) + fmov.s @IN1+, fr6 ! LS, A2 = v1->v, v1 += 4 + fmov.s @IN2+, fr7 ! LS, B2 = v2->v, v2 += 4 + + fmov.s fr3,@OUT ! LS, OUT->u = LERP + add #4, OUT ! EX, OUT += 4 + fipr fv8, fv4 ! FE, LERP(A2, B2) + add #4, IN1 ! EX, v1 += 4 + add #4, IN2 ! EX, v2 += 4 + fmov.s @IN1,fr2 ! LS, A1 = v1->w + fmov.s @IN2,fr3 ! LS, B1 = v2->w + fmov.s fr7,@OUT ! LS, OUT->v = LERP + add #8, OUT ! EX, OUT += 8 + + fipr fv8, fv0 ! FE, LERP(A1, B1) + add #-4, IN1 ! EX, v1 -= 4 + add #-4, IN2 ! EX, v2 -= 4 + fmov.s fr3,@OUT ! LS, OUT->w = lerp + add #-4, OUT ! EX, OUT -= 4 + + mov.l @IN1+,CL1 ! LS, ACOLOR = v1->bgra + extu.b CL1,TM1 ! EX, val = ACOLOR.b + lds TM1,fpul ! CO, FPUL = val + float fpul,fr2 ! EX, fr2 = float(FPUL) + mov.l @IN2+,CL2 ! LS, BCOLOR = v2->bgra + extu.b CL2,TM1 ! EX, val = BCOLOR.b + lds TM1,fpul ! CO, FPUL = val + float fpul,fr3 ! EX, fr3 = float(FPUL) + fipr fv8, fv0 ! FE, LERP(A1, B1) + shlr8 CL1 ! EX, ACOLOR >>= 8 + ftrc fr3,fpul ! FE, FPUL = int(lerp) + shlr8 CL2 ! EX, BCOLOR >>= 8 + sts fpul,TM2 ! CO, tmp = FPUL + + extu.b CL1,TM1 ! EX, val = ACOLOR.g + lds TM1,fpul ! CO, FPUL = val + float fpul,fr2 ! EX, fr2 = float(FPUL) + extu.b CL2,TM1 ! EX, val = BCOLOR.g + lds TM1,fpul ! CO, FPUL = val + float fpul,fr3 ! EX, fr3 = float(FPUL) + fipr fv8, fv0 ! FE, LERP(A1, B1) + shlr8 CL1 ! EX, ACOLOR >>= 8 + ftrc fr3,fpul ! FE, FPUL = int(lerp) + extu.b TM2,TM2 ! EX, tmp = (uint8)tmp + mov TM2,CLO ! MT, OUTCOLOR.b = tmp + shlr8 CL2 ! EX, BCOLOR >>= 8 + sts fpul,TM2 ! CO, tmp = FPUL + + extu.b CL1,TM1 ! EX, val = ACOLOR.b + lds TM1,fpul ! CO, FPUL = val + float fpul,fr2 ! EX, fr2 = float(FPUL) + extu.b CL2,TM1 ! EX, val = BCOLOR.b + lds TM1,fpul ! CO, FPUL = val + float fpul,fr3 ! EX, fr3 = float(FPUL) + fipr fv8, fv0 ! FE, LERP(A1, B1) + shlr8 CL1 ! EX, ACOLOR >>= 8 + ftrc fr3,fpul ! FE, FPUL = int(lerp) + extu.b TM2,TM2 ! EX, tmp = (uint8)tmp + shll8 TM2 ! EX, tmp <<= 8 + or TM2,CLO ! EX, OUTCOLOR.r |= tmp + shlr8 CL2 ! EX, BCOLOR >>= 8 + sts fpul,TM2 ! CO, tmp = FPUL + + extu.b CL1,TM1 ! EX, val = ACOLOR.r + lds TM1,fpul ! CO, FPUL = val + float fpul,fr2 ! EX, fr2 = float(FPUL) + extu.b CL2,TM1 ! EX, val = BCOLOR.r + lds TM1,fpul ! CO, FPUL = val + float fpul,fr3 ! EX, fr3 = float(FPUL) + fipr fv8, fv0 ! FE, LERP(A1, B1) + ftrc fr3,fpul ! FE, FPUL = int(lerp) + extu.b TM2,TM2 ! EX, tmp = (uint8)tmp + shll16 TM2 ! EX, tmp <<= 16 + or TM2,CLO ! EX, OUTCOLOR.g |= tmp + sts fpul,TM2 ! CO, tmp = FPUL + extu.b TM2,TM2 ! EX, tmp = (uint8)tmp + shll16 TM2 ! EX, tmp <<= 16 + shll8 TM2 ! EX, tmp <<= 8 + or TM2,CLO ! EX, OUTCOLOR.a |= tmp + rts ! CO, return after executing instruction in delay slot + mov.l CLO,@OUT ! LS, OUT->color = OUTCOLOR diff --git a/misc/dreamcast/VertexDraw.S b/misc/dreamcast/VertexDraw.S index 4508afd2a..581db7a09 100644 --- a/misc/dreamcast/VertexDraw.S +++ b/misc/dreamcast/VertexDraw.S @@ -11,11 +11,6 @@ !fr10 = VIEWPORT_X_PLUS_HWIDTH !fr11 = VIEWPORT_Y_PLUS_HHEIGHT -#define REG_V0 r4 -#define REG_V1 r5 -#define REG_V2 r6 -#define REG_V3 r7 - .align 4 ! Pushes a vertex to the store queue @@ -79,37 +74,494 @@ add #-12, \R ! EX, \R -= 12 (back to start of vertex) .endm +! Transforms then pushes a vertex to the store queue +! CLOBBERS: r3, fr0, fr4, fr5 +! INPUTS: R (vertex), r8 (SQ global) +! OUTPUTS: r8 altered +.macro TransformVertex R +! INVERSE W CALCULATION + add #28, \R ! EX, \R = &vertex->w + fmov.s @\R,fr0 ! LS, fr0 = vertex->w + fmul fr0,fr0 ! FE, fr0 = fr0 * fr0 + add #-24, \R ! EX, \R = &vertex->x + fsrra fr0 ! FE, fr0 = 1 / sqrt(fr0) -> 1 / vertex->w + +! TRANSFORM X + fmov.s @\R,fr4 ! LS, fr4 = vertex->x + fmov fr10,fr5 ! LS, fr5 = VIEWPORT_X_PLUS_HWIDTH + fmul fr8,fr4 ! FE, fr4 = VIEWPORT_HWIDTH * vertex->x + fmac fr0,fr4,fr5 ! FE, fr5 = fr0 * fr4 + fr5 -- (X * F * hwidth) + x_plus_hwidth + fmov.s fr5,@\R ! LS, vertex->x = fr5 + add #4, \R ! EX, \R = &vertex->y + +! TRANSFORM Y + fmov.s @\R,fr4 ! LS, fr4 = vertex->y + fmov fr11,fr5 ! LS, fr5 = VIEWPORT_Y_PLUS_HHEIGHT + fmul fr9,fr4 ! FE, fr4 = VIEWPORT_HHEIGHT * vertex->y + fmac fr0,fr4,fr5 ! FE, fr5 = fr0 * fr4 + fr5 -- (Y * F * hheight) + y_plus_hheight + fmov.s fr5,@\R ! LS, vertex->y = fr5 + add #4, \R ! EX, \R = &vertex->z + +! ASSIGN Z + fmov.s fr0,@\R ! LS, vertex->z = fr0 + add #-12, \R ! EX, \R -= 12 (back to start of vertex) + +! memcpy(r8, \R, 28) + mov.l @(0,\R), r3 + mov.l r3, @(0,r8) + mov.l @(4,\R), r3 + mov.l r3, @(4,r8) + mov.l @(8,\R), r3 + mov.l r3, @(8,r8) + mov.l @(12,\R),r3 + mov.l r3,@(12,r8) + mov.l @(16,\R),r3 + mov.l r3,@(16,r8) + mov.l @(20,\R),r3 + mov.l r3,@(20,r8) + mov.l @(24,\R),r3 + mov.l r3,@(24,r8) + pref @r8 ! LS, Trigger SQ + add #32,r8 ! EX, SQ += 32 +.endm + + +#define REG_CLIP1 r1 +#define REG_CLIP2 r2 + +#define REG_V0 r4 +#define REG_V1 r5 +#define REG_V2 r6 +#define REG_V3 r7 + +#define REG_CMD_VTX r10 +#define REG_CMD_EOL r11 +#define REG_CLIPFUNC r12 + +! r3 also matches out parameter for ClipLine +#define REG_TMP r3 +#define TMP_SET_A \ + mov r15, REG_TMP + +#define TMP_SET_B \ + mov r15, REG_TMP; add #32, REG_TMP + _Case_0_0_0_1: -_Case_0_0_1_0: -_Case_0_0_1_1: -_Case_0_1_0_0: -_Case_0_1_0_1: -_Case_0_1_1_0: -_Case_0_1_1_1: -_Case_1_0_0_0: -_Case_1_0_0_1: -_Case_1_0_1_0: -_Case_1_0_1_1: -_Case_1_1_0_0: -_Case_1_1_0_1: -_Case_1_1_1_0: - rts + rts; nop + ! v0 + ! / | + ! / | + ! .....A....B... + ! / | + ! v3--v2---v1 + sts.l pr,@-r15 + + TMP_SET_A + mov REG_V3, REG_CLIP1 + mov REG_V0, REG_CLIP2 + mov.l REG_CMD_EOL, @REG_TMP + jsr @REG_CLIPFUNC nop + TMP_SET_B + mov REG_V0, REG_CLIP1 + mov REG_V1, REG_CLIP2 + mov.l REG_CMD_VTX, @REG_TMP + jsr @REG_CLIPFUNC + nop + + TransformVertex REG_V0 + TMP_SET_B + TransformVertex REG_TMP + TMP_SET_A + TransformVertex REG_TMP + + rts + lds.l @r15+,pr + +_Case_0_0_1_0: + rts; nop + ! v1 + ! / | + ! / | + ! ....A.....B... + ! / | + ! v0--v3---v2 + sts.l pr,@-r15 + + TMP_SET_A + mov REG_V0, REG_CLIP1 + mov REG_V1, REG_CLIP2 + mov.l REG_CMD_VTX, @REG_TMP + jsr @REG_CLIPFUNC + nop + + TMP_SET_B + mov REG_V0, REG_CLIP1 + mov REG_V1, REG_CLIP2 + mov.l REG_CMD_EOL, @REG_TMP + jsr @REG_CLIPFUNC + nop + + TMP_SET_A + TransformVertex REG_TMP + TransformVertex REG_V1 + TMP_SET_B + TransformVertex REG_TMP + + rts + lds.l @r15+,pr + +_Case_0_1_0_0: + rts; nop + ! v2 + ! / | + ! / | + ! ....A.....B... + ! / | + ! v1--v0---v3 + sts.l pr,@-r15 + + TMP_SET_A + mov REG_V1, REG_CLIP1 + mov REG_V2, REG_CLIP2 + mov.l REG_CMD_VTX, @REG_TMP + jsr @REG_CLIPFUNC + nop + + TMP_SET_B + mov REG_V2, REG_CLIP1 + mov REG_V3, REG_CLIP2 + mov.l REG_CMD_EOL, @REG_TMP + jsr @REG_CLIPFUNC + nop + + TMP_SET_A + TransformVertex REG_TMP + TransformVertex REG_V2 + TMP_SET_B + TransformVertex REG_TMP + + rts + lds.l @r15+,pr + +_Case_1_0_0_0: + rts; nop + ! v3 + ! / | + ! / | + ! ....A.....B... + ! / | + ! v2--v1---v0 + sts.l pr,@-r15 + + TMP_SET_A + mov REG_V2, REG_CLIP1 + mov REG_V3, REG_CLIP2 + mov.l REG_CMD_VTX, @REG_TMP + jsr @REG_CLIPFUNC + nop + + TMP_SET_B + mov REG_V3, REG_CLIP1 + mov REG_V0, REG_CLIP2 + mov.l REG_CMD_VTX, @REG_TMP + jsr @REG_CLIPFUNC + nop + + TMP_SET_B + TransformVertex REG_TMP + TMP_SET_A + TransformVertex REG_TMP + TransformVertex REG_V3 + + rts + lds.l @r15+,pr + + +_Case_0_0_1_1: + rts; nop + ! v0-----------v1 + ! \ | + ! ....B..........A... + ! \ | + ! v3-----v2 + sts.l pr,@-r15 + + TMP_SET_A + mov REG_V1, REG_CLIP1 + mov REG_V2, REG_CLIP2 + mov.l REG_CMD_VTX, @REG_TMP + jsr @REG_CLIPFUNC + nop + + TMP_SET_B + mov REG_V3, REG_CLIP1 + mov REG_V0, REG_CLIP2 + mov.l REG_CMD_EOL, @REG_TMP + jsr @REG_CLIPFUNC + nop + + TransformVertex REG_V1 + TMP_SET_A + TransformVertex REG_TMP + TransformVertex REG_V0 + TMP_SET_B + TransformVertex REG_TMP + + rts + lds.l @r15+,pr + +_Case_1_0_0_1: + rts; nop + ! v3-----------v0 + ! \ | + ! ....B..........A... + ! \ | + ! v2-----v1 + sts.l pr,@-r15 + + TMP_SET_A + mov REG_V0, REG_CLIP1 + mov REG_V1, REG_CLIP2 + mov.l REG_CMD_VTX, @REG_TMP + jsr @REG_CLIPFUNC + nop + + TMP_SET_B + mov REG_V2, REG_CLIP1 + mov REG_V3, REG_CLIP2 + mov.l REG_CMD_VTX, @REG_TMP + jsr @REG_CLIPFUNC + nop + + TMP_SET_A + TransformVertex REG_TMP + TMP_SET_B + TransformVertex REG_TMP + TransformVertex REG_V0 + TransformVertex REG_V3 + + rts + lds.l @r15+,pr + +_Case_0_1_1_0: + rts; nop + ! v1-----------v2 + ! \ | + ! ....B..........A... + ! \ | + ! v0-----v3 + sts.l pr,@-r15 + + TMP_SET_A + mov REG_V2, REG_CLIP1 + mov REG_V3, REG_CLIP2 + mov.l REG_CMD_EOL, @REG_TMP + jsr @REG_CLIPFUNC + nop + + TMP_SET_B + mov REG_V0, REG_CLIP1 + mov REG_V1, REG_CLIP2 + mov.l REG_CMD_VTX, @REG_TMP + jsr @REG_CLIPFUNC + nop + + TransformVertex REG_V1 + TransformVertex REG_V2 + TMP_SET_B + TransformVertex REG_TMP + TMP_SET_A + TransformVertex REG_TMP + + rts + lds.l @r15+,pr + +_Case_1_1_0_0: + rts; nop + ! v2-----------v3 + ! \ | + ! ....B..........A... + ! \ | + ! v1-----v0 + sts.l pr,@-r15 + + TMP_SET_A + mov REG_V3, REG_CLIP1 + mov REG_V0, REG_CLIP2 + mov.l REG_CMD_VTX, @REG_TMP + jsr @REG_CLIPFUNC + nop + + TMP_SET_B + mov REG_V1, REG_CLIP1 + mov REG_V2, REG_CLIP2 + mov.l REG_CMD_VTX, @REG_TMP + jsr @REG_CLIPFUNC + nop + + TMP_SET_B + TransformVertex REG_TMP + TransformVertex REG_V2 + TMP_SET_A + TransformVertex REG_TMP + TransformVertex REG_V3 + + rts + lds.l @r15+,pr + +_Case_0_1_1_1: + rts; nop + ! --v1-- + ! v0-- --v2 + ! \ | + ! .....B.....A... + ! \ | + ! v3 + ! v1,v2,v0 v2,v0,A v0,A,B + sts.l pr,@-r15 + + TMP_SET_A + mov REG_V2, REG_CLIP1 + mov REG_V3, REG_CLIP2 + mov.l REG_CMD_VTX, @REG_TMP + jsr @REG_CLIPFUNC + nop + + TMP_SET_B + mov REG_V3, REG_CLIP1 + mov REG_V0, REG_CLIP2 + mov.l REG_CMD_EOL, @REG_TMP + jsr @REG_CLIPFUNC + nop + + TransformVertex REG_V1 + TransformVertex REG_V2 + TransformVertex REG_V0 + TMP_SET_A + TransformVertex REG_TMP + TMP_SET_B + TransformVertex REG_TMP + + rts + lds.l @r15+,pr + +_Case_1_0_1_1: + rts; nop + ! --v0-- + ! v3-- --v1 + ! \ | + ! .....B.....A... + ! \ | + ! v2 + ! v0,v1,v3 v1,v3,A v3,A,B + sts.l pr,@-r15 + + TMP_SET_A + mov REG_V1, REG_CLIP1 + mov REG_V2, REG_CLIP2 + mov.l REG_CMD_VTX, @REG_TMP + jsr @REG_CLIPFUNC + nop + + TMP_SET_B + mov REG_V2, REG_CLIP1 + mov REG_V3, REG_CLIP2 + mov.l REG_CMD_EOL, @REG_TMP + jsr @REG_CLIPFUNC + mov.l REG_CMD_VTX, @REG_V3 + + TransformVertex REG_V0 + TransformVertex REG_V1 + TransformVertex REG_V3 + TMP_SET_A + TransformVertex REG_TMP + TMP_SET_B + TransformVertex REG_TMP + + rts + lds.l @r15+,pr + +_Case_1_1_0_1: + rts; nop + ! --v3-- + ! v2-- --v0 + ! \ | + ! .....B.....A... + ! \ | + ! v1 + ! v3,v0,v2 v0,v2,A v2,A,B + sts.l pr,@-r15 + + TMP_SET_A + mov REG_V0, REG_CLIP1 + mov REG_V1, REG_CLIP2 + mov.l REG_CMD_VTX, @REG_TMP + jsr @REG_CLIPFUNC + nop + + TMP_SET_B + mov REG_V1, REG_CLIP1 + mov REG_V2, REG_CLIP2 + mov.l REG_CMD_EOL, @REG_TMP + jsr @REG_CLIPFUNC + mov.l REG_CMD_VTX, @REG_V3 + + TransformVertex REG_V3 + TransformVertex REG_V0 + TransformVertex REG_V2 + TMP_SET_A + TransformVertex REG_TMP + TMP_SET_B + TransformVertex REG_TMP + + rts + lds.l @r15+,pr + +_Case_1_1_1_0: + rts; nop + ! --v2-- + ! v1-- --v3 + ! \ | + ! .....B.....A... + ! \ | + ! v0 + ! v2,v3,v1 v3,v1,A v1,A,B + sts.l pr,@-r15 + + TMP_SET_A + mov REG_V3, REG_CLIP1 + mov REG_V0, REG_CLIP2 + mov.l REG_CMD_VTX, @REG_TMP + jsr @REG_CLIPFUNC + nop + + TMP_SET_B + mov REG_V0, REG_CLIP1 + mov REG_V1, REG_CLIP2 + mov.l REG_CMD_EOL, @REG_TMP + jsr @REG_CLIPFUNC + mov.l REG_CMD_VTX, @REG_V3 + + TransformVertex REG_V2 + TransformVertex REG_V3 + TransformVertex REG_V1 + TMP_SET_A + TransformVertex REG_TMP + TMP_SET_B + TransformVertex REG_TMP + + rts + lds.l @r15+,pr + _Case_1_1_1_1: ! Triangle strip: {1,2,0} {2,0,3} - ViewportTransform REG_V1 - PushVertex REG_V1 - - ViewportTransform REG_V2 - PushVertex REG_V2 - - ViewportTransform REG_V0 - PushVertex REG_V0 - - ViewportTransform REG_V3 - PushVertex REG_V3 + TransformVertex REG_V1 + TransformVertex REG_V2 + TransformVertex REG_V0 + TransformVertex REG_V3 rts nop @@ -144,8 +596,9 @@ _ProcessVertexList: mov.l .PVR_EOL, r11 mov.l .PVR_VTX, r10 mov r5,r9 + mov r6,r8 bra SUBMIT_LOOP - mov r6,r8 + add #-64,r15 ! Handles a non-vertex command DO_CMD: @@ -208,6 +661,7 @@ NEXT_ITER: bf.s SUBMIT_LOOP mov r14,r13 ! CUR = NEXT + add #64,r15 ! VIEWPORT SAVE mov.l .VP_1,r0 add #16,r0 diff --git a/third_party/gldc/src/sh4.c b/third_party/gldc/src/sh4.c index e19fa84c8..4c27eab27 100644 --- a/third_party/gldc/src/sh4.c +++ b/third_party/gldc/src/sh4.c @@ -46,7 +46,7 @@ static inline void _glPushHeaderOrVertex(Vertex* v) { sq += 8; } -extern void ClipLine(const Vertex* const v1, const Vertex* const v2, Vertex* vout); +extern void ClipEdge(const Vertex* const v1, const Vertex* const v2, Vertex* vout); #define SPAN_SORT_CFG 0x005F8030 static volatile uint32_t* PVR_LMMODE0 = (uint32_t*) 0xA05F6884; @@ -74,9 +74,9 @@ static void SubmitClipped(Vertex* v0, Vertex* v1, Vertex* v2, Vertex* v3, uint8_ // .....A....B... // / | // v3--v2---v1 - ClipLine(v3, v0, a); + ClipEdge(v3, v0, a); a->flags = PVR_CMD_VERTEX_EOL; - ClipLine(v0, v1, b); + ClipEdge(v0, v1, b); b->flags = PVR_CMD_VERTEX; _glPerspectiveDivideVertex(v0); @@ -97,9 +97,9 @@ static void SubmitClipped(Vertex* v0, Vertex* v1, Vertex* v2, Vertex* v3, uint8_ // ....A.....B... // / | // v0--v3---v2 - ClipLine(v0, v1, a); + ClipEdge(v0, v1, a); a->flags = PVR_CMD_VERTEX; - ClipLine(v1, v2, b); + ClipEdge(v1, v2, b); b->flags = PVR_CMD_VERTEX_EOL; _glPerspectiveDivideVertex(a); @@ -120,9 +120,9 @@ static void SubmitClipped(Vertex* v0, Vertex* v1, Vertex* v2, Vertex* v3, uint8_ // / | // v1--v0---v3 - ClipLine(v1, v2, a); + ClipEdge(v1, v2, a); a->flags = PVR_CMD_VERTEX; - ClipLine(v2, v3, b); + ClipEdge(v2, v3, b); b->flags = PVR_CMD_VERTEX_EOL; _glPerspectiveDivideVertex(a); @@ -142,9 +142,9 @@ static void SubmitClipped(Vertex* v0, Vertex* v1, Vertex* v2, Vertex* v3, uint8_ // ....A.....B... // / | // v2--v1---v0 - ClipLine(v2, v3, a); + ClipEdge(v2, v3, a); a->flags = PVR_CMD_VERTEX; - ClipLine(v3, v0, b); + ClipEdge(v3, v0, b); b->flags = PVR_CMD_VERTEX; _glPerspectiveDivideVertex(b); @@ -164,9 +164,9 @@ static void SubmitClipped(Vertex* v0, Vertex* v1, Vertex* v2, Vertex* v3, uint8_ // ....B..........A... // \ | // v3-----v2 - ClipLine(v1, v2, a); + ClipEdge(v1, v2, a); a->flags = PVR_CMD_VERTEX; - ClipLine(v3, v0, b); + ClipEdge(v3, v0, b); b->flags = PVR_CMD_VERTEX_EOL; _glPerspectiveDivideVertex(v1); @@ -189,9 +189,9 @@ static void SubmitClipped(Vertex* v0, Vertex* v1, Vertex* v2, Vertex* v3, uint8_ // ....B..........A... // \ | // v2-----v1 - ClipLine(v0, v1, a); + ClipEdge(v0, v1, a); a->flags = PVR_CMD_VERTEX; - ClipLine(v2, v3, b); + ClipEdge(v2, v3, b); b->flags = PVR_CMD_VERTEX; _glPerspectiveDivideVertex(a); @@ -213,9 +213,9 @@ static void SubmitClipped(Vertex* v0, Vertex* v1, Vertex* v2, Vertex* v3, uint8_ // ....B..........A... // \ | // v0-----v3 - ClipLine(v2, v3, a); + ClipEdge(v2, v3, a); a->flags = PVR_CMD_VERTEX_EOL; - ClipLine(v0, v1, b); + ClipEdge(v0, v1, b); b->flags = PVR_CMD_VERTEX; _glPerspectiveDivideVertex(v1); @@ -238,9 +238,9 @@ static void SubmitClipped(Vertex* v0, Vertex* v1, Vertex* v2, Vertex* v3, uint8_ // ....B..........A... // \ | // v1-----v0 - ClipLine(v3, v0, a); + ClipEdge(v3, v0, a); a->flags = PVR_CMD_VERTEX; - ClipLine(v1, v2, b); + ClipEdge(v1, v2, b); b->flags = PVR_CMD_VERTEX; _glPerspectiveDivideVertex(b); @@ -264,9 +264,9 @@ static void SubmitClipped(Vertex* v0, Vertex* v1, Vertex* v2, Vertex* v3, uint8_ // \ | // v3 // v1,v2,v0 v2,v0,A v0,A,B - ClipLine(v2, v3, a); + ClipEdge(v2, v3, a); a->flags = PVR_CMD_VERTEX; - ClipLine(v3, v0, b); + ClipEdge(v3, v0, b); b->flags = PVR_CMD_VERTEX_EOL; _glPerspectiveDivideVertex(v1); @@ -293,9 +293,9 @@ static void SubmitClipped(Vertex* v0, Vertex* v1, Vertex* v2, Vertex* v3, uint8_ // \ | // v2 // v0,v1,v3 v1,v3,A v3,A,B - ClipLine(v1, v2, a); + ClipEdge(v1, v2, a); a->flags = PVR_CMD_VERTEX; - ClipLine(v2, v3, b); + ClipEdge(v2, v3, b); b->flags = PVR_CMD_VERTEX_EOL; v3->flags = PVR_CMD_VERTEX; @@ -323,9 +323,9 @@ static void SubmitClipped(Vertex* v0, Vertex* v1, Vertex* v2, Vertex* v3, uint8_ // \ | // v1 // v3,v0,v2 v0,v2,A v2,A,B - ClipLine(v0, v1, a); + ClipEdge(v0, v1, a); a->flags = PVR_CMD_VERTEX; - ClipLine(v1, v2, b); + ClipEdge(v1, v2, b); b->flags = PVR_CMD_VERTEX_EOL; v3->flags = PVR_CMD_VERTEX; @@ -353,9 +353,9 @@ static void SubmitClipped(Vertex* v0, Vertex* v1, Vertex* v2, Vertex* v3, uint8_ // \ | // v0 // v2,v3,v1 v3,v1,A v1,A,B - ClipLine(v3, v0, a); + ClipEdge(v3, v0, a); a->flags = PVR_CMD_VERTEX; - ClipLine(v0, v1, b); + ClipEdge(v0, v1, b); b->flags = PVR_CMD_VERTEX_EOL; v3->flags = PVR_CMD_VERTEX;