From c1f4104dddb277b98b913cc92e10bca4cb24da8e Mon Sep 17 00:00:00 2001 From: UnknownShadow200 Date: Fri, 7 Jun 2024 17:47:38 +1000 Subject: [PATCH] SoftGPU: Add faster path for 2D triangles --- credits.txt | 9 +-- src/Graphics_SoftGPU.c | 147 ++++++++++++++++++++++++++++++++++++++--- src/String.h | 10 +-- 3 files changed, 147 insertions(+), 19 deletions(-) diff --git a/credits.txt b/credits.txt index b564f03da..8748539aa 100644 --- a/credits.txt +++ b/credits.txt @@ -4,13 +4,14 @@ * AndrewPH - Advice on how to improve ui of both client and launcher, multiple suggestions, and hosting the automatic build bot for ClassiCube. * 123DMWM - many suggestions, and assistance in identifying bugs and their causes. -* video_error - Allowing remote use of an OSX machine, pointing out many flaws in the plugin API. - The OSX port would not have been possible without you, thanks! +* video_error - Allowing remote use of an macOS machine, pointing out many flaws in the plugin API. + The macOS port would not have been possible without you, thanks! * Jerralish - reverse engineering and documenting the original classic map generation algorithm. * Cybertoon - Adding water animation, better metal step/dig sounds, identifying multiple flaws * Cheesse - multiple suggestions, testing ClassicalSharp on AMD graphics cards. -* Hemsindor - testing ClassicalSharp on OSX. -* headshotnoby - developing the Switch port +* Hemsindor - testing ClassicalSharp on macOS. +* headshotnoby - developing the Switch port. +* Beyond_5D - identifying many differences from original Classic And a big thanks to everyone else in the ClassiCube community (who I didn't mention here), who in the past have provided many suggestions and assisted in identifying bugs. diff --git a/src/Graphics_SoftGPU.c b/src/Graphics_SoftGPU.c index 7753940ca..2a4d79b28 100644 --- a/src/Graphics_SoftGPU.c +++ b/src/Graphics_SoftGPU.c @@ -286,7 +286,27 @@ typedef struct Vertex_ { PackedCol c; } Vertex; -static void TransformVertex(int index, Vertex* vertex) { +static void TransformVertex2D(int index, Vertex* vertex) { + // TODO: avoid the multiply, just add down in DrawTriangles + char* ptr = (char*)gfx_vertices + index * gfx_stride; + Vector3* pos = (Vector3*)ptr; + vertex->x = pos->x; + vertex->y = pos->y; + + if (gfx_format != VERTEX_FORMAT_TEXTURED) { + struct VertexColoured* v = (struct VertexColoured*)ptr; + vertex->u = 0.0f; + vertex->v = 0.0f; + vertex->c = v->Col; + } else { + struct VertexTextured* v = (struct VertexTextured*)ptr; + vertex->u = v->U; + vertex->v = v->V; + vertex->c = v->Col; + } +} + +static void TransformVertex3D(int index, Vertex* vertex) { // TODO: avoid the multiply, just add down in DrawTriangles char* ptr = (char*)gfx_vertices + index * gfx_stride; Vector3* pos = (Vector3*)ptr; @@ -305,6 +325,8 @@ static void TransformVertex(int index, Vertex* vertex) { if (gfx_format != VERTEX_FORMAT_TEXTURED) { struct VertexColoured* v = (struct VertexColoured*)ptr; + vertex->u = 0.0f; + vertex->v = 0.0f; vertex->c = v->Col; } else { struct VertexTextured* v = (struct VertexTextured*)ptr; @@ -322,7 +344,96 @@ static CC_INLINE int FastFloor(float value) { #define edgeFunction(ax,ay, bx,by, cx,cy) (((bx) - (ax)) * ((cy) - (ay)) - ((by) - (ay)) * ((cx) - (ax))) -static void DrawTriangle(Vertex* V0, Vertex* V1, Vertex* V2) { +static void DrawTriangle2D(Vertex* V0, Vertex* V1, Vertex* V2) { + int x0 = (int)V0->x, y0 = (int)V0->y; + int x1 = (int)V1->x, y1 = (int)V1->y; + int x2 = (int)V2->x, y2 = (int)V2->y; + int minX = min(x0, min(x1, x2)); + int minY = min(y0, min(y1, y2)); + int maxX = max(x0, max(x1, x2)); + int maxY = max(y0, max(y1, y2)); + + int area = edgeFunction(x0,y0, x1,y1, x2,y2); + // Reject triangles completely outside + if (maxX < 0 || minX > fb_maxX) return; + if (maxY < 0 || minY > fb_maxY) return; + + // Perform scissoring + minX = max(minX, 0); maxX = min(maxX, fb_maxX); + minY = max(minY, 0); maxY = min(maxY, fb_maxY); + float factor = 1.0f / area; + + float u0 = V0->u * curTexWidth, u1 = V1->u * curTexWidth, u2 = V2->u * curTexWidth; + float v0 = V0->v * curTexHeight, v1 = V1->v * curTexHeight, v2 = V2->v * curTexHeight; + PackedCol color = V0->c; + + // https://fgiesen.wordpress.com/2013/02/10/optimizing-the-basic-rasterizer/ + // Essentially these are the deltas of edge functions between X/Y and X/Y + 1 (i.e. one X/Y step) + int dx01 = y0 - y1, dy01 = x1 - x0; + int dx12 = y1 - y2, dy12 = x2 - x1; + int dx20 = y2 - y0, dy20 = x0 - x2; + + float bc0_start = edgeFunction(x1,y1, x2,y2, minX+0.5f,minY+0.5f); + float bc1_start = edgeFunction(x2,y2, x0,y0, minX+0.5f,minY+0.5f); + float bc2_start = edgeFunction(x0,y0, x1,y1, minX+0.5f,minY+0.5f); + + for (int y = minY; y <= maxY; y++, bc0_start += dy12, bc1_start += dy20, bc2_start += dy01) + { + float bc0 = bc0_start; + float bc1 = bc1_start; + float bc2 = bc2_start; + + for (int x = minX; x <= maxX; x++, bc0 += dx12, bc1 += dx20, bc2 += dx01) + { + float ic0 = bc0 * factor; + float ic1 = bc1 * factor; + float ic2 = bc2 * factor; + + if (ic0 < 0 || ic1 < 0 || ic2 < 0) continue; + int index = y * fb_width + x; + + int R, G, B, A; + if (gfx_format == VERTEX_FORMAT_TEXTURED) { + float u = ic0 * u0 + ic1 * u1 + ic2 * u2; + float v = ic0 * v0 + ic1 * v1 + ic2 * v2; + int texX = ((int)u) & texWidthMask; + int texY = ((int)v) & texHeightMask; + int texIndex = texY * curTexWidth + texX; + + BitmapCol tColor = curTexPixels[texIndex]; + int a1 = PackedCol_A(color), a2 = BitmapCol_A(tColor); + A = ( a1 * a2 ) >> 8; + int r1 = PackedCol_R(color), r2 = BitmapCol_R(tColor); + R = ( r1 * r2 ) >> 8; + int g1 = PackedCol_G(color), g2 = BitmapCol_G(tColor); + G = ( g1 * g2 ) >> 8; + int b1 = PackedCol_B(color), b2 = BitmapCol_B(tColor); + B = ( b1 * b2 ) >> 8; + } else { + R = PackedCol_R(color); + G = PackedCol_G(color); + B = PackedCol_B(color); + A = PackedCol_A(color); + } + + if (gfx_alphaBlend) { + BitmapCol dst = colorBuffer[index]; + int dstR = BitmapCol_R(dst); + int dstG = BitmapCol_G(dst); + int dstB = BitmapCol_B(dst); + + R = (R * A + dstR * (255 - A)) >> 8; + G = (G * A + dstG * (255 - A)) >> 8; + B = (B * A + dstB * (255 - A)) >> 8; + } + if (gfx_alphaTest && A < 0x80) continue; + + colorBuffer[index] = BitmapCol_Make(R, G, B, 0xFF); + } + } +} + +static void DrawTriangle3D(Vertex* V0, Vertex* V1, Vertex* V2) { int x0 = (int)V0->x, y0 = (int)V0->y; int x1 = (int)V1->x, y1 = (int)V1->y; int x2 = (int)V2->x, y2 = (int)V2->y; @@ -436,16 +547,30 @@ void DrawQuads(int startVertex, int verticesCount) { Vertex vertices[4]; int j = startVertex; - // 4 vertices = 1 quad = 2 triangles - for (int i = 0; i < verticesCount / 4; i++, j += 4) - { - TransformVertex(j + 0, &vertices[0]); - TransformVertex(j + 1, &vertices[1]); - TransformVertex(j + 2, &vertices[2]); - TransformVertex(j + 3, &vertices[3]); + if (gfx_rendering2D) { + // 4 vertices = 1 quad = 2 triangles + for (int i = 0; i < verticesCount / 4; i++, j += 4) + { + TransformVertex2D(j + 0, &vertices[0]); + TransformVertex2D(j + 1, &vertices[1]); + TransformVertex2D(j + 2, &vertices[2]); + TransformVertex2D(j + 3, &vertices[3]); - DrawTriangle(&vertices[0], &vertices[2], &vertices[1]); - DrawTriangle(&vertices[2], &vertices[0], &vertices[3]); + DrawTriangle2D(&vertices[0], &vertices[2], &vertices[1]); + DrawTriangle2D(&vertices[2], &vertices[0], &vertices[3]); + } + } else { + // 4 vertices = 1 quad = 2 triangles + for (int i = 0; i < verticesCount / 4; i++, j += 4) + { + TransformVertex3D(j + 0, &vertices[0]); + TransformVertex3D(j + 1, &vertices[1]); + TransformVertex3D(j + 2, &vertices[2]); + TransformVertex3D(j + 3, &vertices[3]); + + DrawTriangle3D(&vertices[0], &vertices[2], &vertices[1]); + DrawTriangle3D(&vertices[2], &vertices[0], &vertices[3]); + } } } diff --git a/src/String.h b/src/String.h index cf7e0b3d4..7f1f2df7d 100644 --- a/src/String.h +++ b/src/String.h @@ -25,17 +25,19 @@ static CC_INLINE cc_string String_Init(STRING_REF char* buffer, int length, int CC_API int String_CalcLen(const char* raw, int capacity); /* Counts number of characters until a '\0' is found. */ int String_Length(const char* raw); -/* Constructs a string from a (maybe null terminated) buffer. */ -CC_NOINLINE cc_string String_FromRaw(STRING_REF char* buffer, int capacity); -/* Constructs a string from a null-terminated constant readonly buffer. */ -CC_API cc_string String_FromReadonly(STRING_REF const char* buffer); /* Constructs a string from a compile time string constant */ #define String_FromConst(text) { (char*)(text), (sizeof(text) - 1), (sizeof(text) - 1)} /* Constructs a string from a compile time array */ #define String_FromArray(buffer) { buffer, 0, sizeof(buffer)} + +/* Constructs a string from a (maybe null terminated) buffer. */ +CC_NOINLINE cc_string String_FromRaw(STRING_REF char* buffer, int capacity); +/* Constructs a string from a null-terminated constant readonly buffer. */ +CC_API cc_string String_FromReadonly(STRING_REF const char* buffer); /* Constructs a string from a compile time array, that may have arbitary actual length of data at runtime */ #define String_FromRawArray(buffer) String_FromRaw(buffer, sizeof(buffer)) + /* Constructs a string from a compile time array (leaving 1 byte of room for null terminator) */ #define String_NT_Array(buffer) { buffer, 0, (sizeof(buffer) - 1)} /* Initialises a string from a compile time array. */