diff options
Diffstat (limited to 'third_party/gldc/src/sh4.c')
-rw-r--r-- | third_party/gldc/src/sh4.c | 494 |
1 files changed, 494 insertions, 0 deletions
diff --git a/third_party/gldc/src/sh4.c b/third_party/gldc/src/sh4.c new file mode 100644 index 0000000..0dff66f --- /dev/null +++ b/third_party/gldc/src/sh4.c @@ -0,0 +1,494 @@ +#include <math.h> +#include "sh4.h" +#include "sh4_math.h" + +#define CLIP_DEBUG 0 + +#define likely(x) __builtin_expect(!!(x), 1) +#define unlikely(x) __builtin_expect(!!(x), 0) + +#define SQ_BASE_ADDRESS (void*) 0xe0000000 + +GL_FORCE_INLINE float _glFastInvert(float x) { + return MATH_fsrra(x * x); +} + +GL_FORCE_INLINE void _glPerspectiveDivideVertex(Vertex* vertex) { + TRACE(); + + const float f = _glFastInvert(vertex->w); + + /* Convert to NDC and apply viewport */ + vertex->xyz[0] = (vertex->xyz[0] * f * VIEWPORT.hwidth) + VIEWPORT.x_plus_hwidth; + vertex->xyz[1] = (vertex->xyz[1] * f * VIEWPORT.hheight) + VIEWPORT.y_plus_hheight; + + /* Orthographic projections need to use invZ otherwise we lose + the depth information. As w == 1, and clip-space range is -w to +w + we add 1.0 to the Z to bring it into range. We add a little extra to + avoid a divide by zero. + */ + if(vertex->w == 1.0f) { + vertex->xyz[2] = _glFastInvert(1.0001f + vertex->xyz[2]); + } else { + vertex->xyz[2] = f; + } +} + + +volatile uint32_t *sq = SQ_BASE_ADDRESS; + +static inline void _glFlushBuffer() { + TRACE(); + + /* Wait for both store queues to complete */ + sq = (uint32_t*) 0xe0000000; + sq[0] = sq[8] = 0; +} + +static inline void _glPushHeaderOrVertex(Vertex* v) { + TRACE(); + + uint32_t* s = (uint32_t*) v; + sq[0] = *(s++); + sq[1] = *(s++); + sq[2] = *(s++); + sq[3] = *(s++); + sq[4] = *(s++); + sq[5] = *(s++); + sq[6] = *(s++); + sq[7] = *(s++); + __asm__("pref @%0" : : "r"(sq)); + sq += 8; +} + +static void _glClipEdge(const Vertex* const v1, const Vertex* const v2, Vertex* vout) { + const float d0 = v1->w + v1->xyz[2]; + const float d1 = v2->w + v2->xyz[2]; + const float t = (fabs(d0) * MATH_fsrra((d1 - d0) * (d1 - d0))) + 0.000001f; + const float invt = 1.0f - t; + + vout->xyz[0] = invt * v1->xyz[0] + t * v2->xyz[0]; + vout->xyz[1] = invt * v1->xyz[1] + t * v2->xyz[1]; + vout->xyz[2] = invt * v1->xyz[2] + t * v2->xyz[2]; + + vout->uv[0] = invt * v1->uv[0] + t * v2->uv[0]; + vout->uv[1] = invt * v1->uv[1] + t * v2->uv[1]; + + vout->w = invt * v1->w + t * v2->w; + + vout->bgra[0] = invt * v1->bgra[0] + t * v2->bgra[0]; + vout->bgra[1] = invt * v1->bgra[1] + t * v2->bgra[1]; + vout->bgra[2] = invt * v1->bgra[2] + t * v2->bgra[2]; + vout->bgra[3] = invt * v1->bgra[3] + t * v2->bgra[3]; +} + +#define SPAN_SORT_CFG 0x005F8030 +static volatile uint32_t* PVR_LMMODE0 = (uint32_t*) 0xA05F6884; +static volatile uint32_t* PVR_LMMODE1 = (uint32_t*) 0xA05F6888; +static volatile uint32_t* QACR = (uint32_t*) 0xFF000038; + +#define V0_VIS (1 << 0) +#define V1_VIS (1 << 1) +#define V2_VIS (1 << 2) +#define V3_VIS (1 << 3) + + +// https://casual-effects.com/research/McGuire2011Clipping/clip.glsl +static void SubmitClipped(Vertex* v0, Vertex* v1, Vertex* v2, Vertex* v3, uint8_t visible_mask) { + Vertex __attribute__((aligned(32))) scratch[4]; + Vertex* a = &scratch[0]; + Vertex* b = &scratch[1]; + + switch(visible_mask) { + case V0_VIS: + { + // v0 + // / | + // / | + // .....A....B... + // / | + // v3--v2---v1 + _glClipEdge(v3, v0, a); + a->flags = PVR_CMD_VERTEX_EOL; + _glClipEdge(v0, v1, b); + b->flags = PVR_CMD_VERTEX; + + _glPerspectiveDivideVertex(v0); + _glPushHeaderOrVertex(v0); + + _glPerspectiveDivideVertex(b); + _glPushHeaderOrVertex(b); + + _glPerspectiveDivideVertex(a); + _glPushHeaderOrVertex(a); + } + break; + case V1_VIS: + { + // v1 + // / | + // / | + // ....A.....B... + // / | + // v0--v3---v2 + _glClipEdge(v0, v1, a); + a->flags = PVR_CMD_VERTEX; + _glClipEdge(v1, v2, b); + b->flags = PVR_CMD_VERTEX_EOL; + + _glPerspectiveDivideVertex(a); + _glPushHeaderOrVertex(a); + + _glPerspectiveDivideVertex(v1); + _glPushHeaderOrVertex(v1); + + _glPerspectiveDivideVertex(b); + _glPushHeaderOrVertex(b); + } break; + case V2_VIS: + { + // v2 + // / | + // / | + // ....A.....B... + // / | + // v1--v0---v3 + + _glClipEdge(v1, v2, a); + a->flags = PVR_CMD_VERTEX; + _glClipEdge(v2, v3, b); + b->flags = PVR_CMD_VERTEX_EOL; + + _glPerspectiveDivideVertex(a); + _glPushHeaderOrVertex(a); + + _glPerspectiveDivideVertex(v2); + _glPushHeaderOrVertex(v2); + + _glPerspectiveDivideVertex(b); + _glPushHeaderOrVertex(b); + } break; + case V3_VIS: + { + // v3 + // / | + // / | + // ....A.....B... + // / | + // v2--v1---v0 + _glClipEdge(v2, v3, a); + a->flags = PVR_CMD_VERTEX; + _glClipEdge(v3, v0, b); + b->flags = PVR_CMD_VERTEX; + + _glPerspectiveDivideVertex(b); + _glPushHeaderOrVertex(b); + + _glPerspectiveDivideVertex(a); + _glPushHeaderOrVertex(a); + + _glPerspectiveDivideVertex(v3); + _glPushHeaderOrVertex(v3); + } + break; + case V0_VIS | V1_VIS: + { + // v0-----------v1 + // \ | + // ....B..........A... + // \ | + // v3-----v2 + _glClipEdge(v1, v2, a); + a->flags = PVR_CMD_VERTEX; + _glClipEdge(v3, v0, b); + b->flags = PVR_CMD_VERTEX_EOL; + + _glPerspectiveDivideVertex(v1); + _glPushHeaderOrVertex(v1); + + _glPerspectiveDivideVertex(a); + _glPushHeaderOrVertex(a); + + _glPerspectiveDivideVertex(v0); + _glPushHeaderOrVertex(v0); + + _glPerspectiveDivideVertex(b); + _glPushHeaderOrVertex(b); + } break; + // case V0_VIS | V2_VIS: degenerate case that should never happen + case V0_VIS | V3_VIS: + { + // v3-----------v0 + // \ | + // ....B..........A... + // \ | + // v2-----v1 + _glClipEdge(v0, v1, a); + a->flags = PVR_CMD_VERTEX; + _glClipEdge(v2, v3, b); + b->flags = PVR_CMD_VERTEX; + + _glPerspectiveDivideVertex(a); + _glPushHeaderOrVertex(a); + + _glPerspectiveDivideVertex(b); + _glPushHeaderOrVertex(b); + + _glPerspectiveDivideVertex(v0); + _glPushHeaderOrVertex(v0); + + _glPerspectiveDivideVertex(v3); + _glPushHeaderOrVertex(v3); + } break; + case V1_VIS | V2_VIS: + { + // v1-----------v2 + // \ | + // ....B..........A... + // \ | + // v0-----v3 + _glClipEdge(v2, v3, a); + a->flags = PVR_CMD_VERTEX_EOL; + _glClipEdge(v0, v1, b); + b->flags = PVR_CMD_VERTEX; + + _glPerspectiveDivideVertex(v1); + _glPushHeaderOrVertex(v1); + + _glPerspectiveDivideVertex(v2); + _glPushHeaderOrVertex(v2); + + _glPerspectiveDivideVertex(b); + _glPushHeaderOrVertex(b); + + _glPerspectiveDivideVertex(a); + _glPushHeaderOrVertex(a); + } break; + // case V1_VIS | V3_VIS: degenerate case that should never happen + case V2_VIS | V3_VIS: + { + // v2-----------v3 + // \ | + // ....B..........A... + // \ | + // v1-----v0 + _glClipEdge(v3, v0, a); + a->flags = PVR_CMD_VERTEX; + _glClipEdge(v1, v2, b); + b->flags = PVR_CMD_VERTEX; + + _glPerspectiveDivideVertex(b); + _glPushHeaderOrVertex(b); + + _glPerspectiveDivideVertex(v2); + _glPushHeaderOrVertex(v2); + + _glPerspectiveDivideVertex(a); + _glPushHeaderOrVertex(a); + + _glPerspectiveDivideVertex(v3); + _glPushHeaderOrVertex(v3); + } break; + case V0_VIS | V1_VIS | V2_VIS: + { + // --v1-- + // v0-- --v2 + // \ | + // .....B.....A... + // \ | + // v3 + // v1,v2,v0 v2,v0,A v0,A,B + _glClipEdge(v2, v3, a); + a->flags = PVR_CMD_VERTEX; + _glClipEdge(v3, v0, b); + b->flags = PVR_CMD_VERTEX_EOL; + + _glPerspectiveDivideVertex(v1); + _glPushHeaderOrVertex(v1); + + _glPerspectiveDivideVertex(v2); + _glPushHeaderOrVertex(v2); + + _glPerspectiveDivideVertex(v0); + _glPushHeaderOrVertex(v0); + + _glPerspectiveDivideVertex(a); + _glPushHeaderOrVertex(a); + + _glPerspectiveDivideVertex(b); + _glPushHeaderOrVertex(b); + } break; + case V0_VIS | V1_VIS | V3_VIS: + { + // --v0-- + // v3-- --v1 + // \ | + // .....B.....A... + // \ | + // v2 + // v0,v1,v3 v1,v3,A v3,A,B + _glClipEdge(v1, v2, a); + a->flags = PVR_CMD_VERTEX; + _glClipEdge(v2, v3, b); + b->flags = PVR_CMD_VERTEX_EOL; + v3->flags = PVR_CMD_VERTEX; + + _glPerspectiveDivideVertex(v0); + _glPushHeaderOrVertex(v0); + + _glPerspectiveDivideVertex(v1); + _glPushHeaderOrVertex(v1); + + _glPerspectiveDivideVertex(v3); + _glPushHeaderOrVertex(v3); + + _glPerspectiveDivideVertex(a); + _glPushHeaderOrVertex(a); + + _glPerspectiveDivideVertex(b); + _glPushHeaderOrVertex(b); + } break; + case V0_VIS | V2_VIS | V3_VIS: + { + // --v3-- + // v2-- --v0 + // \ | + // .....B.....A... + // \ | + // v1 + // v3,v0,v2 v0,v2,A v2,A,B + _glClipEdge(v0, v1, a); + a->flags = PVR_CMD_VERTEX; + _glClipEdge(v1, v2, b); + b->flags = PVR_CMD_VERTEX_EOL; + v3->flags = PVR_CMD_VERTEX; + + _glPerspectiveDivideVertex(v3); + _glPushHeaderOrVertex(v3); + + _glPerspectiveDivideVertex(v0); + _glPushHeaderOrVertex(v0); + + _glPerspectiveDivideVertex(v2); + _glPushHeaderOrVertex(v2); + + _glPerspectiveDivideVertex(a); + _glPushHeaderOrVertex(a); + + _glPerspectiveDivideVertex(b); + _glPushHeaderOrVertex(b); + } break; + case V1_VIS | V2_VIS | V3_VIS: + { + // --v2-- + // v1-- --v3 + // \ | + // .....B.....A... + // \ | + // v0 + // v2,v3,v1 v3,v1,A v1,A,B + _glClipEdge(v3, v0, a); + a->flags = PVR_CMD_VERTEX; + _glClipEdge(v0, v1, b); + b->flags = PVR_CMD_VERTEX_EOL; + v3->flags = PVR_CMD_VERTEX; + + _glPerspectiveDivideVertex(v2); + _glPushHeaderOrVertex(v2); + + _glPerspectiveDivideVertex(v3); + _glPushHeaderOrVertex(v3); + + _glPerspectiveDivideVertex(v1); + _glPushHeaderOrVertex(v1); + + _glPerspectiveDivideVertex(a); + _glPushHeaderOrVertex(a); + + _glPerspectiveDivideVertex(b); + _glPushHeaderOrVertex(b); + } break; + } +} + +void SceneListSubmit(Vertex* v3, int n) { + TRACE(); + /* You need at least a header, and 3 vertices to render anything */ + if(n < 4) return; + + PVR_SET(SPAN_SORT_CFG, 0x0); + + //Set PVR DMA registers + *PVR_LMMODE0 = 0; + *PVR_LMMODE1 = 0; + + //Set QACR registers + QACR[1] = QACR[0] = 0x11; + +#if CLIP_DEBUG + Vertex* vertex = (Vertex*) src; + for(int i = 0; i < n; ++i) { + fprintf(stderr, "{%f, %f, %f, %f}, // %x (%x)\n", vertex[i].xyz[0], vertex[i].xyz[1], vertex[i].xyz[2], vertex[i].w, vertex[i].flags, &vertex[i]); + } + + fprintf(stderr, "----\n"); +#endif + uint8_t visible_mask = 0; + + sq = SQ_BASE_ADDRESS; + + for(int i = 0; i < n; ++i, ++v3) { + PREFETCH(v3 + 1); + switch(v3->flags & 0xFF000000) { + case PVR_CMD_VERTEX_EOL: + break; + case PVR_CMD_VERTEX: + continue; + default: + _glPushHeaderOrVertex(v3); + continue; + }; + + // Quads [0, 1, 2, 3] -> Triangles [{0, 1, 2} {2, 3, 0}] + Vertex* const v0 = v3 - 3; + Vertex* const v1 = v3 - 2; + Vertex* const v2 = v3 - 1; + + visible_mask = v3->flags & 0xFF; + v3->flags &= ~0xFF; + + // Stats gathering found that when testing a 64x64x64 sized world, at most + // ~400-500 triangles needed clipping + // ~13% of the triangles in a frame needed clipping (percentage increased when less triangles overall) + // Based on this, the decision was made to optimise for rendering quads there + // were either entirely visible or entirely culled, at the expensive at making + // partially visible quads a bit slower due to needing to be split into two triangles first + // Performance measuring indicated that overall FPS improved from this change + // to switching to try to process 1 quad instead of 2 triangles though + + switch(visible_mask) { + case V0_VIS | V1_VIS | V2_VIS | V3_VIS: // All vertices visible + { + // Triangle strip: {1,2,0} {2,0,3} + _glPerspectiveDivideVertex(v1); + _glPushHeaderOrVertex(v1); + + _glPerspectiveDivideVertex(v2); + _glPushHeaderOrVertex(v2); + + _glPerspectiveDivideVertex(v0); + _glPushHeaderOrVertex(v0); + + _glPerspectiveDivideVertex(v3); + _glPushHeaderOrVertex(v3); + } + break; + + default: // Some vertices visible + SubmitClipped(v0, v1, v2, v3, visible_mask); + break; + } + } + + _glFlushBuffer(); +} |