summary refs log tree commit diff
path: root/third_party/citro3d.c
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/citro3d.c')
-rw-r--r--third_party/citro3d.c1720
1 files changed, 1720 insertions, 0 deletions
diff --git a/third_party/citro3d.c b/third_party/citro3d.c
new file mode 100644
index 0000000..7aa0bad
--- /dev/null
+++ b/third_party/citro3d.c
@@ -0,0 +1,1720 @@
+/*
+Copyright (C) 2014-2018 fincs
+
+This software is provided 'as-is', without any express or implied
+warranty.  In no event will the authors be held liable for any
+damages arising from the use of this software.
+
+Permission is granted to anyone to use this software for any
+purpose, including commercial applications, and to alter it and
+redistribute it freely, subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you
+   must not claim that you wrote the original software. If you use
+   this software in a product, an acknowledgment in the product
+   documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and
+   must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source
+   distribution.
+*/
+#include <stdbool.h>
+#include <stdint.h>
+#include <math.h>
+#include <string.h>
+#include <stdlib.h>
+
+typedef u32 C3D_IVec;
+
+typedef union
+{
+	struct
+	{
+		float w; ///< W-component
+		float z; ///< Z-component
+		float y; ///< Y-component
+		float x; ///< X-component
+	};
+	float c[4];
+} C3D_FVec;
+
+typedef union
+{
+	C3D_FVec r[4]; ///< Rows are vectors
+	float m[4*4]; ///< Raw access
+} C3D_Mtx;
+
+
+typedef struct
+{
+	u32 flags[2];
+	u64 permutation;
+	int attrCount;
+} C3D_AttrInfo;
+
+static void AttrInfo_Init(C3D_AttrInfo* info);
+static int  AttrInfo_AddLoader(C3D_AttrInfo* info, int regId, GPU_FORMATS format, int count);
+
+static C3D_AttrInfo* C3D_GetAttrInfo(void);
+
+
+
+
+typedef struct
+{
+	u32 offset;
+	u32 flags[2];
+} C3D_BufCfg;
+
+
+typedef struct
+{
+	void* data;
+	GPU_TEXCOLOR fmt : 4;
+	size_t size : 28;
+
+	union
+	{
+		u32 dim;
+		struct
+		{
+			u16 height;
+			u16 width;
+		};
+	};
+
+	u32 param;
+	u32 border;
+	union
+	{
+		u32 lodParam;
+		struct
+		{
+			u16 lodBias;
+			u8 maxLevel;
+			u8 minLevel;
+		};
+	};
+} C3D_Tex;
+
+static void C3D_TexLoadImage(C3D_Tex* tex, const void* data, GPU_TEXFACE face, int level);
+static void C3D_TexGenerateMipmap(C3D_Tex* tex, GPU_TEXFACE face);
+static void C3D_TexBind(int unitId, C3D_Tex* tex);
+static void C3D_TexFlush(C3D_Tex* tex);
+static void C3D_TexDelete(C3D_Tex* tex);
+
+static inline int C3D_TexCalcMaxLevel(u32 width, u32 height)
+{
+	return (31-__builtin_clz(width < height ? width : height)) - 3; // avoid sizes smaller than 8
+}
+
+static inline u32 C3D_TexCalcLevelSize(u32 size, int level)
+{
+	return size >> (2*level);
+}
+
+static inline u32 C3D_TexCalcTotalSize(u32 size, int maxLevel)
+{
+	/*
+	S  = s + sr + sr^2 + sr^3 + ... + sr^n
+	Sr = sr + sr^2 + sr^3 + ... + sr^(n+1)
+	S-Sr = s - sr^(n+1)
+	S(1-r) = s(1 - r^(n+1))
+	S = s (1 - r^(n+1)) / (1-r)
+
+	r = 1/4
+	1-r = 3/4
+
+	S = 4s (1 - (1/4)^(n+1)) / 3
+	S = 4s (1 - 1/4^(n+1)) / 3
+	S = (4/3) (s - s/4^(n+1))
+	S = (4/3) (s - s/(1<<(2n+2)))
+	S = (4/3) (s - s>>(2n+2))
+	*/
+	return (size - C3D_TexCalcLevelSize(size,maxLevel+1)) * 4 / 3;
+}
+
+static inline void* C3D_TexGetImagePtr(C3D_Tex* tex, void* data, int level, u32* size)
+{
+	if (size) *size = level >= 0 ? C3D_TexCalcLevelSize(tex->size, level) : C3D_TexCalcTotalSize(tex->size, tex->maxLevel);
+	if (!level) return data;
+	return (u8*)data + (level > 0 ? C3D_TexCalcTotalSize(tex->size, level-1) : 0);
+}
+
+static inline void* C3D_Tex2DGetImagePtr(C3D_Tex* tex, int level, u32* size)
+{
+	return C3D_TexGetImagePtr(tex, tex->data, level, size);
+}
+
+static inline void C3D_TexUpload(C3D_Tex* tex, const void* data)
+{
+	C3D_TexLoadImage(tex, data, GPU_TEXFACE_2D, 0);
+}
+
+
+
+
+
+
+
+
+
+static void C3D_DepthMap(bool bIsZBuffer, float zScale, float zOffset);
+static void C3D_CullFace(GPU_CULLMODE mode);
+static void C3D_StencilTest(void);
+static void C3D_StencilOp(void);
+static void C3D_EarlyDepthTest(bool enable, GPU_EARLYDEPTHFUNC function, u32 ref);
+static void C3D_DepthTest(bool enable, GPU_TESTFUNC function, GPU_WRITEMASK writemask);
+static void C3D_AlphaTest(bool enable, GPU_TESTFUNC function, int ref);
+static void C3D_AlphaBlend(GPU_BLENDEQUATION colorEq, GPU_BLENDEQUATION alphaEq, GPU_BLENDFACTOR srcClr, GPU_BLENDFACTOR dstClr, GPU_BLENDFACTOR srcAlpha, GPU_BLENDFACTOR dstAlpha);
+static void C3D_ColorLogicOp(GPU_LOGICOP op);
+static void C3D_FragOpMode(GPU_FRAGOPMODE mode);
+static void C3D_FragOpShadow(float scale, float bias);
+
+
+
+
+
+
+#define C3D_DEFAULT_CMDBUF_SIZE 0x40000
+
+enum
+{
+	C3D_UNSIGNED_BYTE = 0,
+	C3D_UNSIGNED_SHORT = 1,
+};
+
+static bool C3D_Init(size_t cmdBufSize);
+static void C3D_Fini(void);
+
+static void C3D_BindProgram(shaderProgram_s* program);
+
+static void C3D_SetViewport(u32 x, u32 y, u32 w, u32 h);
+static void C3D_SetScissor(GPU_SCISSORMODE mode, u32 left, u32 top, u32 right, u32 bottom);
+
+static void C3D_DrawElements(GPU_Primitive_t primitive, int count);
+
+// Immediate-mode vertex submission
+static void C3D_ImmDrawBegin(GPU_Primitive_t primitive);
+static void C3D_ImmSendAttrib(float x, float y, float z, float w);
+static void C3D_ImmDrawEnd(void);
+
+static inline void C3D_ImmDrawRestartPrim(void)
+{
+	GPUCMD_AddWrite(GPUREG_RESTART_PRIMITIVE, 1);
+}
+
+
+
+
+
+typedef struct
+{
+	u32 data[128];
+} C3D_FogLut;
+
+static inline float FogLut_CalcZ(float depth, float near, float far)
+{
+	return far*near/(depth*(far-near)+near);
+}
+
+static void FogLut_FromArray(C3D_FogLut* lut, const float data[256]);
+
+static void C3D_FogGasMode(GPU_FOGMODE fogMode, GPU_GASMODE gasMode, bool zFlip);
+static void C3D_FogColor(u32 color);
+static void C3D_FogLutBind(C3D_FogLut* lut);
+
+
+
+
+
+
+
+
+
+
+
+typedef struct
+{
+	void* colorBuf;
+	void* depthBuf;
+	u16 width;
+	u16 height;
+	GPU_COLORBUF colorFmt;
+	GPU_DEPTHBUF depthFmt;
+	bool block32;
+	u8 colorMask : 4;
+	u8 depthMask : 4;
+} C3D_FrameBuf;
+
+// Flags for C3D_FrameBufClear
+typedef enum
+{
+	C3D_CLEAR_COLOR = BIT(0),
+	C3D_CLEAR_DEPTH = BIT(1),
+	C3D_CLEAR_ALL   = C3D_CLEAR_COLOR | C3D_CLEAR_DEPTH,
+} C3D_ClearBits;
+
+static u32 C3D_CalcColorBufSize(u32 width, u32 height, GPU_COLORBUF fmt);
+static u32 C3D_CalcDepthBufSize(u32 width, u32 height, GPU_DEPTHBUF fmt);
+
+static C3D_FrameBuf* C3D_GetFrameBuf(void);
+static void C3D_SetFrameBuf(C3D_FrameBuf* fb);
+static void C3D_FrameBufClear(C3D_FrameBuf* fb, C3D_ClearBits clearBits, u32 clearColor, u32 clearDepth);
+static void C3D_FrameBufTransfer(C3D_FrameBuf* fb, gfxScreen_t screen, gfx3dSide_t side, u32 transferFlags);
+
+static inline void C3D_FrameBufAttrib(C3D_FrameBuf* fb, u16 width, u16 height, bool block32)
+{
+	fb->width   = width;
+	fb->height  = height;
+	fb->block32 = block32;
+}
+
+static inline void C3D_FrameBufColor(C3D_FrameBuf* fb, void* buf, GPU_COLORBUF fmt)
+{
+	if (buf)
+	{
+		fb->colorBuf  = buf;
+		fb->colorFmt  = fmt;
+		fb->colorMask = 0xF;
+	} else
+	{
+		fb->colorBuf  = NULL;
+		fb->colorFmt  = GPU_RB_RGBA8;
+		fb->colorMask = 0;
+	}
+}
+
+static inline void C3D_FrameBufDepth(C3D_FrameBuf* fb, void* buf, GPU_DEPTHBUF fmt)
+{
+	if (buf)
+	{
+		fb->depthBuf  = buf;
+		fb->depthFmt  = fmt;
+		fb->depthMask = fmt == GPU_RB_DEPTH24_STENCIL8 ? 0x3 : 0x2;
+	} else
+	{
+		fb->depthBuf  = NULL;
+		fb->depthFmt  = GPU_RB_DEPTH24;
+		fb->depthMask = 0;
+	}
+}
+
+
+
+
+
+
+
+typedef struct C3D_RenderTarget_tag C3D_RenderTarget;
+
+struct C3D_RenderTarget_tag
+{
+	C3D_FrameBuf frameBuf;
+
+	bool used, linked;
+	gfxScreen_t screen;
+	gfx3dSide_t side;
+	u32 transferFlags;
+};
+
+// Flags for C3D_FrameBegin
+enum
+{
+	C3D_FRAME_SYNCDRAW = BIT(0), // Perform C3D_FrameSync before checking the GPU status
+	C3D_FRAME_NONBLOCK = BIT(1), // Return false instead of waiting if the GPU is busy
+};
+
+static void C3D_FrameSync(void);
+
+static bool C3D_FrameBegin(u8 flags);
+static bool C3D_FrameDrawOn(C3D_RenderTarget* target);
+static void C3D_FrameSplit(u8 flags);
+static void C3D_FrameEnd(u8 flags);
+
+static void C3D_RenderTargetCreate(C3D_RenderTarget* target, int width, int height, GPU_COLORBUF colorFmt, GPU_DEPTHBUF depthFmt);
+static void C3D_RenderTargetDelete(C3D_RenderTarget* target);
+static void C3D_RenderTargetSetOutput(C3D_RenderTarget* target, gfxScreen_t screen, gfx3dSide_t side, u32 transferFlags);
+
+static inline void C3D_RenderTargetDetachOutput(C3D_RenderTarget* target)
+{
+	C3D_RenderTargetSetOutput(NULL, target->screen, target->side, 0);
+}
+
+static inline void C3D_RenderTargetClear(C3D_RenderTarget* target, C3D_ClearBits clearBits, u32 clearColor, u32 clearDepth)
+{
+	C3D_FrameBufClear(&target->frameBuf, clearBits, clearColor, clearDepth);
+}
+
+static void C3D_SyncTextureCopy(u32* inadr, u32 indim, u32* outadr, u32 outdim, u32 size, u32 flags);
+
+
+
+
+typedef struct
+{
+	u16 srcRgb, srcAlpha;
+	union
+	{
+		u32 opAll;
+		struct { u32 opRgb:12, opAlpha:12; };
+	};
+	u16 funcRgb, funcAlpha;
+	u32 color;
+	u16 scaleRgb, scaleAlpha;
+} C3D_TexEnv;
+
+static inline void C3D_TexEnvInit(C3D_TexEnv* env)
+{
+	env->srcRgb     = GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0);
+	env->srcAlpha   = GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0);
+	env->opAll      = 0;
+	env->funcRgb    = GPU_REPLACE;
+	env->funcAlpha  = GPU_REPLACE;
+	env->color      = 0xFFFFFFFF;
+	env->scaleRgb   = GPU_TEVSCALE_1;
+	env->scaleAlpha = GPU_TEVSCALE_1;
+}
+
+
+void Mtx_Multiply(C3D_Mtx* out, const C3D_Mtx* a, const C3D_Mtx* b)
+{
+	// http://www.wolframalpha.com/input/?i={{a,b,c,d},{e,f,g,h},{i,j,k,l},{m,n,o,p}}{{α,β,γ,δ},{ε,θ,ι,κ},{λ,μ,ν,ξ},{ο,π,ρ,σ}}
+	int i, j;
+	for (j = 0; j < 4; ++j)
+		for (i = 0; i < 4; ++i)
+			out->r[j].c[i] = a->r[j].x*b->r[0].c[i] + a->r[j].y*b->r[1].c[i] + a->r[j].z*b->r[2].c[i] + a->r[j].w*b->r[3].c[i];
+}
+
+
+
+
+#define C3D_FVUNIF_COUNT 96
+#define C3D_IVUNIF_COUNT 4
+
+static C3D_FVec C3D_FVUnif[C3D_FVUNIF_COUNT];
+static C3D_IVec C3D_IVUnif[C3D_IVUNIF_COUNT];
+static u16      C3D_BoolUnifs;
+
+static bool C3D_FVUnifDirty[C3D_FVUNIF_COUNT];
+static bool C3D_IVUnifDirty[C3D_IVUNIF_COUNT];
+static bool C3D_BoolUnifsDirty;
+
+static inline C3D_FVec* C3D_FVUnifWritePtr(int id, int size)
+{
+	int i;
+	for (i = 0; i < size; i ++)
+		C3D_FVUnifDirty[id+i] = true;
+	return &C3D_FVUnif[id];
+}
+
+static inline void C3D_FVUnifMtx4x4(int id, const C3D_Mtx* mtx)
+{
+	int i;
+	C3D_FVec* ptr = C3D_FVUnifWritePtr(id, 4);
+	for (i = 0; i < 4; i ++)
+		ptr[i] = mtx->r[i]; // Struct copy.
+}
+
+static inline void C3D_FVUnifSet(int id, float x, float y, float z, float w)
+{
+	C3D_FVec* ptr = C3D_FVUnifWritePtr(id, 1);
+	ptr->x = x;
+	ptr->y = y;
+	ptr->z = z;
+	ptr->w = w;
+}
+
+static void C3D_UpdateUniforms(void);
+
+
+
+
+
+
+typedef struct
+{
+	u32 fragOpMode;
+	u32 fragOpShadow;
+	u32 zScale, zOffset;
+	GPU_CULLMODE cullMode;
+	bool zBuffer, earlyDepth;
+	GPU_EARLYDEPTHFUNC earlyDepthFunc;
+	u32 earlyDepthRef;
+
+	u32 alphaTest;
+	u32 stencilMode, stencilOp;
+	u32 depthTest;
+
+	u32 alphaBlend;
+	GPU_LOGICOP clrLogicOp;
+} C3D_Effect;
+
+typedef struct
+{
+	gxCmdQueue_s gxQueue;
+	u32* cmdBuf;
+	size_t cmdBufSize;
+
+	u32 flags;
+	shaderProgram_s* program;
+
+	C3D_AttrInfo attrInfo;
+	C3D_Effect effect;
+
+	u32 texConfig;
+	C3D_Tex* tex[3];
+
+	u32 texEnvBuf, texEnvBufClr;
+	u32 fogClr;
+	C3D_FogLut* fogLut;
+
+	C3D_FrameBuf fb;
+	u32 viewport[5];
+	u32 scissor[3];
+} C3D_Context;
+
+enum
+{
+	C3DiF_Active = BIT(0),
+	C3DiF_DrawUsed = BIT(1),
+	C3DiF_AttrInfo = BIT(2),
+	C3DiF_Effect = BIT(4),
+	C3DiF_FrameBuf = BIT(5),
+	C3DiF_Viewport = BIT(6),
+	C3DiF_Scissor = BIT(7),
+	C3DiF_Program = BIT(8),
+	C3DiF_TexEnvBuf = BIT(9),
+	C3DiF_VshCode = BIT(11),
+	C3DiF_GshCode = BIT(12),
+	C3DiF_TexStatus = BIT(14),
+	C3DiF_FogLut = BIT(17),
+	C3DiF_Gas = BIT(18),
+
+	C3DiF_Reset = BIT(19),
+
+#define C3DiF_Tex(n) BIT(23+(n))
+	C3DiF_TexAll = 7 << 23,
+};
+
+static C3D_Context __C3D_Context;
+static inline C3D_Context* C3Di_GetContext(void)
+{
+	extern C3D_Context __C3D_Context;
+	return &__C3D_Context;
+}
+
+static inline bool addrIsVRAM(const void* addr)
+{
+	u32 vaddr = (u32)addr;
+	return vaddr >= OS_VRAM_VADDR && vaddr < OS_VRAM_VADDR + OS_VRAM_SIZE;
+}
+
+static inline vramAllocPos addrGetVRAMBank(const void* addr)
+{
+	u32 vaddr = (u32)addr;
+	return vaddr < OS_VRAM_VADDR + OS_VRAM_SIZE/2 ? VRAM_ALLOC_A : VRAM_ALLOC_B;
+}
+
+static void C3Di_UpdateContext(void);
+static void C3Di_AttrInfoBind(C3D_AttrInfo* info);
+static void C3Di_FrameBufBind(C3D_FrameBuf* fb);
+static void C3Di_TexEnvBind(int id, C3D_TexEnv* env);
+static void C3Di_SetTex(int unit, C3D_Tex* tex);
+static void C3Di_EffectBind(C3D_Effect* effect);
+
+static void C3Di_DirtyUniforms(void);
+static void C3Di_LoadShaderUniforms(shaderInstance_s* si);
+static void C3Di_ClearShaderUniforms(GPU_SHADER_TYPE type);
+
+static bool C3Di_SplitFrame(u32** pBuf, u32* pSize);
+
+static void C3Di_RenderQueueInit(void);
+static void C3Di_RenderQueueExit(void);
+static void C3Di_RenderQueueWaitDone(void);
+static void C3Di_RenderQueueEnableVBlank(void);
+static void C3Di_RenderQueueDisableVBlank(void);
+
+
+
+
+
+
+
+
+
+static void AttrInfo_Init(C3D_AttrInfo* info)
+{
+	memset(info, 0, sizeof(*info));
+	info->flags[1] = 0xFFF << 16;
+}
+
+static int AttrInfo_AddLoader(C3D_AttrInfo* info, int regId, GPU_FORMATS format, int count)
+{
+	if (info->attrCount == 12) return -1;
+	int id = info->attrCount++;
+	if (regId < 0) regId = id;
+	if (id < 8)
+		info->flags[0] |= GPU_ATTRIBFMT(id, count, format);
+	else
+		info->flags[1] |= GPU_ATTRIBFMT(id-8, count, format);
+
+	info->flags[1] = (info->flags[1] &~ (0xF0000000 | BIT(id+16))) | (id << 28);
+	info->permutation |= regId << (id*4);
+	return id;
+}
+
+static C3D_AttrInfo* C3D_GetAttrInfo(void)
+{
+	C3D_Context* ctx = C3Di_GetContext();
+
+	ctx->flags |= C3DiF_AttrInfo;
+	return &ctx->attrInfo;
+}
+
+static void C3Di_AttrInfoBind(C3D_AttrInfo* info)
+{
+	GPUCMD_AddIncrementalWrites(GPUREG_ATTRIBBUFFERS_FORMAT_LOW, (u32*)info->flags, sizeof(info->flags)/sizeof(u32));
+	GPUCMD_AddMaskedWrite(GPUREG_VSH_INPUTBUFFER_CONFIG, 0xB, 0xA0000000 | (info->attrCount - 1));
+	GPUCMD_AddWrite(GPUREG_VSH_NUM_ATTR, info->attrCount - 1);
+	GPUCMD_AddIncrementalWrites(GPUREG_VSH_ATTRIBUTES_PERMUTATION_LOW, (u32*)&info->permutation, 2);
+}
+
+
+
+
+
+
+
+
+
+
+
+#define BUFFER_BASE_PADDR 0x18000000
+
+
+
+static void C3D_DrawElements(GPU_Primitive_t primitive, int count)
+{
+	C3Di_UpdateContext();
+
+	// Set primitive type
+	GPUCMD_AddMaskedWrite(GPUREG_PRIMITIVE_CONFIG, 2, primitive != GPU_TRIANGLES ? primitive : GPU_GEOMETRY_PRIM);
+	// Start a new primitive (breaks off a triangle strip/fan)
+	GPUCMD_AddWrite(GPUREG_RESTART_PRIMITIVE, 1);
+	// Number of vertices
+	GPUCMD_AddWrite(GPUREG_NUMVERTICES, count);
+	// First vertex
+	GPUCMD_AddWrite(GPUREG_VERTEX_OFFSET, 0);
+	// Enable triangle element drawing mode if necessary
+	GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG, 2, 0x100);
+	GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG2, 2, 0x100);
+	// Enable drawing mode
+	GPUCMD_AddMaskedWrite(GPUREG_START_DRAW_FUNC0, 1, 0);
+	// Trigger element drawing
+	GPUCMD_AddWrite(GPUREG_DRAWELEMENTS, 1);
+	// Go back to configuration mode
+	GPUCMD_AddMaskedWrite(GPUREG_START_DRAW_FUNC0, 1, 1);
+	// Disable triangle element drawing mode if necessary
+	GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG, 2, 0);
+	GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG2, 2, 0);
+	// Clear the post-vertex cache
+	GPUCMD_AddWrite(GPUREG_VTX_FUNC, 1);
+	GPUCMD_AddMaskedWrite(GPUREG_PRIMITIVE_CONFIG, 0x8, 0);
+	GPUCMD_AddMaskedWrite(GPUREG_PRIMITIVE_CONFIG, 0x8, 0);
+
+	C3Di_GetContext()->flags |= C3DiF_DrawUsed;
+}
+
+
+
+
+
+
+
+
+static inline C3D_Effect* getEffect()
+{
+	C3D_Context* ctx = C3Di_GetContext();
+	ctx->flags |= C3DiF_Effect;
+	return &ctx->effect;
+}
+
+static void C3D_DepthMap(bool bIsZBuffer, float zScale, float zOffset)
+{
+	C3D_Effect* e = getEffect();
+	e->zBuffer = bIsZBuffer;
+	e->zScale  = f32tof24(zScale);
+	e->zOffset = f32tof24(zOffset);
+}
+
+static void C3D_CullFace(GPU_CULLMODE mode)
+{
+	C3D_Effect* e = getEffect();
+	e->cullMode = mode;
+}
+
+static void C3D_StencilTest(void)
+{
+	C3D_Effect* e = getEffect();
+	e->stencilMode = false | (GPU_ALWAYS << 4) | (0xFF << 24);
+}
+
+static void C3D_StencilOp(void)
+{
+	C3D_Effect* e = getEffect();
+	e->stencilOp = GPU_STENCIL_KEEP | (GPU_STENCIL_KEEP << 4) | (GPU_STENCIL_KEEP << 8);
+}
+
+static void C3D_EarlyDepthTest(bool enable, GPU_EARLYDEPTHFUNC function, u32 ref)
+{
+	C3D_Effect* e = getEffect();
+	e->earlyDepth = enable;
+	e->earlyDepthFunc = function;
+	e->earlyDepthRef = ref;
+}
+
+static void C3D_DepthTest(bool enable, GPU_TESTFUNC function, GPU_WRITEMASK writemask)
+{
+	C3D_Effect* e = getEffect();
+	e->depthTest = (!!enable) | ((function & 7) << 4) | (writemask << 8);
+}
+
+static void C3D_AlphaTest(bool enable, GPU_TESTFUNC function, int ref)
+{
+	C3D_Effect* e = getEffect();
+	e->alphaTest = (!!enable) | ((function & 7) << 4) | (ref << 8);
+}
+
+static void C3D_AlphaBlend(GPU_BLENDEQUATION colorEq, GPU_BLENDEQUATION alphaEq, GPU_BLENDFACTOR srcClr, GPU_BLENDFACTOR dstClr, GPU_BLENDFACTOR srcAlpha, GPU_BLENDFACTOR dstAlpha)
+{
+	C3D_Effect* e = getEffect();
+	e->alphaBlend = colorEq | (alphaEq << 8) | (srcClr << 16) | (dstClr << 20) | (srcAlpha << 24) | (dstAlpha << 28);
+	e->fragOpMode &= ~0xFF00;
+	e->fragOpMode |= 0x0100;
+}
+
+static void C3D_ColorLogicOp(GPU_LOGICOP op)
+{
+	C3D_Effect* e = getEffect();
+	e->fragOpMode &= ~0xFF00;
+	e->clrLogicOp = op;
+}
+
+static void C3D_FragOpMode(GPU_FRAGOPMODE mode)
+{
+	C3D_Effect* e = getEffect();
+	e->fragOpMode &= ~0xFF00FF;
+	e->fragOpMode |= 0xE40000 | mode;
+}
+
+static void C3D_FragOpShadow(float scale, float bias)
+{
+	C3D_Effect* e = getEffect();
+	e->fragOpShadow = f32tof16(scale+bias) | (f32tof16(-scale)<<16);
+}
+
+static void C3Di_EffectBind(C3D_Effect* e)
+{
+	GPUCMD_AddWrite(GPUREG_DEPTHMAP_ENABLE, e->zBuffer ? 1 : 0);
+	GPUCMD_AddWrite(GPUREG_FACECULLING_CONFIG, e->cullMode & 0x3);
+	GPUCMD_AddIncrementalWrites(GPUREG_DEPTHMAP_SCALE, (u32*)&e->zScale, 2);
+	GPUCMD_AddIncrementalWrites(GPUREG_FRAGOP_ALPHA_TEST, (u32*)&e->alphaTest, 4);
+	GPUCMD_AddMaskedWrite(GPUREG_GAS_DELTAZ_DEPTH, 0x8, (u32)GPU_MAKEGASDEPTHFUNC((e->depthTest>>4)&7) << 24);
+	GPUCMD_AddWrite(GPUREG_BLEND_COLOR, 0);
+	GPUCMD_AddWrite(GPUREG_BLEND_FUNC, e->alphaBlend);
+	GPUCMD_AddWrite(GPUREG_LOGIC_OP, e->clrLogicOp);
+	GPUCMD_AddMaskedWrite(GPUREG_COLOR_OPERATION, 7, e->fragOpMode);
+	GPUCMD_AddWrite(GPUREG_FRAGOP_SHADOW, e->fragOpShadow);
+	GPUCMD_AddMaskedWrite(GPUREG_EARLYDEPTH_TEST1, 1, e->earlyDepth ? 1 : 0);
+	GPUCMD_AddWrite(GPUREG_EARLYDEPTH_TEST2, e->earlyDepth ? 1 : 0);
+	GPUCMD_AddMaskedWrite(GPUREG_EARLYDEPTH_FUNC, 1, e->earlyDepthFunc);
+	GPUCMD_AddMaskedWrite(GPUREG_EARLYDEPTH_DATA, 0x7, e->earlyDepthRef);
+}
+
+
+
+
+
+
+
+static void FogLut_FromArray(C3D_FogLut* lut, const float data[256])
+{
+	int i;
+	for (i = 0; i < 128; i ++)
+	{
+		float in = data[i], diff = data[i+128];
+
+		u32 val = 0;
+		if (in > 0.0f)
+		{
+			in *= 0x800;
+			val = (in < 0x800) ? (u32)in : 0x7FF;
+		}
+
+		u32 val2 = 0;
+		if (diff != 0.0f)
+		{
+			diff *= 0x800;
+			if (diff < -0x1000) diff = -0x1000;
+			else if (diff > 0xFFF) diff = 0xFFF;
+			val2 = (s32)diff & 0x1FFF;
+		}
+
+		lut->data[i] = val2 | (val << 13);
+	}
+}
+
+static void C3D_FogGasMode(GPU_FOGMODE fogMode, GPU_GASMODE gasMode, bool zFlip)
+{
+	C3D_Context* ctx = C3Di_GetContext();
+
+	if (!(ctx->flags & C3DiF_Active))
+		return;
+
+	ctx->flags |= C3DiF_TexEnvBuf;
+	ctx->texEnvBuf &= ~0x100FF;
+	ctx->texEnvBuf |= (fogMode&7) | ((gasMode&1)<<3) | (zFlip ? BIT(16) : 0);
+}
+
+static void C3D_FogColor(u32 color)
+{
+	C3D_Context* ctx = C3Di_GetContext();
+
+	if (!(ctx->flags & C3DiF_Active))
+		return;
+
+	ctx->flags |= C3DiF_TexEnvBuf;
+	ctx->fogClr = color;
+}
+
+static void C3D_FogLutBind(C3D_FogLut* lut)
+{
+	C3D_Context* ctx = C3Di_GetContext();
+
+	if (!(ctx->flags & C3DiF_Active))
+		return;
+
+	if (lut)
+	{
+		ctx->flags |= C3DiF_FogLut;
+		ctx->fogLut = lut;
+	} else
+		ctx->flags &= ~C3DiF_FogLut;
+}
+
+
+
+
+
+
+
+
+
+
+static const u8 colorFmtSizes[] = {2,1,0,0,0};
+static const u8 depthFmtSizes[] = {0,0,1,2};
+
+static u32 C3D_CalcColorBufSize(u32 width, u32 height, GPU_COLORBUF fmt)
+{
+	u32 size = width*height;
+	return size*(2+colorFmtSizes[fmt]);
+}
+
+static u32 C3D_CalcDepthBufSize(u32 width, u32 height, GPU_DEPTHBUF fmt)
+{
+	u32 size = width*height;
+	return size*(2+depthFmtSizes[fmt]);
+}
+
+static C3D_FrameBuf* C3D_GetFrameBuf(void)
+{
+	C3D_Context* ctx = C3Di_GetContext();
+
+	ctx->flags |= C3DiF_FrameBuf;
+	return &ctx->fb;
+}
+
+static void C3D_SetFrameBuf(C3D_FrameBuf* fb)
+{
+	C3D_Context* ctx = C3Di_GetContext();
+
+	if (!(ctx->flags & C3DiF_Active))
+		return;
+
+	if (fb != &ctx->fb)
+		memcpy(&ctx->fb, fb, sizeof(*fb));
+	ctx->flags |= C3DiF_FrameBuf;
+}
+
+static void C3Di_FrameBufBind(C3D_FrameBuf* fb)
+{
+	u32 param[4] = { 0, 0, 0, 0 };
+
+	GPUCMD_AddWrite(GPUREG_FRAMEBUFFER_INVALIDATE, 1);
+
+	param[0] = osConvertVirtToPhys(fb->depthBuf) >> 3;
+	param[1] = osConvertVirtToPhys(fb->colorBuf) >> 3;
+	param[2] = 0x01000000 | (((u32)(fb->height-1) & 0xFFF) << 12) | (fb->width & 0xFFF);
+	GPUCMD_AddIncrementalWrites(GPUREG_DEPTHBUFFER_LOC, param, 3);
+
+	GPUCMD_AddWrite(GPUREG_RENDERBUF_DIM,       param[2]);
+	GPUCMD_AddWrite(GPUREG_DEPTHBUFFER_FORMAT,  fb->depthFmt);
+	GPUCMD_AddWrite(GPUREG_COLORBUFFER_FORMAT,  colorFmtSizes[fb->colorFmt] | ((u32)fb->colorFmt << 16));
+	GPUCMD_AddWrite(GPUREG_FRAMEBUFFER_BLOCK32, fb->block32 ? 1 : 0);
+
+	// Enable or disable color/depth buffers
+	param[0] = param[1] = fb->colorBuf ? fb->colorMask : 0;
+	param[2] = param[3] = fb->depthBuf ? fb->depthMask : 0;
+	GPUCMD_AddIncrementalWrites(GPUREG_COLORBUFFER_READ, param, 4);
+}
+
+static void C3D_FrameBufClear(C3D_FrameBuf* frameBuf, C3D_ClearBits clearBits, u32 clearColor, u32 clearDepth)
+{
+	u32 size = (u32)frameBuf->width * frameBuf->height;
+	u32 cfs = colorFmtSizes[frameBuf->colorFmt];
+	u32 dfs = depthFmtSizes[frameBuf->depthFmt];
+	void* colorBufEnd = (u8*)frameBuf->colorBuf + size*(2+cfs);
+	void* depthBufEnd = (u8*)frameBuf->depthBuf + size*(2+dfs);
+
+	if (clearBits & C3D_CLEAR_COLOR)
+	{
+		if (clearBits & C3D_CLEAR_DEPTH)
+			GX_MemoryFill(
+				(u32*)frameBuf->colorBuf, clearColor, (u32*)colorBufEnd, BIT(0) | (cfs << 8),
+				(u32*)frameBuf->depthBuf, clearDepth, (u32*)depthBufEnd, BIT(0) | (dfs << 8));
+		else
+			GX_MemoryFill(
+				(u32*)frameBuf->colorBuf, clearColor, (u32*)colorBufEnd, BIT(0) | (cfs << 8),
+				NULL, 0, NULL, 0);
+	} else
+		GX_MemoryFill(
+			(u32*)frameBuf->depthBuf, clearDepth, (u32*)depthBufEnd, BIT(0) | (dfs << 8),
+			NULL, 0, NULL, 0);
+}
+
+static void C3D_FrameBufTransfer(C3D_FrameBuf* frameBuf, gfxScreen_t screen, gfx3dSide_t side, u32 transferFlags)
+{
+	u32* outputFrameBuf = (u32*)gfxGetFramebuffer(screen, side, NULL, NULL);
+	u32 dim = GX_BUFFER_DIM((u32)frameBuf->width, (u32)frameBuf->height);
+	GX_DisplayTransfer((u32*)frameBuf->colorBuf, dim, outputFrameBuf, dim, transferFlags);
+}
+
+
+
+
+
+
+
+static void C3D_ImmDrawBegin(GPU_Primitive_t primitive)
+{
+	C3Di_UpdateContext();
+
+	// Set primitive type
+	GPUCMD_AddMaskedWrite(GPUREG_PRIMITIVE_CONFIG, 2, primitive);
+	// Start a new primitive (breaks off a triangle strip/fan)
+	GPUCMD_AddWrite(GPUREG_RESTART_PRIMITIVE, 1);
+	// Enable vertex submission mode
+	GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG2, 1, 1);
+	// Enable drawing mode
+	GPUCMD_AddMaskedWrite(GPUREG_START_DRAW_FUNC0, 1, 0);
+	// Begin immediate-mode vertex submission
+	GPUCMD_AddWrite(GPUREG_FIXEDATTRIB_INDEX, 0xF);
+}
+
+static inline void write24(u8* p, u32 val)
+{
+	p[0] = val;
+	p[1] = val>>8;
+	p[2] = val>>16;
+}
+
+static void C3D_ImmSendAttrib(float x, float y, float z, float w)
+{
+	union
+	{
+		u32 packed[3];
+		struct
+		{
+			u8 x[3];
+			u8 y[3];
+			u8 z[3];
+			u8 w[3];
+		};
+	} param;
+
+	// Convert the values to float24
+	write24(param.x, f32tof24(x));
+	write24(param.y, f32tof24(y));
+	write24(param.z, f32tof24(z));
+	write24(param.w, f32tof24(w));
+
+	// Reverse the packed words
+	u32 p = param.packed[0];
+	param.packed[0] = param.packed[2];
+	param.packed[2] = p;
+
+	// Send the attribute
+	GPUCMD_AddIncrementalWrites(GPUREG_FIXEDATTRIB_DATA0, param.packed, 3);
+}
+
+static void C3D_ImmDrawEnd(void)
+{
+	// Go back to configuration mode
+	GPUCMD_AddMaskedWrite(GPUREG_START_DRAW_FUNC0, 1, 1);
+	// Disable vertex submission mode
+	GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG2, 1, 0);
+	// Clear the post-vertex cache
+	GPUCMD_AddWrite(GPUREG_VTX_FUNC, 1);
+
+	C3Di_GetContext()->flags |= C3DiF_DrawUsed;
+}
+
+
+
+static C3D_RenderTarget *linkedTarget[3];
+
+static bool inFrame, inSafeTransfer;
+static bool needSwapTop, needSwapBot, isTopStereo;
+static u32 vblankCounter[2];
+
+static void C3Di_RenderTargetDestroy(C3D_RenderTarget* target);
+
+static void onVBlank0(void* unused)
+{
+	vblankCounter[0]++;
+}
+
+static void onVBlank1(void* unused)
+{
+	vblankCounter[1]++;
+}
+
+static void onQueueFinish(gxCmdQueue_s* queue)
+{
+	if (inSafeTransfer)
+	{
+		inSafeTransfer = false;
+		if (inFrame)
+		{
+			gxCmdQueueStop(queue);
+			gxCmdQueueClear(queue);
+		}
+	}
+	else
+	{
+		if (needSwapTop)
+		{
+			gfxScreenSwapBuffers(GFX_TOP, isTopStereo);
+			needSwapTop = false;
+		}
+		if (needSwapBot)
+		{
+			gfxScreenSwapBuffers(GFX_BOTTOM, false);
+			needSwapBot = false;
+		}
+	}
+}
+
+static void C3D_FrameSync(void)
+{
+	u32 cur[2];
+	u32 start[2] = { vblankCounter[0], vblankCounter[1] };
+	do
+	{
+		gspWaitForAnyEvent();
+		cur[0] = vblankCounter[0];
+		cur[1] = vblankCounter[1];
+	} while (cur[0]==start[0] || cur[1]==start[1]);
+}
+
+static bool C3Di_WaitAndClearQueue(s64 timeout)
+{
+	gxCmdQueue_s* queue = &C3Di_GetContext()->gxQueue;
+	if (!gxCmdQueueWait(queue, timeout))
+		return false;
+	gxCmdQueueStop(queue);
+	gxCmdQueueClear(queue);
+	return true;
+}
+
+static void C3Di_RenderQueueEnableVBlank(void)
+{
+	gspSetEventCallback(GSPGPU_EVENT_VBlank0, onVBlank0, NULL, false);
+	gspSetEventCallback(GSPGPU_EVENT_VBlank1, onVBlank1, NULL, false);
+}
+
+static void C3Di_RenderQueueDisableVBlank(void)
+{
+	gspSetEventCallback(GSPGPU_EVENT_VBlank0, NULL, NULL, false);
+	gspSetEventCallback(GSPGPU_EVENT_VBlank1, NULL, NULL, false);
+}
+
+static void C3Di_RenderQueueInit(void)
+{
+	C3D_Context* ctx = C3Di_GetContext();
+
+	C3Di_RenderQueueEnableVBlank();
+
+	GX_BindQueue(&ctx->gxQueue);
+	gxCmdQueueSetCallback(&ctx->gxQueue, onQueueFinish, NULL);
+	gxCmdQueueRun(&ctx->gxQueue);
+}
+
+static void C3Di_RenderQueueExit(void)
+{
+	C3Di_WaitAndClearQueue(-1);
+	gxCmdQueueSetCallback(&C3Di_GetContext()->gxQueue, NULL, NULL);
+	GX_BindQueue(NULL);
+
+	C3Di_RenderQueueDisableVBlank();
+}
+
+static void C3Di_RenderQueueWaitDone(void)
+{
+	C3Di_WaitAndClearQueue(-1);
+}
+
+static bool C3D_FrameBegin(u8 flags)
+{
+	C3D_Context* ctx = C3Di_GetContext();
+	if (inFrame) return false;
+
+	if (!C3Di_WaitAndClearQueue((flags & C3D_FRAME_NONBLOCK) ? 0 : -1))
+		return false;
+
+	inFrame = true;
+	GPUCMD_SetBuffer(ctx->cmdBuf, ctx->cmdBufSize, 0);
+	return true;
+}
+
+static bool C3D_FrameDrawOn(C3D_RenderTarget* target)
+{
+	if (!inFrame) return false;
+
+	target->used = true;
+	C3D_SetFrameBuf(&target->frameBuf);
+	C3D_SetViewport(0, 0, target->frameBuf.width, target->frameBuf.height);
+	return true;
+}
+
+static void C3D_FrameSplit(u8 flags)
+{
+	u32 *cmdBuf, cmdBufSize;
+	if (!inFrame) return;
+	if (C3Di_SplitFrame(&cmdBuf, &cmdBufSize))
+		GX_ProcessCommandList(cmdBuf, cmdBufSize*4, flags);
+}
+
+static void C3D_FrameEnd(u8 flags)
+{
+	C3D_Context* ctx = C3Di_GetContext();
+	if (!inFrame) return;
+
+	C3D_FrameSplit(flags);
+	GPUCMD_SetBuffer(NULL, 0, 0);
+	inFrame = false;
+
+	// Flush the entire linear memory if the user did not explicitly mandate to flush the command list
+	if (!(flags & GX_CMDLIST_FLUSH))
+	{
+		extern u32 __ctru_linear_heap;
+		extern u32 __ctru_linear_heap_size;
+		GSPGPU_FlushDataCache((void*)__ctru_linear_heap, __ctru_linear_heap_size);
+	}
+
+	C3D_RenderTarget* target;
+	isTopStereo = false;
+	needSwapTop = true;
+	needSwapBot = true;
+
+	for (int i = 2; i >= 0; i --)
+	{
+		target = linkedTarget[i];
+		if (!target || !target->used)
+			continue;
+
+		target->used = false;
+		C3D_FrameBufTransfer(&target->frameBuf, target->screen, target->side, target->transferFlags);
+
+		if (target->screen == GFX_TOP && target->side == GFX_RIGHT) isTopStereo = true;
+	}
+
+	gxCmdQueueRun(&ctx->gxQueue);
+}
+
+void C3D_RenderTargetCreate(C3D_RenderTarget* target, int width, int height, GPU_COLORBUF colorFmt, GPU_DEPTHBUF depthFmt)
+{
+	size_t colorSize = C3D_CalcColorBufSize(width,height,colorFmt);
+	size_t depthSize = C3D_CalcDepthBufSize(width,height,depthFmt);
+	memset(target, 0, sizeof(C3D_RenderTarget));
+
+	void* depthBuf = NULL;
+	void* colorBuf = vramAlloc(colorSize);
+	if (!colorBuf) goto _fail;
+
+	vramAllocPos vramBank = addrGetVRAMBank(colorBuf);
+	depthBuf = vramAllocAt(depthSize, vramBank ^ VRAM_ALLOC_ANY); // Attempt opposite bank first...
+	if (!depthBuf) depthBuf = vramAllocAt(depthSize, vramBank); // ... if that fails, attempt same bank
+	if (!depthBuf) goto _fail;
+
+	C3D_FrameBuf* fb = &target->frameBuf;
+	C3D_FrameBufAttrib(fb, width, height, false);
+	C3D_FrameBufColor(fb, colorBuf, colorFmt);
+	C3D_FrameBufDepth(fb, depthBuf, depthFmt);
+	return;
+
+_fail:
+	if (depthBuf) vramFree(depthBuf);
+	if (colorBuf) vramFree(colorBuf);
+}
+
+static void C3Di_RenderTargetDestroy(C3D_RenderTarget* target)
+{
+	vramFree(target->frameBuf.colorBuf);
+	vramFree(target->frameBuf.depthBuf);
+}
+
+static void C3D_RenderTargetDelete(C3D_RenderTarget* target)
+{
+	if (inFrame)
+		svcBreak(USERBREAK_PANIC); // Shouldn't happen.
+	if (target->linked)
+		C3D_RenderTargetDetachOutput(target);
+	else
+		C3Di_WaitAndClearQueue(-1);
+	C3Di_RenderTargetDestroy(target);
+}
+
+static void C3D_RenderTargetSetOutput(C3D_RenderTarget* target, gfxScreen_t screen, gfx3dSide_t side, u32 transferFlags)
+{
+	int id = 0;
+	if (screen==GFX_BOTTOM) id = 2;
+	else if (side==GFX_RIGHT) id = 1;
+	if (linkedTarget[id])
+	{
+		linkedTarget[id]->linked = false;
+		if (!inFrame)
+			C3Di_WaitAndClearQueue(-1);
+	}
+	linkedTarget[id] = target;
+	if (target)
+	{
+		target->linked = true;
+		target->transferFlags = transferFlags;
+		target->screen = screen;
+		target->side = side;
+	}
+}
+
+static void C3Di_SafeTextureCopy(u32* inadr, u32 indim, u32* outadr, u32 outdim, u32 size, u32 flags)
+{
+	C3Di_WaitAndClearQueue(-1);
+	inSafeTransfer = true;
+	GX_TextureCopy(inadr, indim, outadr, outdim, size, flags);
+	gxCmdQueueRun(&C3Di_GetContext()->gxQueue);
+}
+
+static void C3D_SyncTextureCopy(u32* inadr, u32 indim, u32* outadr, u32 outdim, u32 size, u32 flags)
+{
+	if (inFrame)
+	{
+		C3D_FrameSplit(0);
+		GX_TextureCopy(inadr, indim, outadr, outdim, size, flags);
+	} else
+	{
+		C3Di_SafeTextureCopy(inadr, indim, outadr, outdim, size, flags);
+		gspWaitForPPF();
+	}
+}
+
+
+
+
+
+
+
+static void C3Di_TexEnvBind(int id, C3D_TexEnv* env)
+{
+	if (id >= 4) id += 2;
+	GPUCMD_AddIncrementalWrites(GPUREG_TEXENV0_SOURCE + id*8, (u32*)env, sizeof(C3D_TexEnv)/sizeof(u32));
+}
+
+
+
+
+
+static void C3D_TexLoadImage(C3D_Tex* tex, const void* data, GPU_TEXFACE face, int level)
+{
+	u32 size = 0;
+	void* out = C3D_TexGetImagePtr(tex, tex->data, level, &size);
+
+	if (!addrIsVRAM(out))
+		memcpy(out, data, size);
+	else
+		C3D_SyncTextureCopy((u32*)data, 0, (u32*)out, 0, size, 8);
+}
+
+static void C3D_TexBind(int unitId, C3D_Tex* tex)
+{
+	C3D_Context* ctx = C3Di_GetContext();
+
+	ctx->flags |= C3DiF_Tex(unitId);
+	ctx->tex[unitId] = tex;
+}
+
+static void C3D_TexFlush(C3D_Tex* tex)
+{
+	if (!addrIsVRAM(tex->data))
+		GSPGPU_FlushDataCache(tex->data, C3D_TexCalcTotalSize(tex->size, tex->maxLevel));
+}
+
+static void C3D_TexDelete(C3D_Tex* tex)
+{
+	void* addr = tex->data;
+	if (addrIsVRAM(addr))
+		vramFree(addr);
+	else
+		linearFree(addr);
+}
+
+static void C3Di_SetTex(int unit, C3D_Tex* tex)
+{
+	u32 reg[10];
+	u32 regcount = 5;
+	reg[0] = tex->border;
+	reg[1] = tex->dim;
+	reg[2] = tex->param;
+	reg[3] = tex->lodParam;
+	reg[4] = osConvertVirtToPhys(tex->data) >> 3;
+
+	switch (unit)
+	{
+		case 0:
+			GPUCMD_AddIncrementalWrites(GPUREG_TEXUNIT0_BORDER_COLOR, reg, regcount);
+			GPUCMD_AddWrite(GPUREG_TEXUNIT0_TYPE, tex->fmt);
+			break;
+		case 1:
+			GPUCMD_AddIncrementalWrites(GPUREG_TEXUNIT1_BORDER_COLOR, reg, 5);
+			GPUCMD_AddWrite(GPUREG_TEXUNIT1_TYPE, tex->fmt);
+			break;
+		case 2:
+			GPUCMD_AddIncrementalWrites(GPUREG_TEXUNIT2_BORDER_COLOR, reg, 5);
+			GPUCMD_AddWrite(GPUREG_TEXUNIT2_TYPE, tex->fmt);
+			break;
+	}
+}
+
+
+
+
+
+
+
+
+static struct
+{
+	bool dirty;
+	int count;
+	float24Uniform_s* data;
+} C3Di_ShaderFVecData;
+
+static bool C3Di_FVUnifEverDirty[C3D_FVUNIF_COUNT];
+static bool C3Di_IVUnifEverDirty[C3D_IVUNIF_COUNT];
+
+static void C3D_UpdateUniforms(void)
+{
+	int i = 0;
+
+	// Update FVec uniforms that come from shader constants
+	if (C3Di_ShaderFVecData.dirty)
+	{
+		while (i < C3Di_ShaderFVecData.count)
+		{
+			float24Uniform_s* u = &C3Di_ShaderFVecData.data[i++];
+			GPUCMD_AddIncrementalWrites(GPUREG_VSH_FLOATUNIFORM_CONFIG, (u32*)u, 4);
+			C3D_FVUnifDirty[u->id] = false;
+		}
+		C3Di_ShaderFVecData.dirty = false;
+		i = 0;
+	}
+
+	// Update FVec uniforms
+	while (i < C3D_FVUNIF_COUNT)
+	{
+		if (!C3D_FVUnifDirty[i])
+		{
+			i ++;
+			continue;
+		}
+
+		// Find the number of consecutive dirty uniforms
+		int j;
+		for (j = i; j < C3D_FVUNIF_COUNT && C3D_FVUnifDirty[j]; j ++);
+
+		// Upload the uniforms
+		GPUCMD_AddWrite(GPUREG_VSH_FLOATUNIFORM_CONFIG, 0x80000000|i);
+		GPUCMD_AddWrites(GPUREG_VSH_FLOATUNIFORM_DATA, (u32*)&C3D_FVUnif[i], (j-i)*4);
+
+		// Clear the dirty flag
+		int k;
+		for (k = i; k < j; k ++)
+		{
+			C3D_FVUnifDirty[k] = false;
+			C3Di_FVUnifEverDirty[k] = true;
+		}
+
+		// Advance
+		i = j;
+	}
+
+	// Update IVec uniforms
+	for (i = 0; i < C3D_IVUNIF_COUNT; i ++)
+	{
+		if (!C3D_IVUnifDirty[i]) continue;
+
+		GPUCMD_AddWrite(GPUREG_VSH_INTUNIFORM_I0+i, C3D_IVUnif[i]);
+		C3D_IVUnifDirty[i] = false;
+		C3Di_IVUnifEverDirty[i] = false;
+	}
+
+	// Update bool uniforms
+	if (C3D_BoolUnifsDirty)
+	{
+		GPUCMD_AddWrite(GPUREG_VSH_BOOLUNIFORM, 0x7FFF0000 | C3D_BoolUnifs);
+		C3D_BoolUnifsDirty = false;
+	}
+}
+
+static void C3Di_DirtyUniforms(void)
+{
+	int i;
+	C3D_BoolUnifsDirty = true;
+	if (C3Di_ShaderFVecData.count)
+		C3Di_ShaderFVecData.dirty = true;
+	for (i = 0; i < C3D_FVUNIF_COUNT; i ++)
+		C3D_FVUnifDirty[i] = C3D_FVUnifDirty[i] || C3Di_FVUnifEverDirty[i];
+	for (i = 0; i < C3D_IVUNIF_COUNT; i ++)
+		C3D_IVUnifDirty[i] = C3D_IVUnifDirty[i] || C3Di_IVUnifEverDirty[i];
+}
+
+static void C3Di_LoadShaderUniforms(shaderInstance_s* si)
+{
+	if (si->boolUniformMask)
+	{
+		C3D_BoolUnifs &= ~si->boolUniformMask;
+		C3D_BoolUnifs |= si->boolUniforms;
+	}
+
+	C3D_BoolUnifsDirty = true;
+
+	if (si->intUniformMask)
+	{
+		int i;
+		for (i = 0; i < 4; i ++)
+		{
+			if (si->intUniformMask & BIT(i))
+			{
+				C3D_IVUnif[i] = si->intUniforms[i];
+				C3D_IVUnifDirty[i] = true;
+			}
+		}
+	}
+	C3Di_ShaderFVecData.dirty = true;
+	C3Di_ShaderFVecData.count = si->numFloat24Uniforms;
+	C3Di_ShaderFVecData.data  = si->float24Uniforms;
+}
+
+
+
+
+
+
+
+
+static aptHookCookie hookCookie;
+
+static void C3Di_AptEventHook(APT_HookType hookType, void* param)
+{
+	C3D_Context* ctx = C3Di_GetContext();
+
+	switch (hookType)
+	{
+		case APTHOOK_ONSUSPEND:
+		{
+			C3Di_RenderQueueWaitDone();
+			C3Di_RenderQueueDisableVBlank();
+			break;
+		}
+		case APTHOOK_ONRESTORE:
+		{
+			C3Di_RenderQueueEnableVBlank();
+			ctx->flags |= C3DiF_AttrInfo | C3DiF_Effect | C3DiF_FrameBuf
+				| C3DiF_Viewport | C3DiF_Scissor | C3DiF_Program | C3DiF_VshCode | C3DiF_GshCode
+				| C3DiF_TexAll | C3DiF_TexEnvBuf | C3DiF_Gas | C3DiF_Reset;
+
+			C3Di_DirtyUniforms();
+
+			if (ctx->fogLut)
+				ctx->flags |= C3DiF_FogLut;
+			break;
+		}
+		default:
+			break;
+	}
+}
+
+static bool C3D_Init(size_t cmdBufSize)
+{
+	int i;
+	C3D_Context* ctx = C3Di_GetContext();
+
+	if (ctx->flags & C3DiF_Active)
+		return false;
+
+	cmdBufSize = (cmdBufSize + 0xF) &~ 0xF; // 0x10-byte align
+	ctx->cmdBufSize = cmdBufSize/4;
+	ctx->cmdBuf = (u32*)linearAlloc(cmdBufSize);
+	if (!ctx->cmdBuf)
+		return false;
+
+	ctx->gxQueue.maxEntries = 32;
+	ctx->gxQueue.entries = (gxCmdEntry_s*)malloc(ctx->gxQueue.maxEntries*sizeof(gxCmdEntry_s));
+	if (!ctx->gxQueue.entries)
+	{
+		linearFree(ctx->cmdBuf);
+		return false;
+	}
+
+	ctx->flags = C3DiF_Active | C3DiF_TexEnvBuf | C3DiF_Effect | C3DiF_TexStatus | C3DiF_TexAll | C3DiF_Reset;
+
+	// TODO: replace with direct struct access
+	C3D_DepthMap(true, -1.0f, 0.0f);
+	C3D_CullFace(GPU_CULL_BACK_CCW);
+	C3D_StencilTest();
+	C3D_StencilOp();
+	C3D_EarlyDepthTest(false, GPU_EARLYDEPTH_GREATER, 0);
+	C3D_DepthTest(true, GPU_GREATER, GPU_WRITE_ALL);
+	C3D_AlphaTest(false, GPU_ALWAYS, 0x00);
+	C3D_AlphaBlend(GPU_BLEND_ADD, GPU_BLEND_ADD, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA);
+	C3D_FragOpMode(GPU_FRAGOPMODE_GL);
+	C3D_FragOpShadow(0.0, 1.0);
+
+	ctx->texConfig = BIT(12);
+	ctx->texEnvBuf = 0;
+	ctx->texEnvBufClr = 0xFFFFFFFF;
+	ctx->fogClr = 0;
+	ctx->fogLut = NULL;
+
+	for (i = 0; i < 3; i ++)
+		ctx->tex[i] = NULL;
+
+	C3Di_RenderQueueInit();
+	aptHook(&hookCookie, C3Di_AptEventHook, NULL);
+
+	return true;
+}
+
+static void C3D_SetViewport(u32 x, u32 y, u32 w, u32 h)
+{
+	C3D_Context* ctx = C3Di_GetContext();
+	ctx->flags |= C3DiF_Viewport | C3DiF_Scissor;
+	ctx->viewport[0] = f32tof24(w / 2.0f);
+	ctx->viewport[1] = f32tof31(2.0f / w) << 1;
+	ctx->viewport[2] = f32tof24(h / 2.0f);
+	ctx->viewport[3] = f32tof31(2.0f / h) << 1;
+	ctx->viewport[4] = (y << 16) | (x & 0xFFFF);
+	ctx->scissor[0] = GPU_SCISSOR_DISABLE;
+}
+
+static void C3D_SetScissor(GPU_SCISSORMODE mode, u32 left, u32 top, u32 right, u32 bottom)
+{
+	C3D_Context* ctx = C3Di_GetContext();
+	ctx->flags |= C3DiF_Scissor;
+	ctx->scissor[0] = mode;
+	if (mode == GPU_SCISSOR_DISABLE) return;
+	ctx->scissor[1] = (top << 16) | (left & 0xFFFF);
+	ctx->scissor[2] = ((bottom-1) << 16) | ((right-1) & 0xFFFF);
+}
+
+static void C3Di_UpdateContext(void)
+{
+	int i;
+	C3D_Context* ctx = C3Di_GetContext();
+
+	if (ctx->flags & C3DiF_FrameBuf)
+	{
+		ctx->flags &= ~C3DiF_FrameBuf;
+		if (ctx->flags & C3DiF_DrawUsed)
+		{
+			ctx->flags &= ~C3DiF_DrawUsed;
+			GPUCMD_AddWrite(GPUREG_FRAMEBUFFER_FLUSH, 1);
+			GPUCMD_AddWrite(GPUREG_EARLYDEPTH_CLEAR, 1);
+		}
+		C3Di_FrameBufBind(&ctx->fb);
+	}
+
+	if (ctx->flags & C3DiF_Viewport)
+	{
+		ctx->flags &= ~C3DiF_Viewport;
+		GPUCMD_AddIncrementalWrites(GPUREG_VIEWPORT_WIDTH, ctx->viewport, 4);
+		GPUCMD_AddWrite(GPUREG_VIEWPORT_XY, ctx->viewport[4]);
+	}
+
+	if (ctx->flags & C3DiF_Scissor)
+	{
+		ctx->flags &= ~C3DiF_Scissor;
+		GPUCMD_AddIncrementalWrites(GPUREG_SCISSORTEST_MODE, ctx->scissor, 3);
+	}
+
+	if (ctx->flags & C3DiF_Program)
+	{
+		shaderProgramConfigure(ctx->program, (ctx->flags & C3DiF_VshCode) != 0, (ctx->flags & C3DiF_GshCode) != 0);
+		ctx->flags &= ~(C3DiF_Program | C3DiF_VshCode | C3DiF_GshCode);
+	}
+
+	if (ctx->flags & C3DiF_AttrInfo)
+	{
+		ctx->flags &= ~C3DiF_AttrInfo;
+		C3Di_AttrInfoBind(&ctx->attrInfo);
+	}
+
+	if (ctx->flags & C3DiF_Effect)
+	{
+		ctx->flags &= ~C3DiF_Effect;
+		C3Di_EffectBind(&ctx->effect);
+	}
+
+	if (ctx->flags & C3DiF_TexAll)
+	{
+		u32 units = 0;
+		for (i = 0; i < 3; i ++)
+		{
+			if (ctx->tex[i])
+			{
+				units |= BIT(i);
+				if (ctx->flags & C3DiF_Tex(i))
+					C3Di_SetTex(i, ctx->tex[i]);
+			}
+		}
+
+		// Enable texture units and clear texture cache
+		ctx->texConfig &= ~7;
+		ctx->texConfig |= units | BIT(16);
+		ctx->flags &= ~C3DiF_TexAll;
+		ctx->flags |= C3DiF_TexStatus;
+	}
+
+	if (ctx->flags & C3DiF_TexStatus)
+	{
+		ctx->flags &= ~C3DiF_TexStatus;
+		GPUCMD_AddMaskedWrite(GPUREG_TEXUNIT_CONFIG, 0xB, ctx->texConfig);
+		// Clear texture cache if requested *after* configuring texture units
+		if (ctx->texConfig & BIT(16))
+		{
+			ctx->texConfig &= ~BIT(16);
+			GPUCMD_AddMaskedWrite(GPUREG_TEXUNIT_CONFIG, 0x4, BIT(16));
+		}
+		GPUCMD_AddWrite(GPUREG_TEXUNIT0_SHADOW, BIT(0));
+	}
+
+	if (ctx->flags & C3DiF_TexEnvBuf)
+	{
+		ctx->flags &= ~C3DiF_TexEnvBuf;
+		GPUCMD_AddMaskedWrite(GPUREG_TEXENV_UPDATE_BUFFER, 0x7, ctx->texEnvBuf);
+		GPUCMD_AddWrite(GPUREG_TEXENV_BUFFER_COLOR, ctx->texEnvBufClr);
+		GPUCMD_AddWrite(GPUREG_FOG_COLOR, ctx->fogClr);
+	}
+
+	if ((ctx->flags & C3DiF_FogLut) && (ctx->texEnvBuf&7) != GPU_NO_FOG)
+	{
+		ctx->flags &= ~C3DiF_FogLut;
+		if (ctx->fogLut)
+		{
+			GPUCMD_AddWrite(GPUREG_FOG_LUT_INDEX, 0);
+			GPUCMD_AddWrites(GPUREG_FOG_LUT_DATA0, ctx->fogLut->data, 128);
+		}
+	}
+
+	if (ctx->flags & C3DiF_Reset)
+	{
+		// Reset texture environment
+		C3D_TexEnv texEnv;
+		C3D_TexEnvInit(&texEnv);
+		for (i = 0; i < 6; i++)
+		{
+			C3Di_TexEnvBind(i, &texEnv);
+		}
+
+		// Reset lighting
+		GPUCMD_AddWrite(GPUREG_LIGHTING_ENABLE0, false);
+		GPUCMD_AddWrite(GPUREG_LIGHTING_ENABLE1,  true);
+
+		// Reset attirubte buffer info
+		C3D_BufCfg buffers[12] = { 0 };
+		GPUCMD_AddWrite(GPUREG_ATTRIBBUFFERS_LOC, BUFFER_BASE_PADDR >> 3);
+		GPUCMD_AddIncrementalWrites(GPUREG_ATTRIBBUFFER0_OFFSET, (u32*)buffers, 12 * 3);
+
+		ctx->flags &= ~C3DiF_Reset;
+	}
+
+	C3D_UpdateUniforms();
+}
+
+static bool C3Di_SplitFrame(u32** pBuf, u32* pSize)
+{
+	C3D_Context* ctx = C3Di_GetContext();
+
+	if (!gpuCmdBufOffset)
+		return false; // Nothing was drawn
+
+	if (ctx->flags & C3DiF_DrawUsed)
+	{
+		ctx->flags &= ~C3DiF_DrawUsed;
+		GPUCMD_AddWrite(GPUREG_FRAMEBUFFER_FLUSH, 1);
+		GPUCMD_AddWrite(GPUREG_FRAMEBUFFER_INVALIDATE, 1);
+		GPUCMD_AddWrite(GPUREG_EARLYDEPTH_CLEAR, 1);
+	}
+
+	GPUCMD_Split(pBuf, pSize);
+	return true;
+}
+
+static void C3D_Fini(void)
+{
+	C3D_Context* ctx = C3Di_GetContext();
+
+	if (!(ctx->flags & C3DiF_Active))
+		return;
+
+	aptUnhook(&hookCookie);
+	C3Di_RenderQueueExit();
+	free(ctx->gxQueue.entries);
+	linearFree(ctx->cmdBuf);
+	ctx->flags = 0;
+}
+
+static void C3D_BindProgram(shaderProgram_s* program)
+{
+	C3D_Context* ctx = C3Di_GetContext();
+
+	shaderProgram_s* oldProg = ctx->program;
+	if (oldProg != program)
+	{
+		ctx->program = program;
+		ctx->flags |= C3DiF_Program | C3DiF_AttrInfo;
+
+		if (!oldProg)
+			ctx->flags |= C3DiF_VshCode | C3DiF_GshCode;
+		else
+		{
+			DVLP_s* oldProgV = oldProg->vertexShader->dvle->dvlp;
+			DVLP_s* newProgV = program->vertexShader->dvle->dvlp;
+
+			if (oldProgV != newProgV)
+				ctx->flags |= C3DiF_VshCode | C3DiF_GshCode;
+		}
+	}
+
+	C3Di_LoadShaderUniforms(program->vertexShader);
+}