Adds the GPU mechanism to clip drawing to a per-cell screen-space convex
region via gl_ClipDistance, consumed by the mesh + terrain vertex shaders.
This is the MECHANISM only — every instance defaults to slot 0 (no-clip /
pass-all) and terrain to count 0, so the running game renders IDENTICALLY to
pre-U.3 (verified: offline launch compiles both shaders and reaches steady
state; no GL errors). U.4 populates real clip data from portal visibility.
Binding contract (define once, both sides obey):
- mesh_modern.vert: SSBO binding=2 CellClip[] (shared per-frame regions, slot 0
reserved no-clip) + SSBO binding=3 uint[] per-instance slot, indexed by the
IDENTICAL gl_BaseInstanceARB+gl_InstanceID used for binding=0. binding=0/1
untouched.
- terrain_modern.vert: UBO binding=2 TerrainClip { int count; vec4 planes[8]; }
for the single OutsideView region (UBO namespace; SceneLighting is UBO
binding=1, so binding=2 is free and does not collide with the mesh SSBO
binding=2). count 0 = ungated.
- Both redeclare out gl_PerVertex { vec4 gl_Position; float gl_ClipDistance[8]; }
and set unused planes (i >= count) to +1.0 so they pass everything.
CellClip std430 layout (144 bytes/slot): count@0, 3 pad uints@4/8/12,
planes[8]@16 (vec4 stride 16). Terrain UBO std140: count@0 (padded to 16),
planes[8]@16 → 144 bytes. Verified by ClipFrameLayoutTests (8 new tests).
Pieces:
- ClipFrame: per-frame container + uploader for the SHARED clip data (binding=2
SSBO + terrain UBO). NoClip() = slot 0 + terrain count 0. AppendSlot /
SetTerrainClip pack std430/std140 bytes for U.4. UploadShared binds both.
- WbDrawDispatcher + EnvCellRenderer: each owns its binding=3 zero buffer
(all-zeros sized to its instance count → slot 0), re-binds binding=2 from the
shared ClipFrame id (or an internal no-clip fallback if unwired) before MDI.
gl_ClipDistance is per-vertex, so the single glMultiDrawElementsIndirect per
group is preserved — no draw splitting.
- TerrainModernRenderer: binds the terrain clip UBO (shared or no-clip fallback)
before its draw.
- GameWindow: glEnable(GL_CLIP_DISTANCE0..7) once at init (unused planes pass-all
so always-on avoids per-draw thrash); per frame builds ClipFrame.NoClip(),
UploadShared, and hands the buffer ids to the three renderers (tiny diff; U.4
swaps NoClip() for the real portal-visibility frame).
Gate: dotnet build green; App suite 134/134; offline launch confirms both
shaders compile + link with no GL errors.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
131 lines
5.6 KiB
GLSL
131 lines
5.6 KiB
GLSL
#version 430 core
|
||
#extension GL_ARB_shader_draw_parameters : require
|
||
|
||
layout(location = 0) in vec3 aPosition;
|
||
layout(location = 1) in vec3 aNormal;
|
||
layout(location = 2) in vec2 aTexCoord;
|
||
|
||
struct InstanceData {
|
||
mat4 transform;
|
||
// Reserved for Phase B.4 follow-up (selection-blink retail-faithful
|
||
// highlight): vec4 highlightColor; — extend stride here, increase the
|
||
// _instanceSsbo upload size in WbDrawDispatcher, add a flat varying out,
|
||
// and consume in mesh_modern.frag.
|
||
};
|
||
|
||
struct BatchData {
|
||
uvec2 textureHandle; // bindless handle for sampler2DArray
|
||
uint textureLayer; // layer index (always 0 for per-instance composites)
|
||
uint flags; // reserved — N.5 dispatcher owns all blend state
|
||
// (glBlendFunc per pass). If a future phase wants
|
||
// shader-side per-batch additive flag (Decision 2
|
||
// fallback), encode it here as bit 0.
|
||
};
|
||
|
||
layout(std430, binding = 0) readonly buffer InstanceBuffer {
|
||
InstanceData Instances[];
|
||
};
|
||
|
||
// binding=1 here is the SSBO namespace — distinct from the UBO namespace.
|
||
// SceneLighting UBO also uses binding=1 in the fragment shader; GL keeps
|
||
// GL_SHADER_STORAGE_BUFFER and GL_UNIFORM_BUFFER binding tables separate.
|
||
// Task 10 dispatcher binds:
|
||
// glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, instanceSsbo)
|
||
// glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, batchSsbo)
|
||
// Existing SceneLightingUboBinding handles the UBO side.
|
||
layout(std430, binding = 1) readonly buffer BatchBuffer {
|
||
BatchData Batches[];
|
||
};
|
||
|
||
// === Phase U.3: per-cell screen-space clip gate (gl_ClipDistance) =============
|
||
// Two SSBOs add the clip mechanism without disturbing binding=0/1 above.
|
||
//
|
||
// binding=2 — SHARED per-frame clip regions, one CellClip per "slot". Uploaded
|
||
// ONCE per frame by ClipFrame.UploadShared (shared across WbDrawDispatcher +
|
||
// EnvCellRenderer). Slot 0 is RESERVED = no-clip (count 0 ⇒ every plane passes).
|
||
//
|
||
// binding=3 — PER-RENDERER per-instance slot index, parallel to the binding=0
|
||
// instance buffer and indexed by the IDENTICAL per-instance index
|
||
// (gl_BaseInstanceARB + gl_InstanceID). instanceClipSlot[i] selects which
|
||
// CellClip region instance i is clipped against. Default all-zeros in U.3 ⇒
|
||
// every instance maps to slot 0 ⇒ no clipping ⇒ identical render to pre-U.3.
|
||
//
|
||
// CellClip std430 layout (144 bytes/slot): a uint count + 3 pad uints (16 bytes)
|
||
// then vec4 planes[8] (8 × 16 = 128 bytes). vec4 array stride is 16 under std430.
|
||
// ClipFrame on the CPU side lays out the bytes to match exactly (verified by
|
||
// ClipFrameLayoutTests). A clip-space vertex is INSIDE iff dot(plane, gl_Position)
|
||
// >= 0 for every active plane (see ClipPlaneSet for the plane convention).
|
||
struct CellClip {
|
||
uint count;
|
||
uint _p0;
|
||
uint _p1;
|
||
uint _p2;
|
||
vec4 planes[8];
|
||
};
|
||
layout(std430, binding = 2) readonly buffer ClipRegionBuf {
|
||
CellClip clipRegions[];
|
||
};
|
||
layout(std430, binding = 3) readonly buffer ClipSlotBuf {
|
||
uint instanceClipSlot[];
|
||
};
|
||
|
||
// Core profile: redeclare gl_PerVertex so writing gl_ClipDistance[] is legal
|
||
// alongside gl_Position. The array is sized 8 to match the CellClip plane budget
|
||
// and the GL guarantee (GL_MAX_CLIP_DISTANCES >= 8). The host enables
|
||
// GL_CLIP_DISTANCE0..7 once at startup; unused planes are set to +1.0 below so
|
||
// they pass everything (no clipping) when the slot's count < 8.
|
||
out gl_PerVertex {
|
||
vec4 gl_Position;
|
||
float gl_ClipDistance[8];
|
||
};
|
||
|
||
uniform mat4 uViewProjection;
|
||
|
||
// Phase Post-A.5 (ISSUE #52, 2026-05-10): per-pass offset into Batches[].
|
||
// gl_DrawIDARB resets to 0 at the start of each glMultiDrawElementsIndirect
|
||
// call, so the transparent pass — which begins later in the indirect buffer
|
||
// — was fetching Batches[0..transparentCount) instead of its actual section
|
||
// at Batches[opaqueCount..end). The lifestone crystal (a transparent draw)
|
||
// ended up reading the FIRST OPAQUE batch's TextureHandle every frame. As
|
||
// the camera moved and the opaque front-to-back sort reordered which group
|
||
// landed at BatchData[0], the lifestone's apparent texture flickered to
|
||
// whatever was first — frequently the player character's body parts.
|
||
//
|
||
// WbDrawDispatcher.Draw sets this to 0 before the opaque MDI call and to
|
||
// _opaqueDrawCount before the transparent MDI call, matching WorldBuilder's
|
||
// uDrawIDOffset pattern in BaseObjectRenderManager.cs line 845.
|
||
uniform int uDrawIDOffset;
|
||
|
||
out vec3 vNormal;
|
||
out vec2 vTexCoord;
|
||
out vec3 vWorldPos;
|
||
out flat uvec2 vTextureHandle;
|
||
out flat uint vTextureLayer;
|
||
|
||
void main() {
|
||
int instanceIndex = gl_BaseInstanceARB + gl_InstanceID;
|
||
mat4 model = Instances[instanceIndex].transform;
|
||
|
||
vec4 worldPos = model * vec4(aPosition, 1.0);
|
||
gl_Position = uViewProjection * worldPos;
|
||
|
||
// Phase U.3: per-instance clip gate. instanceClipSlot is indexed by the
|
||
// SAME instanceIndex used for the binding=0 transform above, so the slot
|
||
// travels with the instance through the MDI BaseInstance offsets. Slot 0
|
||
// (the U.3 default) has count 0 ⇒ the second loop sets all 8 distances to
|
||
// +1.0 ⇒ nothing is clipped.
|
||
uint _slot = instanceClipSlot[instanceIndex];
|
||
CellClip _c = clipRegions[_slot];
|
||
for (uint i = 0u; i < _c.count; ++i)
|
||
gl_ClipDistance[i] = dot(_c.planes[i], gl_Position);
|
||
for (uint i = _c.count; i < 8u; ++i)
|
||
gl_ClipDistance[i] = 1.0;
|
||
|
||
vWorldPos = worldPos.xyz;
|
||
vNormal = normalize(mat3(model) * aNormal);
|
||
vTexCoord = aTexCoord;
|
||
|
||
BatchData b = Batches[uDrawIDOffset + gl_DrawIDARB];
|
||
vTextureHandle = b.textureHandle;
|
||
vTextureLayer = b.textureLayer;
|
||
}
|