feat(render): Phase U.3 — GPU clip-plane gate (gl_ClipDistance), no-clip default

Adds the GPU mechanism to clip drawing to a per-cell screen-space convex
region via gl_ClipDistance, consumed by the mesh + terrain vertex shaders.
This is the MECHANISM only — every instance defaults to slot 0 (no-clip /
pass-all) and terrain to count 0, so the running game renders IDENTICALLY to
pre-U.3 (verified: offline launch compiles both shaders and reaches steady
state; no GL errors). U.4 populates real clip data from portal visibility.

Binding contract (define once, both sides obey):
- mesh_modern.vert: SSBO binding=2 CellClip[] (shared per-frame regions, slot 0
  reserved no-clip) + SSBO binding=3 uint[] per-instance slot, indexed by the
  IDENTICAL gl_BaseInstanceARB+gl_InstanceID used for binding=0. binding=0/1
  untouched.
- terrain_modern.vert: UBO binding=2 TerrainClip { int count; vec4 planes[8]; }
  for the single OutsideView region (UBO namespace; SceneLighting is UBO
  binding=1, so binding=2 is free and does not collide with the mesh SSBO
  binding=2). count 0 = ungated.
- Both redeclare out gl_PerVertex { vec4 gl_Position; float gl_ClipDistance[8]; }
  and set unused planes (i >= count) to +1.0 so they pass everything.

CellClip std430 layout (144 bytes/slot): count@0, 3 pad uints@4/8/12,
planes[8]@16 (vec4 stride 16). Terrain UBO std140: count@0 (padded to 16),
planes[8]@16 → 144 bytes. Verified by ClipFrameLayoutTests (8 new tests).

Pieces:
- ClipFrame: per-frame container + uploader for the SHARED clip data (binding=2
  SSBO + terrain UBO). NoClip() = slot 0 + terrain count 0. AppendSlot /
  SetTerrainClip pack std430/std140 bytes for U.4. UploadShared binds both.
- WbDrawDispatcher + EnvCellRenderer: each owns its binding=3 zero buffer
  (all-zeros sized to its instance count → slot 0), re-binds binding=2 from the
  shared ClipFrame id (or an internal no-clip fallback if unwired) before MDI.
  gl_ClipDistance is per-vertex, so the single glMultiDrawElementsIndirect per
  group is preserved — no draw splitting.
- TerrainModernRenderer: binds the terrain clip UBO (shared or no-clip fallback)
  before its draw.
- GameWindow: glEnable(GL_CLIP_DISTANCE0..7) once at init (unused planes pass-all
  so always-on avoids per-draw thrash); per frame builds ClipFrame.NoClip(),
  UploadShared, and hands the buffer ids to the three renderers (tiny diff; U.4
  swaps NoClip() for the real portal-visibility frame).

Gate: dotnet build green; App suite 134/134; offline launch confirms both
shaders compile + link with no GL errors.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
Erik 2026-05-30 17:27:30 +02:00
parent 0b125830fe
commit bf2e559369
8 changed files with 797 additions and 1 deletions

View file

@ -125,6 +125,21 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
private uint _batchSsbo;
private uint _indirectBuffer;
// Phase U.3: per-instance clip-slot SSBO (binding=3), parallel to
// _instanceSsbo. One uint per instance selecting its CellClip slot. In U.3
// this is ALL ZEROS (every instance → slot 0 → no-clip), so the render is
// identical to pre-U.3. U.4 populates real slot indices.
private uint _clipSlotSsbo;
private uint[] _clipSlotData = new uint[256];
// Phase U.3: the SHARED per-cell clip-region SSBO (binding=2), owned by the
// GameWindow-level ClipFrame and handed to us via SetClipRegionSsbo. When 0
// (not yet wired), we bind our OWN fallback no-clip region buffer below so the
// shader never reads an unbound SSBO. The fallback holds exactly slot 0
// (count 0 = pass-all), matching ClipFrame.NoClip's slot 0.
private uint _sharedClipRegionSsbo;
private uint _fallbackClipRegionSsbo;
// Per-frame scratch arrays — Tasks 9-10 fully wire these.
private float[] _instanceData = new float[256 * 16]; // mat4 floats per instance
private BatchData[] _batchData = new BatchData[256];
@ -255,8 +270,19 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
_instanceSsbo = _gl.GenBuffer();
_batchSsbo = _gl.GenBuffer();
_indirectBuffer = _gl.GenBuffer();
_clipSlotSsbo = _gl.GenBuffer(); // Phase U.3 binding=3
}
/// <summary>
/// Phase U.3: hand the dispatcher the SHARED per-cell clip-region SSBO
/// (binding=2) that <see cref="ClipFrame.UploadShared"/> created. The
/// dispatcher re-binds it to binding=2 immediately before each MDI so a
/// consumer that touched binding=2 in between can't leave it pointing
/// elsewhere. Pass 0 to fall back to the internal no-clip region buffer.
/// </summary>
public void SetClipRegionSsbo(uint sharedClipRegionSsbo)
=> _sharedClipRegionSsbo = sharedClipRegionSsbo;
public static Matrix4x4 ComposePartWorldMatrix(
Matrix4x4 entityWorld,
Matrix4x4 animOverride,
@ -975,13 +1001,25 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
_transparentDrawCount,
totalTriangles);
// ── Phase 5: upload three buffers ───────────────────────────────────
// ── Phase 5: upload four buffers ────────────────────────────────────
fixed (float* ip = _instanceData)
UploadSsbo(_instanceSsbo, 0, ip, totalInstances * 16 * sizeof(float));
fixed (BatchData* bp = _batchData)
UploadSsbo(_batchSsbo, 1, bp, totalDraws * sizeof(BatchData));
// Phase U.3: per-instance clip-slot buffer (binding=3), one uint per
// instance, laid out parallel to _instanceData so the shader's
// instanceClipSlot[instanceIndex] tracks the same instance as
// Instances[instanceIndex]. ALL ZEROS in U.3 ⇒ slot 0 ⇒ no-clip. Grow +
// zero the scratch as needed (Array.Resize zero-fills the new tail; the
// reused head is re-zeroed below so stale U.4 slot indices can't leak).
if (_clipSlotData.Length < totalInstances)
_clipSlotData = new uint[totalInstances + 256];
Array.Clear(_clipSlotData, 0, totalInstances);
fixed (uint* sp = _clipSlotData)
UploadSsbo(_clipSlotSsbo, 3, sp, totalInstances * sizeof(uint));
fixed (DrawElementsIndirectCommand* cp = _indirectCommands)
{
_gl.BindBuffer(BufferTargetARB.DrawIndirectBuffer, _indirectBuffer);
@ -989,6 +1027,13 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
(nuint)(totalDraws * sizeof(DrawElementsIndirectCommand)), cp, BufferUsageARB.DynamicDraw);
}
// Phase U.3: bind the SHARED per-cell clip-region SSBO (binding=2). The
// GameWindow-level ClipFrame already uploaded + bound it this frame; we
// re-bind defensively in case another consumer touched binding=2 since.
// When no shared id is set (0), bind our own no-clip fallback so the
// shader never reads an unbound SSBO at binding=2.
BindClipRegionBinding2();
// ── Phase 6: bind global VAO once ───────────────────────────────────
_gl.BindVertexArray(anyVao);
@ -1228,6 +1273,36 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
_gl.BindBufferBase(BufferTargetARB.ShaderStorageBuffer, binding, ssbo);
}
/// <summary>
/// Phase U.3: bind the per-cell clip-region SSBO to binding=2. Prefers the
/// shared <see cref="ClipFrame"/> buffer (set via <see cref="SetClipRegionSsbo"/>);
/// otherwise lazily creates + binds a one-slot no-clip fallback so the shader
/// never reads an unbound SSBO. The fallback's single slot has count 0
/// (pass-all), matching <see cref="ClipFrame.NoClip"/>'s slot 0.
/// </summary>
private unsafe void BindClipRegionBinding2()
{
if (_sharedClipRegionSsbo != 0)
{
_gl.BindBufferBase(BufferTargetARB.ShaderStorageBuffer,
ClipFrame.MeshClipSsboBinding, _sharedClipRegionSsbo);
return;
}
if (_fallbackClipRegionSsbo == 0)
{
_fallbackClipRegionSsbo = _gl.GenBuffer();
// One CellClip slot, all zeros: count 0 ⇒ shader passes every plane.
var zero = stackalloc byte[ClipFrame.CellClipStrideBytes];
for (int i = 0; i < ClipFrame.CellClipStrideBytes; i++) zero[i] = 0;
_gl.BindBuffer(BufferTargetARB.ShaderStorageBuffer, _fallbackClipRegionSsbo);
_gl.BufferData(BufferTargetARB.ShaderStorageBuffer,
(nuint)ClipFrame.CellClipStrideBytes, zero, BufferUsageARB.DynamicDraw);
}
_gl.BindBufferBase(BufferTargetARB.ShaderStorageBuffer,
ClipFrame.MeshClipSsboBinding, _fallbackClipRegionSsbo);
}
private void MaybeFlushDiag()
{
long now = Environment.TickCount64;
@ -1517,6 +1592,8 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
_gl.DeleteBuffer(_instanceSsbo);
_gl.DeleteBuffer(_batchSsbo);
_gl.DeleteBuffer(_indirectBuffer);
if (_clipSlotSsbo != 0) _gl.DeleteBuffer(_clipSlotSsbo); // Phase U.3
if (_fallbackClipRegionSsbo != 0) _gl.DeleteBuffer(_fallbackClipRegionSsbo); // Phase U.3
if (_gpuQueriesInitialized)
{
for (int i = 0; i < GpuQueryRingDepth; i++)