feat(render): Phase U.3 — GPU clip-plane gate (gl_ClipDistance), no-clip default

Adds the GPU mechanism to clip drawing to a per-cell screen-space convex
region via gl_ClipDistance, consumed by the mesh + terrain vertex shaders.
This is the MECHANISM only — every instance defaults to slot 0 (no-clip /
pass-all) and terrain to count 0, so the running game renders IDENTICALLY to
pre-U.3 (verified: offline launch compiles both shaders and reaches steady
state; no GL errors). U.4 populates real clip data from portal visibility.

Binding contract (define once, both sides obey):
- mesh_modern.vert: SSBO binding=2 CellClip[] (shared per-frame regions, slot 0
  reserved no-clip) + SSBO binding=3 uint[] per-instance slot, indexed by the
  IDENTICAL gl_BaseInstanceARB+gl_InstanceID used for binding=0. binding=0/1
  untouched.
- terrain_modern.vert: UBO binding=2 TerrainClip { int count; vec4 planes[8]; }
  for the single OutsideView region (UBO namespace; SceneLighting is UBO
  binding=1, so binding=2 is free and does not collide with the mesh SSBO
  binding=2). count 0 = ungated.
- Both redeclare out gl_PerVertex { vec4 gl_Position; float gl_ClipDistance[8]; }
  and set unused planes (i >= count) to +1.0 so they pass everything.

CellClip std430 layout (144 bytes/slot): count@0, 3 pad uints@4/8/12,
planes[8]@16 (vec4 stride 16). Terrain UBO std140: count@0 (padded to 16),
planes[8]@16 → 144 bytes. Verified by ClipFrameLayoutTests (8 new tests).

Pieces:
- ClipFrame: per-frame container + uploader for the SHARED clip data (binding=2
  SSBO + terrain UBO). NoClip() = slot 0 + terrain count 0. AppendSlot /
  SetTerrainClip pack std430/std140 bytes for U.4. UploadShared binds both.
- WbDrawDispatcher + EnvCellRenderer: each owns its binding=3 zero buffer
  (all-zeros sized to its instance count → slot 0), re-binds binding=2 from the
  shared ClipFrame id (or an internal no-clip fallback if unwired) before MDI.
  gl_ClipDistance is per-vertex, so the single glMultiDrawElementsIndirect per
  group is preserved — no draw splitting.
- TerrainModernRenderer: binds the terrain clip UBO (shared or no-clip fallback)
  before its draw.
- GameWindow: glEnable(GL_CLIP_DISTANCE0..7) once at init (unused planes pass-all
  so always-on avoids per-draw thrash); per frame builds ClipFrame.NoClip(),
  UploadShared, and hands the buffer ids to the three renderers (tiny diff; U.4
  swaps NoClip() for the real portal-visibility frame).

Gate: dotnet build green; App suite 134/134; offline launch confirms both
shaders compile + link with no GL errors.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
Erik 2026-05-30 17:27:30 +02:00
parent 0b125830fe
commit bf2e559369
8 changed files with 797 additions and 1 deletions

View file

@ -54,6 +54,13 @@ public sealed unsafe class TerrainModernRenderer : IDisposable
private uint _indirectBuffer;
private int _indirectCapacity;
// Phase U.3: terrain clip UBO (binding=2, terrain_modern.vert TerrainClip).
// The shared one is created + uploaded by the GameWindow-level ClipFrame and
// handed in via SetClipUbo. When 0, we bind a lazily-created no-clip fallback
// (count 0 = ungated) so the shader never reads an unbound UBO at binding=2.
private uint _sharedClipUbo;
private uint _fallbackClipUbo;
// Cached uvec2-handle uniform locations (matrix uniforms are set by name via Shader.SetMatrix4).
private int _uTerrainHandleLoc;
private int _uAlphaHandleLoc;
@ -93,6 +100,14 @@ public sealed unsafe class TerrainModernRenderer : IDisposable
_indirectBuffer = _gl.GenBuffer();
}
/// <summary>
/// Phase U.3: hand the renderer the SHARED terrain-clip UBO (binding=2)
/// created by <see cref="ClipFrame.UploadShared"/>. The renderer binds it to
/// binding=2 before its draw. Pass 0 to fall back to the internal no-clip UBO
/// (count 0 = ungated terrain).
/// </summary>
public void SetClipUbo(uint sharedClipUbo) => _sharedClipUbo = sharedClipUbo;
/// <summary>
/// Two-tier streaming entry point. Accepts a prebuilt mesh from
/// <see cref="LandblockStreamResult.Loaded.MeshData"/> built on the worker
@ -258,6 +273,10 @@ public sealed unsafe class TerrainModernRenderer : IDisposable
_gl.ProgramUniform2(_shader.Program, _uAlphaHandleLoc,
(uint)(alphaHandle & 0xFFFFFFFFu), (uint)(alphaHandle >> 32));
// Phase U.3: bind the terrain clip UBO (binding=2). Shared ClipFrame UBO
// when wired, else the no-clip fallback (count 0 = ungated terrain).
BindClipUboBinding2();
_gl.BindVertexArray(_globalVao);
_gl.MemoryBarrier(MemoryBarrierMask.CommandBarrierBit);
_gl.MultiDrawElementsIndirect(
@ -275,12 +294,42 @@ public sealed unsafe class TerrainModernRenderer : IDisposable
_gl.DeleteBuffer(_globalVbo);
_gl.DeleteBuffer(_globalEbo);
_gl.DeleteBuffer(_indirectBuffer);
if (_fallbackClipUbo != 0) { _gl.DeleteBuffer(_fallbackClipUbo); _fallbackClipUbo = 0; } // Phase U.3
}
// ----------------------------------------------------------------
// Private helpers
// ----------------------------------------------------------------
/// <summary>
/// Phase U.3: bind the terrain clip UBO to binding=2. Prefers the shared
/// <see cref="ClipFrame"/> UBO (<see cref="SetClipUbo"/>); otherwise lazily
/// creates + binds a no-clip fallback (count 0 = ungated) so the shader never
/// reads an unbound UBO. The fallback is std140-sized to
/// <see cref="ClipFrame.TerrainUboBytes"/> and zero-filled (count 0).
/// </summary>
private void BindClipUboBinding2()
{
if (_sharedClipUbo != 0)
{
_gl.BindBufferBase(BufferTargetARB.UniformBuffer,
ClipFrame.TerrainClipUboBinding, _sharedClipUbo);
return;
}
if (_fallbackClipUbo == 0)
{
_fallbackClipUbo = _gl.GenBuffer();
var zero = stackalloc byte[ClipFrame.TerrainUboBytes];
for (int i = 0; i < ClipFrame.TerrainUboBytes; i++) zero[i] = 0;
_gl.BindBuffer(BufferTargetARB.UniformBuffer, _fallbackClipUbo);
_gl.BufferData(BufferTargetARB.UniformBuffer,
(nuint)ClipFrame.TerrainUboBytes, zero, BufferUsageARB.DynamicDraw);
}
_gl.BindBufferBase(BufferTargetARB.UniformBuffer,
ClipFrame.TerrainClipUboBinding, _fallbackClipUbo);
}
private void AllocateGpuBuffers(int capacitySlots)
{
nuint vboBytes = (nuint)(capacitySlots * VertsPerLandblock * VertexSize);