feat(render): Phase U.3 — GPU clip-plane gate (gl_ClipDistance), no-clip default
Adds the GPU mechanism to clip drawing to a per-cell screen-space convex
region via gl_ClipDistance, consumed by the mesh + terrain vertex shaders.
This is the MECHANISM only — every instance defaults to slot 0 (no-clip /
pass-all) and terrain to count 0, so the running game renders IDENTICALLY to
pre-U.3 (verified: offline launch compiles both shaders and reaches steady
state; no GL errors). U.4 populates real clip data from portal visibility.
Binding contract (define once, both sides obey):
- mesh_modern.vert: SSBO binding=2 CellClip[] (shared per-frame regions, slot 0
reserved no-clip) + SSBO binding=3 uint[] per-instance slot, indexed by the
IDENTICAL gl_BaseInstanceARB+gl_InstanceID used for binding=0. binding=0/1
untouched.
- terrain_modern.vert: UBO binding=2 TerrainClip { int count; vec4 planes[8]; }
for the single OutsideView region (UBO namespace; SceneLighting is UBO
binding=1, so binding=2 is free and does not collide with the mesh SSBO
binding=2). count 0 = ungated.
- Both redeclare out gl_PerVertex { vec4 gl_Position; float gl_ClipDistance[8]; }
and set unused planes (i >= count) to +1.0 so they pass everything.
CellClip std430 layout (144 bytes/slot): count@0, 3 pad uints@4/8/12,
planes[8]@16 (vec4 stride 16). Terrain UBO std140: count@0 (padded to 16),
planes[8]@16 → 144 bytes. Verified by ClipFrameLayoutTests (8 new tests).
Pieces:
- ClipFrame: per-frame container + uploader for the SHARED clip data (binding=2
SSBO + terrain UBO). NoClip() = slot 0 + terrain count 0. AppendSlot /
SetTerrainClip pack std430/std140 bytes for U.4. UploadShared binds both.
- WbDrawDispatcher + EnvCellRenderer: each owns its binding=3 zero buffer
(all-zeros sized to its instance count → slot 0), re-binds binding=2 from the
shared ClipFrame id (or an internal no-clip fallback if unwired) before MDI.
gl_ClipDistance is per-vertex, so the single glMultiDrawElementsIndirect per
group is preserved — no draw splitting.
- TerrainModernRenderer: binds the terrain clip UBO (shared or no-clip fallback)
before its draw.
- GameWindow: glEnable(GL_CLIP_DISTANCE0..7) once at init (unused planes pass-all
so always-on avoids per-draw thrash); per frame builds ClipFrame.NoClip(),
UploadShared, and hands the buffer ids to the three renderers (tiny diff; U.4
swaps NoClip() for the real portal-visibility frame).
Gate: dotnet build green; App suite 134/134; offline launch confirms both
shaders compile + link with no GL errors.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
0b125830fe
commit
bf2e559369
8 changed files with 797 additions and 1 deletions
|
|
@ -74,6 +74,19 @@ public sealed unsafe class EnvCellRenderer : IDisposable
|
|||
// packed transform array instead of the 80-byte CPU struct.
|
||||
private Matrix4x4[] _gpuInstanceTransforms = Array.Empty<Matrix4x4>();
|
||||
|
||||
// Phase U.3: per-instance clip-slot SSBO (binding=3), parallel to
|
||||
// _modernInstanceBuffer. One uint per instance selecting its CellClip slot,
|
||||
// indexed by the same BaseInstance + gl_InstanceID the shader uses for
|
||||
// binding=0. ALL ZEROS in U.3 ⇒ slot 0 ⇒ no-clip. U.4 populates real slots.
|
||||
private uint _clipSlotBuffer;
|
||||
private uint[] _clipSlotData = Array.Empty<uint>();
|
||||
|
||||
// Phase U.3: SHARED per-cell clip-region SSBO (binding=2) handed in via
|
||||
// SetClipRegionSsbo (the GameWindow-level ClipFrame buffer). When 0, we bind
|
||||
// our own one-slot no-clip fallback so the shader never reads an unbound SSBO.
|
||||
private uint _sharedClipRegionSsbo;
|
||||
private uint _fallbackClipRegionSsbo;
|
||||
|
||||
// Reusable scratch arrays — avoid per-frame allocation.
|
||||
// WB BaseObjectRenderManager.cs:58-59: private DrawElementsIndirectCommand[] _commands = Array.Empty<...>()
|
||||
private DrawElementsIndirectCommand[] _commands = Array.Empty<DrawElementsIndirectCommand>();
|
||||
|
|
@ -204,10 +217,26 @@ public sealed unsafe class EnvCellRenderer : IDisposable
|
|||
_gl.BufferData(GLEnum.ShaderStorageBuffer,
|
||||
(nuint)(_modernBatchCapacity * sizeof(ModernBatchData)), null, GLEnum.DynamicDraw);
|
||||
|
||||
// Phase U.3: per-instance clip-slot SSBO (binding=3), sized to the
|
||||
// instance capacity. Uploaded all-zeros each frame in RenderModernMDIInternal.
|
||||
_gl.GenBuffers(1, out _clipSlotBuffer);
|
||||
_gl.BindBuffer(GLEnum.ShaderStorageBuffer, _clipSlotBuffer);
|
||||
_gl.BufferData(GLEnum.ShaderStorageBuffer,
|
||||
(nuint)(_modernInstanceCapacity * sizeof(uint)), null, GLEnum.DynamicDraw);
|
||||
|
||||
_gl.BindBuffer(GLEnum.ShaderStorageBuffer, 0);
|
||||
_gl.BindBuffer(GLEnum.DrawIndirectBuffer, 0);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Phase U.3: hand the renderer the SHARED per-cell clip-region SSBO
|
||||
/// (binding=2) created by <see cref="ClipFrame.UploadShared"/>. The renderer
|
||||
/// re-binds it to binding=2 immediately before its MDI. Pass 0 to fall back to
|
||||
/// the internal one-slot no-clip region buffer.
|
||||
/// </summary>
|
||||
public void SetClipRegionSsbo(uint sharedClipRegionSsbo)
|
||||
=> _sharedClipRegionSsbo = sharedClipRegionSsbo;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// GetEnvCellGeomId
|
||||
// Verbatim copy of WB EnvCellRenderManager.cs:94-103.
|
||||
|
|
@ -947,6 +976,13 @@ public sealed unsafe class EnvCellRenderer : IDisposable
|
|||
_gl.BindBuffer(GLEnum.ShaderStorageBuffer, _modernInstanceBuffer);
|
||||
_gl.BufferData(GLEnum.ShaderStorageBuffer,
|
||||
(nuint)(_modernInstanceCapacity * sizeof(Matrix4x4)), null, GLEnum.DynamicDraw);
|
||||
|
||||
// Phase U.3: keep the clip-slot buffer (binding=3) sized to the
|
||||
// instance buffer so instanceClipSlot[BaseInstance + gl_InstanceID]
|
||||
// is always in range.
|
||||
_gl.BindBuffer(GLEnum.ShaderStorageBuffer, _clipSlotBuffer);
|
||||
_gl.BufferData(GLEnum.ShaderStorageBuffer,
|
||||
(nuint)(_modernInstanceCapacity * sizeof(uint)), null, GLEnum.DynamicDraw);
|
||||
}
|
||||
|
||||
// WB BaseObjectRenderManager.cs:761-762: grow scratch arrays.
|
||||
|
|
@ -1011,6 +1047,23 @@ public sealed unsafe class EnvCellRenderer : IDisposable
|
|||
(nuint)(totalDraws * sizeof(ModernBatchData)), ptr);
|
||||
}
|
||||
|
||||
// Phase U.3: upload the per-instance clip-slot buffer (binding=3), all
|
||||
// zeros ⇒ every instance maps to slot 0 ⇒ no-clip. Re-zero the reused head
|
||||
// each frame so stale U.4 slot indices can't leak. Sized to
|
||||
// uniqueInstanceCount; the buffer was already grown above with the
|
||||
// instance buffer when capacity increased.
|
||||
if (_clipSlotData.Length < uniqueInstanceCount)
|
||||
_clipSlotData = new uint[Math.Max(_clipSlotData.Length * 2, uniqueInstanceCount)];
|
||||
Array.Clear(_clipSlotData, 0, uniqueInstanceCount);
|
||||
_gl.BindBuffer(GLEnum.ShaderStorageBuffer, _clipSlotBuffer);
|
||||
_gl.BufferData(GLEnum.ShaderStorageBuffer,
|
||||
(nuint)(uniqueInstanceCount * sizeof(uint)), null, GLEnum.DynamicDraw);
|
||||
fixed (uint* ptr = _clipSlotData)
|
||||
{
|
||||
_gl.BufferSubData(GLEnum.ShaderStorageBuffer, 0,
|
||||
(nuint)(uniqueInstanceCount * sizeof(uint)), ptr);
|
||||
}
|
||||
|
||||
// WB BaseObjectRenderManager.cs:807-818: bind VAO + SSBOs + barrier.
|
||||
var globalVao = _meshManager.GlobalBuffer?.VAO ?? 0u;
|
||||
if (globalVao == 0) return;
|
||||
|
|
@ -1022,6 +1075,10 @@ public sealed unsafe class EnvCellRenderer : IDisposable
|
|||
|
||||
_gl.BindBufferBase(GLEnum.ShaderStorageBuffer, 0, _modernInstanceBuffer);
|
||||
_gl.BindBufferBase(GLEnum.ShaderStorageBuffer, 1, _modernBatchBuffer);
|
||||
// Phase U.3: per-instance clip slots (binding=3) + shared clip regions
|
||||
// (binding=2, via the GameWindow ClipFrame or our no-clip fallback).
|
||||
_gl.BindBufferBase(GLEnum.ShaderStorageBuffer, 3, _clipSlotBuffer);
|
||||
BindClipRegionBinding2();
|
||||
_gl.BindBuffer(GLEnum.DrawIndirectBuffer, _mdiCommandBuffer);
|
||||
|
||||
_gl.MemoryBarrier(MemoryBarrierMask.ShaderStorageBarrierBit | MemoryBarrierMask.CommandBarrierBit);
|
||||
|
|
@ -1095,6 +1152,41 @@ public sealed unsafe class EnvCellRenderer : IDisposable
|
|||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// BindClipRegionBinding2 (Phase U.3)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// <summary>
|
||||
/// Bind the per-cell clip-region SSBO to binding=2. Prefers the shared
|
||||
/// <see cref="ClipFrame"/> buffer (<see cref="SetClipRegionSsbo"/>); otherwise
|
||||
/// lazily creates + binds a one-slot no-clip fallback (count 0 = pass-all) so
|
||||
/// the shader never reads an unbound SSBO.
|
||||
/// </summary>
|
||||
private void BindClipRegionBinding2()
|
||||
{
|
||||
if (_sharedClipRegionSsbo != 0)
|
||||
{
|
||||
_gl.BindBufferBase(GLEnum.ShaderStorageBuffer,
|
||||
AcDream.App.Rendering.ClipFrame.MeshClipSsboBinding, _sharedClipRegionSsbo);
|
||||
return;
|
||||
}
|
||||
|
||||
if (_fallbackClipRegionSsbo == 0)
|
||||
{
|
||||
_gl.GenBuffers(1, out _fallbackClipRegionSsbo);
|
||||
// One CellClip slot, all zeros: count 0 ⇒ shader passes every plane.
|
||||
var zero = new byte[AcDream.App.Rendering.ClipFrame.CellClipStrideBytes];
|
||||
_gl.BindBuffer(GLEnum.ShaderStorageBuffer, _fallbackClipRegionSsbo);
|
||||
fixed (byte* p = zero)
|
||||
{
|
||||
_gl.BufferData(GLEnum.ShaderStorageBuffer,
|
||||
(nuint)AcDream.App.Rendering.ClipFrame.CellClipStrideBytes, p, GLEnum.DynamicDraw);
|
||||
}
|
||||
}
|
||||
_gl.BindBufferBase(GLEnum.ShaderStorageBuffer,
|
||||
AcDream.App.Rendering.ClipFrame.MeshClipSsboBinding, _fallbackClipRegionSsbo);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// List pool (GetPooledList)
|
||||
// Copied from WB ObjectRenderManagerBase (pattern).
|
||||
|
|
@ -1194,5 +1286,7 @@ public sealed unsafe class EnvCellRenderer : IDisposable
|
|||
if (_mdiCommandBuffer != 0) { _gl.DeleteBuffer(_mdiCommandBuffer); _mdiCommandBuffer = 0; }
|
||||
if (_modernInstanceBuffer != 0){ _gl.DeleteBuffer(_modernInstanceBuffer); _modernInstanceBuffer = 0; }
|
||||
if (_modernBatchBuffer != 0) { _gl.DeleteBuffer(_modernBatchBuffer); _modernBatchBuffer = 0; }
|
||||
if (_clipSlotBuffer != 0) { _gl.DeleteBuffer(_clipSlotBuffer); _clipSlotBuffer = 0; } // Phase U.3
|
||||
if (_fallbackClipRegionSsbo != 0) { _gl.DeleteBuffer(_fallbackClipRegionSsbo); _fallbackClipRegionSsbo = 0; } // Phase U.3
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -125,6 +125,21 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
|
|||
private uint _batchSsbo;
|
||||
private uint _indirectBuffer;
|
||||
|
||||
// Phase U.3: per-instance clip-slot SSBO (binding=3), parallel to
|
||||
// _instanceSsbo. One uint per instance selecting its CellClip slot. In U.3
|
||||
// this is ALL ZEROS (every instance → slot 0 → no-clip), so the render is
|
||||
// identical to pre-U.3. U.4 populates real slot indices.
|
||||
private uint _clipSlotSsbo;
|
||||
private uint[] _clipSlotData = new uint[256];
|
||||
|
||||
// Phase U.3: the SHARED per-cell clip-region SSBO (binding=2), owned by the
|
||||
// GameWindow-level ClipFrame and handed to us via SetClipRegionSsbo. When 0
|
||||
// (not yet wired), we bind our OWN fallback no-clip region buffer below so the
|
||||
// shader never reads an unbound SSBO. The fallback holds exactly slot 0
|
||||
// (count 0 = pass-all), matching ClipFrame.NoClip's slot 0.
|
||||
private uint _sharedClipRegionSsbo;
|
||||
private uint _fallbackClipRegionSsbo;
|
||||
|
||||
// Per-frame scratch arrays — Tasks 9-10 fully wire these.
|
||||
private float[] _instanceData = new float[256 * 16]; // mat4 floats per instance
|
||||
private BatchData[] _batchData = new BatchData[256];
|
||||
|
|
@ -255,8 +270,19 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
|
|||
_instanceSsbo = _gl.GenBuffer();
|
||||
_batchSsbo = _gl.GenBuffer();
|
||||
_indirectBuffer = _gl.GenBuffer();
|
||||
_clipSlotSsbo = _gl.GenBuffer(); // Phase U.3 binding=3
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Phase U.3: hand the dispatcher the SHARED per-cell clip-region SSBO
|
||||
/// (binding=2) that <see cref="ClipFrame.UploadShared"/> created. The
|
||||
/// dispatcher re-binds it to binding=2 immediately before each MDI so a
|
||||
/// consumer that touched binding=2 in between can't leave it pointing
|
||||
/// elsewhere. Pass 0 to fall back to the internal no-clip region buffer.
|
||||
/// </summary>
|
||||
public void SetClipRegionSsbo(uint sharedClipRegionSsbo)
|
||||
=> _sharedClipRegionSsbo = sharedClipRegionSsbo;
|
||||
|
||||
public static Matrix4x4 ComposePartWorldMatrix(
|
||||
Matrix4x4 entityWorld,
|
||||
Matrix4x4 animOverride,
|
||||
|
|
@ -975,13 +1001,25 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
|
|||
_transparentDrawCount,
|
||||
totalTriangles);
|
||||
|
||||
// ── Phase 5: upload three buffers ───────────────────────────────────
|
||||
// ── Phase 5: upload four buffers ────────────────────────────────────
|
||||
fixed (float* ip = _instanceData)
|
||||
UploadSsbo(_instanceSsbo, 0, ip, totalInstances * 16 * sizeof(float));
|
||||
|
||||
fixed (BatchData* bp = _batchData)
|
||||
UploadSsbo(_batchSsbo, 1, bp, totalDraws * sizeof(BatchData));
|
||||
|
||||
// Phase U.3: per-instance clip-slot buffer (binding=3), one uint per
|
||||
// instance, laid out parallel to _instanceData so the shader's
|
||||
// instanceClipSlot[instanceIndex] tracks the same instance as
|
||||
// Instances[instanceIndex]. ALL ZEROS in U.3 ⇒ slot 0 ⇒ no-clip. Grow +
|
||||
// zero the scratch as needed (Array.Resize zero-fills the new tail; the
|
||||
// reused head is re-zeroed below so stale U.4 slot indices can't leak).
|
||||
if (_clipSlotData.Length < totalInstances)
|
||||
_clipSlotData = new uint[totalInstances + 256];
|
||||
Array.Clear(_clipSlotData, 0, totalInstances);
|
||||
fixed (uint* sp = _clipSlotData)
|
||||
UploadSsbo(_clipSlotSsbo, 3, sp, totalInstances * sizeof(uint));
|
||||
|
||||
fixed (DrawElementsIndirectCommand* cp = _indirectCommands)
|
||||
{
|
||||
_gl.BindBuffer(BufferTargetARB.DrawIndirectBuffer, _indirectBuffer);
|
||||
|
|
@ -989,6 +1027,13 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
|
|||
(nuint)(totalDraws * sizeof(DrawElementsIndirectCommand)), cp, BufferUsageARB.DynamicDraw);
|
||||
}
|
||||
|
||||
// Phase U.3: bind the SHARED per-cell clip-region SSBO (binding=2). The
|
||||
// GameWindow-level ClipFrame already uploaded + bound it this frame; we
|
||||
// re-bind defensively in case another consumer touched binding=2 since.
|
||||
// When no shared id is set (0), bind our own no-clip fallback so the
|
||||
// shader never reads an unbound SSBO at binding=2.
|
||||
BindClipRegionBinding2();
|
||||
|
||||
// ── Phase 6: bind global VAO once ───────────────────────────────────
|
||||
_gl.BindVertexArray(anyVao);
|
||||
|
||||
|
|
@ -1228,6 +1273,36 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
|
|||
_gl.BindBufferBase(BufferTargetARB.ShaderStorageBuffer, binding, ssbo);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Phase U.3: bind the per-cell clip-region SSBO to binding=2. Prefers the
|
||||
/// shared <see cref="ClipFrame"/> buffer (set via <see cref="SetClipRegionSsbo"/>);
|
||||
/// otherwise lazily creates + binds a one-slot no-clip fallback so the shader
|
||||
/// never reads an unbound SSBO. The fallback's single slot has count 0
|
||||
/// (pass-all), matching <see cref="ClipFrame.NoClip"/>'s slot 0.
|
||||
/// </summary>
|
||||
private unsafe void BindClipRegionBinding2()
|
||||
{
|
||||
if (_sharedClipRegionSsbo != 0)
|
||||
{
|
||||
_gl.BindBufferBase(BufferTargetARB.ShaderStorageBuffer,
|
||||
ClipFrame.MeshClipSsboBinding, _sharedClipRegionSsbo);
|
||||
return;
|
||||
}
|
||||
|
||||
if (_fallbackClipRegionSsbo == 0)
|
||||
{
|
||||
_fallbackClipRegionSsbo = _gl.GenBuffer();
|
||||
// One CellClip slot, all zeros: count 0 ⇒ shader passes every plane.
|
||||
var zero = stackalloc byte[ClipFrame.CellClipStrideBytes];
|
||||
for (int i = 0; i < ClipFrame.CellClipStrideBytes; i++) zero[i] = 0;
|
||||
_gl.BindBuffer(BufferTargetARB.ShaderStorageBuffer, _fallbackClipRegionSsbo);
|
||||
_gl.BufferData(BufferTargetARB.ShaderStorageBuffer,
|
||||
(nuint)ClipFrame.CellClipStrideBytes, zero, BufferUsageARB.DynamicDraw);
|
||||
}
|
||||
_gl.BindBufferBase(BufferTargetARB.ShaderStorageBuffer,
|
||||
ClipFrame.MeshClipSsboBinding, _fallbackClipRegionSsbo);
|
||||
}
|
||||
|
||||
private void MaybeFlushDiag()
|
||||
{
|
||||
long now = Environment.TickCount64;
|
||||
|
|
@ -1517,6 +1592,8 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
|
|||
_gl.DeleteBuffer(_instanceSsbo);
|
||||
_gl.DeleteBuffer(_batchSsbo);
|
||||
_gl.DeleteBuffer(_indirectBuffer);
|
||||
if (_clipSlotSsbo != 0) _gl.DeleteBuffer(_clipSlotSsbo); // Phase U.3
|
||||
if (_fallbackClipRegionSsbo != 0) _gl.DeleteBuffer(_fallbackClipRegionSsbo); // Phase U.3
|
||||
if (_gpuQueriesInitialized)
|
||||
{
|
||||
for (int i = 0; i < GpuQueryRingDepth; i++)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue