phase(N.5) Task 10: glMultiDrawElementsIndirect dispatch — visual verified
Replaces WbDrawDispatcher's per-group glDrawElementsInstancedBaseVertexBaseInstance loop with two glMultiDrawElementsIndirect calls (opaque + transparent). Per-frame uploads three SSBOs: - _instanceSsbo @ binding=0 (mat4 per instance, indexed by gl_BaseInstanceARB + gl_InstanceID) - _batchSsbo @ binding=1 (BatchData per group, indexed by gl_DrawIDARB) - _indirectBuffer (DrawElementsIndirectCommand[] — opaque first, transparent second) GameWindow swaps the shader load to mesh_modern when _bindlessSupport is non-null. Capability detection + shader load now run in the right order (capability before TextureCache + before Shader). Deletes the obsolete DrawGroup stub, EnsureInstanceAttribs, _instanceBuffer, _patchedVaos. ClassifyBatches + ResolveTexture already migrated in Task 8 to use ulong bindless handles. BuildIndirectArrays (Task 9) wired in: _opaqueDraws + _translucentDraws are flattened into IndirectGroupInput[], laid out via the helper into contiguous indirect commands + parallel BatchData[]. opaqueByteOffset=0, transparentByteOffset = opaqueCount × DrawCommandStride. Visual verification (USER GATE) PASS: Holtburg courtyard renders identical to N.4 — terrain, scenery, characters, NPCs all visible without artifacts. [N.5] modern path capabilities present + mesh_modern shader loaded log lines confirm the boot path. [WB-DIAG] hot-path counters show healthy entity/draw activity. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
b163c53622
commit
f533414edf
2 changed files with 123 additions and 95 deletions
|
|
@ -970,9 +970,9 @@ public sealed class GameWindow : IDisposable
|
|||
Path.Combine(shadersDir, "terrain.vert"),
|
||||
Path.Combine(shadersDir, "terrain.frag"));
|
||||
|
||||
_meshShader = new Shader(_gl,
|
||||
Path.Combine(shadersDir, "mesh_instanced.vert"),
|
||||
Path.Combine(shadersDir, "mesh_instanced.frag"));
|
||||
// mesh_instanced is the default; Task 10 (N.5) moves the final shader
|
||||
// selection to after capability detection so mesh_modern can be chosen
|
||||
// when bindless + ARB_shader_draw_parameters are available. See below.
|
||||
|
||||
// Phase G.1/G.2: shared scene-lighting UBO. Stays bound at
|
||||
// binding=1 for the lifetime of the process — every shader that
|
||||
|
|
@ -1447,6 +1447,23 @@ public sealed class GameWindow : IDisposable
|
|||
}
|
||||
}
|
||||
|
||||
// N.5 Task 10: load mesh_modern when both extensions are present;
|
||||
// fall back to mesh_instanced otherwise. Must be after capability
|
||||
// detection so _bindlessSupport is known.
|
||||
if (_bindlessSupport is not null)
|
||||
{
|
||||
_meshShader = new Shader(_gl,
|
||||
Path.Combine(shadersDir, "mesh_modern.vert"),
|
||||
Path.Combine(shadersDir, "mesh_modern.frag"));
|
||||
Console.WriteLine("[N.5] mesh_modern shader loaded");
|
||||
}
|
||||
else
|
||||
{
|
||||
_meshShader = new Shader(_gl,
|
||||
Path.Combine(shadersDir, "mesh_instanced.vert"),
|
||||
Path.Combine(shadersDir, "mesh_instanced.frag"));
|
||||
}
|
||||
|
||||
_textureCache = new TextureCache(_gl, _dats, _bindlessSupport);
|
||||
// Two persistent GL sampler objects (Repeat + ClampToEdge) so
|
||||
// the sky pass can pick wrap mode per submesh without mutating
|
||||
|
|
|
|||
|
|
@ -32,18 +32,19 @@ namespace AcDream.App.Rendering.Wb;
|
|||
/// </para>
|
||||
///
|
||||
/// <para>
|
||||
/// <b>GL strategy:</b> GROUPED instanced drawing. All visible (entity, batch)
|
||||
/// pairs are bucketed by <see cref="GroupKey"/>; within a group a single
|
||||
/// <c>glDrawElementsInstancedBaseVertexBaseInstance</c> renders all instances.
|
||||
/// All matrices for the frame land in one shared instance VBO via a single
|
||||
/// <c>BufferData</c> upload. This drops draw calls from O(entities×batches)
|
||||
/// to O(unique GfxObj×batch×texture) — typically two orders of magnitude fewer.
|
||||
/// <b>GL strategy (N.5):</b> <c>glMultiDrawElementsIndirect</c> with SSBOs.
|
||||
/// All visible (entity, batch) pairs are bucketed by <see cref="GroupKey"/>;
|
||||
/// each group becomes one <c>DrawElementsIndirectCommand</c>. Three GPU buffers
|
||||
/// are uploaded per frame: instance matrices (SSBO binding 0), per-group batch
|
||||
/// metadata/texture handles (SSBO binding 1), and the indirect draw commands.
|
||||
/// Two <c>glMultiDrawElementsIndirect</c> calls cover the opaque and transparent
|
||||
/// passes respectively — one GL call per pass regardless of group count.
|
||||
/// </para>
|
||||
///
|
||||
/// <para>
|
||||
/// <b>Shader:</b> reuses <c>mesh_instanced</c> (vert locations 0-2 = Position/
|
||||
/// Normal/UV from WB's <c>VertexPositionNormalTexture</c>; locations 3-6 = instance
|
||||
/// matrix from our VBO). WB's 32-byte vertex stride is compatible.
|
||||
/// <b>Shader:</b> <c>mesh_modern</c> when bindless + ARB_shader_draw_parameters
|
||||
/// are available (N.5 path). Falls back to <c>mesh_instanced</c> when the GPU
|
||||
/// lacks those extensions.
|
||||
/// </para>
|
||||
///
|
||||
/// <para>
|
||||
|
|
@ -74,11 +75,9 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
|
|||
private BatchData[] _batchData = new BatchData[256];
|
||||
private DrawElementsIndirectCommand[] _indirectCommands = new DrawElementsIndirectCommand[256];
|
||||
|
||||
#pragma warning disable CS0169 // Tasks 9-10 wire these counters
|
||||
private int _opaqueDrawCount;
|
||||
private int _transparentDrawCount;
|
||||
private int _transparentByteOffset;
|
||||
#pragma warning restore CS0169
|
||||
|
||||
// std430 layout: ulong TextureHandle (uvec2) at offset 0, uint TextureLayer
|
||||
// at offset 8, uint Flags at offset 12. Total 16 bytes.
|
||||
|
|
@ -94,13 +93,10 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
|
|||
public uint Flags;
|
||||
}
|
||||
|
||||
private readonly HashSet<uint> _patchedVaos = new();
|
||||
|
||||
// Per-frame scratch — reused across frames to avoid per-frame allocation.
|
||||
private readonly Dictionary<GroupKey, InstanceGroup> _groups = new();
|
||||
private readonly List<InstanceGroup> _opaqueDraws = new();
|
||||
private readonly List<InstanceGroup> _translucentDraws = new();
|
||||
private float[] _instanceBuffer = new float[256 * 16]; // grow on demand, never shrink
|
||||
|
||||
// Per-entity-cull AABB radius. Conservative — covers most entities; large
|
||||
// outliers (long banners, tall columns) are still landblock-culled.
|
||||
|
|
@ -275,8 +271,7 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
|
|||
return;
|
||||
}
|
||||
|
||||
// ── Phase 2: lay matrices out contiguously, assign per-group offsets,
|
||||
// split into opaque/translucent + compute sort keys ─────────
|
||||
// ── Phase 3: assign FirstInstance per group, lay matrices contiguously, sort opaque ──
|
||||
int totalInstances = 0;
|
||||
foreach (var grp in _groups.Values) totalInstances += grp.Matrices.Count;
|
||||
if (totalInstances == 0)
|
||||
|
|
@ -286,8 +281,8 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
|
|||
}
|
||||
|
||||
int needed = totalInstances * 16;
|
||||
if (_instanceBuffer.Length < needed)
|
||||
_instanceBuffer = new float[needed + 256 * 16]; // headroom
|
||||
if (_instanceData.Length < needed)
|
||||
_instanceData = new float[needed + 256 * 16];
|
||||
|
||||
_opaqueDraws.Clear();
|
||||
_translucentDraws.Clear();
|
||||
|
|
@ -304,17 +299,17 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
|
|||
// position for front-to-back sort (perf #2). Cheap heuristic; works
|
||||
// well when instances of one group are spatially coherent
|
||||
// (typical for trees in one landblock area, NPCs at one spawn).
|
||||
var firstM = grp.Matrices[0];
|
||||
var grpPos = new Vector3(firstM.M41, firstM.M42, firstM.M43);
|
||||
var first = grp.Matrices[0];
|
||||
var grpPos = new Vector3(first.M41, first.M42, first.M43);
|
||||
grp.SortDistance = Vector3.DistanceSquared(camPos, grpPos);
|
||||
|
||||
for (int i = 0; i < grp.Matrices.Count; i++)
|
||||
{
|
||||
WriteMatrix(_instanceBuffer, cursor * 16, grp.Matrices[i]);
|
||||
WriteMatrix(_instanceData, cursor * 16, grp.Matrices[i]);
|
||||
cursor++;
|
||||
}
|
||||
|
||||
if (grp.Translucency == TranslucencyKind.Opaque || grp.Translucency == TranslucencyKind.ClipMap)
|
||||
if (IsOpaque(grp.Translucency))
|
||||
_opaqueDraws.Add(grp);
|
||||
else
|
||||
_translucentDraws.Add(grp);
|
||||
|
|
@ -326,82 +321,115 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
|
|||
// Foundry interior).
|
||||
_opaqueDraws.Sort(static (a, b) => a.SortDistance.CompareTo(b.SortDistance));
|
||||
|
||||
// ── Phase 3: one upload of all matrices ─────────────────────────────
|
||||
// NOTE: _instanceSsbo is temporarily bound as ArrayBuffer for compile
|
||||
// compatibility. Tasks 9-10 rewrite this to BindBufferBase(SSBO) +
|
||||
// glMultiDrawElementsIndirect.
|
||||
_gl.BindBuffer(BufferTargetARB.ArrayBuffer, _instanceSsbo);
|
||||
fixed (float* p = _instanceBuffer)
|
||||
_gl.BufferData(BufferTargetARB.ArrayBuffer,
|
||||
(nuint)(totalInstances * 16 * sizeof(float)), p, BufferUsageARB.DynamicDraw);
|
||||
// ── Phase 4: build IndirectGroupInput list (opaque sorted, then translucent),
|
||||
// fill via BuildIndirectArrays ──────────────────────────────────
|
||||
int totalDraws = _opaqueDraws.Count + _translucentDraws.Count;
|
||||
if (_batchData.Length < totalDraws)
|
||||
_batchData = new BatchData[totalDraws + 64];
|
||||
if (_indirectCommands.Length < totalDraws)
|
||||
_indirectCommands = new DrawElementsIndirectCommand[totalDraws + 64];
|
||||
|
||||
// ── Phase 4: bind VAO once (modern rendering shares one global VAO) ──
|
||||
EnsureInstanceAttribs(anyVao);
|
||||
var groupInputs = new List<IndirectGroupInput>(totalDraws);
|
||||
foreach (var g in _opaqueDraws) groupInputs.Add(ToInput(g));
|
||||
foreach (var g in _translucentDraws) groupInputs.Add(ToInput(g));
|
||||
|
||||
// Cast _batchData (private BatchData) to public-mirror BatchDataPublic for BuildIndirectArrays.
|
||||
// Layout is asserted at test time (BatchDataPublic_LayoutMatchesPrivateBatchData test).
|
||||
var batchPublic = new BatchDataPublic[totalDraws];
|
||||
var layout = BuildIndirectArrays(groupInputs, _indirectCommands, batchPublic);
|
||||
|
||||
// Copy back into _batchData
|
||||
for (int i = 0; i < totalDraws; i++)
|
||||
{
|
||||
_batchData[i] = new BatchData
|
||||
{
|
||||
TextureHandle = batchPublic[i].TextureHandle,
|
||||
TextureLayer = batchPublic[i].TextureLayer,
|
||||
Flags = batchPublic[i].Flags,
|
||||
};
|
||||
}
|
||||
_opaqueDrawCount = layout.OpaqueCount;
|
||||
_transparentDrawCount = layout.TransparentCount;
|
||||
_transparentByteOffset = layout.TransparentByteOffset;
|
||||
|
||||
// ── Phase 5: upload three buffers ───────────────────────────────────
|
||||
fixed (float* ip = _instanceData)
|
||||
UploadSsbo(_instanceSsbo, 0, ip, totalInstances * 16 * sizeof(float));
|
||||
|
||||
fixed (BatchData* bp = _batchData)
|
||||
UploadSsbo(_batchSsbo, 1, bp, totalDraws * sizeof(BatchData));
|
||||
|
||||
fixed (DrawElementsIndirectCommand* cp = _indirectCommands)
|
||||
{
|
||||
_gl.BindBuffer(BufferTargetARB.DrawIndirectBuffer, _indirectBuffer);
|
||||
_gl.BufferData(BufferTargetARB.DrawIndirectBuffer,
|
||||
(nuint)(totalDraws * sizeof(DrawElementsIndirectCommand)), cp, BufferUsageARB.DynamicDraw);
|
||||
}
|
||||
|
||||
// ── Phase 6: bind global VAO once ───────────────────────────────────
|
||||
_gl.BindVertexArray(anyVao);
|
||||
|
||||
// ── Phase 5: opaque + ClipMap pass (front-to-back sorted) ───────────
|
||||
if (string.Equals(Environment.GetEnvironmentVariable("ACDREAM_NO_CULL"), "1", StringComparison.Ordinal))
|
||||
_gl.Disable(EnableCap.CullFace);
|
||||
|
||||
foreach (var grp in _opaqueDraws)
|
||||
// ── Phase 7: opaque pass ─────────────────────────────────────────────
|
||||
if (_opaqueDrawCount > 0)
|
||||
{
|
||||
_shader.SetInt("uTranslucencyKind", (int)grp.Translucency);
|
||||
DrawGroup(grp);
|
||||
_gl.Disable(EnableCap.Blend);
|
||||
_gl.DepthMask(true);
|
||||
_shader.SetInt("uRenderPass", 0);
|
||||
_gl.BindBuffer(BufferTargetARB.DrawIndirectBuffer, _indirectBuffer);
|
||||
_gl.MultiDrawElementsIndirect(
|
||||
PrimitiveType.Triangles,
|
||||
DrawElementsType.UnsignedShort,
|
||||
(void*)0,
|
||||
(uint)_opaqueDrawCount,
|
||||
(uint)DrawCommandStride);
|
||||
}
|
||||
|
||||
// ── Phase 6: translucent pass ───────────────────────────────────────
|
||||
_gl.Enable(EnableCap.Blend);
|
||||
_gl.DepthMask(false);
|
||||
|
||||
if (string.Equals(Environment.GetEnvironmentVariable("ACDREAM_NO_CULL"), "1", StringComparison.Ordinal))
|
||||
// ── Phase 8: transparent pass ────────────────────────────────────────
|
||||
if (_transparentDrawCount > 0)
|
||||
{
|
||||
_gl.Disable(EnableCap.CullFace);
|
||||
}
|
||||
else
|
||||
{
|
||||
_gl.Enable(EnableCap.CullFace);
|
||||
_gl.CullFace(TriangleFace.Back);
|
||||
_gl.FrontFace(FrontFaceDirection.Ccw);
|
||||
_gl.Enable(EnableCap.Blend);
|
||||
_gl.BlendFunc(BlendingFactor.SrcAlpha, BlendingFactor.OneMinusSrcAlpha);
|
||||
_gl.DepthMask(false);
|
||||
_shader.SetInt("uRenderPass", 1);
|
||||
_gl.MultiDrawElementsIndirect(
|
||||
PrimitiveType.Triangles,
|
||||
DrawElementsType.UnsignedShort,
|
||||
(void*)_transparentByteOffset,
|
||||
(uint)_transparentDrawCount,
|
||||
(uint)DrawCommandStride);
|
||||
_gl.DepthMask(true);
|
||||
_gl.Disable(EnableCap.Blend);
|
||||
}
|
||||
|
||||
foreach (var grp in _translucentDraws)
|
||||
{
|
||||
switch (grp.Translucency)
|
||||
{
|
||||
case TranslucencyKind.Additive:
|
||||
_gl.BlendFunc(BlendingFactor.SrcAlpha, BlendingFactor.One);
|
||||
break;
|
||||
case TranslucencyKind.InvAlpha:
|
||||
_gl.BlendFunc(BlendingFactor.OneMinusSrcAlpha, BlendingFactor.SrcAlpha);
|
||||
break;
|
||||
default:
|
||||
_gl.BlendFunc(BlendingFactor.SrcAlpha, BlendingFactor.OneMinusSrcAlpha);
|
||||
break;
|
||||
}
|
||||
|
||||
_shader.SetInt("uTranslucencyKind", (int)grp.Translucency);
|
||||
DrawGroup(grp);
|
||||
}
|
||||
|
||||
_gl.DepthMask(true);
|
||||
_gl.Disable(EnableCap.Blend);
|
||||
_gl.Disable(EnableCap.CullFace);
|
||||
_gl.BindVertexArray(0);
|
||||
|
||||
if (diag)
|
||||
{
|
||||
_drawsIssued += _opaqueDraws.Count + _translucentDraws.Count;
|
||||
_drawsIssued += _opaqueDrawCount + _transparentDrawCount;
|
||||
_instancesIssued += totalInstances;
|
||||
MaybeFlushDiag();
|
||||
}
|
||||
}
|
||||
|
||||
private void DrawGroup(InstanceGroup grp)
|
||||
private static IndirectGroupInput ToInput(InstanceGroup g) => new(
|
||||
IndexCount: g.IndexCount,
|
||||
FirstIndex: g.FirstIndex,
|
||||
BaseVertex: g.BaseVertex,
|
||||
InstanceCount: g.InstanceCount,
|
||||
FirstInstance: g.FirstInstance,
|
||||
TextureHandle: g.BindlessTextureHandle,
|
||||
TextureLayer: g.TextureLayer,
|
||||
Translucency: g.Translucency);
|
||||
|
||||
private unsafe void UploadSsbo(uint ssbo, uint binding, void* data, int byteCount)
|
||||
{
|
||||
throw new NotImplementedException(
|
||||
"DrawGroup is being removed in Task 10 — the dispatcher rewrites Draw() " +
|
||||
"to use glMultiDrawElementsIndirect instead of per-group draws. " +
|
||||
"If this throws at runtime, Task 10 hasn't landed yet.");
|
||||
_gl.BindBuffer(BufferTargetARB.ShaderStorageBuffer, ssbo);
|
||||
_gl.BufferData(BufferTargetARB.ShaderStorageBuffer, (nuint)byteCount, data, BufferUsageARB.DynamicDraw);
|
||||
_gl.BindBufferBase(BufferTargetARB.ShaderStorageBuffer, binding, ssbo);
|
||||
}
|
||||
|
||||
private void MaybeFlushDiag()
|
||||
|
|
@ -495,23 +523,6 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
|
|||
}
|
||||
}
|
||||
|
||||
private void EnsureInstanceAttribs(uint vao)
|
||||
{
|
||||
if (!_patchedVaos.Add(vao)) return;
|
||||
|
||||
_gl.BindVertexArray(vao);
|
||||
// NOTE: temporarily binding _instanceSsbo as ArrayBuffer for compile
|
||||
// compatibility. Tasks 9-10 replace with BindBufferBase(SSBO).
|
||||
_gl.BindBuffer(BufferTargetARB.ArrayBuffer, _instanceSsbo);
|
||||
for (uint row = 0; row < 4; row++)
|
||||
{
|
||||
uint loc = 3 + row;
|
||||
_gl.EnableVertexAttribArray(loc);
|
||||
_gl.VertexAttribPointer(loc, 4, VertexAttribPointerType.Float, false, 64, (void*)(row * 16));
|
||||
_gl.VertexAttribDivisor(loc, 1);
|
||||
}
|
||||
}
|
||||
|
||||
private static void WriteMatrix(float[] buf, int offset, in Matrix4x4 m)
|
||||
{
|
||||
buf[offset + 0] = m.M11; buf[offset + 1] = m.M12; buf[offset + 2] = m.M13; buf[offset + 3] = m.M14;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue