From f533414edf75af655a99d530fa9182cbcf1d48f2 Mon Sep 17 00:00:00 2001 From: Erik Date: Fri, 8 May 2026 20:51:49 +0200 Subject: [PATCH] =?UTF-8?q?phase(N.5)=20Task=2010:=20glMultiDrawElementsIn?= =?UTF-8?q?direct=20dispatch=20=E2=80=94=20visual=20verified?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces WbDrawDispatcher's per-group glDrawElementsInstancedBaseVertexBaseInstance loop with two glMultiDrawElementsIndirect calls (opaque + transparent). Per-frame uploads three SSBOs: - _instanceSsbo @ binding=0 (mat4 per instance, indexed by gl_BaseInstanceARB + gl_InstanceID) - _batchSsbo @ binding=1 (BatchData per group, indexed by gl_DrawIDARB) - _indirectBuffer (DrawElementsIndirectCommand[] — opaque first, transparent second) GameWindow swaps the shader load to mesh_modern when _bindlessSupport is non-null. Capability detection + shader load now run in the right order (capability before TextureCache + before Shader). Deletes the obsolete DrawGroup stub, EnsureInstanceAttribs, _instanceBuffer, _patchedVaos. ClassifyBatches + ResolveTexture already migrated in Task 8 to use ulong bindless handles. BuildIndirectArrays (Task 9) wired in: _opaqueDraws + _translucentDraws are flattened into IndirectGroupInput[], laid out via the helper into contiguous indirect commands + parallel BatchData[]. opaqueByteOffset=0, transparentByteOffset = opaqueCount × DrawCommandStride. Visual verification (USER GATE) PASS: Holtburg courtyard renders identical to N.4 — terrain, scenery, characters, NPCs all visible without artifacts. [N.5] modern path capabilities present + mesh_modern shader loaded log lines confirm the boot path. [WB-DIAG] hot-path counters show healthy entity/draw activity. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/AcDream.App/Rendering/GameWindow.cs | 23 ++- .../Rendering/Wb/WbDrawDispatcher.cs | 195 +++++++++--------- 2 files changed, 123 insertions(+), 95 deletions(-) diff --git a/src/AcDream.App/Rendering/GameWindow.cs b/src/AcDream.App/Rendering/GameWindow.cs index d6321c9..cf8404c 100644 --- a/src/AcDream.App/Rendering/GameWindow.cs +++ b/src/AcDream.App/Rendering/GameWindow.cs @@ -970,9 +970,9 @@ public sealed class GameWindow : IDisposable Path.Combine(shadersDir, "terrain.vert"), Path.Combine(shadersDir, "terrain.frag")); - _meshShader = new Shader(_gl, - Path.Combine(shadersDir, "mesh_instanced.vert"), - Path.Combine(shadersDir, "mesh_instanced.frag")); + // mesh_instanced is the default; Task 10 (N.5) moves the final shader + // selection to after capability detection so mesh_modern can be chosen + // when bindless + ARB_shader_draw_parameters are available. See below. // Phase G.1/G.2: shared scene-lighting UBO. Stays bound at // binding=1 for the lifetime of the process — every shader that @@ -1447,6 +1447,23 @@ public sealed class GameWindow : IDisposable } } + // N.5 Task 10: load mesh_modern when both extensions are present; + // fall back to mesh_instanced otherwise. Must be after capability + // detection so _bindlessSupport is known. + if (_bindlessSupport is not null) + { + _meshShader = new Shader(_gl, + Path.Combine(shadersDir, "mesh_modern.vert"), + Path.Combine(shadersDir, "mesh_modern.frag")); + Console.WriteLine("[N.5] mesh_modern shader loaded"); + } + else + { + _meshShader = new Shader(_gl, + Path.Combine(shadersDir, "mesh_instanced.vert"), + Path.Combine(shadersDir, "mesh_instanced.frag")); + } + _textureCache = new TextureCache(_gl, _dats, _bindlessSupport); // Two persistent GL sampler objects (Repeat + ClampToEdge) so // the sky pass can pick wrap mode per submesh without mutating diff --git a/src/AcDream.App/Rendering/Wb/WbDrawDispatcher.cs b/src/AcDream.App/Rendering/Wb/WbDrawDispatcher.cs index 6d33293..3fe6f13 100644 --- a/src/AcDream.App/Rendering/Wb/WbDrawDispatcher.cs +++ b/src/AcDream.App/Rendering/Wb/WbDrawDispatcher.cs @@ -32,18 +32,19 @@ namespace AcDream.App.Rendering.Wb; /// /// /// -/// GL strategy: GROUPED instanced drawing. All visible (entity, batch) -/// pairs are bucketed by ; within a group a single -/// glDrawElementsInstancedBaseVertexBaseInstance renders all instances. -/// All matrices for the frame land in one shared instance VBO via a single -/// BufferData upload. This drops draw calls from O(entities×batches) -/// to O(unique GfxObj×batch×texture) — typically two orders of magnitude fewer. +/// GL strategy (N.5): glMultiDrawElementsIndirect with SSBOs. +/// All visible (entity, batch) pairs are bucketed by ; +/// each group becomes one DrawElementsIndirectCommand. Three GPU buffers +/// are uploaded per frame: instance matrices (SSBO binding 0), per-group batch +/// metadata/texture handles (SSBO binding 1), and the indirect draw commands. +/// Two glMultiDrawElementsIndirect calls cover the opaque and transparent +/// passes respectively — one GL call per pass regardless of group count. /// /// /// -/// Shader: reuses mesh_instanced (vert locations 0-2 = Position/ -/// Normal/UV from WB's VertexPositionNormalTexture; locations 3-6 = instance -/// matrix from our VBO). WB's 32-byte vertex stride is compatible. +/// Shader: mesh_modern when bindless + ARB_shader_draw_parameters +/// are available (N.5 path). Falls back to mesh_instanced when the GPU +/// lacks those extensions. /// /// /// @@ -74,11 +75,9 @@ public sealed unsafe class WbDrawDispatcher : IDisposable private BatchData[] _batchData = new BatchData[256]; private DrawElementsIndirectCommand[] _indirectCommands = new DrawElementsIndirectCommand[256]; -#pragma warning disable CS0169 // Tasks 9-10 wire these counters private int _opaqueDrawCount; private int _transparentDrawCount; private int _transparentByteOffset; -#pragma warning restore CS0169 // std430 layout: ulong TextureHandle (uvec2) at offset 0, uint TextureLayer // at offset 8, uint Flags at offset 12. Total 16 bytes. @@ -94,13 +93,10 @@ public sealed unsafe class WbDrawDispatcher : IDisposable public uint Flags; } - private readonly HashSet _patchedVaos = new(); - // Per-frame scratch — reused across frames to avoid per-frame allocation. private readonly Dictionary _groups = new(); private readonly List _opaqueDraws = new(); private readonly List _translucentDraws = new(); - private float[] _instanceBuffer = new float[256 * 16]; // grow on demand, never shrink // Per-entity-cull AABB radius. Conservative — covers most entities; large // outliers (long banners, tall columns) are still landblock-culled. @@ -275,8 +271,7 @@ public sealed unsafe class WbDrawDispatcher : IDisposable return; } - // ── Phase 2: lay matrices out contiguously, assign per-group offsets, - // split into opaque/translucent + compute sort keys ───────── + // ── Phase 3: assign FirstInstance per group, lay matrices contiguously, sort opaque ── int totalInstances = 0; foreach (var grp in _groups.Values) totalInstances += grp.Matrices.Count; if (totalInstances == 0) @@ -286,8 +281,8 @@ public sealed unsafe class WbDrawDispatcher : IDisposable } int needed = totalInstances * 16; - if (_instanceBuffer.Length < needed) - _instanceBuffer = new float[needed + 256 * 16]; // headroom + if (_instanceData.Length < needed) + _instanceData = new float[needed + 256 * 16]; _opaqueDraws.Clear(); _translucentDraws.Clear(); @@ -304,17 +299,17 @@ public sealed unsafe class WbDrawDispatcher : IDisposable // position for front-to-back sort (perf #2). Cheap heuristic; works // well when instances of one group are spatially coherent // (typical for trees in one landblock area, NPCs at one spawn). - var firstM = grp.Matrices[0]; - var grpPos = new Vector3(firstM.M41, firstM.M42, firstM.M43); + var first = grp.Matrices[0]; + var grpPos = new Vector3(first.M41, first.M42, first.M43); grp.SortDistance = Vector3.DistanceSquared(camPos, grpPos); for (int i = 0; i < grp.Matrices.Count; i++) { - WriteMatrix(_instanceBuffer, cursor * 16, grp.Matrices[i]); + WriteMatrix(_instanceData, cursor * 16, grp.Matrices[i]); cursor++; } - if (grp.Translucency == TranslucencyKind.Opaque || grp.Translucency == TranslucencyKind.ClipMap) + if (IsOpaque(grp.Translucency)) _opaqueDraws.Add(grp); else _translucentDraws.Add(grp); @@ -326,82 +321,115 @@ public sealed unsafe class WbDrawDispatcher : IDisposable // Foundry interior). _opaqueDraws.Sort(static (a, b) => a.SortDistance.CompareTo(b.SortDistance)); - // ── Phase 3: one upload of all matrices ───────────────────────────── - // NOTE: _instanceSsbo is temporarily bound as ArrayBuffer for compile - // compatibility. Tasks 9-10 rewrite this to BindBufferBase(SSBO) + - // glMultiDrawElementsIndirect. - _gl.BindBuffer(BufferTargetARB.ArrayBuffer, _instanceSsbo); - fixed (float* p = _instanceBuffer) - _gl.BufferData(BufferTargetARB.ArrayBuffer, - (nuint)(totalInstances * 16 * sizeof(float)), p, BufferUsageARB.DynamicDraw); + // ── Phase 4: build IndirectGroupInput list (opaque sorted, then translucent), + // fill via BuildIndirectArrays ────────────────────────────────── + int totalDraws = _opaqueDraws.Count + _translucentDraws.Count; + if (_batchData.Length < totalDraws) + _batchData = new BatchData[totalDraws + 64]; + if (_indirectCommands.Length < totalDraws) + _indirectCommands = new DrawElementsIndirectCommand[totalDraws + 64]; - // ── Phase 4: bind VAO once (modern rendering shares one global VAO) ── - EnsureInstanceAttribs(anyVao); + var groupInputs = new List(totalDraws); + foreach (var g in _opaqueDraws) groupInputs.Add(ToInput(g)); + foreach (var g in _translucentDraws) groupInputs.Add(ToInput(g)); + + // Cast _batchData (private BatchData) to public-mirror BatchDataPublic for BuildIndirectArrays. + // Layout is asserted at test time (BatchDataPublic_LayoutMatchesPrivateBatchData test). + var batchPublic = new BatchDataPublic[totalDraws]; + var layout = BuildIndirectArrays(groupInputs, _indirectCommands, batchPublic); + + // Copy back into _batchData + for (int i = 0; i < totalDraws; i++) + { + _batchData[i] = new BatchData + { + TextureHandle = batchPublic[i].TextureHandle, + TextureLayer = batchPublic[i].TextureLayer, + Flags = batchPublic[i].Flags, + }; + } + _opaqueDrawCount = layout.OpaqueCount; + _transparentDrawCount = layout.TransparentCount; + _transparentByteOffset = layout.TransparentByteOffset; + + // ── Phase 5: upload three buffers ─────────────────────────────────── + fixed (float* ip = _instanceData) + UploadSsbo(_instanceSsbo, 0, ip, totalInstances * 16 * sizeof(float)); + + fixed (BatchData* bp = _batchData) + UploadSsbo(_batchSsbo, 1, bp, totalDraws * sizeof(BatchData)); + + fixed (DrawElementsIndirectCommand* cp = _indirectCommands) + { + _gl.BindBuffer(BufferTargetARB.DrawIndirectBuffer, _indirectBuffer); + _gl.BufferData(BufferTargetARB.DrawIndirectBuffer, + (nuint)(totalDraws * sizeof(DrawElementsIndirectCommand)), cp, BufferUsageARB.DynamicDraw); + } + + // ── Phase 6: bind global VAO once ─────────────────────────────────── _gl.BindVertexArray(anyVao); - // ── Phase 5: opaque + ClipMap pass (front-to-back sorted) ─────────── if (string.Equals(Environment.GetEnvironmentVariable("ACDREAM_NO_CULL"), "1", StringComparison.Ordinal)) _gl.Disable(EnableCap.CullFace); - foreach (var grp in _opaqueDraws) + // ── Phase 7: opaque pass ───────────────────────────────────────────── + if (_opaqueDrawCount > 0) { - _shader.SetInt("uTranslucencyKind", (int)grp.Translucency); - DrawGroup(grp); + _gl.Disable(EnableCap.Blend); + _gl.DepthMask(true); + _shader.SetInt("uRenderPass", 0); + _gl.BindBuffer(BufferTargetARB.DrawIndirectBuffer, _indirectBuffer); + _gl.MultiDrawElementsIndirect( + PrimitiveType.Triangles, + DrawElementsType.UnsignedShort, + (void*)0, + (uint)_opaqueDrawCount, + (uint)DrawCommandStride); } - // ── Phase 6: translucent pass ─────────────────────────────────────── - _gl.Enable(EnableCap.Blend); - _gl.DepthMask(false); - - if (string.Equals(Environment.GetEnvironmentVariable("ACDREAM_NO_CULL"), "1", StringComparison.Ordinal)) + // ── Phase 8: transparent pass ──────────────────────────────────────── + if (_transparentDrawCount > 0) { - _gl.Disable(EnableCap.CullFace); - } - else - { - _gl.Enable(EnableCap.CullFace); - _gl.CullFace(TriangleFace.Back); - _gl.FrontFace(FrontFaceDirection.Ccw); + _gl.Enable(EnableCap.Blend); + _gl.BlendFunc(BlendingFactor.SrcAlpha, BlendingFactor.OneMinusSrcAlpha); + _gl.DepthMask(false); + _shader.SetInt("uRenderPass", 1); + _gl.MultiDrawElementsIndirect( + PrimitiveType.Triangles, + DrawElementsType.UnsignedShort, + (void*)_transparentByteOffset, + (uint)_transparentDrawCount, + (uint)DrawCommandStride); + _gl.DepthMask(true); + _gl.Disable(EnableCap.Blend); } - foreach (var grp in _translucentDraws) - { - switch (grp.Translucency) - { - case TranslucencyKind.Additive: - _gl.BlendFunc(BlendingFactor.SrcAlpha, BlendingFactor.One); - break; - case TranslucencyKind.InvAlpha: - _gl.BlendFunc(BlendingFactor.OneMinusSrcAlpha, BlendingFactor.SrcAlpha); - break; - default: - _gl.BlendFunc(BlendingFactor.SrcAlpha, BlendingFactor.OneMinusSrcAlpha); - break; - } - - _shader.SetInt("uTranslucencyKind", (int)grp.Translucency); - DrawGroup(grp); - } - - _gl.DepthMask(true); - _gl.Disable(EnableCap.Blend); _gl.Disable(EnableCap.CullFace); _gl.BindVertexArray(0); if (diag) { - _drawsIssued += _opaqueDraws.Count + _translucentDraws.Count; + _drawsIssued += _opaqueDrawCount + _transparentDrawCount; _instancesIssued += totalInstances; MaybeFlushDiag(); } } - private void DrawGroup(InstanceGroup grp) + private static IndirectGroupInput ToInput(InstanceGroup g) => new( + IndexCount: g.IndexCount, + FirstIndex: g.FirstIndex, + BaseVertex: g.BaseVertex, + InstanceCount: g.InstanceCount, + FirstInstance: g.FirstInstance, + TextureHandle: g.BindlessTextureHandle, + TextureLayer: g.TextureLayer, + Translucency: g.Translucency); + + private unsafe void UploadSsbo(uint ssbo, uint binding, void* data, int byteCount) { - throw new NotImplementedException( - "DrawGroup is being removed in Task 10 — the dispatcher rewrites Draw() " + - "to use glMultiDrawElementsIndirect instead of per-group draws. " + - "If this throws at runtime, Task 10 hasn't landed yet."); + _gl.BindBuffer(BufferTargetARB.ShaderStorageBuffer, ssbo); + _gl.BufferData(BufferTargetARB.ShaderStorageBuffer, (nuint)byteCount, data, BufferUsageARB.DynamicDraw); + _gl.BindBufferBase(BufferTargetARB.ShaderStorageBuffer, binding, ssbo); } private void MaybeFlushDiag() @@ -495,23 +523,6 @@ public sealed unsafe class WbDrawDispatcher : IDisposable } } - private void EnsureInstanceAttribs(uint vao) - { - if (!_patchedVaos.Add(vao)) return; - - _gl.BindVertexArray(vao); - // NOTE: temporarily binding _instanceSsbo as ArrayBuffer for compile - // compatibility. Tasks 9-10 replace with BindBufferBase(SSBO). - _gl.BindBuffer(BufferTargetARB.ArrayBuffer, _instanceSsbo); - for (uint row = 0; row < 4; row++) - { - uint loc = 3 + row; - _gl.EnableVertexAttribArray(loc); - _gl.VertexAttribPointer(loc, 4, VertexAttribPointerType.Float, false, 64, (void*)(row * 16)); - _gl.VertexAttribDivisor(loc, 1); - } - } - private static void WriteMatrix(float[] buf, int offset, in Matrix4x4 m) { buf[offset + 0] = m.M11; buf[offset + 1] = m.M12; buf[offset + 2] = m.M13; buf[offset + 3] = m.M14;