diff --git a/src/AcDream.App/Rendering/GameWindow.cs b/src/AcDream.App/Rendering/GameWindow.cs
index d6321c9..cf8404c 100644
--- a/src/AcDream.App/Rendering/GameWindow.cs
+++ b/src/AcDream.App/Rendering/GameWindow.cs
@@ -970,9 +970,9 @@ public sealed class GameWindow : IDisposable
Path.Combine(shadersDir, "terrain.vert"),
Path.Combine(shadersDir, "terrain.frag"));
- _meshShader = new Shader(_gl,
- Path.Combine(shadersDir, "mesh_instanced.vert"),
- Path.Combine(shadersDir, "mesh_instanced.frag"));
+ // mesh_instanced is the default; Task 10 (N.5) moves the final shader
+ // selection to after capability detection so mesh_modern can be chosen
+ // when bindless + ARB_shader_draw_parameters are available. See below.
// Phase G.1/G.2: shared scene-lighting UBO. Stays bound at
// binding=1 for the lifetime of the process — every shader that
@@ -1447,6 +1447,23 @@ public sealed class GameWindow : IDisposable
}
}
+ // N.5 Task 10: load mesh_modern when both extensions are present;
+ // fall back to mesh_instanced otherwise. Must be after capability
+ // detection so _bindlessSupport is known.
+ if (_bindlessSupport is not null)
+ {
+ _meshShader = new Shader(_gl,
+ Path.Combine(shadersDir, "mesh_modern.vert"),
+ Path.Combine(shadersDir, "mesh_modern.frag"));
+ Console.WriteLine("[N.5] mesh_modern shader loaded");
+ }
+ else
+ {
+ _meshShader = new Shader(_gl,
+ Path.Combine(shadersDir, "mesh_instanced.vert"),
+ Path.Combine(shadersDir, "mesh_instanced.frag"));
+ }
+
_textureCache = new TextureCache(_gl, _dats, _bindlessSupport);
// Two persistent GL sampler objects (Repeat + ClampToEdge) so
// the sky pass can pick wrap mode per submesh without mutating
diff --git a/src/AcDream.App/Rendering/Wb/WbDrawDispatcher.cs b/src/AcDream.App/Rendering/Wb/WbDrawDispatcher.cs
index 6d33293..3fe6f13 100644
--- a/src/AcDream.App/Rendering/Wb/WbDrawDispatcher.cs
+++ b/src/AcDream.App/Rendering/Wb/WbDrawDispatcher.cs
@@ -32,18 +32,19 @@ namespace AcDream.App.Rendering.Wb;
///
///
///
-/// GL strategy: GROUPED instanced drawing. All visible (entity, batch)
-/// pairs are bucketed by ; within a group a single
-/// glDrawElementsInstancedBaseVertexBaseInstance renders all instances.
-/// All matrices for the frame land in one shared instance VBO via a single
-/// BufferData upload. This drops draw calls from O(entities×batches)
-/// to O(unique GfxObj×batch×texture) — typically two orders of magnitude fewer.
+/// GL strategy (N.5): glMultiDrawElementsIndirect with SSBOs.
+/// All visible (entity, batch) pairs are bucketed by ;
+/// each group becomes one DrawElementsIndirectCommand. Three GPU buffers
+/// are uploaded per frame: instance matrices (SSBO binding 0), per-group batch
+/// metadata/texture handles (SSBO binding 1), and the indirect draw commands.
+/// Two glMultiDrawElementsIndirect calls cover the opaque and transparent
+/// passes respectively — one GL call per pass regardless of group count.
///
///
///
-/// Shader: reuses mesh_instanced (vert locations 0-2 = Position/
-/// Normal/UV from WB's VertexPositionNormalTexture; locations 3-6 = instance
-/// matrix from our VBO). WB's 32-byte vertex stride is compatible.
+/// Shader: mesh_modern when bindless + ARB_shader_draw_parameters
+/// are available (N.5 path). Falls back to mesh_instanced when the GPU
+/// lacks those extensions.
///
///
///
@@ -74,11 +75,9 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
private BatchData[] _batchData = new BatchData[256];
private DrawElementsIndirectCommand[] _indirectCommands = new DrawElementsIndirectCommand[256];
-#pragma warning disable CS0169 // Tasks 9-10 wire these counters
private int _opaqueDrawCount;
private int _transparentDrawCount;
private int _transparentByteOffset;
-#pragma warning restore CS0169
// std430 layout: ulong TextureHandle (uvec2) at offset 0, uint TextureLayer
// at offset 8, uint Flags at offset 12. Total 16 bytes.
@@ -94,13 +93,10 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
public uint Flags;
}
- private readonly HashSet _patchedVaos = new();
-
// Per-frame scratch — reused across frames to avoid per-frame allocation.
private readonly Dictionary _groups = new();
private readonly List _opaqueDraws = new();
private readonly List _translucentDraws = new();
- private float[] _instanceBuffer = new float[256 * 16]; // grow on demand, never shrink
// Per-entity-cull AABB radius. Conservative — covers most entities; large
// outliers (long banners, tall columns) are still landblock-culled.
@@ -275,8 +271,7 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
return;
}
- // ── Phase 2: lay matrices out contiguously, assign per-group offsets,
- // split into opaque/translucent + compute sort keys ─────────
+ // ── Phase 3: assign FirstInstance per group, lay matrices contiguously, sort opaque ──
int totalInstances = 0;
foreach (var grp in _groups.Values) totalInstances += grp.Matrices.Count;
if (totalInstances == 0)
@@ -286,8 +281,8 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
}
int needed = totalInstances * 16;
- if (_instanceBuffer.Length < needed)
- _instanceBuffer = new float[needed + 256 * 16]; // headroom
+ if (_instanceData.Length < needed)
+ _instanceData = new float[needed + 256 * 16];
_opaqueDraws.Clear();
_translucentDraws.Clear();
@@ -304,17 +299,17 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
// position for front-to-back sort (perf #2). Cheap heuristic; works
// well when instances of one group are spatially coherent
// (typical for trees in one landblock area, NPCs at one spawn).
- var firstM = grp.Matrices[0];
- var grpPos = new Vector3(firstM.M41, firstM.M42, firstM.M43);
+ var first = grp.Matrices[0];
+ var grpPos = new Vector3(first.M41, first.M42, first.M43);
grp.SortDistance = Vector3.DistanceSquared(camPos, grpPos);
for (int i = 0; i < grp.Matrices.Count; i++)
{
- WriteMatrix(_instanceBuffer, cursor * 16, grp.Matrices[i]);
+ WriteMatrix(_instanceData, cursor * 16, grp.Matrices[i]);
cursor++;
}
- if (grp.Translucency == TranslucencyKind.Opaque || grp.Translucency == TranslucencyKind.ClipMap)
+ if (IsOpaque(grp.Translucency))
_opaqueDraws.Add(grp);
else
_translucentDraws.Add(grp);
@@ -326,82 +321,115 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
// Foundry interior).
_opaqueDraws.Sort(static (a, b) => a.SortDistance.CompareTo(b.SortDistance));
- // ── Phase 3: one upload of all matrices ─────────────────────────────
- // NOTE: _instanceSsbo is temporarily bound as ArrayBuffer for compile
- // compatibility. Tasks 9-10 rewrite this to BindBufferBase(SSBO) +
- // glMultiDrawElementsIndirect.
- _gl.BindBuffer(BufferTargetARB.ArrayBuffer, _instanceSsbo);
- fixed (float* p = _instanceBuffer)
- _gl.BufferData(BufferTargetARB.ArrayBuffer,
- (nuint)(totalInstances * 16 * sizeof(float)), p, BufferUsageARB.DynamicDraw);
+ // ── Phase 4: build IndirectGroupInput list (opaque sorted, then translucent),
+ // fill via BuildIndirectArrays ──────────────────────────────────
+ int totalDraws = _opaqueDraws.Count + _translucentDraws.Count;
+ if (_batchData.Length < totalDraws)
+ _batchData = new BatchData[totalDraws + 64];
+ if (_indirectCommands.Length < totalDraws)
+ _indirectCommands = new DrawElementsIndirectCommand[totalDraws + 64];
- // ── Phase 4: bind VAO once (modern rendering shares one global VAO) ──
- EnsureInstanceAttribs(anyVao);
+ var groupInputs = new List(totalDraws);
+ foreach (var g in _opaqueDraws) groupInputs.Add(ToInput(g));
+ foreach (var g in _translucentDraws) groupInputs.Add(ToInput(g));
+
+ // Cast _batchData (private BatchData) to public-mirror BatchDataPublic for BuildIndirectArrays.
+ // Layout is asserted at test time (BatchDataPublic_LayoutMatchesPrivateBatchData test).
+ var batchPublic = new BatchDataPublic[totalDraws];
+ var layout = BuildIndirectArrays(groupInputs, _indirectCommands, batchPublic);
+
+ // Copy back into _batchData
+ for (int i = 0; i < totalDraws; i++)
+ {
+ _batchData[i] = new BatchData
+ {
+ TextureHandle = batchPublic[i].TextureHandle,
+ TextureLayer = batchPublic[i].TextureLayer,
+ Flags = batchPublic[i].Flags,
+ };
+ }
+ _opaqueDrawCount = layout.OpaqueCount;
+ _transparentDrawCount = layout.TransparentCount;
+ _transparentByteOffset = layout.TransparentByteOffset;
+
+ // ── Phase 5: upload three buffers ───────────────────────────────────
+ fixed (float* ip = _instanceData)
+ UploadSsbo(_instanceSsbo, 0, ip, totalInstances * 16 * sizeof(float));
+
+ fixed (BatchData* bp = _batchData)
+ UploadSsbo(_batchSsbo, 1, bp, totalDraws * sizeof(BatchData));
+
+ fixed (DrawElementsIndirectCommand* cp = _indirectCommands)
+ {
+ _gl.BindBuffer(BufferTargetARB.DrawIndirectBuffer, _indirectBuffer);
+ _gl.BufferData(BufferTargetARB.DrawIndirectBuffer,
+ (nuint)(totalDraws * sizeof(DrawElementsIndirectCommand)), cp, BufferUsageARB.DynamicDraw);
+ }
+
+ // ── Phase 6: bind global VAO once ───────────────────────────────────
_gl.BindVertexArray(anyVao);
- // ── Phase 5: opaque + ClipMap pass (front-to-back sorted) ───────────
if (string.Equals(Environment.GetEnvironmentVariable("ACDREAM_NO_CULL"), "1", StringComparison.Ordinal))
_gl.Disable(EnableCap.CullFace);
- foreach (var grp in _opaqueDraws)
+ // ── Phase 7: opaque pass ─────────────────────────────────────────────
+ if (_opaqueDrawCount > 0)
{
- _shader.SetInt("uTranslucencyKind", (int)grp.Translucency);
- DrawGroup(grp);
+ _gl.Disable(EnableCap.Blend);
+ _gl.DepthMask(true);
+ _shader.SetInt("uRenderPass", 0);
+ _gl.BindBuffer(BufferTargetARB.DrawIndirectBuffer, _indirectBuffer);
+ _gl.MultiDrawElementsIndirect(
+ PrimitiveType.Triangles,
+ DrawElementsType.UnsignedShort,
+ (void*)0,
+ (uint)_opaqueDrawCount,
+ (uint)DrawCommandStride);
}
- // ── Phase 6: translucent pass ───────────────────────────────────────
- _gl.Enable(EnableCap.Blend);
- _gl.DepthMask(false);
-
- if (string.Equals(Environment.GetEnvironmentVariable("ACDREAM_NO_CULL"), "1", StringComparison.Ordinal))
+ // ── Phase 8: transparent pass ────────────────────────────────────────
+ if (_transparentDrawCount > 0)
{
- _gl.Disable(EnableCap.CullFace);
- }
- else
- {
- _gl.Enable(EnableCap.CullFace);
- _gl.CullFace(TriangleFace.Back);
- _gl.FrontFace(FrontFaceDirection.Ccw);
+ _gl.Enable(EnableCap.Blend);
+ _gl.BlendFunc(BlendingFactor.SrcAlpha, BlendingFactor.OneMinusSrcAlpha);
+ _gl.DepthMask(false);
+ _shader.SetInt("uRenderPass", 1);
+ _gl.MultiDrawElementsIndirect(
+ PrimitiveType.Triangles,
+ DrawElementsType.UnsignedShort,
+ (void*)_transparentByteOffset,
+ (uint)_transparentDrawCount,
+ (uint)DrawCommandStride);
+ _gl.DepthMask(true);
+ _gl.Disable(EnableCap.Blend);
}
- foreach (var grp in _translucentDraws)
- {
- switch (grp.Translucency)
- {
- case TranslucencyKind.Additive:
- _gl.BlendFunc(BlendingFactor.SrcAlpha, BlendingFactor.One);
- break;
- case TranslucencyKind.InvAlpha:
- _gl.BlendFunc(BlendingFactor.OneMinusSrcAlpha, BlendingFactor.SrcAlpha);
- break;
- default:
- _gl.BlendFunc(BlendingFactor.SrcAlpha, BlendingFactor.OneMinusSrcAlpha);
- break;
- }
-
- _shader.SetInt("uTranslucencyKind", (int)grp.Translucency);
- DrawGroup(grp);
- }
-
- _gl.DepthMask(true);
- _gl.Disable(EnableCap.Blend);
_gl.Disable(EnableCap.CullFace);
_gl.BindVertexArray(0);
if (diag)
{
- _drawsIssued += _opaqueDraws.Count + _translucentDraws.Count;
+ _drawsIssued += _opaqueDrawCount + _transparentDrawCount;
_instancesIssued += totalInstances;
MaybeFlushDiag();
}
}
- private void DrawGroup(InstanceGroup grp)
+ private static IndirectGroupInput ToInput(InstanceGroup g) => new(
+ IndexCount: g.IndexCount,
+ FirstIndex: g.FirstIndex,
+ BaseVertex: g.BaseVertex,
+ InstanceCount: g.InstanceCount,
+ FirstInstance: g.FirstInstance,
+ TextureHandle: g.BindlessTextureHandle,
+ TextureLayer: g.TextureLayer,
+ Translucency: g.Translucency);
+
+ private unsafe void UploadSsbo(uint ssbo, uint binding, void* data, int byteCount)
{
- throw new NotImplementedException(
- "DrawGroup is being removed in Task 10 — the dispatcher rewrites Draw() " +
- "to use glMultiDrawElementsIndirect instead of per-group draws. " +
- "If this throws at runtime, Task 10 hasn't landed yet.");
+ _gl.BindBuffer(BufferTargetARB.ShaderStorageBuffer, ssbo);
+ _gl.BufferData(BufferTargetARB.ShaderStorageBuffer, (nuint)byteCount, data, BufferUsageARB.DynamicDraw);
+ _gl.BindBufferBase(BufferTargetARB.ShaderStorageBuffer, binding, ssbo);
}
private void MaybeFlushDiag()
@@ -495,23 +523,6 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
}
}
- private void EnsureInstanceAttribs(uint vao)
- {
- if (!_patchedVaos.Add(vao)) return;
-
- _gl.BindVertexArray(vao);
- // NOTE: temporarily binding _instanceSsbo as ArrayBuffer for compile
- // compatibility. Tasks 9-10 replace with BindBufferBase(SSBO).
- _gl.BindBuffer(BufferTargetARB.ArrayBuffer, _instanceSsbo);
- for (uint row = 0; row < 4; row++)
- {
- uint loc = 3 + row;
- _gl.EnableVertexAttribArray(loc);
- _gl.VertexAttribPointer(loc, 4, VertexAttribPointerType.Float, false, 64, (void*)(row * 16));
- _gl.VertexAttribDivisor(loc, 1);
- }
- }
-
private static void WriteMatrix(float[] buf, int offset, in Matrix4x4 m)
{
buf[offset + 0] = m.M11; buf[offset + 1] = m.M12; buf[offset + 2] = m.M13; buf[offset + 3] = m.M14;