diff --git a/src/AcDream.App/Rendering/Wb/WbDrawDispatcher.cs b/src/AcDream.App/Rendering/Wb/WbDrawDispatcher.cs index 3388887..9728e77 100644 --- a/src/AcDream.App/Rendering/Wb/WbDrawDispatcher.cs +++ b/src/AcDream.App/Rendering/Wb/WbDrawDispatcher.cs @@ -10,9 +10,10 @@ using Silk.NET.OpenGL; namespace AcDream.App.Rendering.Wb; /// -/// Draws entities using WB's (VAO/VBO per GfxObj, -/// per-batch IBO) with acdream's for texture resolution -/// and for translucency classification. +/// Draws entities using WB's (a single global +/// VAO/VBO/IBO under modern rendering) with acdream's +/// for texture resolution and for +/// translucency classification. /// /// /// Atlas-tier entities (ServerGuid == 0): mesh data comes from WB's @@ -24,15 +25,18 @@ namespace AcDream.App.Rendering.Wb; /// /// Per-instance-tier entities (ServerGuid != 0): mesh data also from /// WB, but textures resolve through with palette and -/// surface overrides applied. Part overrides and hidden-parts from -/// control which GfxObj renders per part. +/// surface overrides applied. is currently +/// unused at draw time — GameWindow's spawn path already bakes AnimPartChanges + +/// GfxObjDegradeResolver (Issue #47 close-detail mesh) into MeshRefs. /// /// /// -/// GL strategy: per-entity single-instance drawing. Each draw call uploads -/// one model matrix to the instance VBO, binds WB's VAO (with instance attribute -/// slots patched on first use), binds the batch's IBO, and calls DrawElements with -/// instance count 1. True instancing grouping deferred to N.6. +/// GL strategy: GROUPED instanced drawing. All visible (entity, batch) +/// pairs are bucketed by ; within a group a single +/// glDrawElementsInstancedBaseVertexBaseInstance renders all instances. +/// All matrices for the frame land in one shared instance VBO via a single +/// BufferData upload. This drops draw calls from O(entities×batches) +/// to O(unique GfxObj×batch×texture) — typically two orders of magnitude fewer. /// /// /// @@ -40,6 +44,14 @@ namespace AcDream.App.Rendering.Wb; /// Normal/UV from WB's VertexPositionNormalTexture; locations 3-6 = instance /// matrix from our VBO). WB's 32-byte vertex stride is compatible. /// +/// +/// +/// Modern rendering assumption: WB's _useModernRendering path (GL +/// 4.3 + bindless) puts every mesh in a single shared VAO/VBO/IBO and uses +/// FirstIndex + BaseVertex per batch. The dispatcher honors those +/// offsets via DrawElementsInstancedBaseVertex(BaseInstance). The legacy +/// per-mesh-VAO path also works since FirstIndex/BaseVertex are zero there. +/// /// public sealed unsafe class WbDrawDispatcher : IDisposable { @@ -50,9 +62,12 @@ public sealed unsafe class WbDrawDispatcher : IDisposable private readonly EntitySpawnAdapter _entitySpawnAdapter; private readonly uint _instanceVbo; - private readonly float[] _matrixBuf = new float[16]; private readonly HashSet _patchedVaos = new(); + // Per-frame scratch — reused across frames to avoid per-frame allocation. + private readonly Dictionary _groups = new(); + private float[] _instanceBuffer = new float[256 * 16]; // grow on demand, never shrink + private bool _disposed; // Diagnostic counters logged once per ~5s under ACDREAM_WB_DIAG=1. @@ -60,6 +75,7 @@ public sealed unsafe class WbDrawDispatcher : IDisposable private int _entitiesDrawn; private int _meshesMissing; private int _drawsIssued; + private int _instancesIssued; private long _lastLogTick; public WbDrawDispatcher( @@ -82,8 +98,6 @@ public sealed unsafe class WbDrawDispatcher : IDisposable _entitySpawnAdapter = entitySpawnAdapter; _instanceVbo = _gl.GenBuffer(); - _gl.BindBuffer(BufferTargetARB.ArrayBuffer, _instanceVbo); - _gl.BufferData(BufferTargetARB.ArrayBuffer, 64, null, BufferUsageARB.DynamicDraw); } public static Matrix4x4 ComposePartWorldMatrix( @@ -104,13 +118,13 @@ public sealed unsafe class WbDrawDispatcher : IDisposable var vp = camera.View * camera.Projection; _shader.SetMatrix4("uViewProjection", vp); - var metaTable = _meshAdapter.MetadataTable; bool diag = string.Equals(Environment.GetEnvironmentVariable("ACDREAM_WB_DIAG"), "1", StringComparison.Ordinal); - // Collect visible entities into opaque and translucent lists for two-pass rendering. - // We walk entities once and classify each (entity, meshRef, batch) triple. - var opaqueDraws = new List(); - var translucentDraws = new List(); + // ── Phase 1: clear groups, walk entities, build groups ────────────── + foreach (var grp in _groups.Values) grp.Matrices.Clear(); + + var metaTable = _meshAdapter.MetadataTable; + uint anyVao = 0; foreach (var entry in landblockEntries) { @@ -138,22 +152,17 @@ public sealed unsafe class WbDrawDispatcher : IDisposable Matrix4x4.CreateFromQuaternion(entity.Rotation) * Matrix4x4.CreateTranslation(entity.Position); - bool isPerInstance = entity.ServerGuid != 0; - AnimatedEntityState? animState = isPerInstance - ? _entitySpawnAdapter.GetState(entity.ServerGuid) - : null; - bool drewAny = false; for (int partIdx = 0; partIdx < entity.MeshRefs.Count; partIdx++) { - if (animState is not null && animState.IsPartHidden(partIdx)) - continue; - + // Note: GameWindow's spawn path already applies + // AnimPartChanges + GfxObjDegradeResolver (Issue #47 fix — + // close-detail mesh swap for humanoids) to MeshRefs. We + // trust MeshRefs as the source of truth here. AnimatedEntityState's + // overrides become relevant only for hot-swap (0xF625 + // ObjDescEvent) which today rebuilds MeshRefs anyway. var meshRef = entity.MeshRefs[partIdx]; - ulong gfxObjId = meshRef.GfxObjId; - if (animState is not null) - gfxObjId = animState.ResolvePartGfxObj(partIdx, gfxObjId); var renderData = _meshAdapter.TryGetRenderData(gfxObjId); if (renderData is null) @@ -162,10 +171,8 @@ public sealed unsafe class WbDrawDispatcher : IDisposable continue; } drewAny = true; + if (anyVao == 0) anyVao = renderData.VAO; - // For Setup objects, WB stores sub-parts in SetupParts. For - // single GfxObjs, SetupParts is empty and the render data - // itself contains the batches. if (renderData.IsSetup && renderData.SetupParts.Count > 0) { foreach (var (partGfxObjId, partTransform) in renderData.SetupParts) @@ -176,16 +183,13 @@ public sealed unsafe class WbDrawDispatcher : IDisposable var model = ComposePartWorldMatrix( entityWorld, meshRef.PartTransform, partTransform); - ClassifyBatches(partData, partGfxObjId, model, - entity, meshRef, metaTable, opaqueDraws, translucentDraws); + ClassifyBatches(partData, partGfxObjId, model, entity, meshRef, metaTable); } } else { var model = meshRef.PartTransform * entityWorld; - - ClassifyBatches(renderData, gfxObjId, model, - entity, meshRef, metaTable, opaqueDraws, translucentDraws); + ClassifyBatches(renderData, gfxObjId, model, entity, meshRef, metaTable); } } @@ -193,17 +197,71 @@ public sealed unsafe class WbDrawDispatcher : IDisposable } } - // ── Pass 1: Opaque + ClipMap ───────────────────────────────────────── + // Nothing visible — skip the GL pass entirely. + if (anyVao == 0) + { + if (diag) MaybeFlushDiag(); + return; + } + + // ── Phase 2: lay matrices out contiguously, assign per-group offsets ── + int totalInstances = 0; + foreach (var grp in _groups.Values) totalInstances += grp.Matrices.Count; + if (totalInstances == 0) + { + if (diag) MaybeFlushDiag(); + return; + } + + int needed = totalInstances * 16; + if (_instanceBuffer.Length < needed) + _instanceBuffer = new float[needed + 256 * 16]; // headroom + + int cursor = 0; + int opaqueGroups = 0, translucentGroups = 0; + foreach (var grp in _groups.Values) + { + if (grp.Matrices.Count == 0) continue; + + grp.FirstInstance = cursor; + grp.InstanceCount = grp.Matrices.Count; + for (int i = 0; i < grp.Matrices.Count; i++) + { + WriteMatrix(_instanceBuffer, cursor * 16, grp.Matrices[i]); + cursor++; + } + + if (grp.Translucency == TranslucencyKind.Opaque || grp.Translucency == TranslucencyKind.ClipMap) + opaqueGroups++; + else + translucentGroups++; + } + + // ── Phase 3: one upload of all matrices ───────────────────────────── + _gl.BindBuffer(BufferTargetARB.ArrayBuffer, _instanceVbo); + fixed (float* p = _instanceBuffer) + _gl.BufferData(BufferTargetARB.ArrayBuffer, + (nuint)(totalInstances * 16 * sizeof(float)), p, BufferUsageARB.DynamicDraw); + + // ── Phase 4: bind VAO once (modern rendering shares one global VAO) ── + EnsureInstanceAttribs(anyVao); + _gl.BindVertexArray(anyVao); + + // ── Phase 5: opaque + ClipMap pass ────────────────────────────────── if (string.Equals(Environment.GetEnvironmentVariable("ACDREAM_NO_CULL"), "1", StringComparison.Ordinal)) _gl.Disable(EnableCap.CullFace); - foreach (var item in opaqueDraws) + foreach (var grp in _groups.Values) { - _shader.SetInt("uTranslucencyKind", (int)item.Translucency); - UploadMatrixAndDraw(item); + if (grp.Matrices.Count == 0) continue; + if (grp.Translucency != TranslucencyKind.Opaque && grp.Translucency != TranslucencyKind.ClipMap) + continue; + + _shader.SetInt("uTranslucencyKind", (int)grp.Translucency); + DrawGroup(grp); } - // ── Pass 2: Translucent ────────────────────────────────────────────── + // ── Phase 6: translucent pass ─────────────────────────────────────── _gl.Enable(EnableCap.Blend); _gl.DepthMask(false); @@ -218,9 +276,13 @@ public sealed unsafe class WbDrawDispatcher : IDisposable _gl.FrontFace(FrontFaceDirection.Ccw); } - foreach (var item in translucentDraws) + foreach (var grp in _groups.Values) { - switch (item.Translucency) + if (grp.Matrices.Count == 0) continue; + if (grp.Translucency == TranslucencyKind.Opaque || grp.Translucency == TranslucencyKind.ClipMap) + continue; + + switch (grp.Translucency) { case TranslucencyKind.Additive: _gl.BlendFunc(BlendingFactor.SrcAlpha, BlendingFactor.One); @@ -233,8 +295,8 @@ public sealed unsafe class WbDrawDispatcher : IDisposable break; } - _shader.SetInt("uTranslucencyKind", (int)item.Translucency); - UploadMatrixAndDraw(item); + _shader.SetInt("uTranslucencyKind", (int)grp.Translucency); + DrawGroup(grp); } _gl.DepthMask(true); @@ -244,15 +306,40 @@ public sealed unsafe class WbDrawDispatcher : IDisposable if (diag) { - _drawsIssued += opaqueDraws.Count + translucentDraws.Count; - long now = Environment.TickCount64; - if (now - _lastLogTick > 5000) - { - Console.WriteLine( - $"[WB-DIAG] entSeen={_entitiesSeen} entDrawn={_entitiesDrawn} meshMissing={_meshesMissing} drawsIssued={_drawsIssued}"); - _entitiesSeen = _entitiesDrawn = _meshesMissing = _drawsIssued = 0; - _lastLogTick = now; - } + _drawsIssued += opaqueGroups + translucentGroups; + _instancesIssued += totalInstances; + MaybeFlushDiag(); + } + } + + private void DrawGroup(InstanceGroup grp) + { + _gl.ActiveTexture(TextureUnit.Texture0); + _gl.BindTexture(TextureTarget.Texture2D, grp.TextureHandle); + _gl.BindBuffer(BufferTargetARB.ElementArrayBuffer, grp.Ibo); + + // BaseInstance offsets the per-instance attribute fetches into our + // shared instance VBO so each group reads its own slice. Requires + // GL_ARB_base_instance (GL 4.2+); WB requires 4.3 so this is available. + _gl.DrawElementsInstancedBaseVertexBaseInstance( + PrimitiveType.Triangles, + (uint)grp.IndexCount, + DrawElementsType.UnsignedShort, + (void*)(grp.FirstIndex * sizeof(ushort)), + (uint)grp.InstanceCount, + grp.BaseVertex, + (uint)grp.FirstInstance); + } + + private void MaybeFlushDiag() + { + long now = Environment.TickCount64; + if (now - _lastLogTick > 5000) + { + Console.WriteLine( + $"[WB-DIAG] entSeen={_entitiesSeen} entDrawn={_entitiesDrawn} meshMissing={_meshesMissing} drawsIssued={_drawsIssued} instances={_instancesIssued} groups={_groups.Count}"); + _entitiesSeen = _entitiesDrawn = _meshesMissing = _drawsIssued = _instancesIssued = 0; + _lastLogTick = now; } } @@ -262,9 +349,7 @@ public sealed unsafe class WbDrawDispatcher : IDisposable Matrix4x4 model, WorldEntity entity, MeshRef meshRef, - AcSurfaceMetadataTable metaTable, - List opaqueDraws, - List translucentDraws) + AcSurfaceMetadataTable metaTable) { for (int batchIdx = 0; batchIdx < renderData.Batches.Count; batchIdx++) { @@ -277,7 +362,6 @@ public sealed unsafe class WbDrawDispatcher : IDisposable } else { - // Fallback: derive from WB batch flags. translucency = batch.IsAdditive ? TranslucencyKind.Additive : batch.IsTransparent ? TranslucencyKind.AlphaBlend : TranslucencyKind.Opaque; @@ -286,20 +370,24 @@ public sealed unsafe class WbDrawDispatcher : IDisposable uint texHandle = ResolveTexture(entity, meshRef, batch); if (texHandle == 0) continue; - var item = new DrawItem - { - Vao = renderData.VAO, - Ibo = batch.IBO, - IndexCount = batch.IndexCount, - Model = model, - TextureHandle = texHandle, - Translucency = translucency, - }; + var key = new GroupKey( + batch.IBO, batch.FirstIndex, (int)batch.BaseVertex, + batch.IndexCount, texHandle, translucency); - if (translucency == TranslucencyKind.Opaque || translucency == TranslucencyKind.ClipMap) - opaqueDraws.Add(item); - else - translucentDraws.Add(item); + if (!_groups.TryGetValue(key, out var grp)) + { + grp = new InstanceGroup + { + Ibo = batch.IBO, + FirstIndex = batch.FirstIndex, + BaseVertex = (int)batch.BaseVertex, + IndexCount = batch.IndexCount, + TextureHandle = texHandle, + Translucency = translucency, + }; + _groups[key] = grp; + } + grp.Matrices.Add(model); } } @@ -345,31 +433,6 @@ public sealed unsafe class WbDrawDispatcher : IDisposable } } - private void UploadMatrixAndDraw(in DrawItem item) - { - WriteMatrix(_matrixBuf, 0, item.Model); - - _gl.BindBuffer(BufferTargetARB.ArrayBuffer, _instanceVbo); - fixed (float* p = _matrixBuf) - _gl.BufferSubData(BufferTargetARB.ArrayBuffer, 0, 64, p); - - EnsureInstanceAttribs(item.Vao); - _gl.BindVertexArray(item.Vao); - - // Re-point instance attributes to offset 0 (single matrix). - _gl.BindBuffer(BufferTargetARB.ArrayBuffer, _instanceVbo); - for (uint row = 0; row < 4; row++) - _gl.VertexAttribPointer(3 + row, 4, VertexAttribPointerType.Float, false, 64, (void*)(row * 16)); - - _gl.ActiveTexture(TextureUnit.Texture0); - _gl.BindTexture(TextureTarget.Texture2D, item.TextureHandle); - - _gl.BindBuffer(BufferTargetARB.ElementArrayBuffer, item.Ibo); - _gl.DrawElementsInstanced(PrimitiveType.Triangles, - (uint)item.IndexCount, DrawElementsType.UnsignedShort, - (void*)0, 1); - } - private static void WriteMatrix(float[] buf, int offset, in Matrix4x4 m) { buf[offset + 0] = m.M11; buf[offset + 1] = m.M12; buf[offset + 2] = m.M13; buf[offset + 3] = m.M14; @@ -385,13 +448,24 @@ public sealed unsafe class WbDrawDispatcher : IDisposable _gl.DeleteBuffer(_instanceVbo); } - private struct DrawItem + private readonly record struct GroupKey( + uint Ibo, + uint FirstIndex, + int BaseVertex, + int IndexCount, + uint TextureHandle, + TranslucencyKind Translucency); + + private sealed class InstanceGroup { - public uint Vao; public uint Ibo; + public uint FirstIndex; + public int BaseVertex; public int IndexCount; - public Matrix4x4 Model; public uint TextureHandle; public TranslucencyKind Translucency; + public int FirstInstance; // offset into the shared instance VBO (in instances, not bytes) + public int InstanceCount; + public readonly List Matrices = new(); } }