From 573526dae5adc6ce4bb1a0effc869e5e70212524 Mon Sep 17 00:00:00 2001 From: Erik Date: Fri, 8 May 2026 17:51:03 +0200 Subject: [PATCH] =?UTF-8?q?phase(N.4):=20WbDrawDispatcher=20perf=20pass=20?= =?UTF-8?q?=E2=80=94=20sort,=20cull,=20hash=20memoization?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Four small wins on top of the grouped-instanced refactor. 1. Drop unused animState lookup. Was a side-effect-free _entitySpawnAdapter.GetState call per per-instance entity, made redundant by the Issue #47 fix that trusts MeshRefs. 2. Front-to-back sort opaque groups. Squared distance from camera to each group's first-instance translation; ascending sort. Lets the GPU's depth test reject fragments behind closer geometry — real win on dense scenes (Holtburg courtyard, Foundry interior). 3. Per-entity AABB frustum cull. 5m-radius AABB check per entity before walking parts. Skips work for distant entities even when their landblock is partially visible. Animated entities (other characters, NPCs, monsters) bypass — they always need per-frame work for animation regardless. Conservative radius covers typical entity bounds; large outliers stay landblock-culled. 4. Memoize palette hash per entity. TextureCache.HashPaletteOverride is now internal; new GetOrUploadWithPaletteOverride overload takes a precomputed hash. The dispatcher computes it ONCE per entity and reuses across every (part, batch) lookup, avoiding the per-batch FNV-1a fold over SubPalettes. Trees / scenery without palette overrides skip entirely (palHash stays 0). Visual output unchanged; FPS up further, especially in dense scenes. Co-Authored-By: Claude Opus 4.6 --- src/AcDream.App/Rendering/TextureCache.cs | 22 ++++- .../Rendering/Wb/WbDrawDispatcher.cs | 92 ++++++++++++++----- 2 files changed, 89 insertions(+), 25 deletions(-) diff --git a/src/AcDream.App/Rendering/TextureCache.cs b/src/AcDream.App/Rendering/TextureCache.cs index 76dca7f..6d10200 100644 --- a/src/AcDream.App/Rendering/TextureCache.cs +++ b/src/AcDream.App/Rendering/TextureCache.cs @@ -123,10 +123,23 @@ public sealed unsafe class TextureCache : Wb.ITextureCachePerInstance, IDisposab uint surfaceId, uint? overrideOrigTextureId, PaletteOverride paletteOverride) + => GetOrUploadWithPaletteOverride(surfaceId, overrideOrigTextureId, paletteOverride, + HashPaletteOverride(paletteOverride)); + + /// + /// Overload that accepts a precomputed palette hash. Lets callers (e.g. + /// the WB draw dispatcher) compute the hash ONCE per entity and reuse + /// it across every (part, batch) lookup, avoiding the per-batch + /// FNV-1a fold over . + /// + public uint GetOrUploadWithPaletteOverride( + uint surfaceId, + uint? overrideOrigTextureId, + PaletteOverride paletteOverride, + ulong precomputedPaletteHash) { - ulong hash = HashPaletteOverride(paletteOverride); uint origTexKey = overrideOrigTextureId ?? 0; - var key = (surfaceId, origTexKey, hash); + var key = (surfaceId, origTexKey, precomputedPaletteHash); if (_handlesByPalette.TryGetValue(key, out var h)) return h; @@ -138,9 +151,10 @@ public sealed unsafe class TextureCache : Wb.ITextureCachePerInstance, IDisposab /// /// Cheap 64-bit hash over a palette override's identity so two - /// entities with the same palette setup share a decode. + /// entities with the same palette setup share a decode. Internal so + /// the WB dispatcher can compute it once per entity. /// - private static ulong HashPaletteOverride(PaletteOverride p) + internal static ulong HashPaletteOverride(PaletteOverride p) { // Not cryptographic — just needs to distinguish override setups // for caching. Start with base palette id, fold in each entry. diff --git a/src/AcDream.App/Rendering/Wb/WbDrawDispatcher.cs b/src/AcDream.App/Rendering/Wb/WbDrawDispatcher.cs index 9728e77..4644f71 100644 --- a/src/AcDream.App/Rendering/Wb/WbDrawDispatcher.cs +++ b/src/AcDream.App/Rendering/Wb/WbDrawDispatcher.cs @@ -66,8 +66,14 @@ public sealed unsafe class WbDrawDispatcher : IDisposable // Per-frame scratch — reused across frames to avoid per-frame allocation. private readonly Dictionary _groups = new(); + private readonly List _opaqueDraws = new(); + private readonly List _translucentDraws = new(); private float[] _instanceBuffer = new float[256 * 16]; // grow on demand, never shrink + // Per-entity-cull AABB radius. Conservative — covers most entities; large + // outliers (long banners, tall columns) are still landblock-culled. + private const float PerEntityCullRadius = 5.0f; + private bool _disposed; // Diagnostic counters logged once per ~5s under ACDREAM_WB_DIAG=1. @@ -120,6 +126,13 @@ public sealed unsafe class WbDrawDispatcher : IDisposable bool diag = string.Equals(Environment.GetEnvironmentVariable("ACDREAM_WB_DIAG"), "1", StringComparison.Ordinal); + // Camera world-space position for front-to-back sort (perf #2). The view + // matrix is the inverse of the camera's world transform, so the world + // translation lives in the inverse's translation row. + Vector3 camPos = Vector3.Zero; + if (Matrix4x4.Invert(camera.View, out var invView)) + camPos = invView.Translation; + // ── Phase 1: clear groups, walk entities, build groups ────────────── foreach (var grp in _groups.Values) grp.Matrices.Clear(); @@ -146,12 +159,34 @@ public sealed unsafe class WbDrawDispatcher : IDisposable && !visibleCellIds.Contains(entity.ParentCellId.Value)) continue; + // Per-entity AABB frustum cull (perf #3). Skips work for distant + // entities even when their landblock is visible. Animated + // entities bypass — they're tracked at landblock level + need + // per-frame work for animation regardless. Conservative 5m + // radius covers typical entity bounds. + if (frustum is not null && !isAnimated && entry.LandblockId != neverCullLandblockId) + { + var p = entity.Position; + var aMin = new Vector3(p.X - PerEntityCullRadius, p.Y - PerEntityCullRadius, p.Z - PerEntityCullRadius); + var aMax = new Vector3(p.X + PerEntityCullRadius, p.Y + PerEntityCullRadius, p.Z + PerEntityCullRadius); + if (!FrustumCuller.IsAabbVisible(frustum.Value, aMin, aMax)) + continue; + } + if (diag) _entitiesSeen++; var entityWorld = Matrix4x4.CreateFromQuaternion(entity.Rotation) * Matrix4x4.CreateTranslation(entity.Position); + // Compute palette-override hash ONCE per entity (perf #4). + // Reused across every (part, batch) lookup so the FNV-1a fold + // over SubPalettes runs once instead of N times. Zero when the + // entity has no palette override (trees, scenery). + ulong palHash = 0; + if (entity.PaletteOverride is not null) + palHash = TextureCache.HashPaletteOverride(entity.PaletteOverride); + bool drewAny = false; for (int partIdx = 0; partIdx < entity.MeshRefs.Count; partIdx++) { @@ -183,13 +218,13 @@ public sealed unsafe class WbDrawDispatcher : IDisposable var model = ComposePartWorldMatrix( entityWorld, meshRef.PartTransform, partTransform); - ClassifyBatches(partData, partGfxObjId, model, entity, meshRef, metaTable); + ClassifyBatches(partData, partGfxObjId, model, entity, meshRef, palHash, metaTable); } } else { var model = meshRef.PartTransform * entityWorld; - ClassifyBatches(renderData, gfxObjId, model, entity, meshRef, metaTable); + ClassifyBatches(renderData, gfxObjId, model, entity, meshRef, palHash, metaTable); } } @@ -204,7 +239,8 @@ public sealed unsafe class WbDrawDispatcher : IDisposable return; } - // ── Phase 2: lay matrices out contiguously, assign per-group offsets ── + // ── Phase 2: lay matrices out contiguously, assign per-group offsets, + // split into opaque/translucent + compute sort keys ───────── int totalInstances = 0; foreach (var grp in _groups.Values) totalInstances += grp.Matrices.Count; if (totalInstances == 0) @@ -217,14 +253,25 @@ public sealed unsafe class WbDrawDispatcher : IDisposable if (_instanceBuffer.Length < needed) _instanceBuffer = new float[needed + 256 * 16]; // headroom + _opaqueDraws.Clear(); + _translucentDraws.Clear(); + int cursor = 0; - int opaqueGroups = 0, translucentGroups = 0; foreach (var grp in _groups.Values) { if (grp.Matrices.Count == 0) continue; grp.FirstInstance = cursor; grp.InstanceCount = grp.Matrices.Count; + + // Use the first instance's translation as the group's representative + // position for front-to-back sort (perf #2). Cheap heuristic; works + // well when instances of one group are spatially coherent + // (typical for trees in one landblock area, NPCs at one spawn). + var firstM = grp.Matrices[0]; + var grpPos = new Vector3(firstM.M41, firstM.M42, firstM.M43); + grp.SortDistance = Vector3.DistanceSquared(camPos, grpPos); + for (int i = 0; i < grp.Matrices.Count; i++) { WriteMatrix(_instanceBuffer, cursor * 16, grp.Matrices[i]); @@ -232,11 +279,17 @@ public sealed unsafe class WbDrawDispatcher : IDisposable } if (grp.Translucency == TranslucencyKind.Opaque || grp.Translucency == TranslucencyKind.ClipMap) - opaqueGroups++; + _opaqueDraws.Add(grp); else - translucentGroups++; + _translucentDraws.Add(grp); } + // Front-to-back sort for opaque pass: nearer groups draw first so the + // depth test rejects fragments hidden behind them, reducing fragment + // shader cost from overdraw on dense scenes (Holtburg courtyard, + // Foundry interior). + _opaqueDraws.Sort(static (a, b) => a.SortDistance.CompareTo(b.SortDistance)); + // ── Phase 3: one upload of all matrices ───────────────────────────── _gl.BindBuffer(BufferTargetARB.ArrayBuffer, _instanceVbo); fixed (float* p = _instanceBuffer) @@ -247,16 +300,12 @@ public sealed unsafe class WbDrawDispatcher : IDisposable EnsureInstanceAttribs(anyVao); _gl.BindVertexArray(anyVao); - // ── Phase 5: opaque + ClipMap pass ────────────────────────────────── + // ── Phase 5: opaque + ClipMap pass (front-to-back sorted) ─────────── if (string.Equals(Environment.GetEnvironmentVariable("ACDREAM_NO_CULL"), "1", StringComparison.Ordinal)) _gl.Disable(EnableCap.CullFace); - foreach (var grp in _groups.Values) + foreach (var grp in _opaqueDraws) { - if (grp.Matrices.Count == 0) continue; - if (grp.Translucency != TranslucencyKind.Opaque && grp.Translucency != TranslucencyKind.ClipMap) - continue; - _shader.SetInt("uTranslucencyKind", (int)grp.Translucency); DrawGroup(grp); } @@ -276,12 +325,8 @@ public sealed unsafe class WbDrawDispatcher : IDisposable _gl.FrontFace(FrontFaceDirection.Ccw); } - foreach (var grp in _groups.Values) + foreach (var grp in _translucentDraws) { - if (grp.Matrices.Count == 0) continue; - if (grp.Translucency == TranslucencyKind.Opaque || grp.Translucency == TranslucencyKind.ClipMap) - continue; - switch (grp.Translucency) { case TranslucencyKind.Additive: @@ -306,7 +351,7 @@ public sealed unsafe class WbDrawDispatcher : IDisposable if (diag) { - _drawsIssued += opaqueGroups + translucentGroups; + _drawsIssued += _opaqueDraws.Count + _translucentDraws.Count; _instancesIssued += totalInstances; MaybeFlushDiag(); } @@ -349,6 +394,7 @@ public sealed unsafe class WbDrawDispatcher : IDisposable Matrix4x4 model, WorldEntity entity, MeshRef meshRef, + ulong palHash, AcSurfaceMetadataTable metaTable) { for (int batchIdx = 0; batchIdx < renderData.Batches.Count; batchIdx++) @@ -367,7 +413,7 @@ public sealed unsafe class WbDrawDispatcher : IDisposable : TranslucencyKind.Opaque; } - uint texHandle = ResolveTexture(entity, meshRef, batch); + uint texHandle = ResolveTexture(entity, meshRef, batch, palHash); if (texHandle == 0) continue; var key = new GroupKey( @@ -391,7 +437,7 @@ public sealed unsafe class WbDrawDispatcher : IDisposable } } - private uint ResolveTexture(WorldEntity entity, MeshRef meshRef, ObjectRenderBatch batch) + private uint ResolveTexture(WorldEntity entity, MeshRef meshRef, ObjectRenderBatch batch, ulong palHash) { // WB stores the surface id on batch.Key.SurfaceId (TextureKey struct); // batch.SurfaceId is unset (zero) for batches built by ObjectMeshManager. @@ -405,8 +451,11 @@ public sealed unsafe class WbDrawDispatcher : IDisposable if (entity.PaletteOverride is not null) { + // perf #4: pass the entity-precomputed palette hash so TextureCache + // can skip its internal HashPaletteOverride for repeat lookups + // within the same character. return _textures.GetOrUploadWithPaletteOverride( - surfaceId, origTexOverride, entity.PaletteOverride); + surfaceId, origTexOverride, entity.PaletteOverride, palHash); } else if (hasOrigTexOverride) { @@ -466,6 +515,7 @@ public sealed unsafe class WbDrawDispatcher : IDisposable public TranslucencyKind Translucency; public int FirstInstance; // offset into the shared instance VBO (in instances, not bytes) public int InstanceCount; + public float SortDistance; // squared distance from camera to first instance, for opaque sort public readonly List Matrices = new(); } }