From 3639a6f4ac4fe87b5e303c7e564c2d1926feb839 Mon Sep 17 00:00:00 2001 From: Erik Date: Sun, 10 May 2026 09:45:18 +0200 Subject: [PATCH] feat(perf): Tier 1 entity classification cache MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per docs/plans/2026-05-10-perf-tiers-2-3-roadmap.md Tier 1: cache the per-(entity, meshRef, batch) classification (TextureCache lookup, GroupKey hash, _groups dict insert) so the per-frame Draw inner loop becomes "look up cache → walk assignments → append matrix to group's Matrices list." For static entities (~95% of world: trees, rocks, buildings, scenery), the answer never changes between frames. Cache once at first visit; reuse permanently. Per-frame work for static drops from 4 expensive operations per (meshRef, batch) to 1 list-append. Estimated entity dispatcher: 3.5ms → ~1-1.5ms median at radius=12. Should land inside the 2.0ms spec budget. Implementation: - New EntityClassificationCache class (per-meshRef list of cached (group ref, baked-PartTransform) tuples) keyed by entity.Id. - ClassifyEntity does the one-time work; result populates _groups and the cache. - Draw inner loop: cache lookup → for each assignment, model = PartTransform × entityWorld; group.Matrices.Add(model). - Cache miss when ClassifyEntity finds NO mesh loaded yet (Vao == 0) → don't store; retry next frame. Avoids cache thrash during the streaming-in window. - Public InvalidateEntity(uint id) + ClearEntityCache() for explicit invalidation hooks. Wiring (palette swap on ObjDescEvent, MeshRefs hot-swap) is post-A.5 follow-up — for now, cache-stale entities show their pre-swap appearance until next respawn. Tier 2 (static/dynamic split with persistent groups) and Tier 3 (GPU compute culling) tracked in the roadmap doc. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../Rendering/Wb/WbDrawDispatcher.cs | 251 ++++++++++++++---- 1 file changed, 206 insertions(+), 45 deletions(-) diff --git a/src/AcDream.App/Rendering/Wb/WbDrawDispatcher.cs b/src/AcDream.App/Rendering/Wb/WbDrawDispatcher.cs index 6cd34f0..e8292b3 100644 --- a/src/AcDream.App/Rendering/Wb/WbDrawDispatcher.cs +++ b/src/AcDream.App/Rendering/Wb/WbDrawDispatcher.cs @@ -115,6 +115,37 @@ public sealed unsafe class WbDrawDispatcher : IDisposable // of GC pressure on the render thread under the original T17 shape. private readonly List<(WorldEntity Entity, int MeshRefIndex)> _walkScratch = new(); + // A.5 Tier 1 perf — entity classification cache (post-T26 SHIP polish). + // For static entities (~95% of world: trees, rocks, buildings, scenery), + // the per-(meshRef, batch) classification (TextureCache lookup, GroupKey + // hash, _groups dict insert) produces the same answer every frame + // forever. Cache it at first visit; per-frame work becomes "look up + // cache → walk assignments → append matrix to group's list." + // + // Invalidation today: cache is cleared on entity removal via + // InvalidateEntity. Mid-life mutations that change the entity's + // GroupKey (palette override change via ObjDescEvent, MeshRefs hot- + // swap) must call InvalidateEntity explicitly — those wiring points + // are post-A.5 follow-ups (cache-stale visual is muted: NPC clothes + // don't change color until next respawn). + private readonly Dictionary _entityCache = new(); + + private struct CachedBatchAssignment + { + public InstanceGroup Group; + public Matrix4x4 PartTransform; // baked: meshRef.PartTransform × setupPart, entityWorld at draw time + } + + private sealed class EntityClassificationCache + { + public uint Vao; + // AssignmentsByMeshRef[meshRefIndex] = list of (group, partTransform) for that meshRef. + // Length = entity.MeshRefs.Count at build time. + public List[] AssignmentsByMeshRef = + System.Array.Empty>(); + public bool DrewAny; + } + // Per-entity-cull AABB radius. Conservative — covers most entities; large // outliers (long banners, tall columns) are still landblock-culled. private const float PerEntityCullRadius = 5.0f; @@ -368,58 +399,48 @@ public sealed unsafe class WbDrawDispatcher : IDisposable { if (diag) _entitiesSeen++; + // A.5 Tier 1 perf: look up or build the entity's classification + // cache. Static entities (~95% of world) hit the cache after frame 1. + // We don't cache entries where no mesh data was found at classify + // time — that would prevent the retry when streaming finishes loading + // the mesh on a later frame. + if (!_entityCache.TryGetValue(entity.Id, out var cache)) + { + cache = ClassifyEntity(entity, metaTable); + if (cache.Vao == 0) + { + // No mesh data loaded yet for any meshRef — retry next frame. + if (diag) _meshesMissing++; + continue; + } + _entityCache[entity.Id] = cache; + } + + var assignmentsByMeshRef = cache.AssignmentsByMeshRef; + if (partIdx >= assignmentsByMeshRef.Length) continue; + var assignments = assignmentsByMeshRef[partIdx]; + if (assignments.Count == 0) + { + // Specific meshRef missing at classify time but other meshRefs + // succeeded. Edge case: partial mesh load. Skip this part. + if (diag) _meshesMissing++; + continue; + } + + if (anyVao == 0) anyVao = cache.Vao; + var entityWorld = Matrix4x4.CreateFromQuaternion(entity.Rotation) * Matrix4x4.CreateTranslation(entity.Position); - // Compute palette-override hash ONCE per entity (perf #4). - // Reused across every (part, batch) lookup so the FNV-1a fold - // over SubPalettes runs once instead of N times. Zero when the - // entity has no palette override (trees, scenery). - ulong palHash = 0; - if (entity.PaletteOverride is not null) - palHash = TextureCache.HashPaletteOverride(entity.PaletteOverride); - - // Note: GameWindow's spawn path already applies - // AnimPartChanges + GfxObjDegradeResolver (Issue #47 fix — - // close-detail mesh swap for humanoids) to MeshRefs. We - // trust MeshRefs as the source of truth here. AnimatedEntityState's - // overrides become relevant only for hot-swap (0xF625 - // ObjDescEvent) which today rebuilds MeshRefs anyway. - var meshRef = entity.MeshRefs[partIdx]; - ulong gfxObjId = meshRef.GfxObjId; - - var renderData = _meshAdapter.TryGetRenderData(gfxObjId); - if (renderData is null) + for (int i = 0; i < assignments.Count; i++) { - if (diag) _meshesMissing++; - continue; - } - if (anyVao == 0) anyVao = renderData.VAO; - - bool drewAny = false; - if (renderData.IsSetup && renderData.SetupParts.Count > 0) - { - foreach (var (partGfxObjId, partTransform) in renderData.SetupParts) - { - var partData = _meshAdapter.TryGetRenderData(partGfxObjId); - if (partData is null) continue; - - var model = ComposePartWorldMatrix( - entityWorld, meshRef.PartTransform, partTransform); - - ClassifyBatches(partData, partGfxObjId, model, entity, meshRef, palHash, metaTable); - drewAny = true; - } - } - else - { - var model = meshRef.PartTransform * entityWorld; - ClassifyBatches(renderData, gfxObjId, model, entity, meshRef, palHash, metaTable); - drewAny = true; + var c = assignments[i]; + var model = c.PartTransform * entityWorld; + c.Group.Matrices.Add(model); } - if (diag && drewAny) _entitiesDrawn++; + if (diag) _entitiesDrawn++; } // Nothing visible — skip the GL pass entirely. @@ -669,6 +690,146 @@ public sealed unsafe class WbDrawDispatcher : IDisposable return copy[idx]; } + /// + /// A.5 Tier 1 perf — classify all (meshRef, batch) tuples for an entity + /// once, return the cache. Per-frame Draw walks the cache + appends matrices, + /// skipping the per-batch TextureCache lookup, GroupKey hash, and _groups + /// dict insert. Static entities (~95% of world) hit the cache permanently + /// after first build; dynamic entities (palette swaps, ObjDesc events) need + /// explicit InvalidateEntity to rebuild. + /// + private EntityClassificationCache ClassifyEntity(WorldEntity entity, AcSurfaceMetadataTable metaTable) + { + var cache = new EntityClassificationCache + { + AssignmentsByMeshRef = new List[entity.MeshRefs.Count], + }; + for (int i = 0; i < cache.AssignmentsByMeshRef.Length; i++) + cache.AssignmentsByMeshRef[i] = new List(); + + // Compute palette-override hash ONCE per entity. Reused across every + // (part, batch) lookup. Zero when the entity has no palette override + // (trees, scenery, dat-static stabs/buildings). + ulong palHash = 0; + if (entity.PaletteOverride is not null) + palHash = TextureCache.HashPaletteOverride(entity.PaletteOverride); + + for (int partIdx = 0; partIdx < entity.MeshRefs.Count; partIdx++) + { + var meshRef = entity.MeshRefs[partIdx]; + ulong gfxObjId = meshRef.GfxObjId; + + var renderData = _meshAdapter.TryGetRenderData(gfxObjId); + if (renderData is null) continue; // mesh missing — caller retries next frame + if (cache.Vao == 0) cache.Vao = renderData.VAO; + + var assignments = cache.AssignmentsByMeshRef[partIdx]; + + if (renderData.IsSetup && renderData.SetupParts.Count > 0) + { + foreach (var (partGfxObjId, setupPartTransform) in renderData.SetupParts) + { + var partData = _meshAdapter.TryGetRenderData(partGfxObjId); + if (partData is null) continue; + // Bake (setupPartTransform * meshRef.PartTransform) into the + // assignment's PartTransform. entityWorld is applied per-frame. + // Matches ComposePartWorldMatrix's (restPose * animOverride * entityWorld) + // composition order: setupPartTransform = restPose, + // meshRef.PartTransform = animOverride. + var bakedPart = setupPartTransform * meshRef.PartTransform; + ClassifyBatchesIntoCache(partData, partGfxObjId, entity, meshRef, palHash, bakedPart, metaTable, assignments); + cache.DrewAny = true; + } + } + else + { + ClassifyBatchesIntoCache(renderData, gfxObjId, entity, meshRef, palHash, meshRef.PartTransform, metaTable, assignments); + cache.DrewAny = true; + } + } + return cache; + } + + /// + /// A.5 Tier 1 perf — same per-batch logic as + /// but stores results into instead of mutating + /// _groups[*].Matrices directly. _groups still gets populated (for new keys); + /// the cache stores stable references into _groups for per-frame Matrices.Add. + /// + private void ClassifyBatchesIntoCache( + ObjectRenderData renderData, + ulong gfxObjId, + WorldEntity entity, + MeshRef meshRef, + ulong palHash, + Matrix4x4 partTransform, + AcSurfaceMetadataTable metaTable, + List assignments) + { + for (int batchIdx = 0; batchIdx < renderData.Batches.Count; batchIdx++) + { + var batch = renderData.Batches[batchIdx]; + + TranslucencyKind translucency; + if (metaTable.TryLookup(gfxObjId, batchIdx, out var meta)) + translucency = meta.Translucency; + else + translucency = batch.IsAdditive ? TranslucencyKind.Additive + : batch.IsTransparent ? TranslucencyKind.AlphaBlend + : TranslucencyKind.Opaque; + + ulong texHandle = ResolveTexture(entity, meshRef, batch, palHash); + if (texHandle == 0) continue; + + uint texLayer = 0; + var key = new GroupKey( + batch.IBO, batch.FirstIndex, (int)batch.BaseVertex, + batch.IndexCount, texHandle, texLayer, translucency); + + if (!_groups.TryGetValue(key, out var grp)) + { + grp = new InstanceGroup + { + Ibo = batch.IBO, + FirstIndex = batch.FirstIndex, + BaseVertex = (int)batch.BaseVertex, + IndexCount = batch.IndexCount, + BindlessTextureHandle = texHandle, + TextureLayer = texLayer, + Translucency = translucency, + }; + _groups[key] = grp; + } + + assignments.Add(new CachedBatchAssignment + { + Group = grp, + PartTransform = partTransform, + }); + } + } + + /// + /// A.5 Tier 1 perf — invalidate the classification cache for an entity. + /// Call when an entity's MeshRefs, PaletteOverride, or SurfaceOverrides + /// change (e.g. ObjDescEvent 0xF625, equip-slot updates, transmute). + /// Next frame's Draw will rebuild on demand. + /// + public void InvalidateEntity(uint entityId) + { + _entityCache.Remove(entityId); + } + + /// + /// A.5 Tier 1 perf — clear the entire entity classification cache. + /// Call on world reset (post-character-load, region change). The next + /// frame's Draw will rebuild on demand. + /// + public void ClearEntityCache() + { + _entityCache.Clear(); + } + private void ClassifyBatches( ObjectRenderData renderData, ulong gfxObjId,