feat(perf): Tier 1 entity classification cache

Per docs/plans/2026-05-10-perf-tiers-2-3-roadmap.md Tier 1: cache the
per-(entity, meshRef, batch) classification (TextureCache lookup,
GroupKey hash, _groups dict insert) so the per-frame Draw inner loop
becomes "look up cache → walk assignments → append matrix to group's
Matrices list."

For static entities (~95% of world: trees, rocks, buildings, scenery),
the answer never changes between frames. Cache once at first visit;
reuse permanently. Per-frame work for static drops from 4 expensive
operations per (meshRef, batch) to 1 list-append.

Estimated entity dispatcher: 3.5ms → ~1-1.5ms median at radius=12.
Should land inside the 2.0ms spec budget.

Implementation:
- New EntityClassificationCache class (per-meshRef list of cached
  (group ref, baked-PartTransform) tuples) keyed by entity.Id.
- ClassifyEntity does the one-time work; result populates _groups and
  the cache.
- Draw inner loop: cache lookup → for each assignment, model =
  PartTransform × entityWorld; group.Matrices.Add(model).
- Cache miss when ClassifyEntity finds NO mesh loaded yet (Vao == 0)
  → don't store; retry next frame. Avoids cache thrash during the
  streaming-in window.
- Public InvalidateEntity(uint id) + ClearEntityCache() for explicit
  invalidation hooks. Wiring (palette swap on ObjDescEvent, MeshRefs
  hot-swap) is post-A.5 follow-up — for now, cache-stale entities
  show their pre-swap appearance until next respawn.

Tier 2 (static/dynamic split with persistent groups) and Tier 3 (GPU
compute culling) tracked in the roadmap doc.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Erik 2026-05-10 09:45:18 +02:00
parent 462f9d6377
commit 3639a6f4ac

View file

@ -115,6 +115,37 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
// of GC pressure on the render thread under the original T17 shape.
private readonly List<(WorldEntity Entity, int MeshRefIndex)> _walkScratch = new();
// A.5 Tier 1 perf — entity classification cache (post-T26 SHIP polish).
// For static entities (~95% of world: trees, rocks, buildings, scenery),
// the per-(meshRef, batch) classification (TextureCache lookup, GroupKey
// hash, _groups dict insert) produces the same answer every frame
// forever. Cache it at first visit; per-frame work becomes "look up
// cache → walk assignments → append matrix to group's list."
//
// Invalidation today: cache is cleared on entity removal via
// InvalidateEntity. Mid-life mutations that change the entity's
// GroupKey (palette override change via ObjDescEvent, MeshRefs hot-
// swap) must call InvalidateEntity explicitly — those wiring points
// are post-A.5 follow-ups (cache-stale visual is muted: NPC clothes
// don't change color until next respawn).
private readonly Dictionary<uint, EntityClassificationCache> _entityCache = new();
private struct CachedBatchAssignment
{
public InstanceGroup Group;
public Matrix4x4 PartTransform; // baked: meshRef.PartTransform × setupPart, entityWorld at draw time
}
private sealed class EntityClassificationCache
{
public uint Vao;
// AssignmentsByMeshRef[meshRefIndex] = list of (group, partTransform) for that meshRef.
// Length = entity.MeshRefs.Count at build time.
public List<CachedBatchAssignment>[] AssignmentsByMeshRef =
System.Array.Empty<List<CachedBatchAssignment>>();
public bool DrewAny;
}
// Per-entity-cull AABB radius. Conservative — covers most entities; large
// outliers (long banners, tall columns) are still landblock-culled.
private const float PerEntityCullRadius = 5.0f;
@ -368,58 +399,48 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
{
if (diag) _entitiesSeen++;
// A.5 Tier 1 perf: look up or build the entity's classification
// cache. Static entities (~95% of world) hit the cache after frame 1.
// We don't cache entries where no mesh data was found at classify
// time — that would prevent the retry when streaming finishes loading
// the mesh on a later frame.
if (!_entityCache.TryGetValue(entity.Id, out var cache))
{
cache = ClassifyEntity(entity, metaTable);
if (cache.Vao == 0)
{
// No mesh data loaded yet for any meshRef — retry next frame.
if (diag) _meshesMissing++;
continue;
}
_entityCache[entity.Id] = cache;
}
var assignmentsByMeshRef = cache.AssignmentsByMeshRef;
if (partIdx >= assignmentsByMeshRef.Length) continue;
var assignments = assignmentsByMeshRef[partIdx];
if (assignments.Count == 0)
{
// Specific meshRef missing at classify time but other meshRefs
// succeeded. Edge case: partial mesh load. Skip this part.
if (diag) _meshesMissing++;
continue;
}
if (anyVao == 0) anyVao = cache.Vao;
var entityWorld =
Matrix4x4.CreateFromQuaternion(entity.Rotation) *
Matrix4x4.CreateTranslation(entity.Position);
// Compute palette-override hash ONCE per entity (perf #4).
// Reused across every (part, batch) lookup so the FNV-1a fold
// over SubPalettes runs once instead of N times. Zero when the
// entity has no palette override (trees, scenery).
ulong palHash = 0;
if (entity.PaletteOverride is not null)
palHash = TextureCache.HashPaletteOverride(entity.PaletteOverride);
// Note: GameWindow's spawn path already applies
// AnimPartChanges + GfxObjDegradeResolver (Issue #47 fix —
// close-detail mesh swap for humanoids) to MeshRefs. We
// trust MeshRefs as the source of truth here. AnimatedEntityState's
// overrides become relevant only for hot-swap (0xF625
// ObjDescEvent) which today rebuilds MeshRefs anyway.
var meshRef = entity.MeshRefs[partIdx];
ulong gfxObjId = meshRef.GfxObjId;
var renderData = _meshAdapter.TryGetRenderData(gfxObjId);
if (renderData is null)
for (int i = 0; i < assignments.Count; i++)
{
if (diag) _meshesMissing++;
continue;
}
if (anyVao == 0) anyVao = renderData.VAO;
bool drewAny = false;
if (renderData.IsSetup && renderData.SetupParts.Count > 0)
{
foreach (var (partGfxObjId, partTransform) in renderData.SetupParts)
{
var partData = _meshAdapter.TryGetRenderData(partGfxObjId);
if (partData is null) continue;
var model = ComposePartWorldMatrix(
entityWorld, meshRef.PartTransform, partTransform);
ClassifyBatches(partData, partGfxObjId, model, entity, meshRef, palHash, metaTable);
drewAny = true;
}
}
else
{
var model = meshRef.PartTransform * entityWorld;
ClassifyBatches(renderData, gfxObjId, model, entity, meshRef, palHash, metaTable);
drewAny = true;
var c = assignments[i];
var model = c.PartTransform * entityWorld;
c.Group.Matrices.Add(model);
}
if (diag && drewAny) _entitiesDrawn++;
if (diag) _entitiesDrawn++;
}
// Nothing visible — skip the GL pass entirely.
@ -669,6 +690,146 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
return copy[idx];
}
/// <summary>
/// A.5 Tier 1 perf — classify all (meshRef, batch) tuples for an entity
/// once, return the cache. Per-frame Draw walks the cache + appends matrices,
/// skipping the per-batch TextureCache lookup, GroupKey hash, and _groups
/// dict insert. Static entities (~95% of world) hit the cache permanently
/// after first build; dynamic entities (palette swaps, ObjDesc events) need
/// explicit InvalidateEntity to rebuild.
/// </summary>
private EntityClassificationCache ClassifyEntity(WorldEntity entity, AcSurfaceMetadataTable metaTable)
{
var cache = new EntityClassificationCache
{
AssignmentsByMeshRef = new List<CachedBatchAssignment>[entity.MeshRefs.Count],
};
for (int i = 0; i < cache.AssignmentsByMeshRef.Length; i++)
cache.AssignmentsByMeshRef[i] = new List<CachedBatchAssignment>();
// Compute palette-override hash ONCE per entity. Reused across every
// (part, batch) lookup. Zero when the entity has no palette override
// (trees, scenery, dat-static stabs/buildings).
ulong palHash = 0;
if (entity.PaletteOverride is not null)
palHash = TextureCache.HashPaletteOverride(entity.PaletteOverride);
for (int partIdx = 0; partIdx < entity.MeshRefs.Count; partIdx++)
{
var meshRef = entity.MeshRefs[partIdx];
ulong gfxObjId = meshRef.GfxObjId;
var renderData = _meshAdapter.TryGetRenderData(gfxObjId);
if (renderData is null) continue; // mesh missing — caller retries next frame
if (cache.Vao == 0) cache.Vao = renderData.VAO;
var assignments = cache.AssignmentsByMeshRef[partIdx];
if (renderData.IsSetup && renderData.SetupParts.Count > 0)
{
foreach (var (partGfxObjId, setupPartTransform) in renderData.SetupParts)
{
var partData = _meshAdapter.TryGetRenderData(partGfxObjId);
if (partData is null) continue;
// Bake (setupPartTransform * meshRef.PartTransform) into the
// assignment's PartTransform. entityWorld is applied per-frame.
// Matches ComposePartWorldMatrix's (restPose * animOverride * entityWorld)
// composition order: setupPartTransform = restPose,
// meshRef.PartTransform = animOverride.
var bakedPart = setupPartTransform * meshRef.PartTransform;
ClassifyBatchesIntoCache(partData, partGfxObjId, entity, meshRef, palHash, bakedPart, metaTable, assignments);
cache.DrewAny = true;
}
}
else
{
ClassifyBatchesIntoCache(renderData, gfxObjId, entity, meshRef, palHash, meshRef.PartTransform, metaTable, assignments);
cache.DrewAny = true;
}
}
return cache;
}
/// <summary>
/// A.5 Tier 1 perf — same per-batch logic as <see cref="ClassifyBatches"/>
/// but stores results into <paramref name="assignments"/> instead of mutating
/// _groups[*].Matrices directly. _groups still gets populated (for new keys);
/// the cache stores stable references into _groups for per-frame Matrices.Add.
/// </summary>
private void ClassifyBatchesIntoCache(
ObjectRenderData renderData,
ulong gfxObjId,
WorldEntity entity,
MeshRef meshRef,
ulong palHash,
Matrix4x4 partTransform,
AcSurfaceMetadataTable metaTable,
List<CachedBatchAssignment> assignments)
{
for (int batchIdx = 0; batchIdx < renderData.Batches.Count; batchIdx++)
{
var batch = renderData.Batches[batchIdx];
TranslucencyKind translucency;
if (metaTable.TryLookup(gfxObjId, batchIdx, out var meta))
translucency = meta.Translucency;
else
translucency = batch.IsAdditive ? TranslucencyKind.Additive
: batch.IsTransparent ? TranslucencyKind.AlphaBlend
: TranslucencyKind.Opaque;
ulong texHandle = ResolveTexture(entity, meshRef, batch, palHash);
if (texHandle == 0) continue;
uint texLayer = 0;
var key = new GroupKey(
batch.IBO, batch.FirstIndex, (int)batch.BaseVertex,
batch.IndexCount, texHandle, texLayer, translucency);
if (!_groups.TryGetValue(key, out var grp))
{
grp = new InstanceGroup
{
Ibo = batch.IBO,
FirstIndex = batch.FirstIndex,
BaseVertex = (int)batch.BaseVertex,
IndexCount = batch.IndexCount,
BindlessTextureHandle = texHandle,
TextureLayer = texLayer,
Translucency = translucency,
};
_groups[key] = grp;
}
assignments.Add(new CachedBatchAssignment
{
Group = grp,
PartTransform = partTransform,
});
}
}
/// <summary>
/// A.5 Tier 1 perf — invalidate the classification cache for an entity.
/// Call when an entity's MeshRefs, PaletteOverride, or SurfaceOverrides
/// change (e.g. ObjDescEvent 0xF625, equip-slot updates, transmute).
/// Next frame's Draw will rebuild on demand.
/// </summary>
public void InvalidateEntity(uint entityId)
{
_entityCache.Remove(entityId);
}
/// <summary>
/// A.5 Tier 1 perf — clear the entire entity classification cache.
/// Call on world reset (post-character-load, region change). The next
/// frame's Draw will rebuild on demand.
/// </summary>
public void ClearEntityCache()
{
_entityCache.Clear();
}
private void ClassifyBatches(
ObjectRenderData renderData,
ulong gfxObjId,