phase(N.4): WbDrawDispatcher perf pass — sort, cull, hash memoization

Four small wins on top of the grouped-instanced refactor.

1. Drop unused animState lookup. Was a side-effect-free
   _entitySpawnAdapter.GetState call per per-instance entity, made
   redundant by the Issue #47 fix that trusts MeshRefs.

2. Front-to-back sort opaque groups. Squared distance from camera to
   each group's first-instance translation; ascending sort. Lets the
   GPU's depth test reject fragments behind closer geometry — real
   win on dense scenes (Holtburg courtyard, Foundry interior).

3. Per-entity AABB frustum cull. 5m-radius AABB check per entity
   before walking parts. Skips work for distant entities even when
   their landblock is partially visible. Animated entities (other
   characters, NPCs, monsters) bypass — they always need per-frame
   work for animation regardless. Conservative radius covers typical
   entity bounds; large outliers stay landblock-culled.

4. Memoize palette hash per entity. TextureCache.HashPaletteOverride
   is now internal; new GetOrUploadWithPaletteOverride overload takes
   a precomputed hash. The dispatcher computes it ONCE per entity and
   reuses across every (part, batch) lookup, avoiding the per-batch
   FNV-1a fold over SubPalettes. Trees / scenery without palette
   overrides skip entirely (palHash stays 0).

Visual output unchanged; FPS up further, especially in dense scenes.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Erik 2026-05-08 17:51:03 +02:00
parent 7b41efc281
commit 573526dae5
2 changed files with 89 additions and 25 deletions

View file

@ -123,10 +123,23 @@ public sealed unsafe class TextureCache : Wb.ITextureCachePerInstance, IDisposab
uint surfaceId, uint surfaceId,
uint? overrideOrigTextureId, uint? overrideOrigTextureId,
PaletteOverride paletteOverride) PaletteOverride paletteOverride)
=> GetOrUploadWithPaletteOverride(surfaceId, overrideOrigTextureId, paletteOverride,
HashPaletteOverride(paletteOverride));
/// <summary>
/// Overload that accepts a precomputed palette hash. Lets callers (e.g.
/// the WB draw dispatcher) compute the hash ONCE per entity and reuse
/// it across every (part, batch) lookup, avoiding the per-batch
/// FNV-1a fold over <see cref="PaletteOverride.SubPalettes"/>.
/// </summary>
public uint GetOrUploadWithPaletteOverride(
uint surfaceId,
uint? overrideOrigTextureId,
PaletteOverride paletteOverride,
ulong precomputedPaletteHash)
{ {
ulong hash = HashPaletteOverride(paletteOverride);
uint origTexKey = overrideOrigTextureId ?? 0; uint origTexKey = overrideOrigTextureId ?? 0;
var key = (surfaceId, origTexKey, hash); var key = (surfaceId, origTexKey, precomputedPaletteHash);
if (_handlesByPalette.TryGetValue(key, out var h)) if (_handlesByPalette.TryGetValue(key, out var h))
return h; return h;
@ -138,9 +151,10 @@ public sealed unsafe class TextureCache : Wb.ITextureCachePerInstance, IDisposab
/// <summary> /// <summary>
/// Cheap 64-bit hash over a palette override's identity so two /// Cheap 64-bit hash over a palette override's identity so two
/// entities with the same palette setup share a decode. /// entities with the same palette setup share a decode. Internal so
/// the WB dispatcher can compute it once per entity.
/// </summary> /// </summary>
private static ulong HashPaletteOverride(PaletteOverride p) internal static ulong HashPaletteOverride(PaletteOverride p)
{ {
// Not cryptographic — just needs to distinguish override setups // Not cryptographic — just needs to distinguish override setups
// for caching. Start with base palette id, fold in each entry. // for caching. Start with base palette id, fold in each entry.

View file

@ -66,8 +66,14 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
// Per-frame scratch — reused across frames to avoid per-frame allocation. // Per-frame scratch — reused across frames to avoid per-frame allocation.
private readonly Dictionary<GroupKey, InstanceGroup> _groups = new(); private readonly Dictionary<GroupKey, InstanceGroup> _groups = new();
private readonly List<InstanceGroup> _opaqueDraws = new();
private readonly List<InstanceGroup> _translucentDraws = new();
private float[] _instanceBuffer = new float[256 * 16]; // grow on demand, never shrink private float[] _instanceBuffer = new float[256 * 16]; // grow on demand, never shrink
// Per-entity-cull AABB radius. Conservative — covers most entities; large
// outliers (long banners, tall columns) are still landblock-culled.
private const float PerEntityCullRadius = 5.0f;
private bool _disposed; private bool _disposed;
// Diagnostic counters logged once per ~5s under ACDREAM_WB_DIAG=1. // Diagnostic counters logged once per ~5s under ACDREAM_WB_DIAG=1.
@ -120,6 +126,13 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
bool diag = string.Equals(Environment.GetEnvironmentVariable("ACDREAM_WB_DIAG"), "1", StringComparison.Ordinal); bool diag = string.Equals(Environment.GetEnvironmentVariable("ACDREAM_WB_DIAG"), "1", StringComparison.Ordinal);
// Camera world-space position for front-to-back sort (perf #2). The view
// matrix is the inverse of the camera's world transform, so the world
// translation lives in the inverse's translation row.
Vector3 camPos = Vector3.Zero;
if (Matrix4x4.Invert(camera.View, out var invView))
camPos = invView.Translation;
// ── Phase 1: clear groups, walk entities, build groups ────────────── // ── Phase 1: clear groups, walk entities, build groups ──────────────
foreach (var grp in _groups.Values) grp.Matrices.Clear(); foreach (var grp in _groups.Values) grp.Matrices.Clear();
@ -146,12 +159,34 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
&& !visibleCellIds.Contains(entity.ParentCellId.Value)) && !visibleCellIds.Contains(entity.ParentCellId.Value))
continue; continue;
// Per-entity AABB frustum cull (perf #3). Skips work for distant
// entities even when their landblock is visible. Animated
// entities bypass — they're tracked at landblock level + need
// per-frame work for animation regardless. Conservative 5m
// radius covers typical entity bounds.
if (frustum is not null && !isAnimated && entry.LandblockId != neverCullLandblockId)
{
var p = entity.Position;
var aMin = new Vector3(p.X - PerEntityCullRadius, p.Y - PerEntityCullRadius, p.Z - PerEntityCullRadius);
var aMax = new Vector3(p.X + PerEntityCullRadius, p.Y + PerEntityCullRadius, p.Z + PerEntityCullRadius);
if (!FrustumCuller.IsAabbVisible(frustum.Value, aMin, aMax))
continue;
}
if (diag) _entitiesSeen++; if (diag) _entitiesSeen++;
var entityWorld = var entityWorld =
Matrix4x4.CreateFromQuaternion(entity.Rotation) * Matrix4x4.CreateFromQuaternion(entity.Rotation) *
Matrix4x4.CreateTranslation(entity.Position); Matrix4x4.CreateTranslation(entity.Position);
// Compute palette-override hash ONCE per entity (perf #4).
// Reused across every (part, batch) lookup so the FNV-1a fold
// over SubPalettes runs once instead of N times. Zero when the
// entity has no palette override (trees, scenery).
ulong palHash = 0;
if (entity.PaletteOverride is not null)
palHash = TextureCache.HashPaletteOverride(entity.PaletteOverride);
bool drewAny = false; bool drewAny = false;
for (int partIdx = 0; partIdx < entity.MeshRefs.Count; partIdx++) for (int partIdx = 0; partIdx < entity.MeshRefs.Count; partIdx++)
{ {
@ -183,13 +218,13 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
var model = ComposePartWorldMatrix( var model = ComposePartWorldMatrix(
entityWorld, meshRef.PartTransform, partTransform); entityWorld, meshRef.PartTransform, partTransform);
ClassifyBatches(partData, partGfxObjId, model, entity, meshRef, metaTable); ClassifyBatches(partData, partGfxObjId, model, entity, meshRef, palHash, metaTable);
} }
} }
else else
{ {
var model = meshRef.PartTransform * entityWorld; var model = meshRef.PartTransform * entityWorld;
ClassifyBatches(renderData, gfxObjId, model, entity, meshRef, metaTable); ClassifyBatches(renderData, gfxObjId, model, entity, meshRef, palHash, metaTable);
} }
} }
@ -204,7 +239,8 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
return; return;
} }
// ── Phase 2: lay matrices out contiguously, assign per-group offsets ── // ── Phase 2: lay matrices out contiguously, assign per-group offsets,
// split into opaque/translucent + compute sort keys ─────────
int totalInstances = 0; int totalInstances = 0;
foreach (var grp in _groups.Values) totalInstances += grp.Matrices.Count; foreach (var grp in _groups.Values) totalInstances += grp.Matrices.Count;
if (totalInstances == 0) if (totalInstances == 0)
@ -217,14 +253,25 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
if (_instanceBuffer.Length < needed) if (_instanceBuffer.Length < needed)
_instanceBuffer = new float[needed + 256 * 16]; // headroom _instanceBuffer = new float[needed + 256 * 16]; // headroom
_opaqueDraws.Clear();
_translucentDraws.Clear();
int cursor = 0; int cursor = 0;
int opaqueGroups = 0, translucentGroups = 0;
foreach (var grp in _groups.Values) foreach (var grp in _groups.Values)
{ {
if (grp.Matrices.Count == 0) continue; if (grp.Matrices.Count == 0) continue;
grp.FirstInstance = cursor; grp.FirstInstance = cursor;
grp.InstanceCount = grp.Matrices.Count; grp.InstanceCount = grp.Matrices.Count;
// Use the first instance's translation as the group's representative
// position for front-to-back sort (perf #2). Cheap heuristic; works
// well when instances of one group are spatially coherent
// (typical for trees in one landblock area, NPCs at one spawn).
var firstM = grp.Matrices[0];
var grpPos = new Vector3(firstM.M41, firstM.M42, firstM.M43);
grp.SortDistance = Vector3.DistanceSquared(camPos, grpPos);
for (int i = 0; i < grp.Matrices.Count; i++) for (int i = 0; i < grp.Matrices.Count; i++)
{ {
WriteMatrix(_instanceBuffer, cursor * 16, grp.Matrices[i]); WriteMatrix(_instanceBuffer, cursor * 16, grp.Matrices[i]);
@ -232,11 +279,17 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
} }
if (grp.Translucency == TranslucencyKind.Opaque || grp.Translucency == TranslucencyKind.ClipMap) if (grp.Translucency == TranslucencyKind.Opaque || grp.Translucency == TranslucencyKind.ClipMap)
opaqueGroups++; _opaqueDraws.Add(grp);
else else
translucentGroups++; _translucentDraws.Add(grp);
} }
// Front-to-back sort for opaque pass: nearer groups draw first so the
// depth test rejects fragments hidden behind them, reducing fragment
// shader cost from overdraw on dense scenes (Holtburg courtyard,
// Foundry interior).
_opaqueDraws.Sort(static (a, b) => a.SortDistance.CompareTo(b.SortDistance));
// ── Phase 3: one upload of all matrices ───────────────────────────── // ── Phase 3: one upload of all matrices ─────────────────────────────
_gl.BindBuffer(BufferTargetARB.ArrayBuffer, _instanceVbo); _gl.BindBuffer(BufferTargetARB.ArrayBuffer, _instanceVbo);
fixed (float* p = _instanceBuffer) fixed (float* p = _instanceBuffer)
@ -247,16 +300,12 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
EnsureInstanceAttribs(anyVao); EnsureInstanceAttribs(anyVao);
_gl.BindVertexArray(anyVao); _gl.BindVertexArray(anyVao);
// ── Phase 5: opaque + ClipMap pass ────────────────────────────────── // ── Phase 5: opaque + ClipMap pass (front-to-back sorted) ───────────
if (string.Equals(Environment.GetEnvironmentVariable("ACDREAM_NO_CULL"), "1", StringComparison.Ordinal)) if (string.Equals(Environment.GetEnvironmentVariable("ACDREAM_NO_CULL"), "1", StringComparison.Ordinal))
_gl.Disable(EnableCap.CullFace); _gl.Disable(EnableCap.CullFace);
foreach (var grp in _groups.Values) foreach (var grp in _opaqueDraws)
{ {
if (grp.Matrices.Count == 0) continue;
if (grp.Translucency != TranslucencyKind.Opaque && grp.Translucency != TranslucencyKind.ClipMap)
continue;
_shader.SetInt("uTranslucencyKind", (int)grp.Translucency); _shader.SetInt("uTranslucencyKind", (int)grp.Translucency);
DrawGroup(grp); DrawGroup(grp);
} }
@ -276,12 +325,8 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
_gl.FrontFace(FrontFaceDirection.Ccw); _gl.FrontFace(FrontFaceDirection.Ccw);
} }
foreach (var grp in _groups.Values) foreach (var grp in _translucentDraws)
{ {
if (grp.Matrices.Count == 0) continue;
if (grp.Translucency == TranslucencyKind.Opaque || grp.Translucency == TranslucencyKind.ClipMap)
continue;
switch (grp.Translucency) switch (grp.Translucency)
{ {
case TranslucencyKind.Additive: case TranslucencyKind.Additive:
@ -306,7 +351,7 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
if (diag) if (diag)
{ {
_drawsIssued += opaqueGroups + translucentGroups; _drawsIssued += _opaqueDraws.Count + _translucentDraws.Count;
_instancesIssued += totalInstances; _instancesIssued += totalInstances;
MaybeFlushDiag(); MaybeFlushDiag();
} }
@ -349,6 +394,7 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
Matrix4x4 model, Matrix4x4 model,
WorldEntity entity, WorldEntity entity,
MeshRef meshRef, MeshRef meshRef,
ulong palHash,
AcSurfaceMetadataTable metaTable) AcSurfaceMetadataTable metaTable)
{ {
for (int batchIdx = 0; batchIdx < renderData.Batches.Count; batchIdx++) for (int batchIdx = 0; batchIdx < renderData.Batches.Count; batchIdx++)
@ -367,7 +413,7 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
: TranslucencyKind.Opaque; : TranslucencyKind.Opaque;
} }
uint texHandle = ResolveTexture(entity, meshRef, batch); uint texHandle = ResolveTexture(entity, meshRef, batch, palHash);
if (texHandle == 0) continue; if (texHandle == 0) continue;
var key = new GroupKey( var key = new GroupKey(
@ -391,7 +437,7 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
} }
} }
private uint ResolveTexture(WorldEntity entity, MeshRef meshRef, ObjectRenderBatch batch) private uint ResolveTexture(WorldEntity entity, MeshRef meshRef, ObjectRenderBatch batch, ulong palHash)
{ {
// WB stores the surface id on batch.Key.SurfaceId (TextureKey struct); // WB stores the surface id on batch.Key.SurfaceId (TextureKey struct);
// batch.SurfaceId is unset (zero) for batches built by ObjectMeshManager. // batch.SurfaceId is unset (zero) for batches built by ObjectMeshManager.
@ -405,8 +451,11 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
if (entity.PaletteOverride is not null) if (entity.PaletteOverride is not null)
{ {
// perf #4: pass the entity-precomputed palette hash so TextureCache
// can skip its internal HashPaletteOverride for repeat lookups
// within the same character.
return _textures.GetOrUploadWithPaletteOverride( return _textures.GetOrUploadWithPaletteOverride(
surfaceId, origTexOverride, entity.PaletteOverride); surfaceId, origTexOverride, entity.PaletteOverride, palHash);
} }
else if (hasOrigTexOverride) else if (hasOrigTexOverride)
{ {
@ -466,6 +515,7 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
public TranslucencyKind Translucency; public TranslucencyKind Translucency;
public int FirstInstance; // offset into the shared instance VBO (in instances, not bytes) public int FirstInstance; // offset into the shared instance VBO (in instances, not bytes)
public int InstanceCount; public int InstanceCount;
public float SortDistance; // squared distance from camera to first instance, for opaque sort
public readonly List<Matrix4x4> Matrices = new(); public readonly List<Matrix4x4> Matrices = new();
} }
} }