phase(N.4): WbDrawDispatcher perf pass — sort, cull, hash memoization
Four small wins on top of the grouped-instanced refactor. 1. Drop unused animState lookup. Was a side-effect-free _entitySpawnAdapter.GetState call per per-instance entity, made redundant by the Issue #47 fix that trusts MeshRefs. 2. Front-to-back sort opaque groups. Squared distance from camera to each group's first-instance translation; ascending sort. Lets the GPU's depth test reject fragments behind closer geometry — real win on dense scenes (Holtburg courtyard, Foundry interior). 3. Per-entity AABB frustum cull. 5m-radius AABB check per entity before walking parts. Skips work for distant entities even when their landblock is partially visible. Animated entities (other characters, NPCs, monsters) bypass — they always need per-frame work for animation regardless. Conservative radius covers typical entity bounds; large outliers stay landblock-culled. 4. Memoize palette hash per entity. TextureCache.HashPaletteOverride is now internal; new GetOrUploadWithPaletteOverride overload takes a precomputed hash. The dispatcher computes it ONCE per entity and reuses across every (part, batch) lookup, avoiding the per-batch FNV-1a fold over SubPalettes. Trees / scenery without palette overrides skip entirely (palHash stays 0). Visual output unchanged; FPS up further, especially in dense scenes. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
7b41efc281
commit
573526dae5
2 changed files with 89 additions and 25 deletions
|
|
@ -66,8 +66,14 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
|
|||
|
||||
// Per-frame scratch — reused across frames to avoid per-frame allocation.
|
||||
private readonly Dictionary<GroupKey, InstanceGroup> _groups = new();
|
||||
private readonly List<InstanceGroup> _opaqueDraws = new();
|
||||
private readonly List<InstanceGroup> _translucentDraws = new();
|
||||
private float[] _instanceBuffer = new float[256 * 16]; // grow on demand, never shrink
|
||||
|
||||
// Per-entity-cull AABB radius. Conservative — covers most entities; large
|
||||
// outliers (long banners, tall columns) are still landblock-culled.
|
||||
private const float PerEntityCullRadius = 5.0f;
|
||||
|
||||
private bool _disposed;
|
||||
|
||||
// Diagnostic counters logged once per ~5s under ACDREAM_WB_DIAG=1.
|
||||
|
|
@ -120,6 +126,13 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
|
|||
|
||||
bool diag = string.Equals(Environment.GetEnvironmentVariable("ACDREAM_WB_DIAG"), "1", StringComparison.Ordinal);
|
||||
|
||||
// Camera world-space position for front-to-back sort (perf #2). The view
|
||||
// matrix is the inverse of the camera's world transform, so the world
|
||||
// translation lives in the inverse's translation row.
|
||||
Vector3 camPos = Vector3.Zero;
|
||||
if (Matrix4x4.Invert(camera.View, out var invView))
|
||||
camPos = invView.Translation;
|
||||
|
||||
// ── Phase 1: clear groups, walk entities, build groups ──────────────
|
||||
foreach (var grp in _groups.Values) grp.Matrices.Clear();
|
||||
|
||||
|
|
@ -146,12 +159,34 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
|
|||
&& !visibleCellIds.Contains(entity.ParentCellId.Value))
|
||||
continue;
|
||||
|
||||
// Per-entity AABB frustum cull (perf #3). Skips work for distant
|
||||
// entities even when their landblock is visible. Animated
|
||||
// entities bypass — they're tracked at landblock level + need
|
||||
// per-frame work for animation regardless. Conservative 5m
|
||||
// radius covers typical entity bounds.
|
||||
if (frustum is not null && !isAnimated && entry.LandblockId != neverCullLandblockId)
|
||||
{
|
||||
var p = entity.Position;
|
||||
var aMin = new Vector3(p.X - PerEntityCullRadius, p.Y - PerEntityCullRadius, p.Z - PerEntityCullRadius);
|
||||
var aMax = new Vector3(p.X + PerEntityCullRadius, p.Y + PerEntityCullRadius, p.Z + PerEntityCullRadius);
|
||||
if (!FrustumCuller.IsAabbVisible(frustum.Value, aMin, aMax))
|
||||
continue;
|
||||
}
|
||||
|
||||
if (diag) _entitiesSeen++;
|
||||
|
||||
var entityWorld =
|
||||
Matrix4x4.CreateFromQuaternion(entity.Rotation) *
|
||||
Matrix4x4.CreateTranslation(entity.Position);
|
||||
|
||||
// Compute palette-override hash ONCE per entity (perf #4).
|
||||
// Reused across every (part, batch) lookup so the FNV-1a fold
|
||||
// over SubPalettes runs once instead of N times. Zero when the
|
||||
// entity has no palette override (trees, scenery).
|
||||
ulong palHash = 0;
|
||||
if (entity.PaletteOverride is not null)
|
||||
palHash = TextureCache.HashPaletteOverride(entity.PaletteOverride);
|
||||
|
||||
bool drewAny = false;
|
||||
for (int partIdx = 0; partIdx < entity.MeshRefs.Count; partIdx++)
|
||||
{
|
||||
|
|
@ -183,13 +218,13 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
|
|||
var model = ComposePartWorldMatrix(
|
||||
entityWorld, meshRef.PartTransform, partTransform);
|
||||
|
||||
ClassifyBatches(partData, partGfxObjId, model, entity, meshRef, metaTable);
|
||||
ClassifyBatches(partData, partGfxObjId, model, entity, meshRef, palHash, metaTable);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
var model = meshRef.PartTransform * entityWorld;
|
||||
ClassifyBatches(renderData, gfxObjId, model, entity, meshRef, metaTable);
|
||||
ClassifyBatches(renderData, gfxObjId, model, entity, meshRef, palHash, metaTable);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -204,7 +239,8 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
|
|||
return;
|
||||
}
|
||||
|
||||
// ── Phase 2: lay matrices out contiguously, assign per-group offsets ──
|
||||
// ── Phase 2: lay matrices out contiguously, assign per-group offsets,
|
||||
// split into opaque/translucent + compute sort keys ─────────
|
||||
int totalInstances = 0;
|
||||
foreach (var grp in _groups.Values) totalInstances += grp.Matrices.Count;
|
||||
if (totalInstances == 0)
|
||||
|
|
@ -217,14 +253,25 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
|
|||
if (_instanceBuffer.Length < needed)
|
||||
_instanceBuffer = new float[needed + 256 * 16]; // headroom
|
||||
|
||||
_opaqueDraws.Clear();
|
||||
_translucentDraws.Clear();
|
||||
|
||||
int cursor = 0;
|
||||
int opaqueGroups = 0, translucentGroups = 0;
|
||||
foreach (var grp in _groups.Values)
|
||||
{
|
||||
if (grp.Matrices.Count == 0) continue;
|
||||
|
||||
grp.FirstInstance = cursor;
|
||||
grp.InstanceCount = grp.Matrices.Count;
|
||||
|
||||
// Use the first instance's translation as the group's representative
|
||||
// position for front-to-back sort (perf #2). Cheap heuristic; works
|
||||
// well when instances of one group are spatially coherent
|
||||
// (typical for trees in one landblock area, NPCs at one spawn).
|
||||
var firstM = grp.Matrices[0];
|
||||
var grpPos = new Vector3(firstM.M41, firstM.M42, firstM.M43);
|
||||
grp.SortDistance = Vector3.DistanceSquared(camPos, grpPos);
|
||||
|
||||
for (int i = 0; i < grp.Matrices.Count; i++)
|
||||
{
|
||||
WriteMatrix(_instanceBuffer, cursor * 16, grp.Matrices[i]);
|
||||
|
|
@ -232,11 +279,17 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
|
|||
}
|
||||
|
||||
if (grp.Translucency == TranslucencyKind.Opaque || grp.Translucency == TranslucencyKind.ClipMap)
|
||||
opaqueGroups++;
|
||||
_opaqueDraws.Add(grp);
|
||||
else
|
||||
translucentGroups++;
|
||||
_translucentDraws.Add(grp);
|
||||
}
|
||||
|
||||
// Front-to-back sort for opaque pass: nearer groups draw first so the
|
||||
// depth test rejects fragments hidden behind them, reducing fragment
|
||||
// shader cost from overdraw on dense scenes (Holtburg courtyard,
|
||||
// Foundry interior).
|
||||
_opaqueDraws.Sort(static (a, b) => a.SortDistance.CompareTo(b.SortDistance));
|
||||
|
||||
// ── Phase 3: one upload of all matrices ─────────────────────────────
|
||||
_gl.BindBuffer(BufferTargetARB.ArrayBuffer, _instanceVbo);
|
||||
fixed (float* p = _instanceBuffer)
|
||||
|
|
@ -247,16 +300,12 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
|
|||
EnsureInstanceAttribs(anyVao);
|
||||
_gl.BindVertexArray(anyVao);
|
||||
|
||||
// ── Phase 5: opaque + ClipMap pass ──────────────────────────────────
|
||||
// ── Phase 5: opaque + ClipMap pass (front-to-back sorted) ───────────
|
||||
if (string.Equals(Environment.GetEnvironmentVariable("ACDREAM_NO_CULL"), "1", StringComparison.Ordinal))
|
||||
_gl.Disable(EnableCap.CullFace);
|
||||
|
||||
foreach (var grp in _groups.Values)
|
||||
foreach (var grp in _opaqueDraws)
|
||||
{
|
||||
if (grp.Matrices.Count == 0) continue;
|
||||
if (grp.Translucency != TranslucencyKind.Opaque && grp.Translucency != TranslucencyKind.ClipMap)
|
||||
continue;
|
||||
|
||||
_shader.SetInt("uTranslucencyKind", (int)grp.Translucency);
|
||||
DrawGroup(grp);
|
||||
}
|
||||
|
|
@ -276,12 +325,8 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
|
|||
_gl.FrontFace(FrontFaceDirection.Ccw);
|
||||
}
|
||||
|
||||
foreach (var grp in _groups.Values)
|
||||
foreach (var grp in _translucentDraws)
|
||||
{
|
||||
if (grp.Matrices.Count == 0) continue;
|
||||
if (grp.Translucency == TranslucencyKind.Opaque || grp.Translucency == TranslucencyKind.ClipMap)
|
||||
continue;
|
||||
|
||||
switch (grp.Translucency)
|
||||
{
|
||||
case TranslucencyKind.Additive:
|
||||
|
|
@ -306,7 +351,7 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
|
|||
|
||||
if (diag)
|
||||
{
|
||||
_drawsIssued += opaqueGroups + translucentGroups;
|
||||
_drawsIssued += _opaqueDraws.Count + _translucentDraws.Count;
|
||||
_instancesIssued += totalInstances;
|
||||
MaybeFlushDiag();
|
||||
}
|
||||
|
|
@ -349,6 +394,7 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
|
|||
Matrix4x4 model,
|
||||
WorldEntity entity,
|
||||
MeshRef meshRef,
|
||||
ulong palHash,
|
||||
AcSurfaceMetadataTable metaTable)
|
||||
{
|
||||
for (int batchIdx = 0; batchIdx < renderData.Batches.Count; batchIdx++)
|
||||
|
|
@ -367,7 +413,7 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
|
|||
: TranslucencyKind.Opaque;
|
||||
}
|
||||
|
||||
uint texHandle = ResolveTexture(entity, meshRef, batch);
|
||||
uint texHandle = ResolveTexture(entity, meshRef, batch, palHash);
|
||||
if (texHandle == 0) continue;
|
||||
|
||||
var key = new GroupKey(
|
||||
|
|
@ -391,7 +437,7 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
|
|||
}
|
||||
}
|
||||
|
||||
private uint ResolveTexture(WorldEntity entity, MeshRef meshRef, ObjectRenderBatch batch)
|
||||
private uint ResolveTexture(WorldEntity entity, MeshRef meshRef, ObjectRenderBatch batch, ulong palHash)
|
||||
{
|
||||
// WB stores the surface id on batch.Key.SurfaceId (TextureKey struct);
|
||||
// batch.SurfaceId is unset (zero) for batches built by ObjectMeshManager.
|
||||
|
|
@ -405,8 +451,11 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
|
|||
|
||||
if (entity.PaletteOverride is not null)
|
||||
{
|
||||
// perf #4: pass the entity-precomputed palette hash so TextureCache
|
||||
// can skip its internal HashPaletteOverride for repeat lookups
|
||||
// within the same character.
|
||||
return _textures.GetOrUploadWithPaletteOverride(
|
||||
surfaceId, origTexOverride, entity.PaletteOverride);
|
||||
surfaceId, origTexOverride, entity.PaletteOverride, palHash);
|
||||
}
|
||||
else if (hasOrigTexOverride)
|
||||
{
|
||||
|
|
@ -466,6 +515,7 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
|
|||
public TranslucencyKind Translucency;
|
||||
public int FirstInstance; // offset into the shared instance VBO (in instances, not bytes)
|
||||
public int InstanceCount;
|
||||
public float SortDistance; // squared distance from camera to first instance, for opaque sort
|
||||
public readonly List<Matrix4x4> Matrices = new();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue