fix(A.5): WalkEntities scratch-list pattern (Bug B — T17 GC pressure)

T17's WalkEntities helper allocated a fresh List<(WorldEntity, int)>
per frame to hold the (entity, meshRefIndex) pairs that pass visibility
filters. At ~10K entities × ~3 mesh refs = ~30K tuples × 16 bytes =
~480 KB / frame of GC pressure on the render thread. The implementer's
self-review flagged this as a future N.6 optimization; the post-T26
diagnostic showed it materially contributing to the perf regression
(though Bug A — far-tier entity load — was the dominant factor).

Refactor: split WalkEntities into two overloads.
- WalkEntities(...) — test-friendly, allocates a fresh ToDraw list per
  call. Tests keep using this signature unchanged.
- WalkEntitiesInto(..., scratch, ref result) — no-alloc, clears + populates
  a caller-provided scratch list. Draw uses this with a per-dispatcher
  _walkScratch field reused across frames.

Test count unchanged (40 streaming + 8 bucketing tests still pass).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Erik 2026-05-10 09:13:20 +02:00
parent 9217fd93cd
commit 0ad8c99c37

View file

@ -109,6 +109,11 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
private readonly Dictionary<GroupKey, InstanceGroup> _groups = new();
private readonly List<InstanceGroup> _opaqueDraws = new();
private readonly List<InstanceGroup> _translucentDraws = new();
// A.5 T26 follow-up (Bug B): WalkEntities populates this scratch list
// instead of allocating a fresh List<(WorldEntity, int)> per frame. At
// ~10K entities × ~3 mesh refs = ~30K tuples × 16 bytes = ~480 KB / frame
// of GC pressure on the render thread under the original T17 shape.
private readonly List<(WorldEntity Entity, int MeshRefIndex)> _walkScratch = new();
// Per-entity-cull AABB radius. Conservative — covers most entities; large
// outliers (long banners, tall columns) are still landblock-culled.
@ -207,6 +212,11 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
/// recomputing Position±5 each frame.
/// </para>
/// </summary>
/// <summary>
/// Test-friendly overload that allocates a fresh ToDraw list per call.
/// Production code (<see cref="Draw"/>) uses the no-alloc overload below
/// with a caller-provided scratch list.
/// </summary>
internal static WalkResult WalkEntities(
IEnumerable<LandblockEntry> landblockEntries,
FrustumPlanes? frustum,
@ -214,7 +224,32 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
HashSet<uint>? visibleCellIds,
HashSet<uint>? animatedEntityIds)
{
var result = new WalkResult { ToDraw = new List<(WorldEntity, int)>() };
var scratch = new List<(WorldEntity Entity, int MeshRefIndex)>();
var result = new WalkResult { ToDraw = scratch };
WalkEntitiesInto(
landblockEntries, frustum, neverCullLandblockId,
visibleCellIds, animatedEntityIds, scratch, ref result);
return result;
}
/// <summary>
/// No-alloc overload: clears + populates the caller-provided <paramref name="scratch"/>
/// list. <see cref="Draw"/> reuses a per-dispatcher scratch field across frames to
/// avoid the 480+ KB / frame GC pressure that the test-friendly overload incurs.
/// Returns walk count via <paramref name="result"/>'s <c>EntitiesWalked</c> field.
/// </summary>
internal static void WalkEntitiesInto(
IEnumerable<LandblockEntry> landblockEntries,
FrustumPlanes? frustum,
uint? neverCullLandblockId,
HashSet<uint>? visibleCellIds,
HashSet<uint>? animatedEntityIds,
List<(WorldEntity Entity, int MeshRefIndex)> scratch,
ref WalkResult result)
{
scratch.Clear();
result.EntitiesWalked = 0;
result.ToDraw = scratch;
foreach (var entry in landblockEntries)
{
@ -236,7 +271,7 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
&& !visibleCellIds.Contains(entity.ParentCellId.Value)) continue;
result.EntitiesWalked++;
for (int i = 0; i < entity.MeshRefs.Count; i++)
result.ToDraw.Add((entity, i));
scratch.Add((entity, i));
}
continue;
}
@ -262,10 +297,9 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
result.EntitiesWalked++;
for (int i = 0; i < entity.MeshRefs.Count; i++)
result.ToDraw.Add((entity, i));
scratch.Add((entity, i));
}
}
return result;
}
public void Draw(
@ -317,14 +351,20 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
yield return new LandblockEntry(e.LandblockId, e.AabbMin, e.AabbMax, e.Entities, e.AnimatedById);
}
var walkResult = WalkEntities(
// A.5 T26 follow-up (Bug B): use the no-alloc WalkEntitiesInto overload
// that populates _walkScratch (a per-dispatcher field reused across frames)
// instead of allocating a fresh List<(WorldEntity, int)> per frame.
var walkResult = default(WalkResult);
WalkEntitiesInto(
ToEntries(landblockEntries),
frustum,
neverCullLandblockId,
visibleCellIds,
animatedEntityIds);
animatedEntityIds,
_walkScratch,
ref walkResult);
foreach (var (entity, partIdx) in walkResult.ToDraw)
foreach (var (entity, partIdx) in _walkScratch)
{
if (diag) _entitiesSeen++;