acdream/src/AcDream.App/Rendering/Wb/WbDrawDispatcher.cs
Erik 58822fed96 fix(render): R1 — repurpose the ParentCellId==null cell-gate bypass (#78)
EntityPassesVisibleCellGate no longer returns true unconditionally for outdoor
scenery under a cell filter (was the headline #78 bleed). Outdoor scenery now
draws only via the unfiltered bucket (visibleCellIds: null) + ResolveEntitySlot's
OutsideView routing. The outdoor-root global Draw passes visibleCellIds: null
(no portal-cell scoping outdoors; retires VisibleCellIds as a render gate — peering
into buildings is R5). Updated the EntityClipTests case that pinned the old bypass
(Included -> Excluded). 174/174 App tests green.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-02 20:10:26 +02:00

1959 lines
92 KiB
C#
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

using System;
using System.Collections.Generic;
using System.Numerics;
using System.Runtime.InteropServices;
using AcDream.Core.Meshing;
using AcDream.Core.Rendering;
using AcDream.Core.Terrain;
using AcDream.Core.World;
using DatReaderWriter.Enums;
using Silk.NET.OpenGL;
namespace AcDream.App.Rendering.Wb;
/// <summary>
/// Draws entities using WB's <see cref="ObjectRenderData"/> (a single global
/// VAO/VBO/IBO under modern rendering) with acdream's <see cref="TextureCache"/>
/// for bindless texture resolution and <see cref="AcSurfaceMetadataTable"/> for
/// translucency classification.
///
/// <para>
/// <b>Atlas-tier</b> entities (<c>ServerGuid == 0</c>): mesh data comes from WB's
/// <see cref="ObjectMeshManager"/> via <see cref="WbMeshAdapter.TryGetRenderData"/>.
/// Textures resolve through the bindless-suffixed
/// <see cref="TextureCache.GetOrUploadBindless"/> variants, returning 64-bit
/// resident handles stored in the per-group SSBO.
/// </para>
///
/// <para>
/// <b>Per-instance-tier</b> entities (<c>ServerGuid != 0</c>): mesh data also from
/// WB, but textures resolve through
/// <see cref="TextureCache.GetOrUploadWithPaletteOverrideBindless"/> with palette
/// and surface overrides applied. <see cref="AnimatedEntityState"/> is currently
/// unused at draw time — GameWindow's spawn path already bakes AnimPartChanges +
/// GfxObjDegradeResolver (Issue #47 close-detail mesh) into <c>MeshRefs</c>.
/// </para>
///
/// <para>
/// <b>GL strategy (N.5 — mandatory):</b> <c>glMultiDrawElementsIndirect</c> with SSBOs
/// and <c>GL_ARB_bindless_texture</c> + <c>GL_ARB_shader_draw_parameters</c>.
/// All visible (entity, batch) pairs are bucketed by <see cref="GroupKey"/>;
/// each group becomes one <c>DrawElementsIndirectCommand</c>. Three GPU buffers
/// are uploaded per frame: instance matrices (SSBO binding 0), per-group batch
/// metadata/texture handles (SSBO binding 1), and the indirect draw commands.
/// Two <c>glMultiDrawElementsIndirect</c> calls cover the opaque and transparent
/// passes respectively — one GL call per pass regardless of group count.
/// </para>
///
/// <para>
/// <b>Shader:</b> <c>mesh_modern</c> (bindless + <c>gl_DrawIDARB</c> /
/// <c>gl_BaseInstanceARB</c>). Missing bindless/draw-parameters throws
/// <see cref="NotSupportedException"/> at startup — there is no legacy fallback.
/// </para>
///
/// <para>
/// <b>Modern rendering assumption:</b> WB's <c>_useModernRendering</c> path (GL
/// 4.3 + bindless) puts every mesh in a single shared VAO/VBO/IBO and uses
/// <c>FirstIndex</c> + <c>BaseVertex</c> per batch. The dispatcher honors those
/// offsets inside each <c>DrawElementsIndirectCommand</c> via
/// <c>glMultiDrawElementsIndirect</c>.
/// </para>
/// </summary>
public sealed unsafe class WbDrawDispatcher : IDisposable
{
/// <summary>
/// Which subset of entities to walk in a single Draw call.
///
/// Phase U.1 (2026-05-30): the indoor/outdoor two-pipe split (IndoorPass /
/// OutdoorScenery / BuildingShells / LiveDynamic) was deleted along with the
/// inside-out render machinery. <see cref="All"/> is the sole remaining
/// member; the unified retail-faithful pass (Phase U) draws every entity in
/// one path. The <c>set:</c> parameter is retained on the Draw overloads so
/// the unified pass can re-introduce partitioning later without re-threading
/// the call sites.
/// </summary>
public enum EntitySet
{
/// <summary>Every entity walked, gated only by the existing
/// <c>ParentCellId ∈ visibleCellIds</c> filter.</summary>
All,
}
private readonly GL _gl;
private readonly Shader _shader;
private readonly TextureCache _textures;
private readonly WbMeshAdapter _meshAdapter;
private readonly EntitySpawnAdapter _entitySpawnAdapter;
private readonly BindlessSupport _bindless;
public readonly record struct DrawStats(
EntitySet Set,
int EntitiesWalked,
int MeshRefs,
int Instances,
int Draws,
int CullRuns,
int OpaqueDraws,
int TransparentDraws,
long Triangles);
public DrawStats LastDrawStats { get; private set; }
// Tier 1 cache (#53): per-entity classification results for static
// entities (those NOT in GameWindow._animatedEntities). Wired here in
// Task 7 for plumbing only — Tasks 9-10 wire the per-entity
// miss-populate / hit-fast-path through the loop.
private readonly EntityClassificationCache _cache;
// ACDREAM_DISABLE_TIER1_CACHE=1 A/B diagnostic — forces every static
// entity through the slow path. Read once in ctor.
private readonly bool _tier1CacheDisabled =
string.Equals(Environment.GetEnvironmentVariable("ACDREAM_DISABLE_TIER1_CACHE"), "1", StringComparison.Ordinal);
/// <summary>
/// A.5 T22.5: gate for GL_SAMPLE_ALPHA_TO_COVERAGE around the opaque pass.
/// Default true matches T20 behavior. Set false for Low/Medium presets that
/// have MsaaSamples=0 (A2C is a no-op without MSAA, but turning it off
/// avoids the unnecessary GL state thrash and is cleaner diagnostics).
/// Can be toggled mid-session via <see cref="GameWindow.ReapplyQualityPreset"/>.
/// </summary>
public bool AlphaToCoverage { get; set; } = true;
// SSBO buffer ids
private uint _instanceSsbo;
private uint _batchSsbo;
private uint _indirectBuffer;
// Phase U.3: per-instance clip-slot SSBO (binding=3), parallel to
// _instanceSsbo. One uint per instance selecting its CellClip slot. In U.3
// this is ALL ZEROS (every instance → slot 0 → no-clip), so the render is
// identical to pre-U.3. U.4 populates real slot indices.
private uint _clipSlotSsbo;
private uint[] _clipSlotData = new uint[256];
// Phase U.3: the SHARED per-cell clip-region SSBO (binding=2), owned by the
// GameWindow-level ClipFrame and handed to us via SetClipRegionSsbo. When 0
// (not yet wired), we bind our OWN fallback no-clip region buffer below so the
// shader never reads an unbound SSBO. The fallback holds exactly slot 0
// (count 0 = pass-all), matching ClipFrame.NoClip's slot 0.
private uint _sharedClipRegionSsbo;
private uint _fallbackClipRegionSsbo;
// Phase U.4: per-frame clip-slot routing handed in via SetClipRouting before
// each Draw. When _clipRoutingActive is false (the U.3 path / outdoor root /
// no portal frame), every instance maps to slot 0 (no-clip) and no instance is
// culled — identical to U.3. When active, each instance's slot is resolved by
// ResolveEntitySlot per the U.4 policy (live-dynamic unclipped; cell statics to
// their cell slot; outdoor scenery to the OutsideView slot; non-visible culled).
private bool _clipRoutingActive;
private IReadOnlyDictionary<uint, int>? _cellIdToSlot;
private int _outdoorSlot;
private bool _outdoorVisible;
// Phase U.4: the clip slot of the entity currently being classified in Draw's
// per-entity loop. Set once per entity (before ClassifyBatches / ApplyCacheHit),
// read by the two matrix-append sites (AppendInstanceToGroup + ClassifyBatches)
// so every group's Slots[] stays in lockstep with its Matrices[]. Defaults to 0
// (no-clip) on the U.3 / outdoor path.
private uint _currentEntitySlot;
// Phase U.4: true when the current entity resolved to the CULL sentinel
// (cell not visible, or outdoor stab while no outdoors is visible). Persisted
// across the entity's tuples; the per-tuple body skips all instance emission.
private bool _currentEntityCulled;
// Per-frame scratch arrays — Tasks 9-10 fully wire these.
private float[] _instanceData = new float[256 * 16]; // mat4 floats per instance
private BatchData[] _batchData = new BatchData[256];
private DrawElementsIndirectCommand[] _indirectCommands = new DrawElementsIndirectCommand[256];
private CullMode[] _drawCullModes = new CullMode[256];
private int _opaqueDrawCount;
private int _transparentDrawCount;
private int _transparentByteOffset;
// std430 layout: ulong TextureHandle (uvec2) at offset 0, uint TextureLayer
// at offset 8, uint Flags at offset 12. Total 16 bytes.
// Pack=8 (not 4) because std430's uvec2 requires 8-byte alignment — Pack=4
// works today by accident (TextureHandle is the first field, so offset 0 is
// always 8-byte aligned), but adding a 4-byte field before TextureHandle
// without bumping Pack would silently misalign the GPU struct.
[StructLayout(LayoutKind.Sequential, Pack = 8)]
private struct BatchData
{
public ulong TextureHandle; // bindless handle (uvec2 in GLSL)
public uint TextureLayer;
public uint Flags;
}
// Per-frame scratch — reused across frames to avoid per-frame allocation.
private readonly Dictionary<GroupKey, InstanceGroup> _groups = new();
private readonly List<InstanceGroup> _opaqueDraws = new();
private readonly List<InstanceGroup> _translucentDraws = new();
// A.5 T26 follow-up (Bug B): WalkEntities populates this scratch list
// instead of allocating a fresh List<(WorldEntity, int)> per frame. At
// ~10K entities × ~3 mesh refs = ~30K tuples × 16 bytes = ~480 KB / frame
// of GC pressure on the render thread under the original T17 shape.
private readonly List<(WorldEntity Entity, int MeshRefIndex, uint LandblockId)> _walkScratch = new();
// Tier 1 cache (#53) — per-entity classification collector. Reused across
// frames; cleared at flush time when the per-entity loop crosses an entity
// boundary in _walkScratch (and once more at end-of-loop for the last
// entity). _walkScratch is in entity-order, so all MeshRefs of one entity
// are contiguous — accumulate them all before flushing one Populate call.
// Animated entities skip this scratch entirely (collector = null).
private readonly List<CachedBatch> _populateScratch = new();
// Per-entity-cull AABB radius. Conservative — covers most entities; large
// outliers (long banners, tall columns) are still landblock-culled.
private const float PerEntityCullRadius = 5.0f;
private bool _disposed;
/// <summary>
/// Per-cell-entity last-log frame number for rate-limiting the
/// [indoor-walk] / [indoor-lookup] / [indoor-xform] / [indoor-cull]
/// probes. Defaults to 30 frames at 30Hz = 1 sec.
/// </summary>
private readonly Dictionary<ulong, int> _lastIndoorProbeFrame = new();
private int _indoorProbeFrameCounter;
private const int IndoorProbeRateLimitFrames = 30;
/// <summary>
/// Returns true at most once per <see cref="IndoorProbeRateLimitFrames"/>
/// frames per cellId. Caller must already have checked that an indoor
/// probe flag is enabled.
/// </summary>
private bool ShouldEmitIndoorProbe(ulong cellId)
{
if (!_lastIndoorProbeFrame.TryGetValue(cellId, out int last)
|| _indoorProbeFrameCounter - last >= IndoorProbeRateLimitFrames)
{
_lastIndoorProbeFrame[cellId] = _indoorProbeFrameCounter;
return true;
}
return false;
}
// Diagnostic counters logged once per ~5s under ACDREAM_WB_DIAG=1.
private int _entitiesSeen;
private int _entitiesDrawn;
private int _meshesMissing;
private int _drawsIssued;
private int _instancesIssued;
private long _lastLogTick;
// CPU + GPU timing for [WB-DIAG] under ACDREAM_WB_DIAG=1.
private readonly System.Diagnostics.Stopwatch _cpuStopwatch = new();
private readonly long[] _cpuSamples = new long[256]; // microseconds
private int _cpuSampleCursor;
// GPU timing uses a ring of 3 query-pair slots so the read of frame N-3's
// result lands when the GPU has finished (~50ms after issue on a typical
// 60fps frame). Ring of 3 is the vendor-neutral choice: NVIDIA drivers with
// triple-buffering+vsync can queue ~3 frames ahead, AMD typically 1-2,
// Intel iGPUs vary. ResultAvailable is the safety guard if the GPU is
// still working when we try to read.
private const int GpuQueryRingDepth = 3;
private readonly uint[] _gpuQueryOpaque = new uint[GpuQueryRingDepth];
private readonly uint[] _gpuQueryTransparent = new uint[GpuQueryRingDepth];
private int _gpuQueryFrameIndex;
private readonly long[] _gpuSamples = new long[256]; // microseconds
private int _gpuSampleCursor;
private bool _gpuQueriesInitialized;
// Constructor accessibility is internal because EntityClassificationCache
// is internal — a public ctor with an internal-typed parameter would be
// an inconsistent-accessibility error. The dispatcher is constructed
// exclusively from GameWindow (same assembly), so internal is fine.
internal WbDrawDispatcher(
GL gl,
Shader shader,
TextureCache textures,
WbMeshAdapter meshAdapter,
EntitySpawnAdapter entitySpawnAdapter,
BindlessSupport bindless,
EntityClassificationCache classificationCache)
{
ArgumentNullException.ThrowIfNull(gl);
ArgumentNullException.ThrowIfNull(shader);
ArgumentNullException.ThrowIfNull(textures);
ArgumentNullException.ThrowIfNull(meshAdapter);
ArgumentNullException.ThrowIfNull(entitySpawnAdapter);
ArgumentNullException.ThrowIfNull(classificationCache);
_gl = gl;
_shader = shader;
_textures = textures;
_meshAdapter = meshAdapter;
_entitySpawnAdapter = entitySpawnAdapter;
_cache = classificationCache;
_bindless = bindless ?? throw new ArgumentNullException(nameof(bindless));
_instanceSsbo = _gl.GenBuffer();
_batchSsbo = _gl.GenBuffer();
_indirectBuffer = _gl.GenBuffer();
_clipSlotSsbo = _gl.GenBuffer(); // Phase U.3 binding=3
}
/// <summary>
/// Phase U.3: hand the dispatcher the SHARED per-cell clip-region SSBO
/// (binding=2) that <see cref="ClipFrame.UploadShared"/> created. The
/// dispatcher re-binds it to binding=2 immediately before each MDI so a
/// consumer that touched binding=2 in between can't leave it pointing
/// elsewhere. Pass 0 to fall back to the internal no-clip region buffer.
/// </summary>
public void SetClipRegionSsbo(uint sharedClipRegionSsbo)
=> _sharedClipRegionSsbo = sharedClipRegionSsbo;
/// <summary>
/// Phase U.4: install the per-frame clip-slot routing for an INDOOR root.
/// Call once per frame BEFORE <see cref="Draw"/> when the camera's root cell is
/// non-null; the next <see cref="Draw"/> resolves each instance's binding=3
/// clip slot via the U.4 policy (live-dynamic unclipped, cell statics to their
/// cell slot, outdoor scenery to the OutsideView slot, non-visible culled).
/// Pair with <see cref="ClearClipRouting"/> on outdoor-root frames so the
/// dispatcher reverts to the U.3 no-clip-everything behavior.
/// </summary>
/// <param name="cellIdToSlot">cellId → CellClip slot. A cell absent from the map
/// is NOT visible → its cell-static instances are culled.</param>
/// <param name="outdoorSlot">Slot for outdoor scenery / building shells while
/// indoors (the OutsideView slot, or 0 for no-clip over-include).</param>
/// <param name="outdoorVisible">False ⇒ cull outdoor scenery / shells this frame
/// (the OutsideView is empty).</param>
public void SetClipRouting(IReadOnlyDictionary<uint, int> cellIdToSlot, int outdoorSlot, bool outdoorVisible)
{
ArgumentNullException.ThrowIfNull(cellIdToSlot);
_clipRoutingActive = true;
_cellIdToSlot = cellIdToSlot;
_outdoorSlot = outdoorSlot;
_outdoorVisible = outdoorVisible;
}
/// <summary>
/// Phase U.4: revert to U.3 behavior — every instance maps to slot 0 (no-clip),
/// nothing is culled by clip routing. Call on outdoor-root frames (camera
/// outdoors) and any frame without a portal-visibility result.
/// </summary>
public void ClearClipRouting()
{
_clipRoutingActive = false;
_cellIdToSlot = null;
_outdoorSlot = 0;
_outdoorVisible = false;
}
// Phase U.4 CULL sentinel returned by ResolveEntitySlot: the entity's instances
// are dropped entirely (not emitted into the binding=0 instance buffer NOR the
// binding=3 slot buffer), matching the existing frustum / visible-cell cull.
// Internal (not private) so the clip-slot unit tests can assert against it
// directly — see WbDrawDispatcherClipSlotTests.
internal const int ClipSlotCull = -1;
/// <summary>
/// Phase U.4: resolve the clip slot for one entity per the slot/gate policy.
/// Returns <see cref="ClipSlotCull"/> to drop the entity's instances entirely.
/// <list type="bullet">
/// <item>ServerGuid != 0 (live dynamic: player / NPC / items / doors) ⇒ slot 0
/// (UNCLIPPED — retail draws live-dynamic unclipped; depth only).</item>
/// <item>ParentCellId != null (cell static) ⇒ the cell's slot, or CULL when the
/// cell isn't in <paramref name="cellIdToSlot"/> (not visible / nothing-visible).</item>
/// <item>ParentCellId == null (outdoor scenery / building shell) ⇒ the OutsideView
/// slot when <paramref name="outdoorVisible"/>, else CULL.</item>
/// </list>
/// Only called when <c>_clipRoutingActive</c> (indoor root). On the U.3 / outdoor
/// path every instance is slot 0 and nothing is culled — see
/// <see cref="ResolveSlotForFrame"/>, which gates on that flag.
/// <para>
/// INVARIANT: <paramref name="parentCellId"/> and the keys of
/// <paramref name="cellIdToSlot"/> MUST live in the same FULL cell-id space
/// (<c>lbMask | OtherCellId</c>, e.g. <c>0xA9B40164</c>). A bare-low-byte
/// ParentCellId (e.g. <c>0x64</c>) would never match a full-id key and would
/// silently CULL every indoor stab — cf. the L.2e bare-low-byte finding in
/// CLAUDE.md where player CellId was tracked without its landblock prefix.
/// </para>
/// <para>
/// <c>internal static</c> + pure (reads no instance state) so the clip-slot
/// unit tests exercise every branch without a GL context. The caller hands in
/// the routing fields it would otherwise read from <c>_cellIdToSlot</c> etc.
/// </para>
/// </summary>
internal static int ResolveEntitySlot(
uint serverGuid,
uint? parentCellId,
IReadOnlyDictionary<uint, int> cellIdToSlot,
int outdoorSlot,
bool outdoorVisible)
{
// Live-dynamic entities render unclipped regardless of cell — retail draws
// the player / NPCs / dropped items through the depth buffer without portal
// clipping. ServerGuid is the live-dynamic marker (0 for dat-hydrated).
if (serverGuid != 0)
return 0;
if (parentCellId is uint parentCell)
return cellIdToSlot.TryGetValue(parentCell, out int slot) ? slot : ClipSlotCull;
// Outdoor scenery / building shell (no ParentCellId). Indoor root: gate to
// the OutsideView slot, or cull when nothing outdoors is visible.
return outdoorVisible ? outdoorSlot : ClipSlotCull;
}
/// <summary>
/// Phase U.4: the call-site clip-slot decision for one entity, returning the
/// <c>(Slot, Culled)</c> pair the per-entity loop body consumes. Wraps
/// <see cref="ResolveEntitySlot"/> with the <paramref name="clipRoutingActive"/>
/// gate: when routing is INACTIVE (outdoor root / no portal frame), every entity
/// is slot 0 and nothing is clip-culled — the bit-identical-to-U.3 property, so
/// the resolver (and <paramref name="cellIdToSlot"/>) is bypassed entirely.
/// When active, a CULL sentinel maps to <c>(0, culled=true)</c> — the slot value
/// is never emitted for a culled entity.
/// <c>internal static</c> + pure so the whole policy (including the routing-
/// inactive branch) is unit-testable — see WbDrawDispatcherClipSlotTests.
/// </summary>
internal static (uint Slot, bool Culled) ResolveSlotForFrame(
bool clipRoutingActive,
uint serverGuid,
uint? parentCellId,
IReadOnlyDictionary<uint, int>? cellIdToSlot,
int outdoorSlot,
bool outdoorVisible)
{
if (!clipRoutingActive)
return (0u, false);
int resolved = ResolveEntitySlot(serverGuid, parentCellId, cellIdToSlot!, outdoorSlot, outdoorVisible);
bool culled = resolved == ClipSlotCull;
return (culled ? 0u : (uint)resolved, culled);
}
public static Matrix4x4 ComposePartWorldMatrix(
Matrix4x4 entityWorld,
Matrix4x4 animOverride,
Matrix4x4 restPose)
=> restPose * animOverride * entityWorld;
/// <summary>
/// Entry for <see cref="WalkEntities"/> per-landblock iteration.
/// Mirrors the shape yielded by <c>GpuWorldState.LandblockEntries</c>.
/// </summary>
public readonly record struct LandblockEntry(
uint LandblockId,
Vector3 AabbMin,
Vector3 AabbMax,
IReadOnlyList<WorldEntity> Entities,
IReadOnlyDictionary<uint, WorldEntity>? AnimatedById);
/// <summary>
/// Result of <see cref="WalkEntities"/> — the list of (entity, meshRef index)
/// pairs that passed all visibility filters, plus a diagnostic walk count.
/// </summary>
public struct WalkResult
{
public int EntitiesWalked;
public int BuildingShellAnchorPass;
public int BuildingShellAnchorReject;
public List<(WorldEntity Entity, int MeshRefIndex, uint LandblockId)> ToDraw;
}
/// <summary>
/// Pure-CPU visibility filter over <paramref name="landblockEntries"/>.
/// Separated from <see cref="Draw"/> so tests can exercise it without GL state.
///
/// <para>
/// A.5 T17 Change #1: when an LB is frustum-culled AND
/// <paramref name="animatedEntityIds"/> is non-empty, the OLD path walked
/// every entity in the LB just to find the few animated ones. This helper
/// fixes that: if the LB is invisible, we iterate
/// <paramref name="animatedEntityIds"/> directly and look each up in
/// <c>entry.AnimatedById</c> (typically &lt;50 animated, up to ~10K total).
/// </para>
///
/// <para>
/// A.5 T18 Change #2: per-entity AABB cull reads from the cached
/// <see cref="WorldEntity.AabbMin"/>/<see cref="WorldEntity.AabbMax"/>
/// (refreshed lazily if <see cref="WorldEntity.AabbDirty"/>), instead of
/// recomputing Position±5 each frame.
/// </para>
/// </summary>
/// <summary>
/// Test-friendly overload that allocates a fresh ToDraw list per call.
/// Production code (<see cref="Draw"/>) uses the no-alloc overload below
/// with a caller-provided scratch list.
/// </summary>
internal static WalkResult WalkEntities(
IEnumerable<LandblockEntry> landblockEntries,
FrustumPlanes? frustum,
uint? neverCullLandblockId,
HashSet<uint>? visibleCellIds,
HashSet<uint>? animatedEntityIds)
{
var scratch = new List<(WorldEntity Entity, int MeshRefIndex, uint LandblockId)>();
var result = new WalkResult { ToDraw = scratch };
WalkEntitiesInto(
landblockEntries, frustum, neverCullLandblockId,
visibleCellIds, animatedEntityIds, scratch, ref result);
return result;
}
/// <summary>
/// No-alloc overload: clears + populates the caller-provided <paramref name="scratch"/>
/// list. <see cref="Draw"/> reuses a per-dispatcher scratch field across frames to
/// avoid the 480+ KB / frame GC pressure that the test-friendly overload incurs.
/// Returns walk count via <paramref name="result"/>'s <c>EntitiesWalked</c> field.
///
/// <para>
/// When <paramref name="indoorProbeState"/> is non-null the method emits
/// <c>[indoor-cull]</c> lines for cell entities rejected by the
/// visibleCellIds or frustum filters, and <c>[indoor-walk]</c> lines for
/// cell entities that pass all filters. Rate-limited by
/// <see cref="IndoorProbeState"/>. Pass <see langword="null"/> (the default)
/// to disable all probe emission — used by the test-friendly
/// <see cref="WalkEntities"/> overload.
/// </para>
/// </summary>
internal static void WalkEntitiesInto(
IEnumerable<LandblockEntry> landblockEntries,
FrustumPlanes? frustum,
uint? neverCullLandblockId,
HashSet<uint>? visibleCellIds,
HashSet<uint>? animatedEntityIds,
List<(WorldEntity Entity, int MeshRefIndex, uint LandblockId)> scratch,
ref WalkResult result,
IndoorProbeState? indoorProbeState = null,
EntitySet set = EntitySet.All)
{
scratch.Clear();
result.EntitiesWalked = 0;
result.ToDraw = scratch;
foreach (var entry in landblockEntries)
{
bool landblockVisible = frustum is null
|| entry.LandblockId == neverCullLandblockId
|| FrustumCuller.IsAabbVisible(frustum.Value, entry.AabbMin, entry.AabbMax);
if (!landblockVisible)
{
// A.5 T17 Change #1: walk only animated entities, not all entities.
// Avoids O(N_entities) scan when only O(N_animated) work is needed.
if (animatedEntityIds is null || animatedEntityIds.Count == 0) continue;
if (entry.AnimatedById is null) continue;
foreach (var animatedId in animatedEntityIds)
{
if (!entry.AnimatedById.TryGetValue(animatedId, out var entity)) continue;
// Phase A8: EntitySet partition for indoor/outdoor split passes.
if (!EntityMatchesSet(entity, set)) continue;
if (entity.MeshRefs.Count == 0) continue;
bool shellScoped = IsShellScopedSet(set)
&& entity.IsBuildingShell
&& visibleCellIds is not null;
if (!EntityPassesVisibleCellGate(entity, visibleCellIds, set))
{
if (shellScoped) result.BuildingShellAnchorReject++;
continue;
}
if (shellScoped) result.BuildingShellAnchorPass++;
result.EntitiesWalked++;
for (int i = 0; i < entity.MeshRefs.Count; i++)
scratch.Add((entity, i, entry.LandblockId));
}
continue;
}
foreach (var entity in entry.Entities)
{
// Phase A8: EntitySet partition for indoor/outdoor split passes.
if (!EntityMatchesSet(entity, set)) continue;
if (entity.MeshRefs.Count == 0) continue;
// Detect cell entity for indoor probes — first MeshRef.GfxObjId
// is an EnvCell id (low 16 bits ≥ 0x0100). Cheap to compute;
// result reused for all probe checks below.
ulong cellProbeId = (ulong)entity.MeshRefs[0].GfxObjId;
bool isCellEntity = indoorProbeState is not null
&& RenderingDiagnostics.IsEnvCellId(cellProbeId);
bool shellScoped = IsShellScopedSet(set)
&& entity.IsBuildingShell
&& visibleCellIds is not null;
bool cellInVis = EntityPassesVisibleCellGate(entity, visibleCellIds, set);
if (!cellInVis)
{
if (shellScoped) result.BuildingShellAnchorReject++;
if (isCellEntity && RenderingDiagnostics.ProbeIndoorCullEnabled
&& indoorProbeState!.ShouldEmit(cellProbeId))
{
Console.WriteLine(
$"[indoor-cull] cellEnt=0x{entity.Id:X8} " +
$"reason=visibleCellIds-miss " +
$"parentCell=0x{entity.ParentCellId!.Value:X8}");
}
continue;
}
if (shellScoped) result.BuildingShellAnchorPass++;
// Per-entity AABB frustum cull (perf #3). Animated entities bypass —
// they're tracked at landblock level + need per-frame work regardless.
// A.5 T18 Change #2: read cached AABB, refresh lazily on AabbDirty.
bool isAnimated = animatedEntityIds?.Contains(entity.Id) == true;
bool aabbVisible = true;
if (frustum is not null && !isAnimated && entry.LandblockId != neverCullLandblockId)
{
if (entity.AabbDirty) entity.RefreshAabb();
aabbVisible = FrustumCuller.IsAabbVisible(frustum.Value, entity.AabbMin, entity.AabbMax);
}
if (!aabbVisible)
{
if (isCellEntity && RenderingDiagnostics.ProbeIndoorCullEnabled
&& indoorProbeState!.ShouldEmit(cellProbeId))
{
Console.WriteLine(
$"[indoor-cull] cellEnt=0x{entity.Id:X8} " +
$"reason=frustum " +
$"aabbMin=({entity.AabbMin.X:F1},{entity.AabbMin.Y:F1},{entity.AabbMin.Z:F1}) " +
$"aabbMax=({entity.AabbMax.X:F1},{entity.AabbMax.Y:F1},{entity.AabbMax.Z:F1})");
}
continue;
}
// Passed all filters — emit walk probe.
if (isCellEntity && RenderingDiagnostics.ProbeIndoorWalkEnabled
&& indoorProbeState!.ShouldEmit(cellProbeId))
{
Console.WriteLine(
$"[indoor-walk] cellEnt=0x{entity.Id:X8} " +
$"pos=({entity.Position.X:F1},{entity.Position.Y:F1},{entity.Position.Z:F1}) " +
$"parentCell=0x{(entity.ParentCellId ?? 0u):X8} " +
$"meshRef0=0x{cellProbeId:X8} " +
$"meshRefCount={entity.MeshRefs.Count} " +
$"landblockVisible=true aabbVisible=true cellInVis=true");
}
result.EntitiesWalked++;
for (int i = 0; i < entity.MeshRefs.Count; i++)
scratch.Add((entity, i, entry.LandblockId));
}
}
}
public void Draw(
ICamera camera,
IEnumerable<(uint LandblockId, Vector3 AabbMin, Vector3 AabbMax,
IReadOnlyList<WorldEntity> Entities,
IReadOnlyDictionary<uint, WorldEntity>? AnimatedById)> landblockEntries,
FrustumPlanes? frustum = null,
uint? neverCullLandblockId = null,
HashSet<uint>? visibleCellIds = null,
HashSet<uint>? animatedEntityIds = null,
EntitySet set = EntitySet.All)
{
_shader.Use();
_indoorProbeFrameCounter++;
var vp = camera.View * camera.Projection;
_shader.SetMatrix4("uViewProjection", vp);
bool diag = string.Equals(Environment.GetEnvironmentVariable("ACDREAM_WB_DIAG"), "1", StringComparison.Ordinal);
if (diag && !_gpuQueriesInitialized)
{
for (int i = 0; i < GpuQueryRingDepth; i++)
{
_gpuQueryOpaque[i] = _gl.GenQuery();
_gpuQueryTransparent[i] = _gl.GenQuery();
}
_gpuQueriesInitialized = true;
}
// Always run the CPU stopwatch — cheap; only logged under diag.
_cpuStopwatch.Restart();
// Camera world-space position for front-to-back sort (perf #2). The view
// matrix is the inverse of the camera's world transform, so the world
// translation lives in the inverse's translation row.
Vector3 camPos = Vector3.Zero;
if (Matrix4x4.Invert(camera.View, out var invView))
camPos = invView.Translation;
// ── Phase 1: clear groups, walk entities, build groups ──────────────
foreach (var grp in _groups.Values) { grp.Matrices.Clear(); grp.Slots.Clear(); }
var metaTable = _meshAdapter.MetadataTable;
uint anyVao = 0;
// Project the 5-tuple enumerable into LandblockEntry records for WalkEntities.
static IEnumerable<LandblockEntry> ToEntries(
IEnumerable<(uint LandblockId, Vector3 AabbMin, Vector3 AabbMax,
IReadOnlyList<WorldEntity> Entities,
IReadOnlyDictionary<uint, WorldEntity>? AnimatedById)> src)
{
foreach (var e in src)
yield return new LandblockEntry(e.LandblockId, e.AabbMin, e.AabbMax, e.Entities, e.AnimatedById);
}
// A.5 T26 follow-up (Bug B): use the no-alloc WalkEntitiesInto overload
// that populates _walkScratch (a per-dispatcher field reused across frames)
// instead of allocating a fresh List<(WorldEntity, int)> per frame.
//
// Pass an IndoorProbeState when any indoor probe is active so the static
// WalkEntitiesInto can emit rate-limited [indoor-cull] / [indoor-walk]
// lines without needing access to instance fields. Null = probes off.
IndoorProbeState? probeState = null;
if (RenderingDiagnostics.ProbeIndoorCullEnabled || RenderingDiagnostics.ProbeIndoorWalkEnabled)
{
// _currentFrame is snapped at construction time. Construct
// once per Draw() call only — a second construction within
// the same frame would stamp the dictionary with the
// (already-advanced) counter value, suppressing the second
// pass's emissions for IndoorProbeRateLimitFrames frames.
// Today Draw() is called exactly once per frame; if a
// future refactor adds a shadow / reflection / second pass,
// this assumption needs revisiting.
probeState = new IndoorProbeState(_lastIndoorProbeFrame, _indoorProbeFrameCounter);
}
var walkResult = default(WalkResult);
WalkEntitiesInto(
ToEntries(landblockEntries),
frustum,
neverCullLandblockId,
visibleCellIds,
animatedEntityIds,
_walkScratch,
ref walkResult,
probeState,
set);
// Tier 1 cache (#53) flush-tracking locals. _walkScratch holds one tuple
// per (entity, MeshRefIndex) and is in entity-order, so all MeshRefs of
// a given entity are contiguous. We accumulate ALL of an entity's
// batches into _populateScratch, then flush exactly once per entity:
// either when the iteration crosses to a different entity, or at the
// end of the loop for the last entity. Flushing per-tuple would
// overwrite earlier MeshRefs (the cache is keyed by entity.Id), so
// multi-part Setup-backed entities would only retain their LAST
// MeshRef's batches — bug fixed in commit after 2f489a8.
uint? populateEntityId = null;
uint populateLandblockId = 0;
// Tier 1 cache (#53) — fast-path one-shot tracker. The cache stores a
// FLAT list of batches across all MeshRefs of an entity, so a single
// ApplyCacheHit call already drew every batch. _walkScratch yields
// one tuple per (entity, MeshRefIndex), so without this guard a
// 3-MeshRef static entity on a frame-2 cache hit would call
// ApplyCacheHit 3 times — appending all 6 batches × 3 = 18 instances
// to _groups instead of 6. Result: severe Z-fighting + 3× perf hit
// on every multi-part static entity (buildings, statues, multi-MeshRef
// NPCs). The fast path must fire only on the FIRST tuple of each
// entity; subsequent tuples skip via this tracker.
uint? lastHitEntityId = null;
// Tier 1 cache (#53) — incomplete-entity guard. When any MeshRef of
// the current entity has _meshAdapter.TryGetRenderData return null
// (mesh still async-decoding via ObjectMeshManager.PrepareMeshDataAsync),
// we mark the entity incomplete and DROP the accumulated populate
// scratch at entity boundary instead of writing it to the cache.
// Otherwise the cache would hold a partial classification (some parts
// missing), and frame-2 cache hits would persist that partial render
// even after the missing mesh loads — every subsequent frame sees the
// cache hit and skips re-classification, so the missing parts never
// recover. User-visible symptom: the drudge statue on top of the
// Foundry (multi-part Setup entity with AnimPartChange) renders with
// some parts missing permanently. Reset on entity change.
bool currentEntityIncomplete = false;
// Per-tuple entity tracker used purely for entity-change detection.
// Updated UNCONDITIONALLY at end of every tuple (including tuples that
// skip via null renderData), so the flag-reset block below correctly
// distinguishes "new entity" from "same entity, different tuple."
// populateEntityId can't be used for this because it's only set after
// a successful slow-path classification.
uint? prevTupleEntityId = null;
foreach (var (entity, partIdx, landblockId) in _walkScratch)
{
if (diag) _entitiesSeen++;
// Skip subsequent tuples of an entity that already cache-hit on
// its first tuple. ApplyCacheHit drew the full flat batch list;
// re-firing here would N-multiply the instance count. Diag
// _entitiesDrawn is bumped here to preserve per-tuple parity with
// the previous counting semantics.
if (lastHitEntityId == entity.Id)
{
if (diag) _entitiesDrawn++;
continue;
}
// Reset the hit tracker on entity change so the next entity's
// first tuple re-checks the cache. (When this iteration is the
// FIRST tuple of a new entity after a cache-hit entity, we must
// not retain the previous entity's id.)
if (lastHitEntityId.HasValue && lastHitEntityId.Value != entity.Id)
{
lastHitEntityId = null;
}
// Tier 1 cache (#53) — drop the previous entity's accumulated
// populate scratch BEFORE MaybeFlushOnEntityChange runs. If the
// previous entity ended incomplete (≥1 null renderData), we MUST
// NOT cache its partial classification: clear scratch and null
// the tracker so MaybeFlushOnEntityChange sees the cleaned state
// and no-ops for this entity. Reset the incomplete flag for the
// new entity so each one gets a fresh measurement.
//
// CRITICAL: the flag reset must fire ONLY on entity change, not
// every tuple. Resetting per-tuple within the same entity would
// undo a null-renderData flag set by a previous tuple of the same
// entity → if the missing MeshRef sits in the MIDDLE of the
// entity's MeshRefs list, a later valid tuple's reset would
// re-mark the entity "complete" and let partial data populate
// the cache. Trees with [trunk valid, branches null, leaves
// valid] hit this exactly — branches never recover.
bool isNewEntity = !prevTupleEntityId.HasValue || prevTupleEntityId.Value != entity.Id;
if (isNewEntity)
{
if (populateEntityId.HasValue && currentEntityIncomplete)
{
_populateScratch.Clear();
populateEntityId = null;
}
currentEntityIncomplete = false;
// Phase U.4: resolve this entity's clip slot ONCE per entity
// (constant across its tuples). On the U.3 / outdoor path
// (_clipRoutingActive false) every entity is slot 0, never culled.
// The whole decision (including the routing-active gate) lives in
// the pure ResolveSlotForFrame helper so it's unit-testable.
(_currentEntitySlot, _currentEntityCulled) = ResolveSlotForFrame(
_clipRoutingActive, entity.ServerGuid, entity.ParentCellId,
_cellIdToSlot, _outdoorSlot, _outdoorVisible);
}
prevTupleEntityId = entity.Id;
// Flush-on-entity-change: if the previous entity accumulated any
// batches AND this iteration is for a different entity, populate
// its cache entry now and reset the scratch buffer. Runs for ALL
// entities (including this-entity-culled) so the PREVIOUS entity's
// cache always flushes at the boundary.
(populateEntityId, populateLandblockId) = MaybeFlushOnEntityChange(
populateEntityId, populateLandblockId, entity.Id, _cache, _populateScratch);
// Phase U.4: a culled entity (cell not visible, or no outdoors visible
// for an outdoor stab) contributes NO instances. Skip after the
// boundary flush above so the previous entity still committed; the
// next entity's isNewEntity logic is unaffected (prevTupleEntityId is
// already updated). Matches the existing visible-cell / frustum cull:
// nothing enters _groups, so neither binding=0 nor binding=3 sees it.
if (_currentEntityCulled)
continue;
var entityWorld =
Matrix4x4.CreateFromQuaternion(entity.Rotation) *
Matrix4x4.CreateTranslation(entity.Position);
bool isAnimated = animatedEntityIds?.Contains(entity.Id) == true;
// Cache-hit fast path (Task 10): static entity with a populated
// cache entry skips classification entirely. Walk the cached
// (GroupKey, RestPose) flat list and append cached.RestPose *
// entityWorld to each matching group's matrices. Animated entities
// bypass the cache (collector is set null below; their entries are
// never populated in the first place).
//
// Placed AFTER the entity-change flush above so that, on a
// hit, this iteration also finishes flushing any pending
// populate state from a previous entity. Animated entities never
// enter this branch — the !isAnimated guard makes that explicit.
//
// Fires ONCE per entity: the first tuple reaches here, runs
// ApplyCacheHit, sets lastHitEntityId, and continues. Subsequent
// tuples of the same entity short-circuit at the top of the loop
// body via the lastHitEntityId == entity.Id check above.
if (!isAnimated && !_tier1CacheDisabled && _cache.TryGet(entity.Id, landblockId, out var cachedEntry))
{
ApplyCacheHit(cachedEntry!, entityWorld, AppendInstanceToGroup);
// anyVao recovery: when the first visible entity in the frame
// takes the fast path, no slow-path lookup has populated
// anyVao yet. Look up THIS entity's first MeshRef once via
// the mesh adapter — cheap dict lookup, not a re-classify.
if (anyVao == 0)
{
var firstMeshRef = entity.MeshRefs[partIdx];
var firstRenderData = _meshAdapter.TryGetRenderData(firstMeshRef.GfxObjId);
if (firstRenderData is not null) anyVao = firstRenderData.VAO;
}
if (diag) _entitiesDrawn++;
lastHitEntityId = entity.Id;
#if DEBUG
// Cross-check guard: assert the membership predicate held at hit time.
// The full re-classification cross-check (spec section 6.5) is a stretch
// goal; this simpler assert catches the prior Tier 1 bug class — a
// static entity that turns out to actually be animated would fire here.
//
// Structurally redundant with the `if (!isAnimated && ...)` branch
// condition, but serves as a TRIPWIRE: a future refactor that
// incorrectly relaxes the branch condition (e.g., removes
// `!isAnimated` from the guard) would silently allow animated
// entities into the fast path; the assert catches that immediately.
System.Diagnostics.Debug.Assert(
!isAnimated,
$"EntityClassificationCache hit on animated entity {entity.Id} — invariant violated");
#endif
continue;
}
// Compute palette-override hash ONCE per entity (perf #4).
// Reused across every (part, batch) lookup so the FNV-1a fold
// over SubPalettes runs once instead of N times. Zero when the
// entity has no palette override (trees, scenery).
ulong palHash = 0;
if (entity.PaletteOverride is not null)
palHash = TextureCache.HashPaletteOverride(entity.PaletteOverride);
// Note: GameWindow's spawn path already applies
// AnimPartChanges + GfxObjDegradeResolver (Issue #47 fix —
// close-detail mesh swap for humanoids) to MeshRefs. We
// trust MeshRefs as the source of truth here. AnimatedEntityState's
// overrides become relevant only for hot-swap (0xF625
// ObjDescEvent) which today rebuilds MeshRefs anyway.
var meshRef = entity.MeshRefs[partIdx];
ulong gfxObjId = meshRef.GfxObjId;
var renderData = _meshAdapter.TryGetRenderData(gfxObjId);
// [indoor-lookup] probe — emit once per cell entity per sec.
// Fires BEFORE the null-renderData early-continue so a miss still
// emits hit=false, distinguishing H2 (empty batches) from H6
// (dispatcher fails to traverse Setup).
ulong lookupCellId = (ulong)gfxObjId;
if (RenderingDiagnostics.IsEnvCellId(lookupCellId)
&& RenderingDiagnostics.ProbeIndoorLookupEnabled
// Rate-limit in a separate namespace from [indoor-walk]/[indoor-cull]
// (which key on the same gfxObjId). Without this, IndoorAll=1 would
// silence the lookup probe whenever the walk probe fired first.
&& ShouldEmitIndoorProbe(lookupCellId | 0x8000_0000_0000_0000UL))
{
bool hit = renderData is not null;
bool isSetup = hit && renderData!.IsSetup;
int partCount = isSetup ? renderData!.SetupParts.Count : 0;
int partsHit = 0, partsMiss = 0;
if (isSetup)
{
foreach (var (partId, _) in renderData!.SetupParts)
{
if (_meshAdapter.TryGetRenderData(partId) is not null) partsHit++;
else partsMiss++;
}
}
bool hasEnvCellGeom = isSetup
&& renderData!.SetupParts.Exists(t => (t.GfxObjId & 0x1_0000_0000UL) != 0);
Console.WriteLine(
$"[indoor-lookup] cellId=0x{lookupCellId:X8} " +
$"hit={hit} isSetup={isSetup} partCount={partCount} " +
$"hasEnvCellGeom={hasEnvCellGeom} partsHit={partsHit} partsMiss={partsMiss}");
}
if (renderData is null)
{
// Tier 1 cache (#53): mesh data is still async-decoding via
// WB's ObjectMeshManager.PrepareMeshDataAsync. Flag the entity
// as incomplete so the entity-boundary check (or end-of-loop
// check) drops the accumulated populate scratch instead of
// caching a partial classification. The slow path retries on
// the next frame; once all this entity's meshes have loaded,
// the populate fires with the complete batch set.
currentEntityIncomplete = true;
if (diag) _meshesMissing++;
continue;
}
if (anyVao == 0) anyVao = renderData.VAO;
// Cache-miss path (animated entities skip cache entirely).
// Static entities accumulate into _populateScratch across ALL
// their MeshRefs; the flush at next-entity-boundary (or
// end-of-loop) commits them as a single Populate call.
var collector = isAnimated ? null : _populateScratch;
bool drewAny = false;
if (renderData.IsSetup && renderData.SetupParts.Count > 0)
{
foreach (var (partGfxObjId, partTransform) in renderData.SetupParts)
{
var partData = _meshAdapter.TryGetRenderData(partGfxObjId);
if (partData is null) continue;
var model = ComposePartWorldMatrix(
entityWorld, meshRef.PartTransform, partTransform);
// [indoor-xform] probe — only for the cell's synthetic
// geometry part (bit 32 set, per WB's PrepareEnvCellMeshData
// cellGeomId convention). One line per part per sec.
// Disambiguates hypothesis H5 (transform double-apply —
// composedT lands at 2 × cellOrigin).
if ((partGfxObjId & 0x1_0000_0000UL) != 0
&& RenderingDiagnostics.ProbeIndoorXformEnabled
&& ShouldEmitIndoorProbe(partGfxObjId))
{
Console.WriteLine(
$"[indoor-xform] cellGeomId=0x{partGfxObjId:X16} " +
$"entityWorldT=({entityWorld.Translation.X:F2},{entityWorld.Translation.Y:F2},{entityWorld.Translation.Z:F2}) " +
$"meshRefT=({meshRef.PartTransform.Translation.X:F2},{meshRef.PartTransform.Translation.Y:F2},{meshRef.PartTransform.Translation.Z:F2}) " +
$"partT=({partTransform.Translation.X:F2},{partTransform.Translation.Y:F2},{partTransform.Translation.Z:F2}) " +
$"composedT=({model.Translation.X:F2},{model.Translation.Y:F2},{model.Translation.Z:F2})");
}
var restPose = partTransform * meshRef.PartTransform;
ClassifyBatches(partData, partGfxObjId, model, entity, meshRef, palHash, metaTable, restPose, collector);
drewAny = true;
}
}
else
{
var model = meshRef.PartTransform * entityWorld;
ClassifyBatches(renderData, gfxObjId, model, entity, meshRef, palHash, metaTable, restPose: meshRef.PartTransform, collector: collector);
drewAny = true;
}
// Track THIS entity for the next iteration's flush check. Only
// when collector is non-null (entity is static); animated entities
// leave the tracker null so we don't try to flush them.
if (collector is not null)
{
populateEntityId = entity.Id;
populateLandblockId = landblockId;
}
if (diag && drewAny) _entitiesDrawn++;
}
// Tier 1 cache (#53) — drop the accumulated populate scratch if the
// LAST entity in the loop ended incomplete (had ≥1 null renderData).
// Same reason as the entity-boundary handling above: avoid caching a
// partial classification. The slow path will retry on the next frame
// and populate correctly once all meshes have loaded.
if (currentEntityIncomplete)
{
_populateScratch.Clear();
populateEntityId = null;
}
// Final flush: the last entity in _walkScratch has no "next iteration"
// to trigger the entity-change flush, so commit its accumulated batches
// here. No-op when the last entity was animated (populateEntityId stays
// null) or when no entities walked at all.
FinalFlushPopulate(populateEntityId, populateLandblockId, _cache, _populateScratch);
// Nothing visible — skip the GL pass entirely.
if (anyVao == 0)
{
LastDrawStats = new DrawStats(set, walkResult.EntitiesWalked, _walkScratch.Count, 0, 0, 0, 0, 0, 0);
_cpuStopwatch.Stop();
if (diag) MaybeFlushDiag();
return;
}
// ── Phase 3: assign FirstInstance per group, lay matrices contiguously, sort opaque ──
int totalInstances = 0;
foreach (var grp in _groups.Values) totalInstances += grp.Matrices.Count;
if (totalInstances == 0)
{
LastDrawStats = new DrawStats(set, walkResult.EntitiesWalked, _walkScratch.Count, 0, 0, 0, 0, 0, 0);
_cpuStopwatch.Stop();
if (diag) MaybeFlushDiag();
return;
}
int needed = totalInstances * 16;
if (_instanceData.Length < needed)
_instanceData = new float[needed + 256 * 16];
// Phase U.4: size the per-instance clip-slot buffer to match the instance
// count and lay it out in the SAME group order / cursor as _instanceData,
// so instanceClipSlot[i] (binding=3) tracks Instances[i] (binding=0). On
// the U.3 / outdoor path every Slots entry is 0 ⇒ identical to U.3.
if (_clipSlotData.Length < totalInstances)
_clipSlotData = new uint[totalInstances + 256];
_opaqueDraws.Clear();
_translucentDraws.Clear();
int cursor = 0;
foreach (var grp in _groups.Values)
{
if (grp.Matrices.Count == 0) continue;
grp.FirstInstance = cursor;
grp.InstanceCount = grp.Matrices.Count;
// Use the first instance's translation as the group's representative
// position for front-to-back sort (perf #2). Cheap heuristic; works
// well when instances of one group are spatially coherent
// (typical for trees in one landblock area, NPCs at one spawn).
var first = grp.Matrices[0];
var grpPos = new Vector3(first.M41, first.M42, first.M43);
grp.SortDistance = Vector3.DistanceSquared(camPos, grpPos);
for (int i = 0; i < grp.Matrices.Count; i++)
{
WriteMatrix(_instanceData, cursor * 16, grp.Matrices[i]);
// Slots[] is parallel to Matrices[] within the group; write the
// slot at the same cursor so binding=3 stays aligned with binding=0.
_clipSlotData[cursor] = grp.Slots[i];
cursor++;
}
if (IsOpaque(grp.Translucency))
_opaqueDraws.Add(grp);
else
_translucentDraws.Add(grp);
}
// Front-to-back sort within each cull mode. DrawIndirectRange must
// split MDI calls whenever CullMode changes because GL state is not
// part of an indirect command. Sorting by distance alone can turn a
// stable 1k-draw live scene into hundreds of tiny MDI runs after a
// landblock transition, which shows up as a GPU-command bottleneck
// without a triangle-count spike.
_opaqueDraws.Sort(CompareOpaqueSubmissionOrder);
_translucentDraws.Sort(CompareTransparentSubmissionOrder);
// ── Phase 4: build IndirectGroupInput list (opaque sorted, then translucent),
// fill via BuildIndirectArrays ──────────────────────────────────
int totalDraws = _opaqueDraws.Count + _translucentDraws.Count;
if (_batchData.Length < totalDraws)
_batchData = new BatchData[totalDraws + 64];
if (_indirectCommands.Length < totalDraws)
_indirectCommands = new DrawElementsIndirectCommand[totalDraws + 64];
if (_drawCullModes.Length < totalDraws)
_drawCullModes = new CullMode[totalDraws + 64];
var groupInputs = new List<IndirectGroupInput>(totalDraws);
foreach (var g in _opaqueDraws) groupInputs.Add(ToInput(g));
foreach (var g in _translucentDraws) groupInputs.Add(ToInput(g));
// Cast _batchData (private BatchData) to public-mirror BatchDataPublic for BuildIndirectArrays.
// Layout is asserted at test time (BatchDataPublic_LayoutMatchesPrivateBatchData test).
var batchPublic = new BatchDataPublic[totalDraws];
var layout = BuildIndirectArrays(groupInputs, _indirectCommands, batchPublic, _drawCullModes);
long totalTriangles = 0;
foreach (var input in groupInputs)
totalTriangles += (long)(input.IndexCount / 3) * input.InstanceCount;
int cullRuns =
CountCullRuns(_drawCullModes, 0, layout.OpaqueCount) +
CountCullRuns(_drawCullModes, layout.OpaqueCount, layout.TransparentCount);
// Copy back into _batchData
for (int i = 0; i < totalDraws; i++)
{
_batchData[i] = new BatchData
{
TextureHandle = batchPublic[i].TextureHandle,
TextureLayer = batchPublic[i].TextureLayer,
Flags = batchPublic[i].Flags,
};
}
_opaqueDrawCount = layout.OpaqueCount;
_transparentDrawCount = layout.TransparentCount;
_transparentByteOffset = layout.TransparentByteOffset;
LastDrawStats = new DrawStats(
set,
walkResult.EntitiesWalked,
_walkScratch.Count,
totalInstances,
totalDraws,
cullRuns,
_opaqueDrawCount,
_transparentDrawCount,
totalTriangles);
// ── Phase 5: upload four buffers ────────────────────────────────────
fixed (float* ip = _instanceData)
UploadSsbo(_instanceSsbo, 0, ip, totalInstances * 16 * sizeof(float));
fixed (BatchData* bp = _batchData)
UploadSsbo(_batchSsbo, 1, bp, totalDraws * sizeof(BatchData));
// Phase U.4: per-instance clip-slot buffer (binding=3), one uint per
// instance, laid out parallel to _instanceData in Phase 3's group loop so
// instanceClipSlot[instanceIndex] tracks Instances[instanceIndex]. On the
// U.3 / outdoor path every entry is 0 ⇒ slot 0 ⇒ no-clip (identical to
// U.3); under indoor routing it holds the per-instance slot from
// ResolveEntitySlot. No clear here — Phase 3 wrote exactly totalInstances
// entries; only [0..totalInstances) is uploaded, so any stale tail is
// never read by the shader (BaseInstance + gl_InstanceID < totalInstances).
fixed (uint* sp = _clipSlotData)
UploadSsbo(_clipSlotSsbo, 3, sp, totalInstances * sizeof(uint));
fixed (DrawElementsIndirectCommand* cp = _indirectCommands)
{
_gl.BindBuffer(BufferTargetARB.DrawIndirectBuffer, _indirectBuffer);
_gl.BufferData(BufferTargetARB.DrawIndirectBuffer,
(nuint)(totalDraws * sizeof(DrawElementsIndirectCommand)), cp, BufferUsageARB.DynamicDraw);
}
// Phase U.3: bind the SHARED per-cell clip-region SSBO (binding=2). The
// GameWindow-level ClipFrame already uploaded + bound it this frame; we
// re-bind defensively in case another consumer touched binding=2 since.
// When no shared id is set (0), bind our own no-clip fallback so the
// shader never reads an unbound SSBO at binding=2.
BindClipRegionBinding2();
// ── Phase 6: bind global VAO once ───────────────────────────────────
_gl.BindVertexArray(anyVao);
if (string.Equals(Environment.GetEnvironmentVariable("ACDREAM_NO_CULL"), "1", StringComparison.Ordinal))
_gl.Disable(EnableCap.CullFace);
// GPU timing: compute this frame's ring slot. We read frame N-3's
// result (the oldest data in the ring) before overwriting it with
// frame N's queries. Hoisted to function scope so both the opaque
// and transparent passes below can reference gpuQuerySlot. See spec
// §3 Q1/Q2 + §4 in
// docs/superpowers/specs/2026-05-11-phase-n6-slice1-design.md.
int gpuQuerySlot = _gpuQueryFrameIndex % GpuQueryRingDepth;
// diag is part of the gate so the read/issue/increment trio stays
// symmetric — without it, toggling ACDREAM_WB_DIAG mid-session would
// freeze the frame counter (gated by diag below) while the read kept
// re-reading the same slot, producing duplicate stale samples.
if (diag && _gpuQueriesInitialized && _gpuQueryFrameIndex >= GpuQueryRingDepth)
{
_gl.GetQueryObject(_gpuQueryOpaque[gpuQuerySlot], QueryObjectParameterName.ResultAvailable, out int avail);
if (avail != 0)
{
_gl.GetQueryObject(_gpuQueryOpaque[gpuQuerySlot], QueryObjectParameterName.Result, out ulong opaqueNs);
_gl.GetQueryObject(_gpuQueryTransparent[gpuQuerySlot], QueryObjectParameterName.Result, out ulong transNs);
long gpuUs = (long)((opaqueNs + transNs) / 1000UL);
_gpuSamples[_gpuSampleCursor] = gpuUs;
_gpuSampleCursor = (_gpuSampleCursor + 1) % _gpuSamples.Length;
}
// If avail==0 the sample is dropped silently. MedianMicros
// computes over the non-zero subset, so dropped samples don't
// poison the median.
}
// ── Phase 7: opaque pass ─────────────────────────────────────────────
if (_opaqueDrawCount > 0)
{
_gl.Disable(EnableCap.Blend);
_gl.DepthMask(true);
// A.5 T20: enable A2C for ClipMap foliage — GPU derives sample mask
// from the alpha written by mesh_modern.frag so foliage edges are
// smooth under MSAA 4x. A no-op for fully-opaque (α=1) batches.
// A.5 T22.5: gated by AlphaToCoverage property so Low/Medium presets
// (no MSAA) skip the unnecessary GL state change.
if (AlphaToCoverage) _gl.Enable(EnableCap.SampleAlphaToCoverage);
_shader.SetInt("uRenderPass", 0);
// Phase Post-A.5 (ISSUE #52, 2026-05-10): opaque section of
// Batches[] starts at index 0. See uDrawIDOffset comment in
// mesh_modern.vert for why this is needed.
_shader.SetInt("uDrawIDOffset", 0);
_gl.BindBuffer(BufferTargetARB.DrawIndirectBuffer, _indirectBuffer);
if (diag && _gpuQueriesInitialized) _gl.BeginQuery(QueryTarget.TimeElapsed, _gpuQueryOpaque[gpuQuerySlot]);
DrawIndirectRange(0, _opaqueDrawCount);
if (diag && _gpuQueriesInitialized) _gl.EndQuery(QueryTarget.TimeElapsed);
if (AlphaToCoverage) _gl.Disable(EnableCap.SampleAlphaToCoverage);
}
// ── Phase 8: transparent pass ────────────────────────────────────────
if (_transparentDrawCount > 0)
{
_gl.Enable(EnableCap.Blend);
_gl.BlendFunc(BlendingFactor.SrcAlpha, BlendingFactor.OneMinusSrcAlpha);
_gl.DepthMask(false);
// Phase Post-A.5 (ISSUE #52, 2026-05-10): transparent section of
// Batches[] starts at index _opaqueDrawCount. Without this offset,
// each transparent draw reads BatchData[0..transparentCount) — the
// OPAQUE section — and the lifestone crystal's apparent texture
// flickers to whatever opaque batch sorted first that frame. See
// uDrawIDOffset comment in mesh_modern.vert.
_shader.SetInt("uDrawIDOffset", _opaqueDrawCount);
// Closed-shell translucent meshes still need culling, but the
// cull side must come from each dat batch just like the opaque
// section. BuildIndirectArrays preserves CullMode in _drawCullModes.
_gl.FrontFace(FrontFaceDirection.CW);
_shader.SetInt("uRenderPass", 1);
if (diag && _gpuQueriesInitialized) _gl.BeginQuery(QueryTarget.TimeElapsed, _gpuQueryTransparent[gpuQuerySlot]);
DrawIndirectRange(_opaqueDrawCount, _transparentDrawCount);
if (diag && _gpuQueriesInitialized) _gl.EndQuery(QueryTarget.TimeElapsed);
_gl.DepthMask(true);
_gl.Disable(EnableCap.Blend);
}
_gl.Disable(EnableCap.CullFace);
_gl.BindVertexArray(0);
_cpuStopwatch.Stop();
if (diag)
{
long cpuUs = _cpuStopwatch.ElapsedTicks * 1_000_000L / System.Diagnostics.Stopwatch.Frequency;
_cpuSamples[_cpuSampleCursor] = cpuUs;
_cpuSampleCursor = (_cpuSampleCursor + 1) % _cpuSamples.Length;
// GPU sample read happens BEFORE issuing the next frame's queries
// (see step 1.3 above). Increment the frame counter here so the
// next call computes a fresh slot.
if (_gpuQueriesInitialized) _gpuQueryFrameIndex++;
_drawsIssued += _opaqueDrawCount + _transparentDrawCount;
_instancesIssued += totalInstances;
MaybeFlushDiag();
}
}
/// <summary>
/// Phase A8 RR5 (2026-05-26): per-building draw overload. Walks only
/// entities whose ParentCellId is in <paramref name="cellIds"/>, plus
/// outdoor-style entities matching the EntitySet partition. Used by
/// the indoor render branch to scope rendering to the camera-buildings'
/// cells.
///
/// <para>Mirrors the existing visibleCellIds-based Draw but with an
/// explicit cell list (not the BFS-derived visibility set). The semantic
/// difference is at the caller: cellIds = the camera-buildings' EnvCellIds,
/// not the portal BFS result. The dispatcher's internal logic is identical
/// — it filters indoor entities by membership in the provided set.</para>
/// </summary>
public void Draw(
ICamera camera,
IEnumerable<(uint LandblockId, Vector3 AabbMin, Vector3 AabbMax,
IReadOnlyList<WorldEntity> Entities,
IReadOnlyDictionary<uint, WorldEntity>? AnimatedById)> landblockEntries,
IReadOnlyCollection<uint> cellIds,
FrustumPlanes? frustum = null,
uint? neverCullLandblockId = null,
HashSet<uint>? animatedEntityIds = null,
EntitySet set = EntitySet.All)
{
// Adapt IReadOnlyCollection<uint> → HashSet<uint> for the existing path.
// If the caller already passed a HashSet, avoid re-wrapping.
HashSet<uint> cellIdSet = cellIds is HashSet<uint> hs ? hs : new HashSet<uint>(cellIds);
Draw(camera, landblockEntries,
frustum: frustum,
neverCullLandblockId: neverCullLandblockId,
visibleCellIds: cellIdSet,
animatedEntityIds: animatedEntityIds,
set: set);
}
private static IndirectGroupInput ToInput(InstanceGroup g) => new(
IndexCount: g.IndexCount,
FirstIndex: g.FirstIndex,
BaseVertex: g.BaseVertex,
InstanceCount: g.InstanceCount,
FirstInstance: g.FirstInstance,
TextureHandle: g.BindlessTextureHandle,
TextureLayer: g.TextureLayer,
Translucency: g.Translucency,
CullMode: g.CullMode);
private static int CompareOpaqueSubmissionOrder(InstanceGroup a, InstanceGroup b)
{
int cull = a.CullMode.CompareTo(b.CullMode);
return cull != 0 ? cull : a.SortDistance.CompareTo(b.SortDistance);
}
private static int CompareTransparentSubmissionOrder(InstanceGroup a, InstanceGroup b)
{
int cull = a.CullMode.CompareTo(b.CullMode);
return cull != 0 ? cull : b.SortDistance.CompareTo(a.SortDistance);
}
private static int CountCullRuns(CullMode[] modes, int startCommand, int commandCount)
{
if (commandCount <= 0) return 0;
int end = startCommand + commandCount;
int runs = 1;
var previous = modes[startCommand];
for (int i = startCommand + 1; i < end; i++)
{
var current = modes[i];
if (current == previous) continue;
runs++;
previous = current;
}
return runs;
}
private unsafe void DrawIndirectRange(int startCommand, int commandCount)
{
int end = startCommand + commandCount;
int command = startCommand;
while (command < end)
{
var cullMode = _drawCullModes[command];
ApplyCullMode(cullMode);
int runCount = 1;
while (command + runCount < end && _drawCullModes[command + runCount] == cullMode)
runCount++;
// Each glMultiDrawElementsIndirect call restarts gl_DrawID at 0.
// Because this method splits one logical opaque/transparent pass
// into CullMode runs, the shader must receive the absolute command
// index for this run or it will read BatchData[0] again and bind
// the wrong texture for later runs.
_shader.SetInt("uDrawIDOffset", command);
_gl.MultiDrawElementsIndirect(
PrimitiveType.Triangles,
DrawElementsType.UnsignedShort,
(void*)(command * DrawCommandStride),
(uint)runCount,
(uint)DrawCommandStride);
command += runCount;
}
}
private void ApplyCullMode(CullMode mode)
{
// WB BaseObjectRenderManager.cs:850-866 applies CullMode per MDI group.
// WB GameScene.cs:843 sets FrontFace(CW) globally; SetCullMode then
// only chooses front/back culling. Keep the same convention here so
// splitting MDI commands by CullMode cannot resurrect stale CCW state.
_gl.FrontFace(FrontFaceDirection.CW);
switch (mode)
{
case CullMode.None:
_gl.Disable(EnableCap.CullFace);
break;
case CullMode.Clockwise:
_gl.Enable(EnableCap.CullFace);
_gl.CullFace(TriangleFace.Front);
break;
case CullMode.CounterClockwise:
case CullMode.Landblock:
_gl.Enable(EnableCap.CullFace);
_gl.CullFace(TriangleFace.Back);
break;
}
}
private unsafe void UploadSsbo(uint ssbo, uint binding, void* data, int byteCount)
{
_gl.BindBuffer(BufferTargetARB.ShaderStorageBuffer, ssbo);
_gl.BufferData(BufferTargetARB.ShaderStorageBuffer, (nuint)byteCount, data, BufferUsageARB.DynamicDraw);
_gl.BindBufferBase(BufferTargetARB.ShaderStorageBuffer, binding, ssbo);
}
/// <summary>
/// Phase U.3: bind the per-cell clip-region SSBO to binding=2. Prefers the
/// shared <see cref="ClipFrame"/> buffer (set via <see cref="SetClipRegionSsbo"/>);
/// otherwise lazily creates + binds a one-slot no-clip fallback so the shader
/// never reads an unbound SSBO. The fallback's single slot has count 0
/// (pass-all), matching <see cref="ClipFrame.NoClip"/>'s slot 0.
/// </summary>
private unsafe void BindClipRegionBinding2()
{
if (_sharedClipRegionSsbo != 0)
{
_gl.BindBufferBase(BufferTargetARB.ShaderStorageBuffer,
ClipFrame.MeshClipSsboBinding, _sharedClipRegionSsbo);
return;
}
if (_fallbackClipRegionSsbo == 0)
{
_fallbackClipRegionSsbo = _gl.GenBuffer();
// One CellClip slot, all zeros: count 0 ⇒ shader passes every plane.
var zero = stackalloc byte[ClipFrame.CellClipStrideBytes];
for (int i = 0; i < ClipFrame.CellClipStrideBytes; i++) zero[i] = 0;
_gl.BindBuffer(BufferTargetARB.ShaderStorageBuffer, _fallbackClipRegionSsbo);
_gl.BufferData(BufferTargetARB.ShaderStorageBuffer,
(nuint)ClipFrame.CellClipStrideBytes, zero, BufferUsageARB.DynamicDraw);
}
_gl.BindBufferBase(BufferTargetARB.ShaderStorageBuffer,
ClipFrame.MeshClipSsboBinding, _fallbackClipRegionSsbo);
}
private void MaybeFlushDiag()
{
long now = Environment.TickCount64;
if (now - _lastLogTick > 5000)
{
long cpuMed = MedianMicros(_cpuSamples);
long cpuP95 = Percentile95Micros(_cpuSamples);
long gpuMed = MedianMicros(_gpuSamples);
long gpuP95 = Percentile95Micros(_gpuSamples);
// A.5 T23: flag when entity dispatcher median exceeds 2.0ms budget
// (Phase A.5 spec §2 acceptance criterion 6). Grep-friendly prefix.
const long BudgetUs = 2000;
string budgetFlag = cpuMed > BudgetUs ? " BUDGET_OVER" : "";
Console.WriteLine(
$"[WB-DIAG]{budgetFlag} entSeen={_entitiesSeen} entDrawn={_entitiesDrawn} meshMissing={_meshesMissing} drawsIssued={_drawsIssued} instances={_instancesIssued} groups={_groups.Count} " +
$"cpu_us={cpuMed}m/{cpuP95}p95 gpu_us={gpuMed}m/{gpuP95}p95");
_entitiesSeen = _entitiesDrawn = _meshesMissing = _drawsIssued = _instancesIssued = 0;
_lastLogTick = now;
// Don't reset the sample buffers — they're a moving window of the
// last 256 frames; clearing per 5s flush would lose recent history.
}
}
private static long MedianMicros(long[] samples)
{
var copy = (long[])samples.Clone();
Array.Sort(copy);
int nz = 0;
foreach (var v in copy) if (v > 0) nz++;
if (nz == 0) return 0;
return copy[copy.Length - nz / 2];
}
private static long Percentile95Micros(long[] samples)
{
var copy = (long[])samples.Clone();
Array.Sort(copy);
int nz = 0;
foreach (var v in copy) if (v > 0) nz++;
if (nz == 0) return 0;
int idx = copy.Length - 1 - (int)(nz * 0.05);
return copy[idx];
}
// ── Tier 1 cache (#53) helpers extracted for testability ─────────────────
//
// Three pure-CPU static helpers carved out of Draw's per-entity loop so
// unit tests can exercise the populate/flush algorithm + cache-hit fast
// path without needing a real GL context. Production code (Draw) calls
// these helpers; the dispatcher integration tests in
// WbDrawDispatcherBucketingTests use them to drive the same algorithm
// through deterministic inputs.
/// <summary>
/// Apply a cache hit's batches into the per-frame group dictionary by
/// composing <c>cached.RestPose * entityWorld</c> per batch and routing
/// the result through <paramref name="appendInstance"/>. The delegate
/// abstracts over <see cref="InstanceGroup"/> so this helper stays
/// GL-free and unit-testable.
/// </summary>
/// <remarks>
/// Matrix multiplication is non-commutative: it MUST be
/// <c>RestPose * entityWorld</c>, not the reverse. See
/// <see cref="ComposePartWorldMatrix"/> for the full part-world product.
/// </remarks>
internal static void ApplyCacheHit(
EntityCacheEntry entry,
Matrix4x4 entityWorld,
Action<GroupKey, Matrix4x4> appendInstance)
{
foreach (var cached in entry.Batches)
{
appendInstance(cached.Key, cached.RestPose * entityWorld);
}
}
/// <summary>
/// Per-tuple flush check. If <paramref name="populateEntityId"/> is set
/// AND differs from <paramref name="currentEntityId"/>, the previous
/// entity's accumulated batches are committed to <paramref name="cache"/>
/// and <paramref name="populateScratch"/> is cleared. Returns the
/// updated tracker tuple — pass these back into the field locals in the
/// caller's loop.
/// </summary>
/// <remarks>
/// This is the bug-fix structure from commit 00fa8ae (per-MeshRef
/// Populate would overwrite earlier MeshRefs because the cache is
/// keyed by entity.Id; flushing only on entity boundary preserves all
/// MeshRefs' batches). _walkScratch is in entity-order so all MeshRefs
/// of one entity arrive contiguously.
/// </remarks>
internal static (uint? PopulateEntityId, uint PopulateLandblockId)
MaybeFlushOnEntityChange(
uint? populateEntityId,
uint populateLandblockId,
uint currentEntityId,
EntityClassificationCache cache,
List<CachedBatch> populateScratch)
{
if (populateEntityId.HasValue && populateEntityId.Value != currentEntityId)
{
if (populateScratch.Count > 0)
{
cache.Populate(populateEntityId.Value, populateLandblockId, populateScratch.ToArray());
}
populateScratch.Clear();
return (null, 0u);
}
return (populateEntityId, populateLandblockId);
}
/// <summary>
/// End-of-loop final flush. The last entity in <c>_walkScratch</c> has
/// no next-iteration to trigger <see cref="MaybeFlushOnEntityChange"/>,
/// so commit its accumulated batches here. No-op when no populate is
/// pending (the last entity was animated, or the scratch is empty).
/// <para>
/// End-of-loop only — does NOT reset the caller's tracker locals
/// (intentional, since they go out of scope immediately after).
/// </para>
/// </summary>
internal static void FinalFlushPopulate(
uint? populateEntityId,
uint populateLandblockId,
EntityClassificationCache cache,
List<CachedBatch> populateScratch)
{
if (populateEntityId.HasValue && populateScratch.Count > 0)
{
cache.Populate(populateEntityId.Value, populateLandblockId, populateScratch.ToArray());
populateScratch.Clear();
}
}
/// <summary>
/// Instance-side helper used by <see cref="ApplyCacheHit"/>. Looks up or
/// creates an <see cref="InstanceGroup"/> for the given key in
/// <c>_groups</c> and appends the per-instance world matrix.
/// </summary>
private void AppendInstanceToGroup(GroupKey key, Matrix4x4 model)
{
if (!_groups.TryGetValue(key, out var grp))
{
grp = new InstanceGroup
{
Ibo = key.Ibo,
FirstIndex = key.FirstIndex,
BaseVertex = key.BaseVertex,
IndexCount = key.IndexCount,
BindlessTextureHandle = key.BindlessTextureHandle,
TextureLayer = key.TextureLayer,
Translucency = key.Translucency,
CullMode = key.CullMode,
};
_groups[key] = grp;
}
grp.Matrices.Add(model);
grp.Slots.Add(_currentEntitySlot); // Phase U.4 — parallel to Matrices
}
private void ClassifyBatches(
ObjectRenderData renderData,
ulong gfxObjId,
Matrix4x4 model,
WorldEntity entity,
MeshRef meshRef,
ulong palHash,
AcSurfaceMetadataTable metaTable,
Matrix4x4 restPose,
List<CachedBatch>? collector = null)
{
for (int batchIdx = 0; batchIdx < renderData.Batches.Count; batchIdx++)
{
var batch = renderData.Batches[batchIdx];
TranslucencyKind translucency;
if (metaTable.TryLookup(gfxObjId, batchIdx, out var meta))
{
translucency = meta.Translucency;
}
else
{
translucency = batch.IsAdditive ? TranslucencyKind.Additive
: batch.IsTransparent ? TranslucencyKind.AlphaBlend
: TranslucencyKind.Opaque;
}
ulong texHandle = ResolveTexture(entity, meshRef, batch, palHash);
if (texHandle == 0) continue;
// TextureLayer is always 0 for per-instance composites; non-zero when
// WB atlas is adopted in N.6+ and batches reference a shared atlas layer.
uint texLayer = 0;
var key = new GroupKey(
batch.IBO, batch.FirstIndex, (int)batch.BaseVertex,
batch.IndexCount, texHandle, texLayer, translucency, batch.CullMode);
if (!_groups.TryGetValue(key, out var grp))
{
grp = new InstanceGroup
{
Ibo = batch.IBO,
FirstIndex = batch.FirstIndex,
BaseVertex = (int)batch.BaseVertex,
IndexCount = batch.IndexCount,
BindlessTextureHandle = texHandle,
TextureLayer = texLayer,
Translucency = translucency,
CullMode = batch.CullMode,
};
_groups[key] = grp;
}
grp.Matrices.Add(model);
grp.Slots.Add(_currentEntitySlot); // Phase U.4 — parallel to Matrices
collector?.Add(new CachedBatch(key, texHandle, restPose));
}
}
private ulong ResolveTexture(WorldEntity entity, MeshRef meshRef, ObjectRenderBatch batch, ulong palHash)
{
uint surfaceId = batch.Key.SurfaceId;
if (surfaceId == 0 || surfaceId == 0xFFFFFFFF) return 0;
uint overrideOrigTex = 0;
bool hasOrigTexOverride = meshRef.SurfaceOverrides is not null
&& meshRef.SurfaceOverrides.TryGetValue(surfaceId, out overrideOrigTex);
uint? origTexOverride = hasOrigTexOverride ? overrideOrigTex : (uint?)null;
if (entity.PaletteOverride is not null)
{
return _textures.GetOrUploadWithPaletteOverrideBindless(
surfaceId, origTexOverride, entity.PaletteOverride, palHash);
}
else if (hasOrigTexOverride)
{
return _textures.GetOrUploadWithOrigTextureOverrideBindless(surfaceId, overrideOrigTex);
}
else
{
return _textures.GetOrUploadBindless(surfaceId);
}
}
private static void WriteMatrix(float[] buf, int offset, in Matrix4x4 m)
{
buf[offset + 0] = m.M11; buf[offset + 1] = m.M12; buf[offset + 2] = m.M13; buf[offset + 3] = m.M14;
buf[offset + 4] = m.M21; buf[offset + 5] = m.M22; buf[offset + 6] = m.M23; buf[offset + 7] = m.M24;
buf[offset + 8] = m.M31; buf[offset + 9] = m.M32; buf[offset + 10] = m.M33; buf[offset + 11] = m.M34;
buf[offset + 12] = m.M41; buf[offset + 13] = m.M42; buf[offset + 14] = m.M43; buf[offset + 15] = m.M44;
}
/// <summary>
/// Entity-set membership test. Phase U.1 (2026-05-30): with the
/// two-pipe partition deleted, the sole <see cref="EntitySet.All"/>
/// member matches every entity. Retained as a seam for the unified
/// pass to re-introduce partitioning.
/// </summary>
private static bool EntityMatchesSet(WorldEntity entity, EntitySet set) => true;
internal static bool EntityPassesVisibleCellGate(
WorldEntity entity,
HashSet<uint>? visibleCellIds,
EntitySet set)
{
// No cell filter (outdoor root, or a bucket drawn unfiltered like live-dynamics / outdoor
// scenery) ⇒ every entity passes; clip-slot routing (ResolveEntitySlot) does the gating.
if (visibleCellIds is null)
return true;
// A cell-membership filter is active. An interior static passes iff its cell is visible.
if (entity.ParentCellId.HasValue)
return visibleCellIds.Contains(entity.ParentCellId.Value);
// ParentCellId == null (outdoor scenery / building shell): NOT a member of any interior cell,
// so it does NOT pass a cell-membership filter (R1: the bleed fix — was an unconditional
// `return true`). When such entities must draw (through the doorway), the caller passes
// visibleCellIds: null and relies on ResolveEntitySlot's OutsideView routing instead.
return false;
}
// Phase U.1 (2026-05-30): the shell-scoped sets (IndoorPass / BuildingShells)
// were deleted with the two-pipe machinery. EntitySet.All is never shell-scoped.
private static bool IsShellScopedSet(EntitySet set) => false;
public void Dispose()
{
if (_disposed) return;
_disposed = true;
_gl.DeleteBuffer(_instanceSsbo);
_gl.DeleteBuffer(_batchSsbo);
_gl.DeleteBuffer(_indirectBuffer);
if (_clipSlotSsbo != 0) _gl.DeleteBuffer(_clipSlotSsbo); // Phase U.3
if (_fallbackClipRegionSsbo != 0) _gl.DeleteBuffer(_fallbackClipRegionSsbo); // Phase U.3
if (_gpuQueriesInitialized)
{
for (int i = 0; i < GpuQueryRingDepth; i++)
{
_gl.DeleteQuery(_gpuQueryOpaque[i]);
_gl.DeleteQuery(_gpuQueryTransparent[i]);
}
}
}
// ── Public types + helpers for BuildIndirectArrays (Task 9) ─────────────
//
// These are public so the pure-CPU unit tests in AcDream.Core.Tests can
// exercise BuildIndirectArrays without needing a GL context.
/// <summary>
/// Stride in bytes of <c>DrawElementsIndirectCommand</c> in the indirect buffer.
/// 5 × <c>uint</c> = 20 bytes. Tests and callers reference this symbolically
/// rather than hard-coding <c>20</c> so a layout change produces a compile error.
/// </summary>
public const int DrawCommandStride = 20; // sizeof(DrawElementsIndirectCommand): 5 × uint
/// <summary>
/// Public view of the per-group inputs to <see cref="BuildIndirectArrays"/> — used in tests.
/// </summary>
public readonly record struct IndirectGroupInput(
int IndexCount,
uint FirstIndex,
int BaseVertex,
int InstanceCount,
int FirstInstance,
ulong TextureHandle,
uint TextureLayer,
TranslucencyKind Translucency,
CullMode CullMode = CullMode.CounterClockwise);
/// <summary>
/// Public mirror of the per-group <see cref="BatchData"/> uploaded to the SSBO.
/// Tests verify the layout. Same field shape as the private BatchData.
/// </summary>
[StructLayout(LayoutKind.Sequential, Pack = 8)]
public struct BatchDataPublic
{
public ulong TextureHandle;
public uint TextureLayer;
public uint Flags;
}
/// <summary>Result of <see cref="BuildIndirectArrays"/>.</summary>
public readonly record struct IndirectLayoutResult(
int OpaqueCount,
int TransparentCount,
int TransparentByteOffset);
/// <summary>
/// Lays out the indirect commands + parallel BatchData array contiguously:
/// opaque section first (caller sorts before calling), transparent section second.
/// Pure CPU, no GL state. Caller passes pre-sized scratch arrays.
/// </summary>
/// <remarks>
/// Classification: Opaque + ClipMap → opaque pass (ClipMap uses discard, not
/// blending). Everything else (AlphaBlend, Additive, InvAlpha) → transparent pass.
/// </remarks>
public static IndirectLayoutResult BuildIndirectArrays(
IReadOnlyList<IndirectGroupInput> groups,
DrawElementsIndirectCommand[] indirectScratch,
BatchDataPublic[] batchScratch,
CullMode[]? cullScratch = null)
{
int opaqueCount = 0;
int transparentCount = 0;
foreach (var g in groups)
{
if (IsOpaque(g.Translucency)) opaqueCount++;
else transparentCount++;
}
int oi = 0; // opaque write cursor (fills [0..opaqueCount))
int ti = opaqueCount; // transparent write cursor (fills [opaqueCount..end))
foreach (var g in groups)
{
var dec = new DrawElementsIndirectCommand
{
Count = (uint)g.IndexCount,
InstanceCount = (uint)g.InstanceCount,
FirstIndex = g.FirstIndex,
BaseVertex = g.BaseVertex,
BaseInstance = (uint)g.FirstInstance,
};
var bd = new BatchDataPublic
{
TextureHandle = g.TextureHandle,
TextureLayer = g.TextureLayer,
Flags = 0,
};
if (IsOpaque(g.Translucency))
{
indirectScratch[oi] = dec;
batchScratch[oi] = bd;
if (cullScratch is not null) cullScratch[oi] = g.CullMode;
oi++;
}
else
{
indirectScratch[ti] = dec;
batchScratch[ti] = bd;
if (cullScratch is not null) cullScratch[ti] = g.CullMode;
ti++;
}
}
return new IndirectLayoutResult(opaqueCount, transparentCount, opaqueCount * DrawCommandStride);
}
/// <summary>
/// Public test shim for <see cref="IsOpaque"/>. Locks in the N.5 Decision 2
/// translucency partition: Opaque + ClipMap → opaque indirect; AlphaBlend +
/// Additive + InvAlpha → transparent indirect.
/// </summary>
public static bool IsOpaquePublic(TranslucencyKind t) => IsOpaque(t);
private static bool IsOpaque(TranslucencyKind t)
=> t == TranslucencyKind.Opaque || t == TranslucencyKind.ClipMap;
// ────────────────────────────────────────────────────────────────────────
/// <summary>
/// Thin wrapper around an instance's rate-limit dictionary + frame
/// counter, passed into the static <see cref="WalkEntitiesInto"/>
/// overload so it can emit rate-limited probe lines without access
/// to instance fields. Null = probes disabled (test-friendly overload).
/// </summary>
internal sealed class IndoorProbeState
{
private readonly Dictionary<ulong, int> _lastFrame;
private readonly int _currentFrame;
private const int RateLimit = IndoorProbeRateLimitFrames;
internal IndoorProbeState(Dictionary<ulong, int> lastFrame, int currentFrame)
{
_lastFrame = lastFrame;
_currentFrame = currentFrame;
}
/// <summary>
/// Returns true at most once per <see cref="IndoorProbeRateLimitFrames"/>
/// frames per <paramref name="cellId"/>. Side-effect: stamps the frame
/// number into the dictionary on success.
/// </summary>
internal bool ShouldEmit(ulong cellId)
{
if (!_lastFrame.TryGetValue(cellId, out int last)
|| _currentFrame - last >= RateLimit)
{
_lastFrame[cellId] = _currentFrame;
return true;
}
return false;
}
}
private sealed class InstanceGroup
{
public uint Ibo;
public uint FirstIndex;
public int BaseVertex;
public int IndexCount;
public ulong BindlessTextureHandle; // 64-bit (was uint TextureHandle in N.4)
public uint TextureLayer; // 0 for per-instance composites; non-zero when WB atlas is adopted in N.6+
public TranslucencyKind Translucency;
public CullMode CullMode;
public int FirstInstance; // offset into the shared instance VBO (in instances, not bytes)
public int InstanceCount;
public float SortDistance; // squared distance from camera to first instance, for opaque sort
public readonly List<Matrix4x4> Matrices = new();
// Phase U.4: per-instance clip-slot index, parallel to Matrices (Slots[i]
// is the binding=2 CellClip slot for the instance whose matrix is
// Matrices[i]). At layout time the dispatcher writes Slots[i] into
// _clipSlotData at the same cursor it writes Matrices[i] into _instanceData,
// so the binding=3 instanceClipSlot[] tracks the binding=0 instance.
public readonly List<uint> Slots = new();
}
}