using System;
using System.Collections.Generic;
using System.Numerics;
using System.Runtime.InteropServices;
using AcDream.Core.Meshing;
using AcDream.Core.Rendering;
using AcDream.Core.Terrain;
using AcDream.Core.World;
using DatReaderWriter.Enums;
using Silk.NET.OpenGL;
namespace AcDream.App.Rendering.Wb;
///
/// Draws entities using WB's (a single global
/// VAO/VBO/IBO under modern rendering) with acdream's
/// for bindless texture resolution and for
/// translucency classification.
///
///
/// Atlas-tier entities (ServerGuid == 0): mesh data comes from WB's
/// via .
/// Textures resolve through the bindless-suffixed
/// variants, returning 64-bit
/// resident handles stored in the per-group SSBO.
///
///
///
/// Per-instance-tier entities (ServerGuid != 0): mesh data also from
/// WB, but textures resolve through
/// with palette
/// and surface overrides applied. is currently
/// unused at draw time — GameWindow's spawn path already bakes AnimPartChanges +
/// GfxObjDegradeResolver (Issue #47 close-detail mesh) into MeshRefs.
///
///
///
/// GL strategy (N.5 — mandatory): glMultiDrawElementsIndirect with SSBOs
/// and GL_ARB_bindless_texture + GL_ARB_shader_draw_parameters.
/// All visible (entity, batch) pairs are bucketed by ;
/// each group becomes one DrawElementsIndirectCommand. Three GPU buffers
/// are uploaded per frame: instance matrices (SSBO binding 0), per-group batch
/// metadata/texture handles (SSBO binding 1), and the indirect draw commands.
/// Two glMultiDrawElementsIndirect calls cover the opaque and transparent
/// passes respectively — one GL call per pass regardless of group count.
///
///
///
/// Shader: mesh_modern (bindless + gl_DrawIDARB /
/// gl_BaseInstanceARB). Missing bindless/draw-parameters throws
/// at startup — there is no legacy fallback.
///
///
///
/// Modern rendering assumption: WB's _useModernRendering path (GL
/// 4.3 + bindless) puts every mesh in a single shared VAO/VBO/IBO and uses
/// FirstIndex + BaseVertex per batch. The dispatcher honors those
/// offsets inside each DrawElementsIndirectCommand via
/// glMultiDrawElementsIndirect.
///
///
public sealed unsafe class WbDrawDispatcher : IDisposable
{
///
/// Which subset of entities to walk in a single Draw call.
///
/// Phase U.1 (2026-05-30): the indoor/outdoor two-pipe split (IndoorPass /
/// OutdoorScenery / BuildingShells / LiveDynamic) was deleted along with the
/// inside-out render machinery. is the sole remaining
/// member; the unified retail-faithful pass (Phase U) draws every entity in
/// one path. The set: parameter is retained on the Draw overloads so
/// the unified pass can re-introduce partitioning later without re-threading
/// the call sites.
///
public enum EntitySet
{
/// Every entity walked, gated only by the existing
/// ParentCellId ∈ visibleCellIds filter.
All,
}
private readonly GL _gl;
private readonly Shader _shader;
private readonly TextureCache _textures;
private readonly WbMeshAdapter _meshAdapter;
private readonly EntitySpawnAdapter _entitySpawnAdapter;
private readonly BindlessSupport _bindless;
public readonly record struct DrawStats(
EntitySet Set,
int EntitiesWalked,
int MeshRefs,
int Instances,
int Draws,
int CullRuns,
int OpaqueDraws,
int TransparentDraws,
long Triangles);
public DrawStats LastDrawStats { get; private set; }
// Tier 1 cache (#53): per-entity classification results for static
// entities (those NOT in GameWindow._animatedEntities). Wired here in
// Task 7 for plumbing only — Tasks 9-10 wire the per-entity
// miss-populate / hit-fast-path through the loop.
private readonly EntityClassificationCache _cache;
// ACDREAM_DISABLE_TIER1_CACHE=1 A/B diagnostic — forces every static
// entity through the slow path. Read once in ctor.
private readonly bool _tier1CacheDisabled =
string.Equals(Environment.GetEnvironmentVariable("ACDREAM_DISABLE_TIER1_CACHE"), "1", StringComparison.Ordinal);
///
/// A.5 T22.5: gate for GL_SAMPLE_ALPHA_TO_COVERAGE around the opaque pass.
/// Default true matches T20 behavior. Set false for Low/Medium presets that
/// have MsaaSamples=0 (A2C is a no-op without MSAA, but turning it off
/// avoids the unnecessary GL state thrash and is cleaner diagnostics).
/// Can be toggled mid-session via .
///
public bool AlphaToCoverage { get; set; } = true;
// SSBO buffer ids
private uint _instanceSsbo;
private uint _batchSsbo;
private uint _indirectBuffer;
// Phase U.3: per-instance clip-slot SSBO (binding=3), parallel to
// _instanceSsbo. One uint per instance selecting its CellClip slot. In U.3
// this is ALL ZEROS (every instance → slot 0 → no-clip), so the render is
// identical to pre-U.3. U.4 populates real slot indices.
private uint _clipSlotSsbo;
private uint[] _clipSlotData = new uint[256];
// Phase U.3: the SHARED per-cell clip-region SSBO (binding=2), owned by the
// GameWindow-level ClipFrame and handed to us via SetClipRegionSsbo. When 0
// (not yet wired), we bind our OWN fallback no-clip region buffer below so the
// shader never reads an unbound SSBO. The fallback holds exactly slot 0
// (count 0 = pass-all), matching ClipFrame.NoClip's slot 0.
private uint _sharedClipRegionSsbo;
private uint _fallbackClipRegionSsbo;
// Phase U.4: per-frame clip-slot routing handed in via SetClipRouting before
// each Draw. When _clipRoutingActive is false (the U.3 path / outdoor root /
// no portal frame), every instance maps to slot 0 (no-clip) and no instance is
// culled — identical to U.3. When active, each instance's slot is resolved by
// ResolveEntitySlot per the U.4 policy (live-dynamic unclipped; cell statics to
// their cell slot; outdoor scenery to the OutsideView slot; non-visible culled).
private bool _clipRoutingActive;
private IReadOnlyDictionary? _cellIdToSlot;
private int _outdoorSlot;
private bool _outdoorVisible;
// Phase U.4: the clip slot of the entity currently being classified in Draw's
// per-entity loop. Set once per entity (before ClassifyBatches / ApplyCacheHit),
// read by the two matrix-append sites (AppendInstanceToGroup + ClassifyBatches)
// so every group's Slots[] stays in lockstep with its Matrices[]. Defaults to 0
// (no-clip) on the U.3 / outdoor path.
private uint _currentEntitySlot;
// Phase U.4: true when the current entity resolved to the CULL sentinel
// (cell not visible, or outdoor stab while no outdoors is visible). Persisted
// across the entity's tuples; the per-tuple body skips all instance emission.
private bool _currentEntityCulled;
// Per-frame scratch arrays — Tasks 9-10 fully wire these.
private float[] _instanceData = new float[256 * 16]; // mat4 floats per instance
private BatchData[] _batchData = new BatchData[256];
private DrawElementsIndirectCommand[] _indirectCommands = new DrawElementsIndirectCommand[256];
private CullMode[] _drawCullModes = new CullMode[256];
private int _opaqueDrawCount;
private int _transparentDrawCount;
private int _transparentByteOffset;
// std430 layout: ulong TextureHandle (uvec2) at offset 0, uint TextureLayer
// at offset 8, uint Flags at offset 12. Total 16 bytes.
// Pack=8 (not 4) because std430's uvec2 requires 8-byte alignment — Pack=4
// works today by accident (TextureHandle is the first field, so offset 0 is
// always 8-byte aligned), but adding a 4-byte field before TextureHandle
// without bumping Pack would silently misalign the GPU struct.
[StructLayout(LayoutKind.Sequential, Pack = 8)]
private struct BatchData
{
public ulong TextureHandle; // bindless handle (uvec2 in GLSL)
public uint TextureLayer;
public uint Flags;
}
// Per-frame scratch — reused across frames to avoid per-frame allocation.
private readonly Dictionary _groups = new();
private readonly List _opaqueDraws = new();
private readonly List _translucentDraws = new();
// A.5 T26 follow-up (Bug B): WalkEntities populates this scratch list
// instead of allocating a fresh List<(WorldEntity, int)> per frame. At
// ~10K entities × ~3 mesh refs = ~30K tuples × 16 bytes = ~480 KB / frame
// of GC pressure on the render thread under the original T17 shape.
private readonly List<(WorldEntity Entity, int MeshRefIndex, uint LandblockId)> _walkScratch = new();
// Tier 1 cache (#53) — per-entity classification collector. Reused across
// frames; cleared at flush time when the per-entity loop crosses an entity
// boundary in _walkScratch (and once more at end-of-loop for the last
// entity). _walkScratch is in entity-order, so all MeshRefs of one entity
// are contiguous — accumulate them all before flushing one Populate call.
// Animated entities skip this scratch entirely (collector = null).
private readonly List _populateScratch = new();
// Per-entity-cull AABB radius. Conservative — covers most entities; large
// outliers (long banners, tall columns) are still landblock-culled.
private const float PerEntityCullRadius = 5.0f;
private bool _disposed;
///
/// Per-cell-entity last-log frame number for rate-limiting the
/// [indoor-walk] / [indoor-lookup] / [indoor-xform] / [indoor-cull]
/// probes. Defaults to 30 frames at 30Hz = 1 sec.
///
private readonly Dictionary _lastIndoorProbeFrame = new();
private int _indoorProbeFrameCounter;
private const int IndoorProbeRateLimitFrames = 30;
///
/// Returns true at most once per
/// frames per cellId. Caller must already have checked that an indoor
/// probe flag is enabled.
///
private bool ShouldEmitIndoorProbe(ulong cellId)
{
if (!_lastIndoorProbeFrame.TryGetValue(cellId, out int last)
|| _indoorProbeFrameCounter - last >= IndoorProbeRateLimitFrames)
{
_lastIndoorProbeFrame[cellId] = _indoorProbeFrameCounter;
return true;
}
return false;
}
// Diagnostic counters logged once per ~5s under ACDREAM_WB_DIAG=1.
private int _entitiesSeen;
private int _entitiesDrawn;
private int _meshesMissing;
private int _drawsIssued;
private int _instancesIssued;
private long _lastLogTick;
// CPU + GPU timing for [WB-DIAG] under ACDREAM_WB_DIAG=1.
private readonly System.Diagnostics.Stopwatch _cpuStopwatch = new();
private readonly long[] _cpuSamples = new long[256]; // microseconds
private int _cpuSampleCursor;
// GPU timing uses a ring of 3 query-pair slots so the read of frame N-3's
// result lands when the GPU has finished (~50ms after issue on a typical
// 60fps frame). Ring of 3 is the vendor-neutral choice: NVIDIA drivers with
// triple-buffering+vsync can queue ~3 frames ahead, AMD typically 1-2,
// Intel iGPUs vary. ResultAvailable is the safety guard if the GPU is
// still working when we try to read.
private const int GpuQueryRingDepth = 3;
private readonly uint[] _gpuQueryOpaque = new uint[GpuQueryRingDepth];
private readonly uint[] _gpuQueryTransparent = new uint[GpuQueryRingDepth];
private int _gpuQueryFrameIndex;
private readonly long[] _gpuSamples = new long[256]; // microseconds
private int _gpuSampleCursor;
private bool _gpuQueriesInitialized;
// Constructor accessibility is internal because EntityClassificationCache
// is internal — a public ctor with an internal-typed parameter would be
// an inconsistent-accessibility error. The dispatcher is constructed
// exclusively from GameWindow (same assembly), so internal is fine.
internal WbDrawDispatcher(
GL gl,
Shader shader,
TextureCache textures,
WbMeshAdapter meshAdapter,
EntitySpawnAdapter entitySpawnAdapter,
BindlessSupport bindless,
EntityClassificationCache classificationCache)
{
ArgumentNullException.ThrowIfNull(gl);
ArgumentNullException.ThrowIfNull(shader);
ArgumentNullException.ThrowIfNull(textures);
ArgumentNullException.ThrowIfNull(meshAdapter);
ArgumentNullException.ThrowIfNull(entitySpawnAdapter);
ArgumentNullException.ThrowIfNull(classificationCache);
_gl = gl;
_shader = shader;
_textures = textures;
_meshAdapter = meshAdapter;
_entitySpawnAdapter = entitySpawnAdapter;
_cache = classificationCache;
_bindless = bindless ?? throw new ArgumentNullException(nameof(bindless));
_instanceSsbo = _gl.GenBuffer();
_batchSsbo = _gl.GenBuffer();
_indirectBuffer = _gl.GenBuffer();
_clipSlotSsbo = _gl.GenBuffer(); // Phase U.3 binding=3
}
///
/// Phase U.3: hand the dispatcher the SHARED per-cell clip-region SSBO
/// (binding=2) that created. The
/// dispatcher re-binds it to binding=2 immediately before each MDI so a
/// consumer that touched binding=2 in between can't leave it pointing
/// elsewhere. Pass 0 to fall back to the internal no-clip region buffer.
///
public void SetClipRegionSsbo(uint sharedClipRegionSsbo)
=> _sharedClipRegionSsbo = sharedClipRegionSsbo;
///
/// Phase U.4: install the per-frame clip-slot routing for an INDOOR root.
/// Call once per frame BEFORE when the camera's root cell is
/// non-null; the next resolves each instance's binding=3
/// clip slot via the U.4 policy (live-dynamic unclipped, cell statics to their
/// cell slot, outdoor scenery to the OutsideView slot, non-visible culled).
/// Pair with on outdoor-root frames so the
/// dispatcher reverts to the U.3 no-clip-everything behavior.
///
/// cellId → CellClip slot. A cell absent from the map
/// is NOT visible → its cell-static instances are culled.
/// Slot for outdoor scenery / building shells while
/// indoors (the OutsideView slot, or 0 for no-clip over-include).
/// False ⇒ cull outdoor scenery / shells this frame
/// (the OutsideView is empty).
public void SetClipRouting(IReadOnlyDictionary cellIdToSlot, int outdoorSlot, bool outdoorVisible)
{
ArgumentNullException.ThrowIfNull(cellIdToSlot);
_clipRoutingActive = true;
_cellIdToSlot = cellIdToSlot;
_outdoorSlot = outdoorSlot;
_outdoorVisible = outdoorVisible;
}
///
/// Phase U.4: revert to U.3 behavior — every instance maps to slot 0 (no-clip),
/// nothing is culled by clip routing. Call on outdoor-root frames (camera
/// outdoors) and any frame without a portal-visibility result.
///
public void ClearClipRouting()
{
_clipRoutingActive = false;
_cellIdToSlot = null;
_outdoorSlot = 0;
_outdoorVisible = false;
}
// Phase U.4 CULL sentinel returned by ResolveEntitySlot: the entity's instances
// are dropped entirely (not emitted into the binding=0 instance buffer NOR the
// binding=3 slot buffer), matching the existing frustum / visible-cell cull.
// Internal (not private) so the clip-slot unit tests can assert against it
// directly — see WbDrawDispatcherClipSlotTests.
internal const int ClipSlotCull = -1;
///
/// Phase U.4: resolve the clip slot for one entity per the slot/gate policy.
/// Returns to drop the entity's instances entirely.
///
/// - ServerGuid != 0 (live dynamic: player / NPC / items / doors) ⇒ slot 0
/// (UNCLIPPED — retail draws live-dynamic unclipped; depth only).
/// - ParentCellId != null (cell static) ⇒ the cell's slot, or CULL when the
/// cell isn't in (not visible / nothing-visible).
/// - ParentCellId == null (outdoor scenery / building shell) ⇒ the OutsideView
/// slot when , else CULL.
///
/// Only called when _clipRoutingActive (indoor root). On the U.3 / outdoor
/// path every instance is slot 0 and nothing is culled — see
/// , which gates on that flag.
///
/// INVARIANT: and the keys of
/// MUST live in the same FULL cell-id space
/// (lbMask | OtherCellId, e.g. 0xA9B40164). A bare-low-byte
/// ParentCellId (e.g. 0x64) would never match a full-id key and would
/// silently CULL every indoor stab — cf. the L.2e bare-low-byte finding in
/// CLAUDE.md where player CellId was tracked without its landblock prefix.
///
///
/// internal static + pure (reads no instance state) so the clip-slot
/// unit tests exercise every branch without a GL context. The caller hands in
/// the routing fields it would otherwise read from _cellIdToSlot etc.
///
///
internal static int ResolveEntitySlot(
uint serverGuid,
uint? parentCellId,
IReadOnlyDictionary cellIdToSlot,
int outdoorSlot,
bool outdoorVisible)
{
// Live-dynamic entities render unclipped regardless of cell — retail draws
// the player / NPCs / dropped items through the depth buffer without portal
// clipping. ServerGuid is the live-dynamic marker (0 for dat-hydrated).
if (serverGuid != 0)
return 0;
if (parentCellId is uint parentCell)
return cellIdToSlot.TryGetValue(parentCell, out int slot) ? slot : ClipSlotCull;
// Outdoor scenery / building shell (no ParentCellId). Indoor root: gate to
// the OutsideView slot, or cull when nothing outdoors is visible.
return outdoorVisible ? outdoorSlot : ClipSlotCull;
}
///
/// Phase U.4: the call-site clip-slot decision for one entity, returning the
/// (Slot, Culled) pair the per-entity loop body consumes. Wraps
/// with the
/// gate: when routing is INACTIVE (outdoor root / no portal frame), every entity
/// is slot 0 and nothing is clip-culled — the bit-identical-to-U.3 property, so
/// the resolver (and ) is bypassed entirely.
/// When active, a CULL sentinel maps to (0, culled=true) — the slot value
/// is never emitted for a culled entity.
/// internal static + pure so the whole policy (including the routing-
/// inactive branch) is unit-testable — see WbDrawDispatcherClipSlotTests.
///
internal static (uint Slot, bool Culled) ResolveSlotForFrame(
bool clipRoutingActive,
uint serverGuid,
uint? parentCellId,
IReadOnlyDictionary? cellIdToSlot,
int outdoorSlot,
bool outdoorVisible)
{
if (!clipRoutingActive)
return (0u, false);
int resolved = ResolveEntitySlot(serverGuid, parentCellId, cellIdToSlot!, outdoorSlot, outdoorVisible);
bool culled = resolved == ClipSlotCull;
return (culled ? 0u : (uint)resolved, culled);
}
public static Matrix4x4 ComposePartWorldMatrix(
Matrix4x4 entityWorld,
Matrix4x4 animOverride,
Matrix4x4 restPose)
=> restPose * animOverride * entityWorld;
///
/// Entry for per-landblock iteration.
/// Mirrors the shape yielded by GpuWorldState.LandblockEntries.
///
public readonly record struct LandblockEntry(
uint LandblockId,
Vector3 AabbMin,
Vector3 AabbMax,
IReadOnlyList Entities,
IReadOnlyDictionary? AnimatedById);
///
/// Result of — the list of (entity, meshRef index)
/// pairs that passed all visibility filters, plus a diagnostic walk count.
///
public struct WalkResult
{
public int EntitiesWalked;
public int BuildingShellAnchorPass;
public int BuildingShellAnchorReject;
public List<(WorldEntity Entity, int MeshRefIndex, uint LandblockId)> ToDraw;
}
///
/// Pure-CPU visibility filter over .
/// Separated from so tests can exercise it without GL state.
///
///
/// A.5 T17 Change #1: when an LB is frustum-culled AND
/// is non-empty, the OLD path walked
/// every entity in the LB just to find the few animated ones. This helper
/// fixes that: if the LB is invisible, we iterate
/// directly and look each up in
/// entry.AnimatedById (typically <50 animated, up to ~10K total).
///
///
///
/// A.5 T18 Change #2: per-entity AABB cull reads from the cached
/// /
/// (refreshed lazily if ), instead of
/// recomputing Position±5 each frame.
///
///
///
/// Test-friendly overload that allocates a fresh ToDraw list per call.
/// Production code () uses the no-alloc overload below
/// with a caller-provided scratch list.
///
internal static WalkResult WalkEntities(
IEnumerable landblockEntries,
FrustumPlanes? frustum,
uint? neverCullLandblockId,
HashSet? visibleCellIds,
HashSet? animatedEntityIds)
{
var scratch = new List<(WorldEntity Entity, int MeshRefIndex, uint LandblockId)>();
var result = new WalkResult { ToDraw = scratch };
WalkEntitiesInto(
landblockEntries, frustum, neverCullLandblockId,
visibleCellIds, animatedEntityIds, scratch, ref result);
return result;
}
///
/// No-alloc overload: clears + populates the caller-provided
/// list. reuses a per-dispatcher scratch field across frames to
/// avoid the 480+ KB / frame GC pressure that the test-friendly overload incurs.
/// Returns walk count via 's EntitiesWalked field.
///
///
/// When is non-null the method emits
/// [indoor-cull] lines for cell entities rejected by the
/// visibleCellIds or frustum filters, and [indoor-walk] lines for
/// cell entities that pass all filters. Rate-limited by
/// . Pass (the default)
/// to disable all probe emission — used by the test-friendly
/// overload.
///
///
internal static void WalkEntitiesInto(
IEnumerable landblockEntries,
FrustumPlanes? frustum,
uint? neverCullLandblockId,
HashSet? visibleCellIds,
HashSet? animatedEntityIds,
List<(WorldEntity Entity, int MeshRefIndex, uint LandblockId)> scratch,
ref WalkResult result,
IndoorProbeState? indoorProbeState = null,
EntitySet set = EntitySet.All)
{
scratch.Clear();
result.EntitiesWalked = 0;
result.ToDraw = scratch;
foreach (var entry in landblockEntries)
{
bool landblockVisible = frustum is null
|| entry.LandblockId == neverCullLandblockId
|| FrustumCuller.IsAabbVisible(frustum.Value, entry.AabbMin, entry.AabbMax);
if (!landblockVisible)
{
// A.5 T17 Change #1: walk only animated entities, not all entities.
// Avoids O(N_entities) scan when only O(N_animated) work is needed.
if (animatedEntityIds is null || animatedEntityIds.Count == 0) continue;
if (entry.AnimatedById is null) continue;
foreach (var animatedId in animatedEntityIds)
{
if (!entry.AnimatedById.TryGetValue(animatedId, out var entity)) continue;
// Phase A8: EntitySet partition for indoor/outdoor split passes.
if (!EntityMatchesSet(entity, set)) continue;
if (entity.MeshRefs.Count == 0) continue;
bool shellScoped = IsShellScopedSet(set)
&& entity.IsBuildingShell
&& visibleCellIds is not null;
if (!EntityPassesVisibleCellGate(entity, visibleCellIds, set))
{
if (shellScoped) result.BuildingShellAnchorReject++;
continue;
}
if (shellScoped) result.BuildingShellAnchorPass++;
result.EntitiesWalked++;
for (int i = 0; i < entity.MeshRefs.Count; i++)
scratch.Add((entity, i, entry.LandblockId));
}
continue;
}
foreach (var entity in entry.Entities)
{
// Phase A8: EntitySet partition for indoor/outdoor split passes.
if (!EntityMatchesSet(entity, set)) continue;
if (entity.MeshRefs.Count == 0) continue;
// Detect cell entity for indoor probes — first MeshRef.GfxObjId
// is an EnvCell id (low 16 bits ≥ 0x0100). Cheap to compute;
// result reused for all probe checks below.
ulong cellProbeId = (ulong)entity.MeshRefs[0].GfxObjId;
bool isCellEntity = indoorProbeState is not null
&& RenderingDiagnostics.IsEnvCellId(cellProbeId);
bool shellScoped = IsShellScopedSet(set)
&& entity.IsBuildingShell
&& visibleCellIds is not null;
bool cellInVis = EntityPassesVisibleCellGate(entity, visibleCellIds, set);
if (!cellInVis)
{
if (shellScoped) result.BuildingShellAnchorReject++;
if (isCellEntity && RenderingDiagnostics.ProbeIndoorCullEnabled
&& indoorProbeState!.ShouldEmit(cellProbeId))
{
Console.WriteLine(
$"[indoor-cull] cellEnt=0x{entity.Id:X8} " +
$"reason=visibleCellIds-miss " +
$"parentCell=0x{entity.ParentCellId!.Value:X8}");
}
continue;
}
if (shellScoped) result.BuildingShellAnchorPass++;
// Per-entity AABB frustum cull (perf #3). Animated entities bypass —
// they're tracked at landblock level + need per-frame work regardless.
// A.5 T18 Change #2: read cached AABB, refresh lazily on AabbDirty.
bool isAnimated = animatedEntityIds?.Contains(entity.Id) == true;
bool aabbVisible = true;
if (frustum is not null && !isAnimated && entry.LandblockId != neverCullLandblockId)
{
if (entity.AabbDirty) entity.RefreshAabb();
aabbVisible = FrustumCuller.IsAabbVisible(frustum.Value, entity.AabbMin, entity.AabbMax);
}
if (!aabbVisible)
{
if (isCellEntity && RenderingDiagnostics.ProbeIndoorCullEnabled
&& indoorProbeState!.ShouldEmit(cellProbeId))
{
Console.WriteLine(
$"[indoor-cull] cellEnt=0x{entity.Id:X8} " +
$"reason=frustum " +
$"aabbMin=({entity.AabbMin.X:F1},{entity.AabbMin.Y:F1},{entity.AabbMin.Z:F1}) " +
$"aabbMax=({entity.AabbMax.X:F1},{entity.AabbMax.Y:F1},{entity.AabbMax.Z:F1})");
}
continue;
}
// Passed all filters — emit walk probe.
if (isCellEntity && RenderingDiagnostics.ProbeIndoorWalkEnabled
&& indoorProbeState!.ShouldEmit(cellProbeId))
{
Console.WriteLine(
$"[indoor-walk] cellEnt=0x{entity.Id:X8} " +
$"pos=({entity.Position.X:F1},{entity.Position.Y:F1},{entity.Position.Z:F1}) " +
$"parentCell=0x{(entity.ParentCellId ?? 0u):X8} " +
$"meshRef0=0x{cellProbeId:X8} " +
$"meshRefCount={entity.MeshRefs.Count} " +
$"landblockVisible=true aabbVisible=true cellInVis=true");
}
result.EntitiesWalked++;
for (int i = 0; i < entity.MeshRefs.Count; i++)
scratch.Add((entity, i, entry.LandblockId));
}
}
}
public void Draw(
ICamera camera,
IEnumerable<(uint LandblockId, Vector3 AabbMin, Vector3 AabbMax,
IReadOnlyList Entities,
IReadOnlyDictionary? AnimatedById)> landblockEntries,
FrustumPlanes? frustum = null,
uint? neverCullLandblockId = null,
HashSet? visibleCellIds = null,
HashSet? animatedEntityIds = null,
EntitySet set = EntitySet.All)
{
_shader.Use();
_indoorProbeFrameCounter++;
var vp = camera.View * camera.Projection;
_shader.SetMatrix4("uViewProjection", vp);
bool diag = string.Equals(Environment.GetEnvironmentVariable("ACDREAM_WB_DIAG"), "1", StringComparison.Ordinal);
if (diag && !_gpuQueriesInitialized)
{
for (int i = 0; i < GpuQueryRingDepth; i++)
{
_gpuQueryOpaque[i] = _gl.GenQuery();
_gpuQueryTransparent[i] = _gl.GenQuery();
}
_gpuQueriesInitialized = true;
}
// Always run the CPU stopwatch — cheap; only logged under diag.
_cpuStopwatch.Restart();
// Camera world-space position for front-to-back sort (perf #2). The view
// matrix is the inverse of the camera's world transform, so the world
// translation lives in the inverse's translation row.
Vector3 camPos = Vector3.Zero;
if (Matrix4x4.Invert(camera.View, out var invView))
camPos = invView.Translation;
// ── Phase 1: clear groups, walk entities, build groups ──────────────
foreach (var grp in _groups.Values) { grp.Matrices.Clear(); grp.Slots.Clear(); }
var metaTable = _meshAdapter.MetadataTable;
uint anyVao = 0;
// Project the 5-tuple enumerable into LandblockEntry records for WalkEntities.
static IEnumerable ToEntries(
IEnumerable<(uint LandblockId, Vector3 AabbMin, Vector3 AabbMax,
IReadOnlyList Entities,
IReadOnlyDictionary? AnimatedById)> src)
{
foreach (var e in src)
yield return new LandblockEntry(e.LandblockId, e.AabbMin, e.AabbMax, e.Entities, e.AnimatedById);
}
// A.5 T26 follow-up (Bug B): use the no-alloc WalkEntitiesInto overload
// that populates _walkScratch (a per-dispatcher field reused across frames)
// instead of allocating a fresh List<(WorldEntity, int)> per frame.
//
// Pass an IndoorProbeState when any indoor probe is active so the static
// WalkEntitiesInto can emit rate-limited [indoor-cull] / [indoor-walk]
// lines without needing access to instance fields. Null = probes off.
IndoorProbeState? probeState = null;
if (RenderingDiagnostics.ProbeIndoorCullEnabled || RenderingDiagnostics.ProbeIndoorWalkEnabled)
{
// _currentFrame is snapped at construction time. Construct
// once per Draw() call only — a second construction within
// the same frame would stamp the dictionary with the
// (already-advanced) counter value, suppressing the second
// pass's emissions for IndoorProbeRateLimitFrames frames.
// Today Draw() is called exactly once per frame; if a
// future refactor adds a shadow / reflection / second pass,
// this assumption needs revisiting.
probeState = new IndoorProbeState(_lastIndoorProbeFrame, _indoorProbeFrameCounter);
}
var walkResult = default(WalkResult);
WalkEntitiesInto(
ToEntries(landblockEntries),
frustum,
neverCullLandblockId,
visibleCellIds,
animatedEntityIds,
_walkScratch,
ref walkResult,
probeState,
set);
// Tier 1 cache (#53) flush-tracking locals. _walkScratch holds one tuple
// per (entity, MeshRefIndex) and is in entity-order, so all MeshRefs of
// a given entity are contiguous. We accumulate ALL of an entity's
// batches into _populateScratch, then flush exactly once per entity:
// either when the iteration crosses to a different entity, or at the
// end of the loop for the last entity. Flushing per-tuple would
// overwrite earlier MeshRefs (the cache is keyed by entity.Id), so
// multi-part Setup-backed entities would only retain their LAST
// MeshRef's batches — bug fixed in commit after 2f489a8.
uint? populateEntityId = null;
uint populateLandblockId = 0;
// Tier 1 cache (#53) — fast-path one-shot tracker. The cache stores a
// FLAT list of batches across all MeshRefs of an entity, so a single
// ApplyCacheHit call already drew every batch. _walkScratch yields
// one tuple per (entity, MeshRefIndex), so without this guard a
// 3-MeshRef static entity on a frame-2 cache hit would call
// ApplyCacheHit 3 times — appending all 6 batches × 3 = 18 instances
// to _groups instead of 6. Result: severe Z-fighting + 3× perf hit
// on every multi-part static entity (buildings, statues, multi-MeshRef
// NPCs). The fast path must fire only on the FIRST tuple of each
// entity; subsequent tuples skip via this tracker.
uint? lastHitEntityId = null;
// Tier 1 cache (#53) — incomplete-entity guard. When any MeshRef of
// the current entity has _meshAdapter.TryGetRenderData return null
// (mesh still async-decoding via ObjectMeshManager.PrepareMeshDataAsync),
// we mark the entity incomplete and DROP the accumulated populate
// scratch at entity boundary instead of writing it to the cache.
// Otherwise the cache would hold a partial classification (some parts
// missing), and frame-2 cache hits would persist that partial render
// even after the missing mesh loads — every subsequent frame sees the
// cache hit and skips re-classification, so the missing parts never
// recover. User-visible symptom: the drudge statue on top of the
// Foundry (multi-part Setup entity with AnimPartChange) renders with
// some parts missing permanently. Reset on entity change.
bool currentEntityIncomplete = false;
// Per-tuple entity tracker used purely for entity-change detection.
// Updated UNCONDITIONALLY at end of every tuple (including tuples that
// skip via null renderData), so the flag-reset block below correctly
// distinguishes "new entity" from "same entity, different tuple."
// populateEntityId can't be used for this because it's only set after
// a successful slow-path classification.
uint? prevTupleEntityId = null;
foreach (var (entity, partIdx, landblockId) in _walkScratch)
{
if (diag) _entitiesSeen++;
// Skip subsequent tuples of an entity that already cache-hit on
// its first tuple. ApplyCacheHit drew the full flat batch list;
// re-firing here would N-multiply the instance count. Diag
// _entitiesDrawn is bumped here to preserve per-tuple parity with
// the previous counting semantics.
if (lastHitEntityId == entity.Id)
{
if (diag) _entitiesDrawn++;
continue;
}
// Reset the hit tracker on entity change so the next entity's
// first tuple re-checks the cache. (When this iteration is the
// FIRST tuple of a new entity after a cache-hit entity, we must
// not retain the previous entity's id.)
if (lastHitEntityId.HasValue && lastHitEntityId.Value != entity.Id)
{
lastHitEntityId = null;
}
// Tier 1 cache (#53) — drop the previous entity's accumulated
// populate scratch BEFORE MaybeFlushOnEntityChange runs. If the
// previous entity ended incomplete (≥1 null renderData), we MUST
// NOT cache its partial classification: clear scratch and null
// the tracker so MaybeFlushOnEntityChange sees the cleaned state
// and no-ops for this entity. Reset the incomplete flag for the
// new entity so each one gets a fresh measurement.
//
// CRITICAL: the flag reset must fire ONLY on entity change, not
// every tuple. Resetting per-tuple within the same entity would
// undo a null-renderData flag set by a previous tuple of the same
// entity → if the missing MeshRef sits in the MIDDLE of the
// entity's MeshRefs list, a later valid tuple's reset would
// re-mark the entity "complete" and let partial data populate
// the cache. Trees with [trunk valid, branches null, leaves
// valid] hit this exactly — branches never recover.
bool isNewEntity = !prevTupleEntityId.HasValue || prevTupleEntityId.Value != entity.Id;
if (isNewEntity)
{
if (populateEntityId.HasValue && currentEntityIncomplete)
{
_populateScratch.Clear();
populateEntityId = null;
}
currentEntityIncomplete = false;
// Phase U.4: resolve this entity's clip slot ONCE per entity
// (constant across its tuples). On the U.3 / outdoor path
// (_clipRoutingActive false) every entity is slot 0, never culled.
// The whole decision (including the routing-active gate) lives in
// the pure ResolveSlotForFrame helper so it's unit-testable.
(_currentEntitySlot, _currentEntityCulled) = ResolveSlotForFrame(
_clipRoutingActive, entity.ServerGuid, entity.ParentCellId,
_cellIdToSlot, _outdoorSlot, _outdoorVisible);
}
prevTupleEntityId = entity.Id;
// Flush-on-entity-change: if the previous entity accumulated any
// batches AND this iteration is for a different entity, populate
// its cache entry now and reset the scratch buffer. Runs for ALL
// entities (including this-entity-culled) so the PREVIOUS entity's
// cache always flushes at the boundary.
(populateEntityId, populateLandblockId) = MaybeFlushOnEntityChange(
populateEntityId, populateLandblockId, entity.Id, _cache, _populateScratch);
// Phase U.4: a culled entity (cell not visible, or no outdoors visible
// for an outdoor stab) contributes NO instances. Skip after the
// boundary flush above so the previous entity still committed; the
// next entity's isNewEntity logic is unaffected (prevTupleEntityId is
// already updated). Matches the existing visible-cell / frustum cull:
// nothing enters _groups, so neither binding=0 nor binding=3 sees it.
if (_currentEntityCulled)
continue;
var entityWorld =
Matrix4x4.CreateFromQuaternion(entity.Rotation) *
Matrix4x4.CreateTranslation(entity.Position);
bool isAnimated = animatedEntityIds?.Contains(entity.Id) == true;
// Cache-hit fast path (Task 10): static entity with a populated
// cache entry skips classification entirely. Walk the cached
// (GroupKey, RestPose) flat list and append cached.RestPose *
// entityWorld to each matching group's matrices. Animated entities
// bypass the cache (collector is set null below; their entries are
// never populated in the first place).
//
// Placed AFTER the entity-change flush above so that, on a
// hit, this iteration also finishes flushing any pending
// populate state from a previous entity. Animated entities never
// enter this branch — the !isAnimated guard makes that explicit.
//
// Fires ONCE per entity: the first tuple reaches here, runs
// ApplyCacheHit, sets lastHitEntityId, and continues. Subsequent
// tuples of the same entity short-circuit at the top of the loop
// body via the lastHitEntityId == entity.Id check above.
if (!isAnimated && !_tier1CacheDisabled && _cache.TryGet(entity.Id, landblockId, out var cachedEntry))
{
ApplyCacheHit(cachedEntry!, entityWorld, AppendInstanceToGroup);
// anyVao recovery: when the first visible entity in the frame
// takes the fast path, no slow-path lookup has populated
// anyVao yet. Look up THIS entity's first MeshRef once via
// the mesh adapter — cheap dict lookup, not a re-classify.
if (anyVao == 0)
{
var firstMeshRef = entity.MeshRefs[partIdx];
var firstRenderData = _meshAdapter.TryGetRenderData(firstMeshRef.GfxObjId);
if (firstRenderData is not null) anyVao = firstRenderData.VAO;
}
if (diag) _entitiesDrawn++;
lastHitEntityId = entity.Id;
#if DEBUG
// Cross-check guard: assert the membership predicate held at hit time.
// The full re-classification cross-check (spec section 6.5) is a stretch
// goal; this simpler assert catches the prior Tier 1 bug class — a
// static entity that turns out to actually be animated would fire here.
//
// Structurally redundant with the `if (!isAnimated && ...)` branch
// condition, but serves as a TRIPWIRE: a future refactor that
// incorrectly relaxes the branch condition (e.g., removes
// `!isAnimated` from the guard) would silently allow animated
// entities into the fast path; the assert catches that immediately.
System.Diagnostics.Debug.Assert(
!isAnimated,
$"EntityClassificationCache hit on animated entity {entity.Id} — invariant violated");
#endif
continue;
}
// Compute palette-override hash ONCE per entity (perf #4).
// Reused across every (part, batch) lookup so the FNV-1a fold
// over SubPalettes runs once instead of N times. Zero when the
// entity has no palette override (trees, scenery).
ulong palHash = 0;
if (entity.PaletteOverride is not null)
palHash = TextureCache.HashPaletteOverride(entity.PaletteOverride);
// Note: GameWindow's spawn path already applies
// AnimPartChanges + GfxObjDegradeResolver (Issue #47 fix —
// close-detail mesh swap for humanoids) to MeshRefs. We
// trust MeshRefs as the source of truth here. AnimatedEntityState's
// overrides become relevant only for hot-swap (0xF625
// ObjDescEvent) which today rebuilds MeshRefs anyway.
var meshRef = entity.MeshRefs[partIdx];
ulong gfxObjId = meshRef.GfxObjId;
var renderData = _meshAdapter.TryGetRenderData(gfxObjId);
// [indoor-lookup] probe — emit once per cell entity per sec.
// Fires BEFORE the null-renderData early-continue so a miss still
// emits hit=false, distinguishing H2 (empty batches) from H6
// (dispatcher fails to traverse Setup).
ulong lookupCellId = (ulong)gfxObjId;
if (RenderingDiagnostics.IsEnvCellId(lookupCellId)
&& RenderingDiagnostics.ProbeIndoorLookupEnabled
// Rate-limit in a separate namespace from [indoor-walk]/[indoor-cull]
// (which key on the same gfxObjId). Without this, IndoorAll=1 would
// silence the lookup probe whenever the walk probe fired first.
&& ShouldEmitIndoorProbe(lookupCellId | 0x8000_0000_0000_0000UL))
{
bool hit = renderData is not null;
bool isSetup = hit && renderData!.IsSetup;
int partCount = isSetup ? renderData!.SetupParts.Count : 0;
int partsHit = 0, partsMiss = 0;
if (isSetup)
{
foreach (var (partId, _) in renderData!.SetupParts)
{
if (_meshAdapter.TryGetRenderData(partId) is not null) partsHit++;
else partsMiss++;
}
}
bool hasEnvCellGeom = isSetup
&& renderData!.SetupParts.Exists(t => (t.GfxObjId & 0x1_0000_0000UL) != 0);
Console.WriteLine(
$"[indoor-lookup] cellId=0x{lookupCellId:X8} " +
$"hit={hit} isSetup={isSetup} partCount={partCount} " +
$"hasEnvCellGeom={hasEnvCellGeom} partsHit={partsHit} partsMiss={partsMiss}");
}
if (renderData is null)
{
// Tier 1 cache (#53): mesh data is still async-decoding via
// WB's ObjectMeshManager.PrepareMeshDataAsync. Flag the entity
// as incomplete so the entity-boundary check (or end-of-loop
// check) drops the accumulated populate scratch instead of
// caching a partial classification. The slow path retries on
// the next frame; once all this entity's meshes have loaded,
// the populate fires with the complete batch set.
currentEntityIncomplete = true;
if (diag) _meshesMissing++;
continue;
}
if (anyVao == 0) anyVao = renderData.VAO;
// Cache-miss path (animated entities skip cache entirely).
// Static entities accumulate into _populateScratch across ALL
// their MeshRefs; the flush at next-entity-boundary (or
// end-of-loop) commits them as a single Populate call.
var collector = isAnimated ? null : _populateScratch;
bool drewAny = false;
if (renderData.IsSetup && renderData.SetupParts.Count > 0)
{
foreach (var (partGfxObjId, partTransform) in renderData.SetupParts)
{
var partData = _meshAdapter.TryGetRenderData(partGfxObjId);
if (partData is null) continue;
var model = ComposePartWorldMatrix(
entityWorld, meshRef.PartTransform, partTransform);
// [indoor-xform] probe — only for the cell's synthetic
// geometry part (bit 32 set, per WB's PrepareEnvCellMeshData
// cellGeomId convention). One line per part per sec.
// Disambiguates hypothesis H5 (transform double-apply —
// composedT lands at 2 × cellOrigin).
if ((partGfxObjId & 0x1_0000_0000UL) != 0
&& RenderingDiagnostics.ProbeIndoorXformEnabled
&& ShouldEmitIndoorProbe(partGfxObjId))
{
Console.WriteLine(
$"[indoor-xform] cellGeomId=0x{partGfxObjId:X16} " +
$"entityWorldT=({entityWorld.Translation.X:F2},{entityWorld.Translation.Y:F2},{entityWorld.Translation.Z:F2}) " +
$"meshRefT=({meshRef.PartTransform.Translation.X:F2},{meshRef.PartTransform.Translation.Y:F2},{meshRef.PartTransform.Translation.Z:F2}) " +
$"partT=({partTransform.Translation.X:F2},{partTransform.Translation.Y:F2},{partTransform.Translation.Z:F2}) " +
$"composedT=({model.Translation.X:F2},{model.Translation.Y:F2},{model.Translation.Z:F2})");
}
var restPose = partTransform * meshRef.PartTransform;
ClassifyBatches(partData, partGfxObjId, model, entity, meshRef, palHash, metaTable, restPose, collector);
drewAny = true;
}
}
else
{
var model = meshRef.PartTransform * entityWorld;
ClassifyBatches(renderData, gfxObjId, model, entity, meshRef, palHash, metaTable, restPose: meshRef.PartTransform, collector: collector);
drewAny = true;
}
// Track THIS entity for the next iteration's flush check. Only
// when collector is non-null (entity is static); animated entities
// leave the tracker null so we don't try to flush them.
if (collector is not null)
{
populateEntityId = entity.Id;
populateLandblockId = landblockId;
}
if (diag && drewAny) _entitiesDrawn++;
}
// Tier 1 cache (#53) — drop the accumulated populate scratch if the
// LAST entity in the loop ended incomplete (had ≥1 null renderData).
// Same reason as the entity-boundary handling above: avoid caching a
// partial classification. The slow path will retry on the next frame
// and populate correctly once all meshes have loaded.
if (currentEntityIncomplete)
{
_populateScratch.Clear();
populateEntityId = null;
}
// Final flush: the last entity in _walkScratch has no "next iteration"
// to trigger the entity-change flush, so commit its accumulated batches
// here. No-op when the last entity was animated (populateEntityId stays
// null) or when no entities walked at all.
FinalFlushPopulate(populateEntityId, populateLandblockId, _cache, _populateScratch);
// Nothing visible — skip the GL pass entirely.
if (anyVao == 0)
{
LastDrawStats = new DrawStats(set, walkResult.EntitiesWalked, _walkScratch.Count, 0, 0, 0, 0, 0, 0);
_cpuStopwatch.Stop();
if (diag) MaybeFlushDiag();
return;
}
// ── Phase 3: assign FirstInstance per group, lay matrices contiguously, sort opaque ──
int totalInstances = 0;
foreach (var grp in _groups.Values) totalInstances += grp.Matrices.Count;
if (totalInstances == 0)
{
LastDrawStats = new DrawStats(set, walkResult.EntitiesWalked, _walkScratch.Count, 0, 0, 0, 0, 0, 0);
_cpuStopwatch.Stop();
if (diag) MaybeFlushDiag();
return;
}
int needed = totalInstances * 16;
if (_instanceData.Length < needed)
_instanceData = new float[needed + 256 * 16];
// Phase U.4: size the per-instance clip-slot buffer to match the instance
// count and lay it out in the SAME group order / cursor as _instanceData,
// so instanceClipSlot[i] (binding=3) tracks Instances[i] (binding=0). On
// the U.3 / outdoor path every Slots entry is 0 ⇒ identical to U.3.
if (_clipSlotData.Length < totalInstances)
_clipSlotData = new uint[totalInstances + 256];
_opaqueDraws.Clear();
_translucentDraws.Clear();
int cursor = 0;
foreach (var grp in _groups.Values)
{
if (grp.Matrices.Count == 0) continue;
grp.FirstInstance = cursor;
grp.InstanceCount = grp.Matrices.Count;
// Use the first instance's translation as the group's representative
// position for front-to-back sort (perf #2). Cheap heuristic; works
// well when instances of one group are spatially coherent
// (typical for trees in one landblock area, NPCs at one spawn).
var first = grp.Matrices[0];
var grpPos = new Vector3(first.M41, first.M42, first.M43);
grp.SortDistance = Vector3.DistanceSquared(camPos, grpPos);
for (int i = 0; i < grp.Matrices.Count; i++)
{
WriteMatrix(_instanceData, cursor * 16, grp.Matrices[i]);
// Slots[] is parallel to Matrices[] within the group; write the
// slot at the same cursor so binding=3 stays aligned with binding=0.
_clipSlotData[cursor] = grp.Slots[i];
cursor++;
}
if (IsOpaque(grp.Translucency))
_opaqueDraws.Add(grp);
else
_translucentDraws.Add(grp);
}
// Front-to-back sort within each cull mode. DrawIndirectRange must
// split MDI calls whenever CullMode changes because GL state is not
// part of an indirect command. Sorting by distance alone can turn a
// stable 1k-draw live scene into hundreds of tiny MDI runs after a
// landblock transition, which shows up as a GPU-command bottleneck
// without a triangle-count spike.
_opaqueDraws.Sort(CompareOpaqueSubmissionOrder);
_translucentDraws.Sort(CompareTransparentSubmissionOrder);
// ── Phase 4: build IndirectGroupInput list (opaque sorted, then translucent),
// fill via BuildIndirectArrays ──────────────────────────────────
int totalDraws = _opaqueDraws.Count + _translucentDraws.Count;
if (_batchData.Length < totalDraws)
_batchData = new BatchData[totalDraws + 64];
if (_indirectCommands.Length < totalDraws)
_indirectCommands = new DrawElementsIndirectCommand[totalDraws + 64];
if (_drawCullModes.Length < totalDraws)
_drawCullModes = new CullMode[totalDraws + 64];
var groupInputs = new List(totalDraws);
foreach (var g in _opaqueDraws) groupInputs.Add(ToInput(g));
foreach (var g in _translucentDraws) groupInputs.Add(ToInput(g));
// Cast _batchData (private BatchData) to public-mirror BatchDataPublic for BuildIndirectArrays.
// Layout is asserted at test time (BatchDataPublic_LayoutMatchesPrivateBatchData test).
var batchPublic = new BatchDataPublic[totalDraws];
var layout = BuildIndirectArrays(groupInputs, _indirectCommands, batchPublic, _drawCullModes);
long totalTriangles = 0;
foreach (var input in groupInputs)
totalTriangles += (long)(input.IndexCount / 3) * input.InstanceCount;
int cullRuns =
CountCullRuns(_drawCullModes, 0, layout.OpaqueCount) +
CountCullRuns(_drawCullModes, layout.OpaqueCount, layout.TransparentCount);
// Copy back into _batchData
for (int i = 0; i < totalDraws; i++)
{
_batchData[i] = new BatchData
{
TextureHandle = batchPublic[i].TextureHandle,
TextureLayer = batchPublic[i].TextureLayer,
Flags = batchPublic[i].Flags,
};
}
_opaqueDrawCount = layout.OpaqueCount;
_transparentDrawCount = layout.TransparentCount;
_transparentByteOffset = layout.TransparentByteOffset;
LastDrawStats = new DrawStats(
set,
walkResult.EntitiesWalked,
_walkScratch.Count,
totalInstances,
totalDraws,
cullRuns,
_opaqueDrawCount,
_transparentDrawCount,
totalTriangles);
// ── Phase 5: upload four buffers ────────────────────────────────────
fixed (float* ip = _instanceData)
UploadSsbo(_instanceSsbo, 0, ip, totalInstances * 16 * sizeof(float));
fixed (BatchData* bp = _batchData)
UploadSsbo(_batchSsbo, 1, bp, totalDraws * sizeof(BatchData));
// Phase U.4: per-instance clip-slot buffer (binding=3), one uint per
// instance, laid out parallel to _instanceData in Phase 3's group loop so
// instanceClipSlot[instanceIndex] tracks Instances[instanceIndex]. On the
// U.3 / outdoor path every entry is 0 ⇒ slot 0 ⇒ no-clip (identical to
// U.3); under indoor routing it holds the per-instance slot from
// ResolveEntitySlot. No clear here — Phase 3 wrote exactly totalInstances
// entries; only [0..totalInstances) is uploaded, so any stale tail is
// never read by the shader (BaseInstance + gl_InstanceID < totalInstances).
fixed (uint* sp = _clipSlotData)
UploadSsbo(_clipSlotSsbo, 3, sp, totalInstances * sizeof(uint));
fixed (DrawElementsIndirectCommand* cp = _indirectCommands)
{
_gl.BindBuffer(BufferTargetARB.DrawIndirectBuffer, _indirectBuffer);
_gl.BufferData(BufferTargetARB.DrawIndirectBuffer,
(nuint)(totalDraws * sizeof(DrawElementsIndirectCommand)), cp, BufferUsageARB.DynamicDraw);
}
// Phase U.3: bind the SHARED per-cell clip-region SSBO (binding=2). The
// GameWindow-level ClipFrame already uploaded + bound it this frame; we
// re-bind defensively in case another consumer touched binding=2 since.
// When no shared id is set (0), bind our own no-clip fallback so the
// shader never reads an unbound SSBO at binding=2.
BindClipRegionBinding2();
// ── Phase 6: bind global VAO once ───────────────────────────────────
_gl.BindVertexArray(anyVao);
if (string.Equals(Environment.GetEnvironmentVariable("ACDREAM_NO_CULL"), "1", StringComparison.Ordinal))
_gl.Disable(EnableCap.CullFace);
// GPU timing: compute this frame's ring slot. We read frame N-3's
// result (the oldest data in the ring) before overwriting it with
// frame N's queries. Hoisted to function scope so both the opaque
// and transparent passes below can reference gpuQuerySlot. See spec
// §3 Q1/Q2 + §4 in
// docs/superpowers/specs/2026-05-11-phase-n6-slice1-design.md.
int gpuQuerySlot = _gpuQueryFrameIndex % GpuQueryRingDepth;
// diag is part of the gate so the read/issue/increment trio stays
// symmetric — without it, toggling ACDREAM_WB_DIAG mid-session would
// freeze the frame counter (gated by diag below) while the read kept
// re-reading the same slot, producing duplicate stale samples.
if (diag && _gpuQueriesInitialized && _gpuQueryFrameIndex >= GpuQueryRingDepth)
{
_gl.GetQueryObject(_gpuQueryOpaque[gpuQuerySlot], QueryObjectParameterName.ResultAvailable, out int avail);
if (avail != 0)
{
_gl.GetQueryObject(_gpuQueryOpaque[gpuQuerySlot], QueryObjectParameterName.Result, out ulong opaqueNs);
_gl.GetQueryObject(_gpuQueryTransparent[gpuQuerySlot], QueryObjectParameterName.Result, out ulong transNs);
long gpuUs = (long)((opaqueNs + transNs) / 1000UL);
_gpuSamples[_gpuSampleCursor] = gpuUs;
_gpuSampleCursor = (_gpuSampleCursor + 1) % _gpuSamples.Length;
}
// If avail==0 the sample is dropped silently. MedianMicros
// computes over the non-zero subset, so dropped samples don't
// poison the median.
}
// ── Phase 7: opaque pass ─────────────────────────────────────────────
if (_opaqueDrawCount > 0)
{
_gl.Disable(EnableCap.Blend);
_gl.DepthMask(true);
// A.5 T20: enable A2C for ClipMap foliage — GPU derives sample mask
// from the alpha written by mesh_modern.frag so foliage edges are
// smooth under MSAA 4x. A no-op for fully-opaque (α=1) batches.
// A.5 T22.5: gated by AlphaToCoverage property so Low/Medium presets
// (no MSAA) skip the unnecessary GL state change.
if (AlphaToCoverage) _gl.Enable(EnableCap.SampleAlphaToCoverage);
_shader.SetInt("uRenderPass", 0);
// Phase Post-A.5 (ISSUE #52, 2026-05-10): opaque section of
// Batches[] starts at index 0. See uDrawIDOffset comment in
// mesh_modern.vert for why this is needed.
_shader.SetInt("uDrawIDOffset", 0);
_gl.BindBuffer(BufferTargetARB.DrawIndirectBuffer, _indirectBuffer);
if (diag && _gpuQueriesInitialized) _gl.BeginQuery(QueryTarget.TimeElapsed, _gpuQueryOpaque[gpuQuerySlot]);
DrawIndirectRange(0, _opaqueDrawCount);
if (diag && _gpuQueriesInitialized) _gl.EndQuery(QueryTarget.TimeElapsed);
if (AlphaToCoverage) _gl.Disable(EnableCap.SampleAlphaToCoverage);
}
// ── Phase 8: transparent pass ────────────────────────────────────────
if (_transparentDrawCount > 0)
{
_gl.Enable(EnableCap.Blend);
_gl.BlendFunc(BlendingFactor.SrcAlpha, BlendingFactor.OneMinusSrcAlpha);
_gl.DepthMask(false);
// Phase Post-A.5 (ISSUE #52, 2026-05-10): transparent section of
// Batches[] starts at index _opaqueDrawCount. Without this offset,
// each transparent draw reads BatchData[0..transparentCount) — the
// OPAQUE section — and the lifestone crystal's apparent texture
// flickers to whatever opaque batch sorted first that frame. See
// uDrawIDOffset comment in mesh_modern.vert.
_shader.SetInt("uDrawIDOffset", _opaqueDrawCount);
// Closed-shell translucent meshes still need culling, but the
// cull side must come from each dat batch just like the opaque
// section. BuildIndirectArrays preserves CullMode in _drawCullModes.
_gl.FrontFace(FrontFaceDirection.CW);
_shader.SetInt("uRenderPass", 1);
if (diag && _gpuQueriesInitialized) _gl.BeginQuery(QueryTarget.TimeElapsed, _gpuQueryTransparent[gpuQuerySlot]);
DrawIndirectRange(_opaqueDrawCount, _transparentDrawCount);
if (diag && _gpuQueriesInitialized) _gl.EndQuery(QueryTarget.TimeElapsed);
_gl.DepthMask(true);
_gl.Disable(EnableCap.Blend);
}
_gl.Disable(EnableCap.CullFace);
_gl.BindVertexArray(0);
_cpuStopwatch.Stop();
if (diag)
{
long cpuUs = _cpuStopwatch.ElapsedTicks * 1_000_000L / System.Diagnostics.Stopwatch.Frequency;
_cpuSamples[_cpuSampleCursor] = cpuUs;
_cpuSampleCursor = (_cpuSampleCursor + 1) % _cpuSamples.Length;
// GPU sample read happens BEFORE issuing the next frame's queries
// (see step 1.3 above). Increment the frame counter here so the
// next call computes a fresh slot.
if (_gpuQueriesInitialized) _gpuQueryFrameIndex++;
_drawsIssued += _opaqueDrawCount + _transparentDrawCount;
_instancesIssued += totalInstances;
MaybeFlushDiag();
}
}
///
/// Phase A8 RR5 (2026-05-26): per-building draw overload. Walks only
/// entities whose ParentCellId is in , plus
/// outdoor-style entities matching the EntitySet partition. Used by
/// the indoor render branch to scope rendering to the camera-buildings'
/// cells.
///
/// Mirrors the existing visibleCellIds-based Draw but with an
/// explicit cell list (not the BFS-derived visibility set). The semantic
/// difference is at the caller: cellIds = the camera-buildings' EnvCellIds,
/// not the portal BFS result. The dispatcher's internal logic is identical
/// — it filters indoor entities by membership in the provided set.
///
public void Draw(
ICamera camera,
IEnumerable<(uint LandblockId, Vector3 AabbMin, Vector3 AabbMax,
IReadOnlyList Entities,
IReadOnlyDictionary? AnimatedById)> landblockEntries,
IReadOnlyCollection cellIds,
FrustumPlanes? frustum = null,
uint? neverCullLandblockId = null,
HashSet? animatedEntityIds = null,
EntitySet set = EntitySet.All)
{
// Adapt IReadOnlyCollection → HashSet for the existing path.
// If the caller already passed a HashSet, avoid re-wrapping.
HashSet cellIdSet = cellIds is HashSet hs ? hs : new HashSet(cellIds);
Draw(camera, landblockEntries,
frustum: frustum,
neverCullLandblockId: neverCullLandblockId,
visibleCellIds: cellIdSet,
animatedEntityIds: animatedEntityIds,
set: set);
}
private static IndirectGroupInput ToInput(InstanceGroup g) => new(
IndexCount: g.IndexCount,
FirstIndex: g.FirstIndex,
BaseVertex: g.BaseVertex,
InstanceCount: g.InstanceCount,
FirstInstance: g.FirstInstance,
TextureHandle: g.BindlessTextureHandle,
TextureLayer: g.TextureLayer,
Translucency: g.Translucency,
CullMode: g.CullMode);
private static int CompareOpaqueSubmissionOrder(InstanceGroup a, InstanceGroup b)
{
int cull = a.CullMode.CompareTo(b.CullMode);
return cull != 0 ? cull : a.SortDistance.CompareTo(b.SortDistance);
}
private static int CompareTransparentSubmissionOrder(InstanceGroup a, InstanceGroup b)
{
int cull = a.CullMode.CompareTo(b.CullMode);
return cull != 0 ? cull : b.SortDistance.CompareTo(a.SortDistance);
}
private static int CountCullRuns(CullMode[] modes, int startCommand, int commandCount)
{
if (commandCount <= 0) return 0;
int end = startCommand + commandCount;
int runs = 1;
var previous = modes[startCommand];
for (int i = startCommand + 1; i < end; i++)
{
var current = modes[i];
if (current == previous) continue;
runs++;
previous = current;
}
return runs;
}
private unsafe void DrawIndirectRange(int startCommand, int commandCount)
{
int end = startCommand + commandCount;
int command = startCommand;
while (command < end)
{
var cullMode = _drawCullModes[command];
ApplyCullMode(cullMode);
int runCount = 1;
while (command + runCount < end && _drawCullModes[command + runCount] == cullMode)
runCount++;
// Each glMultiDrawElementsIndirect call restarts gl_DrawID at 0.
// Because this method splits one logical opaque/transparent pass
// into CullMode runs, the shader must receive the absolute command
// index for this run or it will read BatchData[0] again and bind
// the wrong texture for later runs.
_shader.SetInt("uDrawIDOffset", command);
_gl.MultiDrawElementsIndirect(
PrimitiveType.Triangles,
DrawElementsType.UnsignedShort,
(void*)(command * DrawCommandStride),
(uint)runCount,
(uint)DrawCommandStride);
command += runCount;
}
}
private void ApplyCullMode(CullMode mode)
{
// WB BaseObjectRenderManager.cs:850-866 applies CullMode per MDI group.
// WB GameScene.cs:843 sets FrontFace(CW) globally; SetCullMode then
// only chooses front/back culling. Keep the same convention here so
// splitting MDI commands by CullMode cannot resurrect stale CCW state.
_gl.FrontFace(FrontFaceDirection.CW);
switch (mode)
{
case CullMode.None:
_gl.Disable(EnableCap.CullFace);
break;
case CullMode.Clockwise:
_gl.Enable(EnableCap.CullFace);
_gl.CullFace(TriangleFace.Front);
break;
case CullMode.CounterClockwise:
case CullMode.Landblock:
_gl.Enable(EnableCap.CullFace);
_gl.CullFace(TriangleFace.Back);
break;
}
}
private unsafe void UploadSsbo(uint ssbo, uint binding, void* data, int byteCount)
{
_gl.BindBuffer(BufferTargetARB.ShaderStorageBuffer, ssbo);
_gl.BufferData(BufferTargetARB.ShaderStorageBuffer, (nuint)byteCount, data, BufferUsageARB.DynamicDraw);
_gl.BindBufferBase(BufferTargetARB.ShaderStorageBuffer, binding, ssbo);
}
///
/// Phase U.3: bind the per-cell clip-region SSBO to binding=2. Prefers the
/// shared buffer (set via );
/// otherwise lazily creates + binds a one-slot no-clip fallback so the shader
/// never reads an unbound SSBO. The fallback's single slot has count 0
/// (pass-all), matching 's slot 0.
///
private unsafe void BindClipRegionBinding2()
{
if (_sharedClipRegionSsbo != 0)
{
_gl.BindBufferBase(BufferTargetARB.ShaderStorageBuffer,
ClipFrame.MeshClipSsboBinding, _sharedClipRegionSsbo);
return;
}
if (_fallbackClipRegionSsbo == 0)
{
_fallbackClipRegionSsbo = _gl.GenBuffer();
// One CellClip slot, all zeros: count 0 ⇒ shader passes every plane.
var zero = stackalloc byte[ClipFrame.CellClipStrideBytes];
for (int i = 0; i < ClipFrame.CellClipStrideBytes; i++) zero[i] = 0;
_gl.BindBuffer(BufferTargetARB.ShaderStorageBuffer, _fallbackClipRegionSsbo);
_gl.BufferData(BufferTargetARB.ShaderStorageBuffer,
(nuint)ClipFrame.CellClipStrideBytes, zero, BufferUsageARB.DynamicDraw);
}
_gl.BindBufferBase(BufferTargetARB.ShaderStorageBuffer,
ClipFrame.MeshClipSsboBinding, _fallbackClipRegionSsbo);
}
private void MaybeFlushDiag()
{
long now = Environment.TickCount64;
if (now - _lastLogTick > 5000)
{
long cpuMed = MedianMicros(_cpuSamples);
long cpuP95 = Percentile95Micros(_cpuSamples);
long gpuMed = MedianMicros(_gpuSamples);
long gpuP95 = Percentile95Micros(_gpuSamples);
// A.5 T23: flag when entity dispatcher median exceeds 2.0ms budget
// (Phase A.5 spec §2 acceptance criterion 6). Grep-friendly prefix.
const long BudgetUs = 2000;
string budgetFlag = cpuMed > BudgetUs ? " BUDGET_OVER" : "";
Console.WriteLine(
$"[WB-DIAG]{budgetFlag} entSeen={_entitiesSeen} entDrawn={_entitiesDrawn} meshMissing={_meshesMissing} drawsIssued={_drawsIssued} instances={_instancesIssued} groups={_groups.Count} " +
$"cpu_us={cpuMed}m/{cpuP95}p95 gpu_us={gpuMed}m/{gpuP95}p95");
_entitiesSeen = _entitiesDrawn = _meshesMissing = _drawsIssued = _instancesIssued = 0;
_lastLogTick = now;
// Don't reset the sample buffers — they're a moving window of the
// last 256 frames; clearing per 5s flush would lose recent history.
}
}
private static long MedianMicros(long[] samples)
{
var copy = (long[])samples.Clone();
Array.Sort(copy);
int nz = 0;
foreach (var v in copy) if (v > 0) nz++;
if (nz == 0) return 0;
return copy[copy.Length - nz / 2];
}
private static long Percentile95Micros(long[] samples)
{
var copy = (long[])samples.Clone();
Array.Sort(copy);
int nz = 0;
foreach (var v in copy) if (v > 0) nz++;
if (nz == 0) return 0;
int idx = copy.Length - 1 - (int)(nz * 0.05);
return copy[idx];
}
// ── Tier 1 cache (#53) helpers extracted for testability ─────────────────
//
// Three pure-CPU static helpers carved out of Draw's per-entity loop so
// unit tests can exercise the populate/flush algorithm + cache-hit fast
// path without needing a real GL context. Production code (Draw) calls
// these helpers; the dispatcher integration tests in
// WbDrawDispatcherBucketingTests use them to drive the same algorithm
// through deterministic inputs.
///
/// Apply a cache hit's batches into the per-frame group dictionary by
/// composing cached.RestPose * entityWorld per batch and routing
/// the result through . The delegate
/// abstracts over so this helper stays
/// GL-free and unit-testable.
///
///
/// Matrix multiplication is non-commutative: it MUST be
/// RestPose * entityWorld, not the reverse. See
/// for the full part-world product.
///
internal static void ApplyCacheHit(
EntityCacheEntry entry,
Matrix4x4 entityWorld,
Action appendInstance)
{
foreach (var cached in entry.Batches)
{
appendInstance(cached.Key, cached.RestPose * entityWorld);
}
}
///
/// Per-tuple flush check. If is set
/// AND differs from , the previous
/// entity's accumulated batches are committed to
/// and is cleared. Returns the
/// updated tracker tuple — pass these back into the field locals in the
/// caller's loop.
///
///
/// This is the bug-fix structure from commit 00fa8ae (per-MeshRef
/// Populate would overwrite earlier MeshRefs because the cache is
/// keyed by entity.Id; flushing only on entity boundary preserves all
/// MeshRefs' batches). _walkScratch is in entity-order so all MeshRefs
/// of one entity arrive contiguously.
///
internal static (uint? PopulateEntityId, uint PopulateLandblockId)
MaybeFlushOnEntityChange(
uint? populateEntityId,
uint populateLandblockId,
uint currentEntityId,
EntityClassificationCache cache,
List populateScratch)
{
if (populateEntityId.HasValue && populateEntityId.Value != currentEntityId)
{
if (populateScratch.Count > 0)
{
cache.Populate(populateEntityId.Value, populateLandblockId, populateScratch.ToArray());
}
populateScratch.Clear();
return (null, 0u);
}
return (populateEntityId, populateLandblockId);
}
///
/// End-of-loop final flush. The last entity in _walkScratch has
/// no next-iteration to trigger ,
/// so commit its accumulated batches here. No-op when no populate is
/// pending (the last entity was animated, or the scratch is empty).
///
/// End-of-loop only — does NOT reset the caller's tracker locals
/// (intentional, since they go out of scope immediately after).
///
///
internal static void FinalFlushPopulate(
uint? populateEntityId,
uint populateLandblockId,
EntityClassificationCache cache,
List populateScratch)
{
if (populateEntityId.HasValue && populateScratch.Count > 0)
{
cache.Populate(populateEntityId.Value, populateLandblockId, populateScratch.ToArray());
populateScratch.Clear();
}
}
///
/// Instance-side helper used by . Looks up or
/// creates an for the given key in
/// _groups and appends the per-instance world matrix.
///
private void AppendInstanceToGroup(GroupKey key, Matrix4x4 model)
{
if (!_groups.TryGetValue(key, out var grp))
{
grp = new InstanceGroup
{
Ibo = key.Ibo,
FirstIndex = key.FirstIndex,
BaseVertex = key.BaseVertex,
IndexCount = key.IndexCount,
BindlessTextureHandle = key.BindlessTextureHandle,
TextureLayer = key.TextureLayer,
Translucency = key.Translucency,
CullMode = key.CullMode,
};
_groups[key] = grp;
}
grp.Matrices.Add(model);
grp.Slots.Add(_currentEntitySlot); // Phase U.4 — parallel to Matrices
}
private void ClassifyBatches(
ObjectRenderData renderData,
ulong gfxObjId,
Matrix4x4 model,
WorldEntity entity,
MeshRef meshRef,
ulong palHash,
AcSurfaceMetadataTable metaTable,
Matrix4x4 restPose,
List? collector = null)
{
for (int batchIdx = 0; batchIdx < renderData.Batches.Count; batchIdx++)
{
var batch = renderData.Batches[batchIdx];
TranslucencyKind translucency;
if (metaTable.TryLookup(gfxObjId, batchIdx, out var meta))
{
translucency = meta.Translucency;
}
else
{
translucency = batch.IsAdditive ? TranslucencyKind.Additive
: batch.IsTransparent ? TranslucencyKind.AlphaBlend
: TranslucencyKind.Opaque;
}
ulong texHandle = ResolveTexture(entity, meshRef, batch, palHash);
if (texHandle == 0) continue;
// TextureLayer is always 0 for per-instance composites; non-zero when
// WB atlas is adopted in N.6+ and batches reference a shared atlas layer.
uint texLayer = 0;
var key = new GroupKey(
batch.IBO, batch.FirstIndex, (int)batch.BaseVertex,
batch.IndexCount, texHandle, texLayer, translucency, batch.CullMode);
if (!_groups.TryGetValue(key, out var grp))
{
grp = new InstanceGroup
{
Ibo = batch.IBO,
FirstIndex = batch.FirstIndex,
BaseVertex = (int)batch.BaseVertex,
IndexCount = batch.IndexCount,
BindlessTextureHandle = texHandle,
TextureLayer = texLayer,
Translucency = translucency,
CullMode = batch.CullMode,
};
_groups[key] = grp;
}
grp.Matrices.Add(model);
grp.Slots.Add(_currentEntitySlot); // Phase U.4 — parallel to Matrices
collector?.Add(new CachedBatch(key, texHandle, restPose));
}
}
private ulong ResolveTexture(WorldEntity entity, MeshRef meshRef, ObjectRenderBatch batch, ulong palHash)
{
uint surfaceId = batch.Key.SurfaceId;
if (surfaceId == 0 || surfaceId == 0xFFFFFFFF) return 0;
uint overrideOrigTex = 0;
bool hasOrigTexOverride = meshRef.SurfaceOverrides is not null
&& meshRef.SurfaceOverrides.TryGetValue(surfaceId, out overrideOrigTex);
uint? origTexOverride = hasOrigTexOverride ? overrideOrigTex : (uint?)null;
if (entity.PaletteOverride is not null)
{
return _textures.GetOrUploadWithPaletteOverrideBindless(
surfaceId, origTexOverride, entity.PaletteOverride, palHash);
}
else if (hasOrigTexOverride)
{
return _textures.GetOrUploadWithOrigTextureOverrideBindless(surfaceId, overrideOrigTex);
}
else
{
return _textures.GetOrUploadBindless(surfaceId);
}
}
private static void WriteMatrix(float[] buf, int offset, in Matrix4x4 m)
{
buf[offset + 0] = m.M11; buf[offset + 1] = m.M12; buf[offset + 2] = m.M13; buf[offset + 3] = m.M14;
buf[offset + 4] = m.M21; buf[offset + 5] = m.M22; buf[offset + 6] = m.M23; buf[offset + 7] = m.M24;
buf[offset + 8] = m.M31; buf[offset + 9] = m.M32; buf[offset + 10] = m.M33; buf[offset + 11] = m.M34;
buf[offset + 12] = m.M41; buf[offset + 13] = m.M42; buf[offset + 14] = m.M43; buf[offset + 15] = m.M44;
}
///
/// Entity-set membership test. Phase U.1 (2026-05-30): with the
/// two-pipe partition deleted, the sole
/// member matches every entity. Retained as a seam for the unified
/// pass to re-introduce partitioning.
///
private static bool EntityMatchesSet(WorldEntity entity, EntitySet set) => true;
internal static bool EntityPassesVisibleCellGate(
WorldEntity entity,
HashSet? visibleCellIds,
EntitySet set)
{
if (visibleCellIds is null)
return true;
if (entity.ParentCellId.HasValue)
return visibleCellIds.Contains(entity.ParentCellId.Value);
if (IsShellScopedSet(set) && entity.IsBuildingShell)
{
return entity.BuildingShellAnchorCellId is uint anchorCellId
&& visibleCellIds.Contains(anchorCellId);
}
return true;
}
// Phase U.1 (2026-05-30): the shell-scoped sets (IndoorPass / BuildingShells)
// were deleted with the two-pipe machinery. EntitySet.All is never shell-scoped.
private static bool IsShellScopedSet(EntitySet set) => false;
public void Dispose()
{
if (_disposed) return;
_disposed = true;
_gl.DeleteBuffer(_instanceSsbo);
_gl.DeleteBuffer(_batchSsbo);
_gl.DeleteBuffer(_indirectBuffer);
if (_clipSlotSsbo != 0) _gl.DeleteBuffer(_clipSlotSsbo); // Phase U.3
if (_fallbackClipRegionSsbo != 0) _gl.DeleteBuffer(_fallbackClipRegionSsbo); // Phase U.3
if (_gpuQueriesInitialized)
{
for (int i = 0; i < GpuQueryRingDepth; i++)
{
_gl.DeleteQuery(_gpuQueryOpaque[i]);
_gl.DeleteQuery(_gpuQueryTransparent[i]);
}
}
}
// ── Public types + helpers for BuildIndirectArrays (Task 9) ─────────────
//
// These are public so the pure-CPU unit tests in AcDream.Core.Tests can
// exercise BuildIndirectArrays without needing a GL context.
///
/// Stride in bytes of DrawElementsIndirectCommand in the indirect buffer.
/// 5 × uint = 20 bytes. Tests and callers reference this symbolically
/// rather than hard-coding 20 so a layout change produces a compile error.
///
public const int DrawCommandStride = 20; // sizeof(DrawElementsIndirectCommand): 5 × uint
///
/// Public view of the per-group inputs to — used in tests.
///
public readonly record struct IndirectGroupInput(
int IndexCount,
uint FirstIndex,
int BaseVertex,
int InstanceCount,
int FirstInstance,
ulong TextureHandle,
uint TextureLayer,
TranslucencyKind Translucency,
CullMode CullMode = CullMode.CounterClockwise);
///
/// Public mirror of the per-group uploaded to the SSBO.
/// Tests verify the layout. Same field shape as the private BatchData.
///
[StructLayout(LayoutKind.Sequential, Pack = 8)]
public struct BatchDataPublic
{
public ulong TextureHandle;
public uint TextureLayer;
public uint Flags;
}
/// Result of .
public readonly record struct IndirectLayoutResult(
int OpaqueCount,
int TransparentCount,
int TransparentByteOffset);
///
/// Lays out the indirect commands + parallel BatchData array contiguously:
/// opaque section first (caller sorts before calling), transparent section second.
/// Pure CPU, no GL state. Caller passes pre-sized scratch arrays.
///
///
/// Classification: Opaque + ClipMap → opaque pass (ClipMap uses discard, not
/// blending). Everything else (AlphaBlend, Additive, InvAlpha) → transparent pass.
///
public static IndirectLayoutResult BuildIndirectArrays(
IReadOnlyList groups,
DrawElementsIndirectCommand[] indirectScratch,
BatchDataPublic[] batchScratch,
CullMode[]? cullScratch = null)
{
int opaqueCount = 0;
int transparentCount = 0;
foreach (var g in groups)
{
if (IsOpaque(g.Translucency)) opaqueCount++;
else transparentCount++;
}
int oi = 0; // opaque write cursor (fills [0..opaqueCount))
int ti = opaqueCount; // transparent write cursor (fills [opaqueCount..end))
foreach (var g in groups)
{
var dec = new DrawElementsIndirectCommand
{
Count = (uint)g.IndexCount,
InstanceCount = (uint)g.InstanceCount,
FirstIndex = g.FirstIndex,
BaseVertex = g.BaseVertex,
BaseInstance = (uint)g.FirstInstance,
};
var bd = new BatchDataPublic
{
TextureHandle = g.TextureHandle,
TextureLayer = g.TextureLayer,
Flags = 0,
};
if (IsOpaque(g.Translucency))
{
indirectScratch[oi] = dec;
batchScratch[oi] = bd;
if (cullScratch is not null) cullScratch[oi] = g.CullMode;
oi++;
}
else
{
indirectScratch[ti] = dec;
batchScratch[ti] = bd;
if (cullScratch is not null) cullScratch[ti] = g.CullMode;
ti++;
}
}
return new IndirectLayoutResult(opaqueCount, transparentCount, opaqueCount * DrawCommandStride);
}
///
/// Public test shim for . Locks in the N.5 Decision 2
/// translucency partition: Opaque + ClipMap → opaque indirect; AlphaBlend +
/// Additive + InvAlpha → transparent indirect.
///
public static bool IsOpaquePublic(TranslucencyKind t) => IsOpaque(t);
private static bool IsOpaque(TranslucencyKind t)
=> t == TranslucencyKind.Opaque || t == TranslucencyKind.ClipMap;
// ────────────────────────────────────────────────────────────────────────
///
/// Thin wrapper around an instance's rate-limit dictionary + frame
/// counter, passed into the static
/// overload so it can emit rate-limited probe lines without access
/// to instance fields. Null = probes disabled (test-friendly overload).
///
internal sealed class IndoorProbeState
{
private readonly Dictionary _lastFrame;
private readonly int _currentFrame;
private const int RateLimit = IndoorProbeRateLimitFrames;
internal IndoorProbeState(Dictionary lastFrame, int currentFrame)
{
_lastFrame = lastFrame;
_currentFrame = currentFrame;
}
///
/// Returns true at most once per
/// frames per . Side-effect: stamps the frame
/// number into the dictionary on success.
///
internal bool ShouldEmit(ulong cellId)
{
if (!_lastFrame.TryGetValue(cellId, out int last)
|| _currentFrame - last >= RateLimit)
{
_lastFrame[cellId] = _currentFrame;
return true;
}
return false;
}
}
private sealed class InstanceGroup
{
public uint Ibo;
public uint FirstIndex;
public int BaseVertex;
public int IndexCount;
public ulong BindlessTextureHandle; // 64-bit (was uint TextureHandle in N.4)
public uint TextureLayer; // 0 for per-instance composites; non-zero when WB atlas is adopted in N.6+
public TranslucencyKind Translucency;
public CullMode CullMode;
public int FirstInstance; // offset into the shared instance VBO (in instances, not bytes)
public int InstanceCount;
public float SortDistance; // squared distance from camera to first instance, for opaque sort
public readonly List Matrices = new();
// Phase U.4: per-instance clip-slot index, parallel to Matrices (Slots[i]
// is the binding=2 CellClip slot for the instance whose matrix is
// Matrices[i]). At layout time the dispatcher writes Slots[i] into
// _clipSlotData at the same cursor it writes Matrices[i] into _instanceData,
// so the binding=3 instanceClipSlot[] tracks the binding=0 instance.
public readonly List Slots = new();
}
}