using System; using System.Collections.Generic; using System.Numerics; using System.Runtime.InteropServices; using AcDream.Core.Meshing; using AcDream.Core.Rendering; using AcDream.Core.Terrain; using AcDream.Core.World; using DatReaderWriter.Enums; using Silk.NET.OpenGL; namespace AcDream.App.Rendering.Wb; /// /// Draws entities using WB's (a single global /// VAO/VBO/IBO under modern rendering) with acdream's /// for bindless texture resolution and for /// translucency classification. /// /// /// Atlas-tier entities (ServerGuid == 0): mesh data comes from WB's /// via . /// Textures resolve through the bindless-suffixed /// variants, returning 64-bit /// resident handles stored in the per-group SSBO. /// /// /// /// Per-instance-tier entities (ServerGuid != 0): mesh data also from /// WB, but textures resolve through /// with palette /// and surface overrides applied. is currently /// unused at draw time — GameWindow's spawn path already bakes AnimPartChanges + /// GfxObjDegradeResolver (Issue #47 close-detail mesh) into MeshRefs. /// /// /// /// GL strategy (N.5 — mandatory): glMultiDrawElementsIndirect with SSBOs /// and GL_ARB_bindless_texture + GL_ARB_shader_draw_parameters. /// All visible (entity, batch) pairs are bucketed by ; /// each group becomes one DrawElementsIndirectCommand. Three GPU buffers /// are uploaded per frame: instance matrices (SSBO binding 0), per-group batch /// metadata/texture handles (SSBO binding 1), and the indirect draw commands. /// Two glMultiDrawElementsIndirect calls cover the opaque and transparent /// passes respectively — one GL call per pass regardless of group count. /// /// /// /// Shader: mesh_modern (bindless + gl_DrawIDARB / /// gl_BaseInstanceARB). Missing bindless/draw-parameters throws /// at startup — there is no legacy fallback. /// /// /// /// Modern rendering assumption: WB's _useModernRendering path (GL /// 4.3 + bindless) puts every mesh in a single shared VAO/VBO/IBO and uses /// FirstIndex + BaseVertex per batch. The dispatcher honors those /// offsets inside each DrawElementsIndirectCommand via /// glMultiDrawElementsIndirect. /// /// public sealed unsafe class WbDrawDispatcher : IDisposable { /// /// Which subset of entities to walk in a single Draw call. /// /// Phase U.1 (2026-05-30): the indoor/outdoor two-pipe split (IndoorPass / /// OutdoorScenery / BuildingShells / LiveDynamic) was deleted along with the /// inside-out render machinery. is the sole remaining /// member; the unified retail-faithful pass (Phase U) draws every entity in /// one path. The set: parameter is retained on the Draw overloads so /// the unified pass can re-introduce partitioning later without re-threading /// the call sites. /// public enum EntitySet { /// Every entity walked, gated only by the existing /// ParentCellId ∈ visibleCellIds filter. All, } private readonly GL _gl; private readonly Shader _shader; private readonly TextureCache _textures; private readonly WbMeshAdapter _meshAdapter; private readonly EntitySpawnAdapter _entitySpawnAdapter; private readonly BindlessSupport _bindless; public readonly record struct DrawStats( EntitySet Set, int EntitiesWalked, int MeshRefs, int Instances, int Draws, int CullRuns, int OpaqueDraws, int TransparentDraws, long Triangles); public DrawStats LastDrawStats { get; private set; } // Tier 1 cache (#53): per-entity classification results for static // entities (those NOT in GameWindow._animatedEntities). Wired here in // Task 7 for plumbing only — Tasks 9-10 wire the per-entity // miss-populate / hit-fast-path through the loop. private readonly EntityClassificationCache _cache; // ACDREAM_DISABLE_TIER1_CACHE=1 A/B diagnostic — forces every static // entity through the slow path. Read once in ctor. private readonly bool _tier1CacheDisabled = string.Equals(Environment.GetEnvironmentVariable("ACDREAM_DISABLE_TIER1_CACHE"), "1", StringComparison.Ordinal); /// /// A.5 T22.5: gate for GL_SAMPLE_ALPHA_TO_COVERAGE around the opaque pass. /// Default true matches T20 behavior. Set false for Low/Medium presets that /// have MsaaSamples=0 (A2C is a no-op without MSAA, but turning it off /// avoids the unnecessary GL state thrash and is cleaner diagnostics). /// Can be toggled mid-session via . /// public bool AlphaToCoverage { get; set; } = true; // SSBO buffer ids private uint _instanceSsbo; private uint _batchSsbo; private uint _indirectBuffer; // Phase U.3: per-instance clip-slot SSBO (binding=3), parallel to // _instanceSsbo. One uint per instance selecting its CellClip slot. In U.3 // this is ALL ZEROS (every instance → slot 0 → no-clip), so the render is // identical to pre-U.3. U.4 populates real slot indices. private uint _clipSlotSsbo; private uint[] _clipSlotData = new uint[256]; // Phase U.3: the SHARED per-cell clip-region SSBO (binding=2), owned by the // GameWindow-level ClipFrame and handed to us via SetClipRegionSsbo. When 0 // (not yet wired), we bind our OWN fallback no-clip region buffer below so the // shader never reads an unbound SSBO. The fallback holds exactly slot 0 // (count 0 = pass-all), matching ClipFrame.NoClip's slot 0. private uint _sharedClipRegionSsbo; private uint _fallbackClipRegionSsbo; // Phase U.4: per-frame clip-slot routing handed in via SetClipRouting before // each Draw. When _clipRoutingActive is false (the U.3 path / outdoor root / // no portal frame), every instance maps to slot 0 (no-clip) and no instance is // culled — identical to U.3. When active, each instance's slot is resolved by // ResolveEntitySlot per the U.4 policy (live-dynamic unclipped; cell statics to // their cell slot; outdoor scenery to the OutsideView slot; non-visible culled). private bool _clipRoutingActive; private IReadOnlyDictionary? _cellIdToSlot; private int _outdoorSlot; private bool _outdoorVisible; // Phase U.4: the clip slot of the entity currently being classified in Draw's // per-entity loop. Set once per entity (before ClassifyBatches / ApplyCacheHit), // read by the two matrix-append sites (AppendInstanceToGroup + ClassifyBatches) // so every group's Slots[] stays in lockstep with its Matrices[]. Defaults to 0 // (no-clip) on the U.3 / outdoor path. private uint _currentEntitySlot; // Phase U.4: true when the current entity resolved to the CULL sentinel // (cell not visible, or outdoor stab while no outdoors is visible). Persisted // across the entity's tuples; the per-tuple body skips all instance emission. private bool _currentEntityCulled; // Per-frame scratch arrays — Tasks 9-10 fully wire these. private float[] _instanceData = new float[256 * 16]; // mat4 floats per instance private BatchData[] _batchData = new BatchData[256]; private DrawElementsIndirectCommand[] _indirectCommands = new DrawElementsIndirectCommand[256]; private CullMode[] _drawCullModes = new CullMode[256]; private int _opaqueDrawCount; private int _transparentDrawCount; private int _transparentByteOffset; // std430 layout: ulong TextureHandle (uvec2) at offset 0, uint TextureLayer // at offset 8, uint Flags at offset 12. Total 16 bytes. // Pack=8 (not 4) because std430's uvec2 requires 8-byte alignment — Pack=4 // works today by accident (TextureHandle is the first field, so offset 0 is // always 8-byte aligned), but adding a 4-byte field before TextureHandle // without bumping Pack would silently misalign the GPU struct. [StructLayout(LayoutKind.Sequential, Pack = 8)] private struct BatchData { public ulong TextureHandle; // bindless handle (uvec2 in GLSL) public uint TextureLayer; public uint Flags; } // Per-frame scratch — reused across frames to avoid per-frame allocation. private readonly Dictionary _groups = new(); private readonly List _opaqueDraws = new(); private readonly List _translucentDraws = new(); // A.5 T26 follow-up (Bug B): WalkEntities populates this scratch list // instead of allocating a fresh List<(WorldEntity, int)> per frame. At // ~10K entities × ~3 mesh refs = ~30K tuples × 16 bytes = ~480 KB / frame // of GC pressure on the render thread under the original T17 shape. private readonly List<(WorldEntity Entity, int MeshRefIndex, uint LandblockId)> _walkScratch = new(); // Tier 1 cache (#53) — per-entity classification collector. Reused across // frames; cleared at flush time when the per-entity loop crosses an entity // boundary in _walkScratch (and once more at end-of-loop for the last // entity). _walkScratch is in entity-order, so all MeshRefs of one entity // are contiguous — accumulate them all before flushing one Populate call. // Animated entities skip this scratch entirely (collector = null). private readonly List _populateScratch = new(); // Per-entity-cull AABB radius. Conservative — covers most entities; large // outliers (long banners, tall columns) are still landblock-culled. private const float PerEntityCullRadius = 5.0f; private bool _disposed; /// /// Per-cell-entity last-log frame number for rate-limiting the /// [indoor-walk] / [indoor-lookup] / [indoor-xform] / [indoor-cull] /// probes. Defaults to 30 frames at 30Hz = 1 sec. /// private readonly Dictionary _lastIndoorProbeFrame = new(); private int _indoorProbeFrameCounter; private const int IndoorProbeRateLimitFrames = 30; /// /// Returns true at most once per /// frames per cellId. Caller must already have checked that an indoor /// probe flag is enabled. /// private bool ShouldEmitIndoorProbe(ulong cellId) { if (!_lastIndoorProbeFrame.TryGetValue(cellId, out int last) || _indoorProbeFrameCounter - last >= IndoorProbeRateLimitFrames) { _lastIndoorProbeFrame[cellId] = _indoorProbeFrameCounter; return true; } return false; } // Diagnostic counters logged once per ~5s under ACDREAM_WB_DIAG=1. private int _entitiesSeen; private int _entitiesDrawn; private int _meshesMissing; private int _drawsIssued; private int _instancesIssued; private long _lastLogTick; // CPU + GPU timing for [WB-DIAG] under ACDREAM_WB_DIAG=1. private readonly System.Diagnostics.Stopwatch _cpuStopwatch = new(); private readonly long[] _cpuSamples = new long[256]; // microseconds private int _cpuSampleCursor; // GPU timing uses a ring of 3 query-pair slots so the read of frame N-3's // result lands when the GPU has finished (~50ms after issue on a typical // 60fps frame). Ring of 3 is the vendor-neutral choice: NVIDIA drivers with // triple-buffering+vsync can queue ~3 frames ahead, AMD typically 1-2, // Intel iGPUs vary. ResultAvailable is the safety guard if the GPU is // still working when we try to read. private const int GpuQueryRingDepth = 3; private readonly uint[] _gpuQueryOpaque = new uint[GpuQueryRingDepth]; private readonly uint[] _gpuQueryTransparent = new uint[GpuQueryRingDepth]; private int _gpuQueryFrameIndex; private readonly long[] _gpuSamples = new long[256]; // microseconds private int _gpuSampleCursor; private bool _gpuQueriesInitialized; // Constructor accessibility is internal because EntityClassificationCache // is internal — a public ctor with an internal-typed parameter would be // an inconsistent-accessibility error. The dispatcher is constructed // exclusively from GameWindow (same assembly), so internal is fine. internal WbDrawDispatcher( GL gl, Shader shader, TextureCache textures, WbMeshAdapter meshAdapter, EntitySpawnAdapter entitySpawnAdapter, BindlessSupport bindless, EntityClassificationCache classificationCache) { ArgumentNullException.ThrowIfNull(gl); ArgumentNullException.ThrowIfNull(shader); ArgumentNullException.ThrowIfNull(textures); ArgumentNullException.ThrowIfNull(meshAdapter); ArgumentNullException.ThrowIfNull(entitySpawnAdapter); ArgumentNullException.ThrowIfNull(classificationCache); _gl = gl; _shader = shader; _textures = textures; _meshAdapter = meshAdapter; _entitySpawnAdapter = entitySpawnAdapter; _cache = classificationCache; _bindless = bindless ?? throw new ArgumentNullException(nameof(bindless)); _instanceSsbo = _gl.GenBuffer(); _batchSsbo = _gl.GenBuffer(); _indirectBuffer = _gl.GenBuffer(); _clipSlotSsbo = _gl.GenBuffer(); // Phase U.3 binding=3 } /// /// Phase U.3: hand the dispatcher the SHARED per-cell clip-region SSBO /// (binding=2) that created. The /// dispatcher re-binds it to binding=2 immediately before each MDI so a /// consumer that touched binding=2 in between can't leave it pointing /// elsewhere. Pass 0 to fall back to the internal no-clip region buffer. /// public void SetClipRegionSsbo(uint sharedClipRegionSsbo) => _sharedClipRegionSsbo = sharedClipRegionSsbo; /// /// Phase U.4: install the per-frame clip-slot routing for an INDOOR root. /// Call once per frame BEFORE when the camera's root cell is /// non-null; the next resolves each instance's binding=3 /// clip slot via the U.4 policy (live-dynamic unclipped, cell statics to their /// cell slot, outdoor scenery to the OutsideView slot, non-visible culled). /// Pair with on outdoor-root frames so the /// dispatcher reverts to the U.3 no-clip-everything behavior. /// /// cellId → CellClip slot. A cell absent from the map /// is NOT visible → its cell-static instances are culled. /// Slot for outdoor scenery / building shells while /// indoors (the OutsideView slot, or 0 for no-clip over-include). /// False ⇒ cull outdoor scenery / shells this frame /// (the OutsideView is empty). public void SetClipRouting(IReadOnlyDictionary cellIdToSlot, int outdoorSlot, bool outdoorVisible) { ArgumentNullException.ThrowIfNull(cellIdToSlot); _clipRoutingActive = true; _cellIdToSlot = cellIdToSlot; _outdoorSlot = outdoorSlot; _outdoorVisible = outdoorVisible; } /// /// Phase U.4: revert to U.3 behavior — every instance maps to slot 0 (no-clip), /// nothing is culled by clip routing. Call on outdoor-root frames (camera /// outdoors) and any frame without a portal-visibility result. /// public void ClearClipRouting() { _clipRoutingActive = false; _cellIdToSlot = null; _outdoorSlot = 0; _outdoorVisible = false; } // Phase U.4 CULL sentinel returned by ResolveEntitySlot: the entity's instances // are dropped entirely (not emitted into the binding=0 instance buffer NOR the // binding=3 slot buffer), matching the existing frustum / visible-cell cull. // Internal (not private) so the clip-slot unit tests can assert against it // directly — see WbDrawDispatcherClipSlotTests. internal const int ClipSlotCull = -1; /// /// Phase U.4: resolve the clip slot for one entity per the slot/gate policy. /// Returns to drop the entity's instances entirely. /// /// ServerGuid != 0 (live dynamic: player / NPC / items / doors) ⇒ slot 0 /// (UNCLIPPED — retail draws live-dynamic unclipped; depth only). /// ParentCellId != null (cell static) ⇒ the cell's slot, or CULL when the /// cell isn't in (not visible / nothing-visible). /// ParentCellId == null (outdoor scenery / building shell) ⇒ the OutsideView /// slot when , else CULL. /// /// Only called when _clipRoutingActive (indoor root). On the U.3 / outdoor /// path every instance is slot 0 and nothing is culled — see /// , which gates on that flag. /// /// INVARIANT: and the keys of /// MUST live in the same FULL cell-id space /// (lbMask | OtherCellId, e.g. 0xA9B40164). A bare-low-byte /// ParentCellId (e.g. 0x64) would never match a full-id key and would /// silently CULL every indoor stab — cf. the L.2e bare-low-byte finding in /// CLAUDE.md where player CellId was tracked without its landblock prefix. /// /// /// internal static + pure (reads no instance state) so the clip-slot /// unit tests exercise every branch without a GL context. The caller hands in /// the routing fields it would otherwise read from _cellIdToSlot etc. /// /// internal static int ResolveEntitySlot( uint serverGuid, uint? parentCellId, IReadOnlyDictionary cellIdToSlot, int outdoorSlot, bool outdoorVisible) { // Live-dynamic entities render unclipped regardless of cell — retail draws // the player / NPCs / dropped items through the depth buffer without portal // clipping. ServerGuid is the live-dynamic marker (0 for dat-hydrated). if (serverGuid != 0) return 0; if (parentCellId is uint parentCell) return cellIdToSlot.TryGetValue(parentCell, out int slot) ? slot : ClipSlotCull; // Outdoor scenery / building shell (no ParentCellId). Indoor root: gate to // the OutsideView slot, or cull when nothing outdoors is visible. return outdoorVisible ? outdoorSlot : ClipSlotCull; } /// /// Phase U.4: the call-site clip-slot decision for one entity, returning the /// (Slot, Culled) pair the per-entity loop body consumes. Wraps /// with the /// gate: when routing is INACTIVE (outdoor root / no portal frame), every entity /// is slot 0 and nothing is clip-culled — the bit-identical-to-U.3 property, so /// the resolver (and ) is bypassed entirely. /// When active, a CULL sentinel maps to (0, culled=true) — the slot value /// is never emitted for a culled entity. /// internal static + pure so the whole policy (including the routing- /// inactive branch) is unit-testable — see WbDrawDispatcherClipSlotTests. /// internal static (uint Slot, bool Culled) ResolveSlotForFrame( bool clipRoutingActive, uint serverGuid, uint? parentCellId, IReadOnlyDictionary? cellIdToSlot, int outdoorSlot, bool outdoorVisible) { if (!clipRoutingActive) return (0u, false); int resolved = ResolveEntitySlot(serverGuid, parentCellId, cellIdToSlot!, outdoorSlot, outdoorVisible); bool culled = resolved == ClipSlotCull; return (culled ? 0u : (uint)resolved, culled); } public static Matrix4x4 ComposePartWorldMatrix( Matrix4x4 entityWorld, Matrix4x4 animOverride, Matrix4x4 restPose) => restPose * animOverride * entityWorld; /// /// Entry for per-landblock iteration. /// Mirrors the shape yielded by GpuWorldState.LandblockEntries. /// public readonly record struct LandblockEntry( uint LandblockId, Vector3 AabbMin, Vector3 AabbMax, IReadOnlyList Entities, IReadOnlyDictionary? AnimatedById); /// /// Result of — the list of (entity, meshRef index) /// pairs that passed all visibility filters, plus a diagnostic walk count. /// public struct WalkResult { public int EntitiesWalked; public int BuildingShellAnchorPass; public int BuildingShellAnchorReject; public List<(WorldEntity Entity, int MeshRefIndex, uint LandblockId)> ToDraw; } /// /// Pure-CPU visibility filter over . /// Separated from so tests can exercise it without GL state. /// /// /// A.5 T17 Change #1: when an LB is frustum-culled AND /// is non-empty, the OLD path walked /// every entity in the LB just to find the few animated ones. This helper /// fixes that: if the LB is invisible, we iterate /// directly and look each up in /// entry.AnimatedById (typically <50 animated, up to ~10K total). /// /// /// /// A.5 T18 Change #2: per-entity AABB cull reads from the cached /// / /// (refreshed lazily if ), instead of /// recomputing Position±5 each frame. /// /// /// /// Test-friendly overload that allocates a fresh ToDraw list per call. /// Production code () uses the no-alloc overload below /// with a caller-provided scratch list. /// internal static WalkResult WalkEntities( IEnumerable landblockEntries, FrustumPlanes? frustum, uint? neverCullLandblockId, HashSet? visibleCellIds, HashSet? animatedEntityIds) { var scratch = new List<(WorldEntity Entity, int MeshRefIndex, uint LandblockId)>(); var result = new WalkResult { ToDraw = scratch }; WalkEntitiesInto( landblockEntries, frustum, neverCullLandblockId, visibleCellIds, animatedEntityIds, scratch, ref result); return result; } /// /// No-alloc overload: clears + populates the caller-provided /// list. reuses a per-dispatcher scratch field across frames to /// avoid the 480+ KB / frame GC pressure that the test-friendly overload incurs. /// Returns walk count via 's EntitiesWalked field. /// /// /// When is non-null the method emits /// [indoor-cull] lines for cell entities rejected by the /// visibleCellIds or frustum filters, and [indoor-walk] lines for /// cell entities that pass all filters. Rate-limited by /// . Pass (the default) /// to disable all probe emission — used by the test-friendly /// overload. /// /// internal static void WalkEntitiesInto( IEnumerable landblockEntries, FrustumPlanes? frustum, uint? neverCullLandblockId, HashSet? visibleCellIds, HashSet? animatedEntityIds, List<(WorldEntity Entity, int MeshRefIndex, uint LandblockId)> scratch, ref WalkResult result, IndoorProbeState? indoorProbeState = null, EntitySet set = EntitySet.All) { scratch.Clear(); result.EntitiesWalked = 0; result.ToDraw = scratch; foreach (var entry in landblockEntries) { bool landblockVisible = frustum is null || entry.LandblockId == neverCullLandblockId || FrustumCuller.IsAabbVisible(frustum.Value, entry.AabbMin, entry.AabbMax); if (!landblockVisible) { // A.5 T17 Change #1: walk only animated entities, not all entities. // Avoids O(N_entities) scan when only O(N_animated) work is needed. if (animatedEntityIds is null || animatedEntityIds.Count == 0) continue; if (entry.AnimatedById is null) continue; foreach (var animatedId in animatedEntityIds) { if (!entry.AnimatedById.TryGetValue(animatedId, out var entity)) continue; // Phase A8: EntitySet partition for indoor/outdoor split passes. if (!EntityMatchesSet(entity, set)) continue; if (entity.MeshRefs.Count == 0) continue; bool shellScoped = IsShellScopedSet(set) && entity.IsBuildingShell && visibleCellIds is not null; if (!EntityPassesVisibleCellGate(entity, visibleCellIds, set)) { if (shellScoped) result.BuildingShellAnchorReject++; continue; } if (shellScoped) result.BuildingShellAnchorPass++; result.EntitiesWalked++; for (int i = 0; i < entity.MeshRefs.Count; i++) scratch.Add((entity, i, entry.LandblockId)); } continue; } foreach (var entity in entry.Entities) { // Phase A8: EntitySet partition for indoor/outdoor split passes. if (!EntityMatchesSet(entity, set)) continue; if (entity.MeshRefs.Count == 0) continue; // Detect cell entity for indoor probes — first MeshRef.GfxObjId // is an EnvCell id (low 16 bits ≥ 0x0100). Cheap to compute; // result reused for all probe checks below. ulong cellProbeId = (ulong)entity.MeshRefs[0].GfxObjId; bool isCellEntity = indoorProbeState is not null && RenderingDiagnostics.IsEnvCellId(cellProbeId); bool shellScoped = IsShellScopedSet(set) && entity.IsBuildingShell && visibleCellIds is not null; bool cellInVis = EntityPassesVisibleCellGate(entity, visibleCellIds, set); if (!cellInVis) { if (shellScoped) result.BuildingShellAnchorReject++; if (isCellEntity && RenderingDiagnostics.ProbeIndoorCullEnabled && indoorProbeState!.ShouldEmit(cellProbeId)) { Console.WriteLine( $"[indoor-cull] cellEnt=0x{entity.Id:X8} " + $"reason=visibleCellIds-miss " + $"parentCell=0x{entity.ParentCellId!.Value:X8}"); } continue; } if (shellScoped) result.BuildingShellAnchorPass++; // Per-entity AABB frustum cull (perf #3). Animated entities bypass — // they're tracked at landblock level + need per-frame work regardless. // A.5 T18 Change #2: read cached AABB, refresh lazily on AabbDirty. bool isAnimated = animatedEntityIds?.Contains(entity.Id) == true; bool aabbVisible = true; if (frustum is not null && !isAnimated && entry.LandblockId != neverCullLandblockId) { if (entity.AabbDirty) entity.RefreshAabb(); aabbVisible = FrustumCuller.IsAabbVisible(frustum.Value, entity.AabbMin, entity.AabbMax); } if (!aabbVisible) { if (isCellEntity && RenderingDiagnostics.ProbeIndoorCullEnabled && indoorProbeState!.ShouldEmit(cellProbeId)) { Console.WriteLine( $"[indoor-cull] cellEnt=0x{entity.Id:X8} " + $"reason=frustum " + $"aabbMin=({entity.AabbMin.X:F1},{entity.AabbMin.Y:F1},{entity.AabbMin.Z:F1}) " + $"aabbMax=({entity.AabbMax.X:F1},{entity.AabbMax.Y:F1},{entity.AabbMax.Z:F1})"); } continue; } // Passed all filters — emit walk probe. if (isCellEntity && RenderingDiagnostics.ProbeIndoorWalkEnabled && indoorProbeState!.ShouldEmit(cellProbeId)) { Console.WriteLine( $"[indoor-walk] cellEnt=0x{entity.Id:X8} " + $"pos=({entity.Position.X:F1},{entity.Position.Y:F1},{entity.Position.Z:F1}) " + $"parentCell=0x{(entity.ParentCellId ?? 0u):X8} " + $"meshRef0=0x{cellProbeId:X8} " + $"meshRefCount={entity.MeshRefs.Count} " + $"landblockVisible=true aabbVisible=true cellInVis=true"); } result.EntitiesWalked++; for (int i = 0; i < entity.MeshRefs.Count; i++) scratch.Add((entity, i, entry.LandblockId)); } } } public void Draw( ICamera camera, IEnumerable<(uint LandblockId, Vector3 AabbMin, Vector3 AabbMax, IReadOnlyList Entities, IReadOnlyDictionary? AnimatedById)> landblockEntries, FrustumPlanes? frustum = null, uint? neverCullLandblockId = null, HashSet? visibleCellIds = null, HashSet? animatedEntityIds = null, EntitySet set = EntitySet.All) { _shader.Use(); _indoorProbeFrameCounter++; var vp = camera.View * camera.Projection; _shader.SetMatrix4("uViewProjection", vp); bool diag = string.Equals(Environment.GetEnvironmentVariable("ACDREAM_WB_DIAG"), "1", StringComparison.Ordinal); if (diag && !_gpuQueriesInitialized) { for (int i = 0; i < GpuQueryRingDepth; i++) { _gpuQueryOpaque[i] = _gl.GenQuery(); _gpuQueryTransparent[i] = _gl.GenQuery(); } _gpuQueriesInitialized = true; } // Always run the CPU stopwatch — cheap; only logged under diag. _cpuStopwatch.Restart(); // Camera world-space position for front-to-back sort (perf #2). The view // matrix is the inverse of the camera's world transform, so the world // translation lives in the inverse's translation row. Vector3 camPos = Vector3.Zero; if (Matrix4x4.Invert(camera.View, out var invView)) camPos = invView.Translation; // ── Phase 1: clear groups, walk entities, build groups ────────────── foreach (var grp in _groups.Values) { grp.Matrices.Clear(); grp.Slots.Clear(); } var metaTable = _meshAdapter.MetadataTable; uint anyVao = 0; // Project the 5-tuple enumerable into LandblockEntry records for WalkEntities. static IEnumerable ToEntries( IEnumerable<(uint LandblockId, Vector3 AabbMin, Vector3 AabbMax, IReadOnlyList Entities, IReadOnlyDictionary? AnimatedById)> src) { foreach (var e in src) yield return new LandblockEntry(e.LandblockId, e.AabbMin, e.AabbMax, e.Entities, e.AnimatedById); } // A.5 T26 follow-up (Bug B): use the no-alloc WalkEntitiesInto overload // that populates _walkScratch (a per-dispatcher field reused across frames) // instead of allocating a fresh List<(WorldEntity, int)> per frame. // // Pass an IndoorProbeState when any indoor probe is active so the static // WalkEntitiesInto can emit rate-limited [indoor-cull] / [indoor-walk] // lines without needing access to instance fields. Null = probes off. IndoorProbeState? probeState = null; if (RenderingDiagnostics.ProbeIndoorCullEnabled || RenderingDiagnostics.ProbeIndoorWalkEnabled) { // _currentFrame is snapped at construction time. Construct // once per Draw() call only — a second construction within // the same frame would stamp the dictionary with the // (already-advanced) counter value, suppressing the second // pass's emissions for IndoorProbeRateLimitFrames frames. // Today Draw() is called exactly once per frame; if a // future refactor adds a shadow / reflection / second pass, // this assumption needs revisiting. probeState = new IndoorProbeState(_lastIndoorProbeFrame, _indoorProbeFrameCounter); } var walkResult = default(WalkResult); WalkEntitiesInto( ToEntries(landblockEntries), frustum, neverCullLandblockId, visibleCellIds, animatedEntityIds, _walkScratch, ref walkResult, probeState, set); // Tier 1 cache (#53) flush-tracking locals. _walkScratch holds one tuple // per (entity, MeshRefIndex) and is in entity-order, so all MeshRefs of // a given entity are contiguous. We accumulate ALL of an entity's // batches into _populateScratch, then flush exactly once per entity: // either when the iteration crosses to a different entity, or at the // end of the loop for the last entity. Flushing per-tuple would // overwrite earlier MeshRefs (the cache is keyed by entity.Id), so // multi-part Setup-backed entities would only retain their LAST // MeshRef's batches — bug fixed in commit after 2f489a8. uint? populateEntityId = null; uint populateLandblockId = 0; // Tier 1 cache (#53) — fast-path one-shot tracker. The cache stores a // FLAT list of batches across all MeshRefs of an entity, so a single // ApplyCacheHit call already drew every batch. _walkScratch yields // one tuple per (entity, MeshRefIndex), so without this guard a // 3-MeshRef static entity on a frame-2 cache hit would call // ApplyCacheHit 3 times — appending all 6 batches × 3 = 18 instances // to _groups instead of 6. Result: severe Z-fighting + 3× perf hit // on every multi-part static entity (buildings, statues, multi-MeshRef // NPCs). The fast path must fire only on the FIRST tuple of each // entity; subsequent tuples skip via this tracker. uint? lastHitEntityId = null; // Tier 1 cache (#53) — incomplete-entity guard. When any MeshRef of // the current entity has _meshAdapter.TryGetRenderData return null // (mesh still async-decoding via ObjectMeshManager.PrepareMeshDataAsync), // we mark the entity incomplete and DROP the accumulated populate // scratch at entity boundary instead of writing it to the cache. // Otherwise the cache would hold a partial classification (some parts // missing), and frame-2 cache hits would persist that partial render // even after the missing mesh loads — every subsequent frame sees the // cache hit and skips re-classification, so the missing parts never // recover. User-visible symptom: the drudge statue on top of the // Foundry (multi-part Setup entity with AnimPartChange) renders with // some parts missing permanently. Reset on entity change. bool currentEntityIncomplete = false; // Per-tuple entity tracker used purely for entity-change detection. // Updated UNCONDITIONALLY at end of every tuple (including tuples that // skip via null renderData), so the flag-reset block below correctly // distinguishes "new entity" from "same entity, different tuple." // populateEntityId can't be used for this because it's only set after // a successful slow-path classification. uint? prevTupleEntityId = null; foreach (var (entity, partIdx, landblockId) in _walkScratch) { if (diag) _entitiesSeen++; // Skip subsequent tuples of an entity that already cache-hit on // its first tuple. ApplyCacheHit drew the full flat batch list; // re-firing here would N-multiply the instance count. Diag // _entitiesDrawn is bumped here to preserve per-tuple parity with // the previous counting semantics. if (lastHitEntityId == entity.Id) { if (diag) _entitiesDrawn++; continue; } // Reset the hit tracker on entity change so the next entity's // first tuple re-checks the cache. (When this iteration is the // FIRST tuple of a new entity after a cache-hit entity, we must // not retain the previous entity's id.) if (lastHitEntityId.HasValue && lastHitEntityId.Value != entity.Id) { lastHitEntityId = null; } // Tier 1 cache (#53) — drop the previous entity's accumulated // populate scratch BEFORE MaybeFlushOnEntityChange runs. If the // previous entity ended incomplete (≥1 null renderData), we MUST // NOT cache its partial classification: clear scratch and null // the tracker so MaybeFlushOnEntityChange sees the cleaned state // and no-ops for this entity. Reset the incomplete flag for the // new entity so each one gets a fresh measurement. // // CRITICAL: the flag reset must fire ONLY on entity change, not // every tuple. Resetting per-tuple within the same entity would // undo a null-renderData flag set by a previous tuple of the same // entity → if the missing MeshRef sits in the MIDDLE of the // entity's MeshRefs list, a later valid tuple's reset would // re-mark the entity "complete" and let partial data populate // the cache. Trees with [trunk valid, branches null, leaves // valid] hit this exactly — branches never recover. bool isNewEntity = !prevTupleEntityId.HasValue || prevTupleEntityId.Value != entity.Id; if (isNewEntity) { if (populateEntityId.HasValue && currentEntityIncomplete) { _populateScratch.Clear(); populateEntityId = null; } currentEntityIncomplete = false; // Phase U.4: resolve this entity's clip slot ONCE per entity // (constant across its tuples). On the U.3 / outdoor path // (_clipRoutingActive false) every entity is slot 0, never culled. // The whole decision (including the routing-active gate) lives in // the pure ResolveSlotForFrame helper so it's unit-testable. (_currentEntitySlot, _currentEntityCulled) = ResolveSlotForFrame( _clipRoutingActive, entity.ServerGuid, entity.ParentCellId, _cellIdToSlot, _outdoorSlot, _outdoorVisible); } prevTupleEntityId = entity.Id; // Flush-on-entity-change: if the previous entity accumulated any // batches AND this iteration is for a different entity, populate // its cache entry now and reset the scratch buffer. Runs for ALL // entities (including this-entity-culled) so the PREVIOUS entity's // cache always flushes at the boundary. (populateEntityId, populateLandblockId) = MaybeFlushOnEntityChange( populateEntityId, populateLandblockId, entity.Id, _cache, _populateScratch); // Phase U.4: a culled entity (cell not visible, or no outdoors visible // for an outdoor stab) contributes NO instances. Skip after the // boundary flush above so the previous entity still committed; the // next entity's isNewEntity logic is unaffected (prevTupleEntityId is // already updated). Matches the existing visible-cell / frustum cull: // nothing enters _groups, so neither binding=0 nor binding=3 sees it. if (_currentEntityCulled) continue; var entityWorld = Matrix4x4.CreateFromQuaternion(entity.Rotation) * Matrix4x4.CreateTranslation(entity.Position); bool isAnimated = animatedEntityIds?.Contains(entity.Id) == true; // Cache-hit fast path (Task 10): static entity with a populated // cache entry skips classification entirely. Walk the cached // (GroupKey, RestPose) flat list and append cached.RestPose * // entityWorld to each matching group's matrices. Animated entities // bypass the cache (collector is set null below; their entries are // never populated in the first place). // // Placed AFTER the entity-change flush above so that, on a // hit, this iteration also finishes flushing any pending // populate state from a previous entity. Animated entities never // enter this branch — the !isAnimated guard makes that explicit. // // Fires ONCE per entity: the first tuple reaches here, runs // ApplyCacheHit, sets lastHitEntityId, and continues. Subsequent // tuples of the same entity short-circuit at the top of the loop // body via the lastHitEntityId == entity.Id check above. if (!isAnimated && !_tier1CacheDisabled && _cache.TryGet(entity.Id, landblockId, out var cachedEntry)) { ApplyCacheHit(cachedEntry!, entityWorld, AppendInstanceToGroup); // anyVao recovery: when the first visible entity in the frame // takes the fast path, no slow-path lookup has populated // anyVao yet. Look up THIS entity's first MeshRef once via // the mesh adapter — cheap dict lookup, not a re-classify. if (anyVao == 0) { var firstMeshRef = entity.MeshRefs[partIdx]; var firstRenderData = _meshAdapter.TryGetRenderData(firstMeshRef.GfxObjId); if (firstRenderData is not null) anyVao = firstRenderData.VAO; } if (diag) _entitiesDrawn++; lastHitEntityId = entity.Id; #if DEBUG // Cross-check guard: assert the membership predicate held at hit time. // The full re-classification cross-check (spec section 6.5) is a stretch // goal; this simpler assert catches the prior Tier 1 bug class — a // static entity that turns out to actually be animated would fire here. // // Structurally redundant with the `if (!isAnimated && ...)` branch // condition, but serves as a TRIPWIRE: a future refactor that // incorrectly relaxes the branch condition (e.g., removes // `!isAnimated` from the guard) would silently allow animated // entities into the fast path; the assert catches that immediately. System.Diagnostics.Debug.Assert( !isAnimated, $"EntityClassificationCache hit on animated entity {entity.Id} — invariant violated"); #endif continue; } // Compute palette-override hash ONCE per entity (perf #4). // Reused across every (part, batch) lookup so the FNV-1a fold // over SubPalettes runs once instead of N times. Zero when the // entity has no palette override (trees, scenery). ulong palHash = 0; if (entity.PaletteOverride is not null) palHash = TextureCache.HashPaletteOverride(entity.PaletteOverride); // Note: GameWindow's spawn path already applies // AnimPartChanges + GfxObjDegradeResolver (Issue #47 fix — // close-detail mesh swap for humanoids) to MeshRefs. We // trust MeshRefs as the source of truth here. AnimatedEntityState's // overrides become relevant only for hot-swap (0xF625 // ObjDescEvent) which today rebuilds MeshRefs anyway. var meshRef = entity.MeshRefs[partIdx]; ulong gfxObjId = meshRef.GfxObjId; var renderData = _meshAdapter.TryGetRenderData(gfxObjId); // [indoor-lookup] probe — emit once per cell entity per sec. // Fires BEFORE the null-renderData early-continue so a miss still // emits hit=false, distinguishing H2 (empty batches) from H6 // (dispatcher fails to traverse Setup). ulong lookupCellId = (ulong)gfxObjId; if (RenderingDiagnostics.IsEnvCellId(lookupCellId) && RenderingDiagnostics.ProbeIndoorLookupEnabled // Rate-limit in a separate namespace from [indoor-walk]/[indoor-cull] // (which key on the same gfxObjId). Without this, IndoorAll=1 would // silence the lookup probe whenever the walk probe fired first. && ShouldEmitIndoorProbe(lookupCellId | 0x8000_0000_0000_0000UL)) { bool hit = renderData is not null; bool isSetup = hit && renderData!.IsSetup; int partCount = isSetup ? renderData!.SetupParts.Count : 0; int partsHit = 0, partsMiss = 0; if (isSetup) { foreach (var (partId, _) in renderData!.SetupParts) { if (_meshAdapter.TryGetRenderData(partId) is not null) partsHit++; else partsMiss++; } } bool hasEnvCellGeom = isSetup && renderData!.SetupParts.Exists(t => (t.GfxObjId & 0x1_0000_0000UL) != 0); Console.WriteLine( $"[indoor-lookup] cellId=0x{lookupCellId:X8} " + $"hit={hit} isSetup={isSetup} partCount={partCount} " + $"hasEnvCellGeom={hasEnvCellGeom} partsHit={partsHit} partsMiss={partsMiss}"); } if (renderData is null) { // Tier 1 cache (#53): mesh data is still async-decoding via // WB's ObjectMeshManager.PrepareMeshDataAsync. Flag the entity // as incomplete so the entity-boundary check (or end-of-loop // check) drops the accumulated populate scratch instead of // caching a partial classification. The slow path retries on // the next frame; once all this entity's meshes have loaded, // the populate fires with the complete batch set. currentEntityIncomplete = true; if (diag) _meshesMissing++; continue; } if (anyVao == 0) anyVao = renderData.VAO; // Cache-miss path (animated entities skip cache entirely). // Static entities accumulate into _populateScratch across ALL // their MeshRefs; the flush at next-entity-boundary (or // end-of-loop) commits them as a single Populate call. var collector = isAnimated ? null : _populateScratch; bool drewAny = false; if (renderData.IsSetup && renderData.SetupParts.Count > 0) { foreach (var (partGfxObjId, partTransform) in renderData.SetupParts) { var partData = _meshAdapter.TryGetRenderData(partGfxObjId); if (partData is null) continue; var model = ComposePartWorldMatrix( entityWorld, meshRef.PartTransform, partTransform); // [indoor-xform] probe — only for the cell's synthetic // geometry part (bit 32 set, per WB's PrepareEnvCellMeshData // cellGeomId convention). One line per part per sec. // Disambiguates hypothesis H5 (transform double-apply — // composedT lands at 2 × cellOrigin). if ((partGfxObjId & 0x1_0000_0000UL) != 0 && RenderingDiagnostics.ProbeIndoorXformEnabled && ShouldEmitIndoorProbe(partGfxObjId)) { Console.WriteLine( $"[indoor-xform] cellGeomId=0x{partGfxObjId:X16} " + $"entityWorldT=({entityWorld.Translation.X:F2},{entityWorld.Translation.Y:F2},{entityWorld.Translation.Z:F2}) " + $"meshRefT=({meshRef.PartTransform.Translation.X:F2},{meshRef.PartTransform.Translation.Y:F2},{meshRef.PartTransform.Translation.Z:F2}) " + $"partT=({partTransform.Translation.X:F2},{partTransform.Translation.Y:F2},{partTransform.Translation.Z:F2}) " + $"composedT=({model.Translation.X:F2},{model.Translation.Y:F2},{model.Translation.Z:F2})"); } var restPose = partTransform * meshRef.PartTransform; ClassifyBatches(partData, partGfxObjId, model, entity, meshRef, palHash, metaTable, restPose, collector); drewAny = true; } } else { var model = meshRef.PartTransform * entityWorld; ClassifyBatches(renderData, gfxObjId, model, entity, meshRef, palHash, metaTable, restPose: meshRef.PartTransform, collector: collector); drewAny = true; } // Track THIS entity for the next iteration's flush check. Only // when collector is non-null (entity is static); animated entities // leave the tracker null so we don't try to flush them. if (collector is not null) { populateEntityId = entity.Id; populateLandblockId = landblockId; } if (diag && drewAny) _entitiesDrawn++; } // Tier 1 cache (#53) — drop the accumulated populate scratch if the // LAST entity in the loop ended incomplete (had ≥1 null renderData). // Same reason as the entity-boundary handling above: avoid caching a // partial classification. The slow path will retry on the next frame // and populate correctly once all meshes have loaded. if (currentEntityIncomplete) { _populateScratch.Clear(); populateEntityId = null; } // Final flush: the last entity in _walkScratch has no "next iteration" // to trigger the entity-change flush, so commit its accumulated batches // here. No-op when the last entity was animated (populateEntityId stays // null) or when no entities walked at all. FinalFlushPopulate(populateEntityId, populateLandblockId, _cache, _populateScratch); // Nothing visible — skip the GL pass entirely. if (anyVao == 0) { LastDrawStats = new DrawStats(set, walkResult.EntitiesWalked, _walkScratch.Count, 0, 0, 0, 0, 0, 0); _cpuStopwatch.Stop(); if (diag) MaybeFlushDiag(); return; } // ── Phase 3: assign FirstInstance per group, lay matrices contiguously, sort opaque ── int totalInstances = 0; foreach (var grp in _groups.Values) totalInstances += grp.Matrices.Count; if (totalInstances == 0) { LastDrawStats = new DrawStats(set, walkResult.EntitiesWalked, _walkScratch.Count, 0, 0, 0, 0, 0, 0); _cpuStopwatch.Stop(); if (diag) MaybeFlushDiag(); return; } int needed = totalInstances * 16; if (_instanceData.Length < needed) _instanceData = new float[needed + 256 * 16]; // Phase U.4: size the per-instance clip-slot buffer to match the instance // count and lay it out in the SAME group order / cursor as _instanceData, // so instanceClipSlot[i] (binding=3) tracks Instances[i] (binding=0). On // the U.3 / outdoor path every Slots entry is 0 ⇒ identical to U.3. if (_clipSlotData.Length < totalInstances) _clipSlotData = new uint[totalInstances + 256]; _opaqueDraws.Clear(); _translucentDraws.Clear(); int cursor = 0; foreach (var grp in _groups.Values) { if (grp.Matrices.Count == 0) continue; grp.FirstInstance = cursor; grp.InstanceCount = grp.Matrices.Count; // Use the first instance's translation as the group's representative // position for front-to-back sort (perf #2). Cheap heuristic; works // well when instances of one group are spatially coherent // (typical for trees in one landblock area, NPCs at one spawn). var first = grp.Matrices[0]; var grpPos = new Vector3(first.M41, first.M42, first.M43); grp.SortDistance = Vector3.DistanceSquared(camPos, grpPos); for (int i = 0; i < grp.Matrices.Count; i++) { WriteMatrix(_instanceData, cursor * 16, grp.Matrices[i]); // Slots[] is parallel to Matrices[] within the group; write the // slot at the same cursor so binding=3 stays aligned with binding=0. _clipSlotData[cursor] = grp.Slots[i]; cursor++; } if (IsOpaque(grp.Translucency)) _opaqueDraws.Add(grp); else _translucentDraws.Add(grp); } // Front-to-back sort within each cull mode. DrawIndirectRange must // split MDI calls whenever CullMode changes because GL state is not // part of an indirect command. Sorting by distance alone can turn a // stable 1k-draw live scene into hundreds of tiny MDI runs after a // landblock transition, which shows up as a GPU-command bottleneck // without a triangle-count spike. _opaqueDraws.Sort(CompareOpaqueSubmissionOrder); _translucentDraws.Sort(CompareTransparentSubmissionOrder); // ── Phase 4: build IndirectGroupInput list (opaque sorted, then translucent), // fill via BuildIndirectArrays ────────────────────────────────── int totalDraws = _opaqueDraws.Count + _translucentDraws.Count; if (_batchData.Length < totalDraws) _batchData = new BatchData[totalDraws + 64]; if (_indirectCommands.Length < totalDraws) _indirectCommands = new DrawElementsIndirectCommand[totalDraws + 64]; if (_drawCullModes.Length < totalDraws) _drawCullModes = new CullMode[totalDraws + 64]; var groupInputs = new List(totalDraws); foreach (var g in _opaqueDraws) groupInputs.Add(ToInput(g)); foreach (var g in _translucentDraws) groupInputs.Add(ToInput(g)); // Cast _batchData (private BatchData) to public-mirror BatchDataPublic for BuildIndirectArrays. // Layout is asserted at test time (BatchDataPublic_LayoutMatchesPrivateBatchData test). var batchPublic = new BatchDataPublic[totalDraws]; var layout = BuildIndirectArrays(groupInputs, _indirectCommands, batchPublic, _drawCullModes); long totalTriangles = 0; foreach (var input in groupInputs) totalTriangles += (long)(input.IndexCount / 3) * input.InstanceCount; int cullRuns = CountCullRuns(_drawCullModes, 0, layout.OpaqueCount) + CountCullRuns(_drawCullModes, layout.OpaqueCount, layout.TransparentCount); // Copy back into _batchData for (int i = 0; i < totalDraws; i++) { _batchData[i] = new BatchData { TextureHandle = batchPublic[i].TextureHandle, TextureLayer = batchPublic[i].TextureLayer, Flags = batchPublic[i].Flags, }; } _opaqueDrawCount = layout.OpaqueCount; _transparentDrawCount = layout.TransparentCount; _transparentByteOffset = layout.TransparentByteOffset; LastDrawStats = new DrawStats( set, walkResult.EntitiesWalked, _walkScratch.Count, totalInstances, totalDraws, cullRuns, _opaqueDrawCount, _transparentDrawCount, totalTriangles); // ── Phase 5: upload four buffers ──────────────────────────────────── fixed (float* ip = _instanceData) UploadSsbo(_instanceSsbo, 0, ip, totalInstances * 16 * sizeof(float)); fixed (BatchData* bp = _batchData) UploadSsbo(_batchSsbo, 1, bp, totalDraws * sizeof(BatchData)); // Phase U.4: per-instance clip-slot buffer (binding=3), one uint per // instance, laid out parallel to _instanceData in Phase 3's group loop so // instanceClipSlot[instanceIndex] tracks Instances[instanceIndex]. On the // U.3 / outdoor path every entry is 0 ⇒ slot 0 ⇒ no-clip (identical to // U.3); under indoor routing it holds the per-instance slot from // ResolveEntitySlot. No clear here — Phase 3 wrote exactly totalInstances // entries; only [0..totalInstances) is uploaded, so any stale tail is // never read by the shader (BaseInstance + gl_InstanceID < totalInstances). fixed (uint* sp = _clipSlotData) UploadSsbo(_clipSlotSsbo, 3, sp, totalInstances * sizeof(uint)); fixed (DrawElementsIndirectCommand* cp = _indirectCommands) { _gl.BindBuffer(BufferTargetARB.DrawIndirectBuffer, _indirectBuffer); _gl.BufferData(BufferTargetARB.DrawIndirectBuffer, (nuint)(totalDraws * sizeof(DrawElementsIndirectCommand)), cp, BufferUsageARB.DynamicDraw); } // Phase U.3: bind the SHARED per-cell clip-region SSBO (binding=2). The // GameWindow-level ClipFrame already uploaded + bound it this frame; we // re-bind defensively in case another consumer touched binding=2 since. // When no shared id is set (0), bind our own no-clip fallback so the // shader never reads an unbound SSBO at binding=2. BindClipRegionBinding2(); // ── Phase 6: bind global VAO once ─────────────────────────────────── _gl.BindVertexArray(anyVao); if (string.Equals(Environment.GetEnvironmentVariable("ACDREAM_NO_CULL"), "1", StringComparison.Ordinal)) _gl.Disable(EnableCap.CullFace); // GPU timing: compute this frame's ring slot. We read frame N-3's // result (the oldest data in the ring) before overwriting it with // frame N's queries. Hoisted to function scope so both the opaque // and transparent passes below can reference gpuQuerySlot. See spec // §3 Q1/Q2 + §4 in // docs/superpowers/specs/2026-05-11-phase-n6-slice1-design.md. int gpuQuerySlot = _gpuQueryFrameIndex % GpuQueryRingDepth; // diag is part of the gate so the read/issue/increment trio stays // symmetric — without it, toggling ACDREAM_WB_DIAG mid-session would // freeze the frame counter (gated by diag below) while the read kept // re-reading the same slot, producing duplicate stale samples. if (diag && _gpuQueriesInitialized && _gpuQueryFrameIndex >= GpuQueryRingDepth) { _gl.GetQueryObject(_gpuQueryOpaque[gpuQuerySlot], QueryObjectParameterName.ResultAvailable, out int avail); if (avail != 0) { _gl.GetQueryObject(_gpuQueryOpaque[gpuQuerySlot], QueryObjectParameterName.Result, out ulong opaqueNs); _gl.GetQueryObject(_gpuQueryTransparent[gpuQuerySlot], QueryObjectParameterName.Result, out ulong transNs); long gpuUs = (long)((opaqueNs + transNs) / 1000UL); _gpuSamples[_gpuSampleCursor] = gpuUs; _gpuSampleCursor = (_gpuSampleCursor + 1) % _gpuSamples.Length; } // If avail==0 the sample is dropped silently. MedianMicros // computes over the non-zero subset, so dropped samples don't // poison the median. } // ── Phase 7: opaque pass ───────────────────────────────────────────── if (_opaqueDrawCount > 0) { _gl.Disable(EnableCap.Blend); _gl.DepthMask(true); // A.5 T20: enable A2C for ClipMap foliage — GPU derives sample mask // from the alpha written by mesh_modern.frag so foliage edges are // smooth under MSAA 4x. A no-op for fully-opaque (α=1) batches. // A.5 T22.5: gated by AlphaToCoverage property so Low/Medium presets // (no MSAA) skip the unnecessary GL state change. if (AlphaToCoverage) _gl.Enable(EnableCap.SampleAlphaToCoverage); _shader.SetInt("uRenderPass", 0); // Phase Post-A.5 (ISSUE #52, 2026-05-10): opaque section of // Batches[] starts at index 0. See uDrawIDOffset comment in // mesh_modern.vert for why this is needed. _shader.SetInt("uDrawIDOffset", 0); _gl.BindBuffer(BufferTargetARB.DrawIndirectBuffer, _indirectBuffer); if (diag && _gpuQueriesInitialized) _gl.BeginQuery(QueryTarget.TimeElapsed, _gpuQueryOpaque[gpuQuerySlot]); DrawIndirectRange(0, _opaqueDrawCount); if (diag && _gpuQueriesInitialized) _gl.EndQuery(QueryTarget.TimeElapsed); if (AlphaToCoverage) _gl.Disable(EnableCap.SampleAlphaToCoverage); } // ── Phase 8: transparent pass ──────────────────────────────────────── if (_transparentDrawCount > 0) { _gl.Enable(EnableCap.Blend); _gl.BlendFunc(BlendingFactor.SrcAlpha, BlendingFactor.OneMinusSrcAlpha); _gl.DepthMask(false); // Phase Post-A.5 (ISSUE #52, 2026-05-10): transparent section of // Batches[] starts at index _opaqueDrawCount. Without this offset, // each transparent draw reads BatchData[0..transparentCount) — the // OPAQUE section — and the lifestone crystal's apparent texture // flickers to whatever opaque batch sorted first that frame. See // uDrawIDOffset comment in mesh_modern.vert. _shader.SetInt("uDrawIDOffset", _opaqueDrawCount); // Closed-shell translucent meshes still need culling, but the // cull side must come from each dat batch just like the opaque // section. BuildIndirectArrays preserves CullMode in _drawCullModes. _gl.FrontFace(FrontFaceDirection.CW); _shader.SetInt("uRenderPass", 1); if (diag && _gpuQueriesInitialized) _gl.BeginQuery(QueryTarget.TimeElapsed, _gpuQueryTransparent[gpuQuerySlot]); DrawIndirectRange(_opaqueDrawCount, _transparentDrawCount); if (diag && _gpuQueriesInitialized) _gl.EndQuery(QueryTarget.TimeElapsed); _gl.DepthMask(true); _gl.Disable(EnableCap.Blend); } _gl.Disable(EnableCap.CullFace); _gl.BindVertexArray(0); _cpuStopwatch.Stop(); if (diag) { long cpuUs = _cpuStopwatch.ElapsedTicks * 1_000_000L / System.Diagnostics.Stopwatch.Frequency; _cpuSamples[_cpuSampleCursor] = cpuUs; _cpuSampleCursor = (_cpuSampleCursor + 1) % _cpuSamples.Length; // GPU sample read happens BEFORE issuing the next frame's queries // (see step 1.3 above). Increment the frame counter here so the // next call computes a fresh slot. if (_gpuQueriesInitialized) _gpuQueryFrameIndex++; _drawsIssued += _opaqueDrawCount + _transparentDrawCount; _instancesIssued += totalInstances; MaybeFlushDiag(); } } /// /// Phase A8 RR5 (2026-05-26): per-building draw overload. Walks only /// entities whose ParentCellId is in , plus /// outdoor-style entities matching the EntitySet partition. Used by /// the indoor render branch to scope rendering to the camera-buildings' /// cells. /// /// Mirrors the existing visibleCellIds-based Draw but with an /// explicit cell list (not the BFS-derived visibility set). The semantic /// difference is at the caller: cellIds = the camera-buildings' EnvCellIds, /// not the portal BFS result. The dispatcher's internal logic is identical /// — it filters indoor entities by membership in the provided set. /// public void Draw( ICamera camera, IEnumerable<(uint LandblockId, Vector3 AabbMin, Vector3 AabbMax, IReadOnlyList Entities, IReadOnlyDictionary? AnimatedById)> landblockEntries, IReadOnlyCollection cellIds, FrustumPlanes? frustum = null, uint? neverCullLandblockId = null, HashSet? animatedEntityIds = null, EntitySet set = EntitySet.All) { // Adapt IReadOnlyCollection → HashSet for the existing path. // If the caller already passed a HashSet, avoid re-wrapping. HashSet cellIdSet = cellIds is HashSet hs ? hs : new HashSet(cellIds); Draw(camera, landblockEntries, frustum: frustum, neverCullLandblockId: neverCullLandblockId, visibleCellIds: cellIdSet, animatedEntityIds: animatedEntityIds, set: set); } private static IndirectGroupInput ToInput(InstanceGroup g) => new( IndexCount: g.IndexCount, FirstIndex: g.FirstIndex, BaseVertex: g.BaseVertex, InstanceCount: g.InstanceCount, FirstInstance: g.FirstInstance, TextureHandle: g.BindlessTextureHandle, TextureLayer: g.TextureLayer, Translucency: g.Translucency, CullMode: g.CullMode); private static int CompareOpaqueSubmissionOrder(InstanceGroup a, InstanceGroup b) { int cull = a.CullMode.CompareTo(b.CullMode); return cull != 0 ? cull : a.SortDistance.CompareTo(b.SortDistance); } private static int CompareTransparentSubmissionOrder(InstanceGroup a, InstanceGroup b) { int cull = a.CullMode.CompareTo(b.CullMode); return cull != 0 ? cull : b.SortDistance.CompareTo(a.SortDistance); } private static int CountCullRuns(CullMode[] modes, int startCommand, int commandCount) { if (commandCount <= 0) return 0; int end = startCommand + commandCount; int runs = 1; var previous = modes[startCommand]; for (int i = startCommand + 1; i < end; i++) { var current = modes[i]; if (current == previous) continue; runs++; previous = current; } return runs; } private unsafe void DrawIndirectRange(int startCommand, int commandCount) { int end = startCommand + commandCount; int command = startCommand; while (command < end) { var cullMode = _drawCullModes[command]; ApplyCullMode(cullMode); int runCount = 1; while (command + runCount < end && _drawCullModes[command + runCount] == cullMode) runCount++; // Each glMultiDrawElementsIndirect call restarts gl_DrawID at 0. // Because this method splits one logical opaque/transparent pass // into CullMode runs, the shader must receive the absolute command // index for this run or it will read BatchData[0] again and bind // the wrong texture for later runs. _shader.SetInt("uDrawIDOffset", command); _gl.MultiDrawElementsIndirect( PrimitiveType.Triangles, DrawElementsType.UnsignedShort, (void*)(command * DrawCommandStride), (uint)runCount, (uint)DrawCommandStride); command += runCount; } } private void ApplyCullMode(CullMode mode) { // WB BaseObjectRenderManager.cs:850-866 applies CullMode per MDI group. // WB GameScene.cs:843 sets FrontFace(CW) globally; SetCullMode then // only chooses front/back culling. Keep the same convention here so // splitting MDI commands by CullMode cannot resurrect stale CCW state. _gl.FrontFace(FrontFaceDirection.CW); switch (mode) { case CullMode.None: _gl.Disable(EnableCap.CullFace); break; case CullMode.Clockwise: _gl.Enable(EnableCap.CullFace); _gl.CullFace(TriangleFace.Front); break; case CullMode.CounterClockwise: case CullMode.Landblock: _gl.Enable(EnableCap.CullFace); _gl.CullFace(TriangleFace.Back); break; } } private unsafe void UploadSsbo(uint ssbo, uint binding, void* data, int byteCount) { _gl.BindBuffer(BufferTargetARB.ShaderStorageBuffer, ssbo); _gl.BufferData(BufferTargetARB.ShaderStorageBuffer, (nuint)byteCount, data, BufferUsageARB.DynamicDraw); _gl.BindBufferBase(BufferTargetARB.ShaderStorageBuffer, binding, ssbo); } /// /// Phase U.3: bind the per-cell clip-region SSBO to binding=2. Prefers the /// shared buffer (set via ); /// otherwise lazily creates + binds a one-slot no-clip fallback so the shader /// never reads an unbound SSBO. The fallback's single slot has count 0 /// (pass-all), matching 's slot 0. /// private unsafe void BindClipRegionBinding2() { if (_sharedClipRegionSsbo != 0) { _gl.BindBufferBase(BufferTargetARB.ShaderStorageBuffer, ClipFrame.MeshClipSsboBinding, _sharedClipRegionSsbo); return; } if (_fallbackClipRegionSsbo == 0) { _fallbackClipRegionSsbo = _gl.GenBuffer(); // One CellClip slot, all zeros: count 0 ⇒ shader passes every plane. var zero = stackalloc byte[ClipFrame.CellClipStrideBytes]; for (int i = 0; i < ClipFrame.CellClipStrideBytes; i++) zero[i] = 0; _gl.BindBuffer(BufferTargetARB.ShaderStorageBuffer, _fallbackClipRegionSsbo); _gl.BufferData(BufferTargetARB.ShaderStorageBuffer, (nuint)ClipFrame.CellClipStrideBytes, zero, BufferUsageARB.DynamicDraw); } _gl.BindBufferBase(BufferTargetARB.ShaderStorageBuffer, ClipFrame.MeshClipSsboBinding, _fallbackClipRegionSsbo); } private void MaybeFlushDiag() { long now = Environment.TickCount64; if (now - _lastLogTick > 5000) { long cpuMed = MedianMicros(_cpuSamples); long cpuP95 = Percentile95Micros(_cpuSamples); long gpuMed = MedianMicros(_gpuSamples); long gpuP95 = Percentile95Micros(_gpuSamples); // A.5 T23: flag when entity dispatcher median exceeds 2.0ms budget // (Phase A.5 spec §2 acceptance criterion 6). Grep-friendly prefix. const long BudgetUs = 2000; string budgetFlag = cpuMed > BudgetUs ? " BUDGET_OVER" : ""; Console.WriteLine( $"[WB-DIAG]{budgetFlag} entSeen={_entitiesSeen} entDrawn={_entitiesDrawn} meshMissing={_meshesMissing} drawsIssued={_drawsIssued} instances={_instancesIssued} groups={_groups.Count} " + $"cpu_us={cpuMed}m/{cpuP95}p95 gpu_us={gpuMed}m/{gpuP95}p95"); _entitiesSeen = _entitiesDrawn = _meshesMissing = _drawsIssued = _instancesIssued = 0; _lastLogTick = now; // Don't reset the sample buffers — they're a moving window of the // last 256 frames; clearing per 5s flush would lose recent history. } } private static long MedianMicros(long[] samples) { var copy = (long[])samples.Clone(); Array.Sort(copy); int nz = 0; foreach (var v in copy) if (v > 0) nz++; if (nz == 0) return 0; return copy[copy.Length - nz / 2]; } private static long Percentile95Micros(long[] samples) { var copy = (long[])samples.Clone(); Array.Sort(copy); int nz = 0; foreach (var v in copy) if (v > 0) nz++; if (nz == 0) return 0; int idx = copy.Length - 1 - (int)(nz * 0.05); return copy[idx]; } // ── Tier 1 cache (#53) helpers extracted for testability ───────────────── // // Three pure-CPU static helpers carved out of Draw's per-entity loop so // unit tests can exercise the populate/flush algorithm + cache-hit fast // path without needing a real GL context. Production code (Draw) calls // these helpers; the dispatcher integration tests in // WbDrawDispatcherBucketingTests use them to drive the same algorithm // through deterministic inputs. /// /// Apply a cache hit's batches into the per-frame group dictionary by /// composing cached.RestPose * entityWorld per batch and routing /// the result through . The delegate /// abstracts over so this helper stays /// GL-free and unit-testable. /// /// /// Matrix multiplication is non-commutative: it MUST be /// RestPose * entityWorld, not the reverse. See /// for the full part-world product. /// internal static void ApplyCacheHit( EntityCacheEntry entry, Matrix4x4 entityWorld, Action appendInstance) { foreach (var cached in entry.Batches) { appendInstance(cached.Key, cached.RestPose * entityWorld); } } /// /// Per-tuple flush check. If is set /// AND differs from , the previous /// entity's accumulated batches are committed to /// and is cleared. Returns the /// updated tracker tuple — pass these back into the field locals in the /// caller's loop. /// /// /// This is the bug-fix structure from commit 00fa8ae (per-MeshRef /// Populate would overwrite earlier MeshRefs because the cache is /// keyed by entity.Id; flushing only on entity boundary preserves all /// MeshRefs' batches). _walkScratch is in entity-order so all MeshRefs /// of one entity arrive contiguously. /// internal static (uint? PopulateEntityId, uint PopulateLandblockId) MaybeFlushOnEntityChange( uint? populateEntityId, uint populateLandblockId, uint currentEntityId, EntityClassificationCache cache, List populateScratch) { if (populateEntityId.HasValue && populateEntityId.Value != currentEntityId) { if (populateScratch.Count > 0) { cache.Populate(populateEntityId.Value, populateLandblockId, populateScratch.ToArray()); } populateScratch.Clear(); return (null, 0u); } return (populateEntityId, populateLandblockId); } /// /// End-of-loop final flush. The last entity in _walkScratch has /// no next-iteration to trigger , /// so commit its accumulated batches here. No-op when no populate is /// pending (the last entity was animated, or the scratch is empty). /// /// End-of-loop only — does NOT reset the caller's tracker locals /// (intentional, since they go out of scope immediately after). /// /// internal static void FinalFlushPopulate( uint? populateEntityId, uint populateLandblockId, EntityClassificationCache cache, List populateScratch) { if (populateEntityId.HasValue && populateScratch.Count > 0) { cache.Populate(populateEntityId.Value, populateLandblockId, populateScratch.ToArray()); populateScratch.Clear(); } } /// /// Instance-side helper used by . Looks up or /// creates an for the given key in /// _groups and appends the per-instance world matrix. /// private void AppendInstanceToGroup(GroupKey key, Matrix4x4 model) { if (!_groups.TryGetValue(key, out var grp)) { grp = new InstanceGroup { Ibo = key.Ibo, FirstIndex = key.FirstIndex, BaseVertex = key.BaseVertex, IndexCount = key.IndexCount, BindlessTextureHandle = key.BindlessTextureHandle, TextureLayer = key.TextureLayer, Translucency = key.Translucency, CullMode = key.CullMode, }; _groups[key] = grp; } grp.Matrices.Add(model); grp.Slots.Add(_currentEntitySlot); // Phase U.4 — parallel to Matrices } private void ClassifyBatches( ObjectRenderData renderData, ulong gfxObjId, Matrix4x4 model, WorldEntity entity, MeshRef meshRef, ulong palHash, AcSurfaceMetadataTable metaTable, Matrix4x4 restPose, List? collector = null) { for (int batchIdx = 0; batchIdx < renderData.Batches.Count; batchIdx++) { var batch = renderData.Batches[batchIdx]; TranslucencyKind translucency; if (metaTable.TryLookup(gfxObjId, batchIdx, out var meta)) { translucency = meta.Translucency; } else { translucency = batch.IsAdditive ? TranslucencyKind.Additive : batch.IsTransparent ? TranslucencyKind.AlphaBlend : TranslucencyKind.Opaque; } ulong texHandle = ResolveTexture(entity, meshRef, batch, palHash); if (texHandle == 0) continue; // TextureLayer is always 0 for per-instance composites; non-zero when // WB atlas is adopted in N.6+ and batches reference a shared atlas layer. uint texLayer = 0; var key = new GroupKey( batch.IBO, batch.FirstIndex, (int)batch.BaseVertex, batch.IndexCount, texHandle, texLayer, translucency, batch.CullMode); if (!_groups.TryGetValue(key, out var grp)) { grp = new InstanceGroup { Ibo = batch.IBO, FirstIndex = batch.FirstIndex, BaseVertex = (int)batch.BaseVertex, IndexCount = batch.IndexCount, BindlessTextureHandle = texHandle, TextureLayer = texLayer, Translucency = translucency, CullMode = batch.CullMode, }; _groups[key] = grp; } grp.Matrices.Add(model); grp.Slots.Add(_currentEntitySlot); // Phase U.4 — parallel to Matrices collector?.Add(new CachedBatch(key, texHandle, restPose)); } } private ulong ResolveTexture(WorldEntity entity, MeshRef meshRef, ObjectRenderBatch batch, ulong palHash) { uint surfaceId = batch.Key.SurfaceId; if (surfaceId == 0 || surfaceId == 0xFFFFFFFF) return 0; uint overrideOrigTex = 0; bool hasOrigTexOverride = meshRef.SurfaceOverrides is not null && meshRef.SurfaceOverrides.TryGetValue(surfaceId, out overrideOrigTex); uint? origTexOverride = hasOrigTexOverride ? overrideOrigTex : (uint?)null; if (entity.PaletteOverride is not null) { return _textures.GetOrUploadWithPaletteOverrideBindless( surfaceId, origTexOverride, entity.PaletteOverride, palHash); } else if (hasOrigTexOverride) { return _textures.GetOrUploadWithOrigTextureOverrideBindless(surfaceId, overrideOrigTex); } else { return _textures.GetOrUploadBindless(surfaceId); } } private static void WriteMatrix(float[] buf, int offset, in Matrix4x4 m) { buf[offset + 0] = m.M11; buf[offset + 1] = m.M12; buf[offset + 2] = m.M13; buf[offset + 3] = m.M14; buf[offset + 4] = m.M21; buf[offset + 5] = m.M22; buf[offset + 6] = m.M23; buf[offset + 7] = m.M24; buf[offset + 8] = m.M31; buf[offset + 9] = m.M32; buf[offset + 10] = m.M33; buf[offset + 11] = m.M34; buf[offset + 12] = m.M41; buf[offset + 13] = m.M42; buf[offset + 14] = m.M43; buf[offset + 15] = m.M44; } /// /// Entity-set membership test. Phase U.1 (2026-05-30): with the /// two-pipe partition deleted, the sole /// member matches every entity. Retained as a seam for the unified /// pass to re-introduce partitioning. /// private static bool EntityMatchesSet(WorldEntity entity, EntitySet set) => true; internal static bool EntityPassesVisibleCellGate( WorldEntity entity, HashSet? visibleCellIds, EntitySet set) { if (visibleCellIds is null) return true; if (entity.ParentCellId.HasValue) return visibleCellIds.Contains(entity.ParentCellId.Value); if (IsShellScopedSet(set) && entity.IsBuildingShell) { return entity.BuildingShellAnchorCellId is uint anchorCellId && visibleCellIds.Contains(anchorCellId); } return true; } // Phase U.1 (2026-05-30): the shell-scoped sets (IndoorPass / BuildingShells) // were deleted with the two-pipe machinery. EntitySet.All is never shell-scoped. private static bool IsShellScopedSet(EntitySet set) => false; public void Dispose() { if (_disposed) return; _disposed = true; _gl.DeleteBuffer(_instanceSsbo); _gl.DeleteBuffer(_batchSsbo); _gl.DeleteBuffer(_indirectBuffer); if (_clipSlotSsbo != 0) _gl.DeleteBuffer(_clipSlotSsbo); // Phase U.3 if (_fallbackClipRegionSsbo != 0) _gl.DeleteBuffer(_fallbackClipRegionSsbo); // Phase U.3 if (_gpuQueriesInitialized) { for (int i = 0; i < GpuQueryRingDepth; i++) { _gl.DeleteQuery(_gpuQueryOpaque[i]); _gl.DeleteQuery(_gpuQueryTransparent[i]); } } } // ── Public types + helpers for BuildIndirectArrays (Task 9) ───────────── // // These are public so the pure-CPU unit tests in AcDream.Core.Tests can // exercise BuildIndirectArrays without needing a GL context. /// /// Stride in bytes of DrawElementsIndirectCommand in the indirect buffer. /// 5 × uint = 20 bytes. Tests and callers reference this symbolically /// rather than hard-coding 20 so a layout change produces a compile error. /// public const int DrawCommandStride = 20; // sizeof(DrawElementsIndirectCommand): 5 × uint /// /// Public view of the per-group inputs to — used in tests. /// public readonly record struct IndirectGroupInput( int IndexCount, uint FirstIndex, int BaseVertex, int InstanceCount, int FirstInstance, ulong TextureHandle, uint TextureLayer, TranslucencyKind Translucency, CullMode CullMode = CullMode.CounterClockwise); /// /// Public mirror of the per-group uploaded to the SSBO. /// Tests verify the layout. Same field shape as the private BatchData. /// [StructLayout(LayoutKind.Sequential, Pack = 8)] public struct BatchDataPublic { public ulong TextureHandle; public uint TextureLayer; public uint Flags; } /// Result of . public readonly record struct IndirectLayoutResult( int OpaqueCount, int TransparentCount, int TransparentByteOffset); /// /// Lays out the indirect commands + parallel BatchData array contiguously: /// opaque section first (caller sorts before calling), transparent section second. /// Pure CPU, no GL state. Caller passes pre-sized scratch arrays. /// /// /// Classification: Opaque + ClipMap → opaque pass (ClipMap uses discard, not /// blending). Everything else (AlphaBlend, Additive, InvAlpha) → transparent pass. /// public static IndirectLayoutResult BuildIndirectArrays( IReadOnlyList groups, DrawElementsIndirectCommand[] indirectScratch, BatchDataPublic[] batchScratch, CullMode[]? cullScratch = null) { int opaqueCount = 0; int transparentCount = 0; foreach (var g in groups) { if (IsOpaque(g.Translucency)) opaqueCount++; else transparentCount++; } int oi = 0; // opaque write cursor (fills [0..opaqueCount)) int ti = opaqueCount; // transparent write cursor (fills [opaqueCount..end)) foreach (var g in groups) { var dec = new DrawElementsIndirectCommand { Count = (uint)g.IndexCount, InstanceCount = (uint)g.InstanceCount, FirstIndex = g.FirstIndex, BaseVertex = g.BaseVertex, BaseInstance = (uint)g.FirstInstance, }; var bd = new BatchDataPublic { TextureHandle = g.TextureHandle, TextureLayer = g.TextureLayer, Flags = 0, }; if (IsOpaque(g.Translucency)) { indirectScratch[oi] = dec; batchScratch[oi] = bd; if (cullScratch is not null) cullScratch[oi] = g.CullMode; oi++; } else { indirectScratch[ti] = dec; batchScratch[ti] = bd; if (cullScratch is not null) cullScratch[ti] = g.CullMode; ti++; } } return new IndirectLayoutResult(opaqueCount, transparentCount, opaqueCount * DrawCommandStride); } /// /// Public test shim for . Locks in the N.5 Decision 2 /// translucency partition: Opaque + ClipMap → opaque indirect; AlphaBlend + /// Additive + InvAlpha → transparent indirect. /// public static bool IsOpaquePublic(TranslucencyKind t) => IsOpaque(t); private static bool IsOpaque(TranslucencyKind t) => t == TranslucencyKind.Opaque || t == TranslucencyKind.ClipMap; // ──────────────────────────────────────────────────────────────────────── /// /// Thin wrapper around an instance's rate-limit dictionary + frame /// counter, passed into the static /// overload so it can emit rate-limited probe lines without access /// to instance fields. Null = probes disabled (test-friendly overload). /// internal sealed class IndoorProbeState { private readonly Dictionary _lastFrame; private readonly int _currentFrame; private const int RateLimit = IndoorProbeRateLimitFrames; internal IndoorProbeState(Dictionary lastFrame, int currentFrame) { _lastFrame = lastFrame; _currentFrame = currentFrame; } /// /// Returns true at most once per /// frames per . Side-effect: stamps the frame /// number into the dictionary on success. /// internal bool ShouldEmit(ulong cellId) { if (!_lastFrame.TryGetValue(cellId, out int last) || _currentFrame - last >= RateLimit) { _lastFrame[cellId] = _currentFrame; return true; } return false; } } private sealed class InstanceGroup { public uint Ibo; public uint FirstIndex; public int BaseVertex; public int IndexCount; public ulong BindlessTextureHandle; // 64-bit (was uint TextureHandle in N.4) public uint TextureLayer; // 0 for per-instance composites; non-zero when WB atlas is adopted in N.6+ public TranslucencyKind Translucency; public CullMode CullMode; public int FirstInstance; // offset into the shared instance VBO (in instances, not bytes) public int InstanceCount; public float SortDistance; // squared distance from camera to first instance, for opaque sort public readonly List Matrices = new(); // Phase U.4: per-instance clip-slot index, parallel to Matrices (Slots[i] // is the binding=2 CellClip slot for the instance whose matrix is // Matrices[i]). At layout time the dispatcher writes Slots[i] into // _clipSlotData at the same cursor it writes Matrices[i] into _instanceData, // so the binding=3 instanceClipSlot[] tracks the binding=0 instance. public readonly List Slots = new(); } }