using System;
using System.Collections.Generic;
using System.Numerics;
using System.Runtime.InteropServices;
using AcDream.Core.Meshing;
using AcDream.Core.Terrain;
using AcDream.Core.World;
using Chorizite.OpenGLSDLBackend.Lib;
using Silk.NET.OpenGL;
namespace AcDream.App.Rendering.Wb;
///
/// Draws entities using WB's (a single global
/// VAO/VBO/IBO under modern rendering) with acdream's
/// for bindless texture resolution and for
/// translucency classification.
///
///
/// Atlas-tier entities (ServerGuid == 0): mesh data comes from WB's
/// via .
/// Textures resolve through the bindless-suffixed
/// variants, returning 64-bit
/// resident handles stored in the per-group SSBO.
///
///
///
/// Per-instance-tier entities (ServerGuid != 0): mesh data also from
/// WB, but textures resolve through
/// with palette
/// and surface overrides applied. is currently
/// unused at draw time — GameWindow's spawn path already bakes AnimPartChanges +
/// GfxObjDegradeResolver (Issue #47 close-detail mesh) into MeshRefs.
///
///
///
/// GL strategy (N.5 — mandatory): glMultiDrawElementsIndirect with SSBOs
/// and GL_ARB_bindless_texture + GL_ARB_shader_draw_parameters.
/// All visible (entity, batch) pairs are bucketed by ;
/// each group becomes one DrawElementsIndirectCommand. Three GPU buffers
/// are uploaded per frame: instance matrices (SSBO binding 0), per-group batch
/// metadata/texture handles (SSBO binding 1), and the indirect draw commands.
/// Two glMultiDrawElementsIndirect calls cover the opaque and transparent
/// passes respectively — one GL call per pass regardless of group count.
///
///
///
/// Shader: mesh_modern (bindless + gl_DrawIDARB /
/// gl_BaseInstanceARB). Missing bindless/draw-parameters throws
/// at startup — there is no legacy fallback.
///
///
///
/// Modern rendering assumption: WB's _useModernRendering path (GL
/// 4.3 + bindless) puts every mesh in a single shared VAO/VBO/IBO and uses
/// FirstIndex + BaseVertex per batch. The dispatcher honors those
/// offsets inside each DrawElementsIndirectCommand via
/// glMultiDrawElementsIndirect.
///
///
public sealed unsafe class WbDrawDispatcher : IDisposable
{
private readonly GL _gl;
private readonly Shader _shader;
private readonly TextureCache _textures;
private readonly WbMeshAdapter _meshAdapter;
private readonly EntitySpawnAdapter _entitySpawnAdapter;
private readonly BindlessSupport _bindless;
// SSBO buffer ids
private uint _instanceSsbo;
private uint _batchSsbo;
private uint _indirectBuffer;
// Per-frame scratch arrays — Tasks 9-10 fully wire these.
private float[] _instanceData = new float[256 * 16]; // mat4 floats per instance
private BatchData[] _batchData = new BatchData[256];
private DrawElementsIndirectCommand[] _indirectCommands = new DrawElementsIndirectCommand[256];
private int _opaqueDrawCount;
private int _transparentDrawCount;
private int _transparentByteOffset;
// std430 layout: ulong TextureHandle (uvec2) at offset 0, uint TextureLayer
// at offset 8, uint Flags at offset 12. Total 16 bytes.
// Pack=8 (not 4) because std430's uvec2 requires 8-byte alignment — Pack=4
// works today by accident (TextureHandle is the first field, so offset 0 is
// always 8-byte aligned), but adding a 4-byte field before TextureHandle
// without bumping Pack would silently misalign the GPU struct.
[StructLayout(LayoutKind.Sequential, Pack = 8)]
private struct BatchData
{
public ulong TextureHandle; // bindless handle (uvec2 in GLSL)
public uint TextureLayer;
public uint Flags;
}
// Per-frame scratch — reused across frames to avoid per-frame allocation.
private readonly Dictionary _groups = new();
private readonly List _opaqueDraws = new();
private readonly List _translucentDraws = new();
// Per-entity-cull AABB radius. Conservative — covers most entities; large
// outliers (long banners, tall columns) are still landblock-culled.
private const float PerEntityCullRadius = 5.0f;
private bool _disposed;
// Diagnostic counters logged once per ~5s under ACDREAM_WB_DIAG=1.
private int _entitiesSeen;
private int _entitiesDrawn;
private int _meshesMissing;
private int _drawsIssued;
private int _instancesIssued;
private long _lastLogTick;
// CPU + GPU timing for [WB-DIAG] under ACDREAM_WB_DIAG=1.
private readonly System.Diagnostics.Stopwatch _cpuStopwatch = new();
private readonly long[] _cpuSamples = new long[256]; // microseconds
private int _cpuSampleCursor;
private uint _gpuQueryOpaque;
private uint _gpuQueryTransparent;
private readonly long[] _gpuSamples = new long[256]; // microseconds
private int _gpuSampleCursor;
private bool _gpuQueriesInitialized;
public WbDrawDispatcher(
GL gl,
Shader shader,
TextureCache textures,
WbMeshAdapter meshAdapter,
EntitySpawnAdapter entitySpawnAdapter,
BindlessSupport bindless)
{
ArgumentNullException.ThrowIfNull(gl);
ArgumentNullException.ThrowIfNull(shader);
ArgumentNullException.ThrowIfNull(textures);
ArgumentNullException.ThrowIfNull(meshAdapter);
ArgumentNullException.ThrowIfNull(entitySpawnAdapter);
_gl = gl;
_shader = shader;
_textures = textures;
_meshAdapter = meshAdapter;
_entitySpawnAdapter = entitySpawnAdapter;
_bindless = bindless ?? throw new ArgumentNullException(nameof(bindless));
_instanceSsbo = _gl.GenBuffer();
_batchSsbo = _gl.GenBuffer();
_indirectBuffer = _gl.GenBuffer();
}
public static Matrix4x4 ComposePartWorldMatrix(
Matrix4x4 entityWorld,
Matrix4x4 animOverride,
Matrix4x4 restPose)
=> restPose * animOverride * entityWorld;
public void Draw(
ICamera camera,
IEnumerable<(uint LandblockId, Vector3 AabbMin, Vector3 AabbMax, IReadOnlyList Entities)> landblockEntries,
FrustumPlanes? frustum = null,
uint? neverCullLandblockId = null,
HashSet? visibleCellIds = null,
HashSet? animatedEntityIds = null)
{
_shader.Use();
var vp = camera.View * camera.Projection;
_shader.SetMatrix4("uViewProjection", vp);
bool diag = string.Equals(Environment.GetEnvironmentVariable("ACDREAM_WB_DIAG"), "1", StringComparison.Ordinal);
if (diag && !_gpuQueriesInitialized)
{
_gpuQueryOpaque = _gl.GenQuery();
_gpuQueryTransparent = _gl.GenQuery();
_gpuQueriesInitialized = true;
}
// Always run the CPU stopwatch — cheap; only logged under diag.
_cpuStopwatch.Restart();
// Camera world-space position for front-to-back sort (perf #2). The view
// matrix is the inverse of the camera's world transform, so the world
// translation lives in the inverse's translation row.
Vector3 camPos = Vector3.Zero;
if (Matrix4x4.Invert(camera.View, out var invView))
camPos = invView.Translation;
// ── Phase 1: clear groups, walk entities, build groups ──────────────
foreach (var grp in _groups.Values) grp.Matrices.Clear();
var metaTable = _meshAdapter.MetadataTable;
uint anyVao = 0;
foreach (var entry in landblockEntries)
{
bool landblockVisible = frustum is null
|| entry.LandblockId == neverCullLandblockId
|| FrustumCuller.IsAabbVisible(frustum.Value, entry.AabbMin, entry.AabbMax);
if (!landblockVisible && (animatedEntityIds is null || animatedEntityIds.Count == 0))
continue;
foreach (var entity in entry.Entities)
{
if (entity.MeshRefs.Count == 0) continue;
bool isAnimated = animatedEntityIds?.Contains(entity.Id) == true;
if (!landblockVisible && !isAnimated) continue;
if (entity.ParentCellId.HasValue && visibleCellIds is not null
&& !visibleCellIds.Contains(entity.ParentCellId.Value))
continue;
// Per-entity AABB frustum cull (perf #3). Skips work for distant
// entities even when their landblock is visible. Animated
// entities bypass — they're tracked at landblock level + need
// per-frame work for animation regardless. Conservative 5m
// radius covers typical entity bounds.
if (frustum is not null && !isAnimated && entry.LandblockId != neverCullLandblockId)
{
var p = entity.Position;
var aMin = new Vector3(p.X - PerEntityCullRadius, p.Y - PerEntityCullRadius, p.Z - PerEntityCullRadius);
var aMax = new Vector3(p.X + PerEntityCullRadius, p.Y + PerEntityCullRadius, p.Z + PerEntityCullRadius);
if (!FrustumCuller.IsAabbVisible(frustum.Value, aMin, aMax))
continue;
}
if (diag) _entitiesSeen++;
var entityWorld =
Matrix4x4.CreateFromQuaternion(entity.Rotation) *
Matrix4x4.CreateTranslation(entity.Position);
// Compute palette-override hash ONCE per entity (perf #4).
// Reused across every (part, batch) lookup so the FNV-1a fold
// over SubPalettes runs once instead of N times. Zero when the
// entity has no palette override (trees, scenery).
ulong palHash = 0;
if (entity.PaletteOverride is not null)
palHash = TextureCache.HashPaletteOverride(entity.PaletteOverride);
bool drewAny = false;
for (int partIdx = 0; partIdx < entity.MeshRefs.Count; partIdx++)
{
// Note: GameWindow's spawn path already applies
// AnimPartChanges + GfxObjDegradeResolver (Issue #47 fix —
// close-detail mesh swap for humanoids) to MeshRefs. We
// trust MeshRefs as the source of truth here. AnimatedEntityState's
// overrides become relevant only for hot-swap (0xF625
// ObjDescEvent) which today rebuilds MeshRefs anyway.
var meshRef = entity.MeshRefs[partIdx];
ulong gfxObjId = meshRef.GfxObjId;
var renderData = _meshAdapter.TryGetRenderData(gfxObjId);
if (renderData is null)
{
if (diag) _meshesMissing++;
continue;
}
drewAny = true;
if (anyVao == 0) anyVao = renderData.VAO;
if (renderData.IsSetup && renderData.SetupParts.Count > 0)
{
foreach (var (partGfxObjId, partTransform) in renderData.SetupParts)
{
var partData = _meshAdapter.TryGetRenderData(partGfxObjId);
if (partData is null) continue;
var model = ComposePartWorldMatrix(
entityWorld, meshRef.PartTransform, partTransform);
ClassifyBatches(partData, partGfxObjId, model, entity, meshRef, palHash, metaTable);
}
}
else
{
var model = meshRef.PartTransform * entityWorld;
ClassifyBatches(renderData, gfxObjId, model, entity, meshRef, palHash, metaTable);
}
}
if (diag && drewAny) _entitiesDrawn++;
}
}
// Nothing visible — skip the GL pass entirely.
if (anyVao == 0)
{
_cpuStopwatch.Stop();
if (diag) MaybeFlushDiag();
return;
}
// ── Phase 3: assign FirstInstance per group, lay matrices contiguously, sort opaque ──
int totalInstances = 0;
foreach (var grp in _groups.Values) totalInstances += grp.Matrices.Count;
if (totalInstances == 0)
{
_cpuStopwatch.Stop();
if (diag) MaybeFlushDiag();
return;
}
int needed = totalInstances * 16;
if (_instanceData.Length < needed)
_instanceData = new float[needed + 256 * 16];
_opaqueDraws.Clear();
_translucentDraws.Clear();
int cursor = 0;
foreach (var grp in _groups.Values)
{
if (grp.Matrices.Count == 0) continue;
grp.FirstInstance = cursor;
grp.InstanceCount = grp.Matrices.Count;
// Use the first instance's translation as the group's representative
// position for front-to-back sort (perf #2). Cheap heuristic; works
// well when instances of one group are spatially coherent
// (typical for trees in one landblock area, NPCs at one spawn).
var first = grp.Matrices[0];
var grpPos = new Vector3(first.M41, first.M42, first.M43);
grp.SortDistance = Vector3.DistanceSquared(camPos, grpPos);
for (int i = 0; i < grp.Matrices.Count; i++)
{
WriteMatrix(_instanceData, cursor * 16, grp.Matrices[i]);
cursor++;
}
if (IsOpaque(grp.Translucency))
_opaqueDraws.Add(grp);
else
_translucentDraws.Add(grp);
}
// Front-to-back sort for opaque pass: nearer groups draw first so the
// depth test rejects fragments hidden behind them, reducing fragment
// shader cost from overdraw on dense scenes (Holtburg courtyard,
// Foundry interior).
_opaqueDraws.Sort(static (a, b) => a.SortDistance.CompareTo(b.SortDistance));
// ── Phase 4: build IndirectGroupInput list (opaque sorted, then translucent),
// fill via BuildIndirectArrays ──────────────────────────────────
int totalDraws = _opaqueDraws.Count + _translucentDraws.Count;
if (_batchData.Length < totalDraws)
_batchData = new BatchData[totalDraws + 64];
if (_indirectCommands.Length < totalDraws)
_indirectCommands = new DrawElementsIndirectCommand[totalDraws + 64];
var groupInputs = new List(totalDraws);
foreach (var g in _opaqueDraws) groupInputs.Add(ToInput(g));
foreach (var g in _translucentDraws) groupInputs.Add(ToInput(g));
// Cast _batchData (private BatchData) to public-mirror BatchDataPublic for BuildIndirectArrays.
// Layout is asserted at test time (BatchDataPublic_LayoutMatchesPrivateBatchData test).
var batchPublic = new BatchDataPublic[totalDraws];
var layout = BuildIndirectArrays(groupInputs, _indirectCommands, batchPublic);
// Copy back into _batchData
for (int i = 0; i < totalDraws; i++)
{
_batchData[i] = new BatchData
{
TextureHandle = batchPublic[i].TextureHandle,
TextureLayer = batchPublic[i].TextureLayer,
Flags = batchPublic[i].Flags,
};
}
_opaqueDrawCount = layout.OpaqueCount;
_transparentDrawCount = layout.TransparentCount;
_transparentByteOffset = layout.TransparentByteOffset;
// ── Phase 5: upload three buffers ───────────────────────────────────
fixed (float* ip = _instanceData)
UploadSsbo(_instanceSsbo, 0, ip, totalInstances * 16 * sizeof(float));
fixed (BatchData* bp = _batchData)
UploadSsbo(_batchSsbo, 1, bp, totalDraws * sizeof(BatchData));
fixed (DrawElementsIndirectCommand* cp = _indirectCommands)
{
_gl.BindBuffer(BufferTargetARB.DrawIndirectBuffer, _indirectBuffer);
_gl.BufferData(BufferTargetARB.DrawIndirectBuffer,
(nuint)(totalDraws * sizeof(DrawElementsIndirectCommand)), cp, BufferUsageARB.DynamicDraw);
}
// ── Phase 6: bind global VAO once ───────────────────────────────────
_gl.BindVertexArray(anyVao);
if (string.Equals(Environment.GetEnvironmentVariable("ACDREAM_NO_CULL"), "1", StringComparison.Ordinal))
_gl.Disable(EnableCap.CullFace);
// ── Phase 7: opaque pass ─────────────────────────────────────────────
if (_opaqueDrawCount > 0)
{
_gl.Disable(EnableCap.Blend);
_gl.DepthMask(true);
_shader.SetInt("uRenderPass", 0);
_gl.BindBuffer(BufferTargetARB.DrawIndirectBuffer, _indirectBuffer);
if (diag && _gpuQueriesInitialized) _gl.BeginQuery(QueryTarget.TimeElapsed, _gpuQueryOpaque);
_gl.MultiDrawElementsIndirect(
PrimitiveType.Triangles,
DrawElementsType.UnsignedShort,
(void*)0,
(uint)_opaqueDrawCount,
(uint)DrawCommandStride);
if (diag && _gpuQueriesInitialized) _gl.EndQuery(QueryTarget.TimeElapsed);
}
// ── Phase 8: transparent pass ────────────────────────────────────────
if (_transparentDrawCount > 0)
{
_gl.Enable(EnableCap.Blend);
_gl.BlendFunc(BlendingFactor.SrcAlpha, BlendingFactor.OneMinusSrcAlpha);
_gl.DepthMask(false);
_shader.SetInt("uRenderPass", 1);
if (diag && _gpuQueriesInitialized) _gl.BeginQuery(QueryTarget.TimeElapsed, _gpuQueryTransparent);
_gl.MultiDrawElementsIndirect(
PrimitiveType.Triangles,
DrawElementsType.UnsignedShort,
(void*)_transparentByteOffset,
(uint)_transparentDrawCount,
(uint)DrawCommandStride);
if (diag && _gpuQueriesInitialized) _gl.EndQuery(QueryTarget.TimeElapsed);
_gl.DepthMask(true);
_gl.Disable(EnableCap.Blend);
}
_gl.Disable(EnableCap.CullFace);
_gl.BindVertexArray(0);
_cpuStopwatch.Stop();
if (diag)
{
long cpuUs = _cpuStopwatch.ElapsedTicks * 1_000_000L / System.Diagnostics.Stopwatch.Frequency;
_cpuSamples[_cpuSampleCursor] = cpuUs;
_cpuSampleCursor = (_cpuSampleCursor + 1) % _cpuSamples.Length;
// Read GPU samples non-blocking; the result for the previous frame's
// queries should be ready by now. If not, drop the sample (don't stall
// the CPU waiting for the GPU).
if (_gpuQueriesInitialized)
{
_gl.GetQueryObject(_gpuQueryOpaque, QueryObjectParameterName.ResultAvailable, out int avail);
if (avail != 0)
{
_gl.GetQueryObject(_gpuQueryOpaque, QueryObjectParameterName.Result, out ulong opaqueNs);
_gl.GetQueryObject(_gpuQueryTransparent, QueryObjectParameterName.Result, out ulong transNs);
long gpuUs = (long)((opaqueNs + transNs) / 1000UL);
_gpuSamples[_gpuSampleCursor] = gpuUs;
_gpuSampleCursor = (_gpuSampleCursor + 1) % _gpuSamples.Length;
}
}
_drawsIssued += _opaqueDrawCount + _transparentDrawCount;
_instancesIssued += totalInstances;
MaybeFlushDiag();
}
}
private static IndirectGroupInput ToInput(InstanceGroup g) => new(
IndexCount: g.IndexCount,
FirstIndex: g.FirstIndex,
BaseVertex: g.BaseVertex,
InstanceCount: g.InstanceCount,
FirstInstance: g.FirstInstance,
TextureHandle: g.BindlessTextureHandle,
TextureLayer: g.TextureLayer,
Translucency: g.Translucency);
private unsafe void UploadSsbo(uint ssbo, uint binding, void* data, int byteCount)
{
_gl.BindBuffer(BufferTargetARB.ShaderStorageBuffer, ssbo);
_gl.BufferData(BufferTargetARB.ShaderStorageBuffer, (nuint)byteCount, data, BufferUsageARB.DynamicDraw);
_gl.BindBufferBase(BufferTargetARB.ShaderStorageBuffer, binding, ssbo);
}
private void MaybeFlushDiag()
{
long now = Environment.TickCount64;
if (now - _lastLogTick > 5000)
{
long cpuMed = MedianMicros(_cpuSamples);
long cpuP95 = Percentile95Micros(_cpuSamples);
long gpuMed = MedianMicros(_gpuSamples);
long gpuP95 = Percentile95Micros(_gpuSamples);
Console.WriteLine(
$"[WB-DIAG] entSeen={_entitiesSeen} entDrawn={_entitiesDrawn} meshMissing={_meshesMissing} drawsIssued={_drawsIssued} instances={_instancesIssued} groups={_groups.Count} " +
$"cpu_us={cpuMed}m/{cpuP95}p95 gpu_us={gpuMed}m/{gpuP95}p95");
_entitiesSeen = _entitiesDrawn = _meshesMissing = _drawsIssued = _instancesIssued = 0;
_lastLogTick = now;
// Don't reset the sample buffers — they're a moving window of the
// last 256 frames; clearing per 5s flush would lose recent history.
}
}
private static long MedianMicros(long[] samples)
{
var copy = (long[])samples.Clone();
Array.Sort(copy);
int nz = 0;
foreach (var v in copy) if (v > 0) nz++;
if (nz == 0) return 0;
return copy[copy.Length - nz / 2];
}
private static long Percentile95Micros(long[] samples)
{
var copy = (long[])samples.Clone();
Array.Sort(copy);
int nz = 0;
foreach (var v in copy) if (v > 0) nz++;
if (nz == 0) return 0;
int idx = copy.Length - 1 - (int)(nz * 0.05);
return copy[idx];
}
private void ClassifyBatches(
ObjectRenderData renderData,
ulong gfxObjId,
Matrix4x4 model,
WorldEntity entity,
MeshRef meshRef,
ulong palHash,
AcSurfaceMetadataTable metaTable)
{
for (int batchIdx = 0; batchIdx < renderData.Batches.Count; batchIdx++)
{
var batch = renderData.Batches[batchIdx];
TranslucencyKind translucency;
if (metaTable.TryLookup(gfxObjId, batchIdx, out var meta))
{
translucency = meta.Translucency;
}
else
{
translucency = batch.IsAdditive ? TranslucencyKind.Additive
: batch.IsTransparent ? TranslucencyKind.AlphaBlend
: TranslucencyKind.Opaque;
}
ulong texHandle = ResolveTexture(entity, meshRef, batch, palHash);
if (texHandle == 0) continue;
// TextureLayer is always 0 for per-instance composites; non-zero when
// WB atlas is adopted in N.6+ and batches reference a shared atlas layer.
uint texLayer = 0;
var key = new GroupKey(
batch.IBO, batch.FirstIndex, (int)batch.BaseVertex,
batch.IndexCount, texHandle, texLayer, translucency);
if (!_groups.TryGetValue(key, out var grp))
{
grp = new InstanceGroup
{
Ibo = batch.IBO,
FirstIndex = batch.FirstIndex,
BaseVertex = (int)batch.BaseVertex,
IndexCount = batch.IndexCount,
BindlessTextureHandle = texHandle,
TextureLayer = texLayer,
Translucency = translucency,
};
_groups[key] = grp;
}
grp.Matrices.Add(model);
}
}
private ulong ResolveTexture(WorldEntity entity, MeshRef meshRef, ObjectRenderBatch batch, ulong palHash)
{
uint surfaceId = batch.Key.SurfaceId;
if (surfaceId == 0 || surfaceId == 0xFFFFFFFF) return 0;
uint overrideOrigTex = 0;
bool hasOrigTexOverride = meshRef.SurfaceOverrides is not null
&& meshRef.SurfaceOverrides.TryGetValue(surfaceId, out overrideOrigTex);
uint? origTexOverride = hasOrigTexOverride ? overrideOrigTex : (uint?)null;
if (entity.PaletteOverride is not null)
{
return _textures.GetOrUploadWithPaletteOverrideBindless(
surfaceId, origTexOverride, entity.PaletteOverride, palHash);
}
else if (hasOrigTexOverride)
{
return _textures.GetOrUploadWithOrigTextureOverrideBindless(surfaceId, overrideOrigTex);
}
else
{
return _textures.GetOrUploadBindless(surfaceId);
}
}
private static void WriteMatrix(float[] buf, int offset, in Matrix4x4 m)
{
buf[offset + 0] = m.M11; buf[offset + 1] = m.M12; buf[offset + 2] = m.M13; buf[offset + 3] = m.M14;
buf[offset + 4] = m.M21; buf[offset + 5] = m.M22; buf[offset + 6] = m.M23; buf[offset + 7] = m.M24;
buf[offset + 8] = m.M31; buf[offset + 9] = m.M32; buf[offset + 10] = m.M33; buf[offset + 11] = m.M34;
buf[offset + 12] = m.M41; buf[offset + 13] = m.M42; buf[offset + 14] = m.M43; buf[offset + 15] = m.M44;
}
public void Dispose()
{
if (_disposed) return;
_disposed = true;
_gl.DeleteBuffer(_instanceSsbo);
_gl.DeleteBuffer(_batchSsbo);
_gl.DeleteBuffer(_indirectBuffer);
if (_gpuQueriesInitialized)
{
_gl.DeleteQuery(_gpuQueryOpaque);
_gl.DeleteQuery(_gpuQueryTransparent);
}
}
// ── Public types + helpers for BuildIndirectArrays (Task 9) ─────────────
//
// These are public so the pure-CPU unit tests in AcDream.Core.Tests can
// exercise BuildIndirectArrays without needing a GL context.
///
/// Stride in bytes of DrawElementsIndirectCommand in the indirect buffer.
/// 5 × uint = 20 bytes. Tests and callers reference this symbolically
/// rather than hard-coding 20 so a layout change produces a compile error.
///
public const int DrawCommandStride = 20; // sizeof(DrawElementsIndirectCommand): 5 × uint
///
/// Public view of the per-group inputs to — used in tests.
///
public readonly record struct IndirectGroupInput(
int IndexCount,
uint FirstIndex,
int BaseVertex,
int InstanceCount,
int FirstInstance,
ulong TextureHandle,
uint TextureLayer,
TranslucencyKind Translucency);
///
/// Public mirror of the per-group uploaded to the SSBO.
/// Tests verify the layout. Same field shape as the private BatchData.
///
[StructLayout(LayoutKind.Sequential, Pack = 8)]
public struct BatchDataPublic
{
public ulong TextureHandle;
public uint TextureLayer;
public uint Flags;
}
/// Result of .
public readonly record struct IndirectLayoutResult(
int OpaqueCount,
int TransparentCount,
int TransparentByteOffset);
///
/// Lays out the indirect commands + parallel BatchData array contiguously:
/// opaque section first (caller sorts before calling), transparent section second.
/// Pure CPU, no GL state. Caller passes pre-sized scratch arrays.
///
///
/// Classification: Opaque + ClipMap → opaque pass (ClipMap uses discard, not
/// blending). Everything else (AlphaBlend, Additive, InvAlpha) → transparent pass.
///
public static IndirectLayoutResult BuildIndirectArrays(
IReadOnlyList groups,
DrawElementsIndirectCommand[] indirectScratch,
BatchDataPublic[] batchScratch)
{
int opaqueCount = 0;
int transparentCount = 0;
foreach (var g in groups)
{
if (IsOpaque(g.Translucency)) opaqueCount++;
else transparentCount++;
}
int oi = 0; // opaque write cursor (fills [0..opaqueCount))
int ti = opaqueCount; // transparent write cursor (fills [opaqueCount..end))
foreach (var g in groups)
{
var dec = new DrawElementsIndirectCommand
{
Count = (uint)g.IndexCount,
InstanceCount = (uint)g.InstanceCount,
FirstIndex = g.FirstIndex,
BaseVertex = g.BaseVertex,
BaseInstance = (uint)g.FirstInstance,
};
var bd = new BatchDataPublic
{
TextureHandle = g.TextureHandle,
TextureLayer = g.TextureLayer,
Flags = 0,
};
if (IsOpaque(g.Translucency))
{
indirectScratch[oi] = dec;
batchScratch[oi] = bd;
oi++;
}
else
{
indirectScratch[ti] = dec;
batchScratch[ti] = bd;
ti++;
}
}
return new IndirectLayoutResult(opaqueCount, transparentCount, opaqueCount * DrawCommandStride);
}
///
/// Public test shim for . Locks in the N.5 Decision 2
/// translucency partition: Opaque + ClipMap → opaque indirect; AlphaBlend +
/// Additive + InvAlpha → transparent indirect.
///
public static bool IsOpaquePublic(TranslucencyKind t) => IsOpaque(t);
private static bool IsOpaque(TranslucencyKind t)
=> t == TranslucencyKind.Opaque || t == TranslucencyKind.ClipMap;
// ────────────────────────────────────────────────────────────────────────
private readonly record struct GroupKey(
uint Ibo,
uint FirstIndex,
int BaseVertex,
int IndexCount,
ulong BindlessTextureHandle,
uint TextureLayer,
TranslucencyKind Translucency);
private sealed class InstanceGroup
{
public uint Ibo;
public uint FirstIndex;
public int BaseVertex;
public int IndexCount;
public ulong BindlessTextureHandle; // 64-bit (was uint TextureHandle in N.4)
public uint TextureLayer; // 0 for per-instance composites; non-zero when WB atlas is adopted in N.6+
public TranslucencyKind Translucency;
public int FirstInstance; // offset into the shared instance VBO (in instances, not bytes)
public int InstanceCount;
public float SortDistance; // squared distance from camera to first instance, for opaque sort
public readonly List Matrices = new();
}
}