using System; using System.Collections.Generic; using System.Numerics; using System.Runtime.InteropServices; using AcDream.Core.Meshing; using AcDream.Core.Terrain; using AcDream.Core.World; using Chorizite.OpenGLSDLBackend.Lib; using Silk.NET.OpenGL; namespace AcDream.App.Rendering.Wb; /// /// Draws entities using WB's (a single global /// VAO/VBO/IBO under modern rendering) with acdream's /// for texture resolution and for /// translucency classification. /// /// /// Atlas-tier entities (ServerGuid == 0): mesh data comes from WB's /// via . /// Textures resolve through using the batch's /// SurfaceId. /// /// /// /// Per-instance-tier entities (ServerGuid != 0): mesh data also from /// WB, but textures resolve through with palette and /// surface overrides applied. is currently /// unused at draw time — GameWindow's spawn path already bakes AnimPartChanges + /// GfxObjDegradeResolver (Issue #47 close-detail mesh) into MeshRefs. /// /// /// /// GL strategy: GROUPED instanced drawing. All visible (entity, batch) /// pairs are bucketed by ; within a group a single /// glDrawElementsInstancedBaseVertexBaseInstance renders all instances. /// All matrices for the frame land in one shared instance VBO via a single /// BufferData upload. This drops draw calls from O(entities×batches) /// to O(unique GfxObj×batch×texture) — typically two orders of magnitude fewer. /// /// /// /// Shader: reuses mesh_instanced (vert locations 0-2 = Position/ /// Normal/UV from WB's VertexPositionNormalTexture; locations 3-6 = instance /// matrix from our VBO). WB's 32-byte vertex stride is compatible. /// /// /// /// Modern rendering assumption: WB's _useModernRendering path (GL /// 4.3 + bindless) puts every mesh in a single shared VAO/VBO/IBO and uses /// FirstIndex + BaseVertex per batch. The dispatcher honors those /// offsets via DrawElementsInstancedBaseVertex(BaseInstance). The legacy /// per-mesh-VAO path also works since FirstIndex/BaseVertex are zero there. /// /// public sealed unsafe class WbDrawDispatcher : IDisposable { private readonly GL _gl; private readonly Shader _shader; private readonly TextureCache _textures; private readonly WbMeshAdapter _meshAdapter; private readonly EntitySpawnAdapter _entitySpawnAdapter; private readonly BindlessSupport _bindless; // SSBO buffer ids private uint _instanceSsbo; private uint _batchSsbo; private uint _indirectBuffer; // Per-frame scratch arrays — Tasks 9-10 fully wire these. private float[] _instanceData = new float[256 * 16]; // mat4 floats per instance private BatchData[] _batchData = new BatchData[256]; private DrawElementsIndirectCommand[] _indirectCommands = new DrawElementsIndirectCommand[256]; #pragma warning disable CS0169 // Tasks 9-10 wire these counters private int _opaqueDrawCount; private int _transparentDrawCount; private int _transparentByteOffset; #pragma warning restore CS0169 // std430 layout: ulong TextureHandle (uvec2) at offset 0, uint TextureLayer // at offset 8, uint Flags at offset 12. Total 16 bytes. // Pack=8 (not 4) because std430's uvec2 requires 8-byte alignment — Pack=4 // works today by accident (TextureHandle is the first field, so offset 0 is // always 8-byte aligned), but adding a 4-byte field before TextureHandle // without bumping Pack would silently misalign the GPU struct. [StructLayout(LayoutKind.Sequential, Pack = 8)] private struct BatchData { public ulong TextureHandle; // bindless handle (uvec2 in GLSL) public uint TextureLayer; public uint Flags; } private readonly HashSet _patchedVaos = new(); // Per-frame scratch — reused across frames to avoid per-frame allocation. private readonly Dictionary _groups = new(); private readonly List _opaqueDraws = new(); private readonly List _translucentDraws = new(); private float[] _instanceBuffer = new float[256 * 16]; // grow on demand, never shrink // Per-entity-cull AABB radius. Conservative — covers most entities; large // outliers (long banners, tall columns) are still landblock-culled. private const float PerEntityCullRadius = 5.0f; private bool _disposed; // Diagnostic counters logged once per ~5s under ACDREAM_WB_DIAG=1. private int _entitiesSeen; private int _entitiesDrawn; private int _meshesMissing; private int _drawsIssued; private int _instancesIssued; private long _lastLogTick; public WbDrawDispatcher( GL gl, Shader shader, TextureCache textures, WbMeshAdapter meshAdapter, EntitySpawnAdapter entitySpawnAdapter, BindlessSupport bindless) { ArgumentNullException.ThrowIfNull(gl); ArgumentNullException.ThrowIfNull(shader); ArgumentNullException.ThrowIfNull(textures); ArgumentNullException.ThrowIfNull(meshAdapter); ArgumentNullException.ThrowIfNull(entitySpawnAdapter); _gl = gl; _shader = shader; _textures = textures; _meshAdapter = meshAdapter; _entitySpawnAdapter = entitySpawnAdapter; _bindless = bindless ?? throw new ArgumentNullException(nameof(bindless)); _instanceSsbo = _gl.GenBuffer(); _batchSsbo = _gl.GenBuffer(); _indirectBuffer = _gl.GenBuffer(); } public static Matrix4x4 ComposePartWorldMatrix( Matrix4x4 entityWorld, Matrix4x4 animOverride, Matrix4x4 restPose) => restPose * animOverride * entityWorld; public void Draw( ICamera camera, IEnumerable<(uint LandblockId, Vector3 AabbMin, Vector3 AabbMax, IReadOnlyList Entities)> landblockEntries, FrustumPlanes? frustum = null, uint? neverCullLandblockId = null, HashSet? visibleCellIds = null, HashSet? animatedEntityIds = null) { _shader.Use(); var vp = camera.View * camera.Projection; _shader.SetMatrix4("uViewProjection", vp); bool diag = string.Equals(Environment.GetEnvironmentVariable("ACDREAM_WB_DIAG"), "1", StringComparison.Ordinal); // Camera world-space position for front-to-back sort (perf #2). The view // matrix is the inverse of the camera's world transform, so the world // translation lives in the inverse's translation row. Vector3 camPos = Vector3.Zero; if (Matrix4x4.Invert(camera.View, out var invView)) camPos = invView.Translation; // ── Phase 1: clear groups, walk entities, build groups ────────────── foreach (var grp in _groups.Values) grp.Matrices.Clear(); var metaTable = _meshAdapter.MetadataTable; uint anyVao = 0; foreach (var entry in landblockEntries) { bool landblockVisible = frustum is null || entry.LandblockId == neverCullLandblockId || FrustumCuller.IsAabbVisible(frustum.Value, entry.AabbMin, entry.AabbMax); if (!landblockVisible && (animatedEntityIds is null || animatedEntityIds.Count == 0)) continue; foreach (var entity in entry.Entities) { if (entity.MeshRefs.Count == 0) continue; bool isAnimated = animatedEntityIds?.Contains(entity.Id) == true; if (!landblockVisible && !isAnimated) continue; if (entity.ParentCellId.HasValue && visibleCellIds is not null && !visibleCellIds.Contains(entity.ParentCellId.Value)) continue; // Per-entity AABB frustum cull (perf #3). Skips work for distant // entities even when their landblock is visible. Animated // entities bypass — they're tracked at landblock level + need // per-frame work for animation regardless. Conservative 5m // radius covers typical entity bounds. if (frustum is not null && !isAnimated && entry.LandblockId != neverCullLandblockId) { var p = entity.Position; var aMin = new Vector3(p.X - PerEntityCullRadius, p.Y - PerEntityCullRadius, p.Z - PerEntityCullRadius); var aMax = new Vector3(p.X + PerEntityCullRadius, p.Y + PerEntityCullRadius, p.Z + PerEntityCullRadius); if (!FrustumCuller.IsAabbVisible(frustum.Value, aMin, aMax)) continue; } if (diag) _entitiesSeen++; var entityWorld = Matrix4x4.CreateFromQuaternion(entity.Rotation) * Matrix4x4.CreateTranslation(entity.Position); // Compute palette-override hash ONCE per entity (perf #4). // Reused across every (part, batch) lookup so the FNV-1a fold // over SubPalettes runs once instead of N times. Zero when the // entity has no palette override (trees, scenery). ulong palHash = 0; if (entity.PaletteOverride is not null) palHash = TextureCache.HashPaletteOverride(entity.PaletteOverride); bool drewAny = false; for (int partIdx = 0; partIdx < entity.MeshRefs.Count; partIdx++) { // Note: GameWindow's spawn path already applies // AnimPartChanges + GfxObjDegradeResolver (Issue #47 fix — // close-detail mesh swap for humanoids) to MeshRefs. We // trust MeshRefs as the source of truth here. AnimatedEntityState's // overrides become relevant only for hot-swap (0xF625 // ObjDescEvent) which today rebuilds MeshRefs anyway. var meshRef = entity.MeshRefs[partIdx]; ulong gfxObjId = meshRef.GfxObjId; var renderData = _meshAdapter.TryGetRenderData(gfxObjId); if (renderData is null) { if (diag) _meshesMissing++; continue; } drewAny = true; if (anyVao == 0) anyVao = renderData.VAO; if (renderData.IsSetup && renderData.SetupParts.Count > 0) { foreach (var (partGfxObjId, partTransform) in renderData.SetupParts) { var partData = _meshAdapter.TryGetRenderData(partGfxObjId); if (partData is null) continue; var model = ComposePartWorldMatrix( entityWorld, meshRef.PartTransform, partTransform); ClassifyBatches(partData, partGfxObjId, model, entity, meshRef, palHash, metaTable); } } else { var model = meshRef.PartTransform * entityWorld; ClassifyBatches(renderData, gfxObjId, model, entity, meshRef, palHash, metaTable); } } if (diag && drewAny) _entitiesDrawn++; } } // Nothing visible — skip the GL pass entirely. if (anyVao == 0) { if (diag) MaybeFlushDiag(); return; } // ── Phase 2: lay matrices out contiguously, assign per-group offsets, // split into opaque/translucent + compute sort keys ───────── int totalInstances = 0; foreach (var grp in _groups.Values) totalInstances += grp.Matrices.Count; if (totalInstances == 0) { if (diag) MaybeFlushDiag(); return; } int needed = totalInstances * 16; if (_instanceBuffer.Length < needed) _instanceBuffer = new float[needed + 256 * 16]; // headroom _opaqueDraws.Clear(); _translucentDraws.Clear(); int cursor = 0; foreach (var grp in _groups.Values) { if (grp.Matrices.Count == 0) continue; grp.FirstInstance = cursor; grp.InstanceCount = grp.Matrices.Count; // Use the first instance's translation as the group's representative // position for front-to-back sort (perf #2). Cheap heuristic; works // well when instances of one group are spatially coherent // (typical for trees in one landblock area, NPCs at one spawn). var firstM = grp.Matrices[0]; var grpPos = new Vector3(firstM.M41, firstM.M42, firstM.M43); grp.SortDistance = Vector3.DistanceSquared(camPos, grpPos); for (int i = 0; i < grp.Matrices.Count; i++) { WriteMatrix(_instanceBuffer, cursor * 16, grp.Matrices[i]); cursor++; } if (grp.Translucency == TranslucencyKind.Opaque || grp.Translucency == TranslucencyKind.ClipMap) _opaqueDraws.Add(grp); else _translucentDraws.Add(grp); } // Front-to-back sort for opaque pass: nearer groups draw first so the // depth test rejects fragments hidden behind them, reducing fragment // shader cost from overdraw on dense scenes (Holtburg courtyard, // Foundry interior). _opaqueDraws.Sort(static (a, b) => a.SortDistance.CompareTo(b.SortDistance)); // ── Phase 3: one upload of all matrices ───────────────────────────── // NOTE: _instanceSsbo is temporarily bound as ArrayBuffer for compile // compatibility. Tasks 9-10 rewrite this to BindBufferBase(SSBO) + // glMultiDrawElementsIndirect. _gl.BindBuffer(BufferTargetARB.ArrayBuffer, _instanceSsbo); fixed (float* p = _instanceBuffer) _gl.BufferData(BufferTargetARB.ArrayBuffer, (nuint)(totalInstances * 16 * sizeof(float)), p, BufferUsageARB.DynamicDraw); // ── Phase 4: bind VAO once (modern rendering shares one global VAO) ── EnsureInstanceAttribs(anyVao); _gl.BindVertexArray(anyVao); // ── Phase 5: opaque + ClipMap pass (front-to-back sorted) ─────────── if (string.Equals(Environment.GetEnvironmentVariable("ACDREAM_NO_CULL"), "1", StringComparison.Ordinal)) _gl.Disable(EnableCap.CullFace); foreach (var grp in _opaqueDraws) { _shader.SetInt("uTranslucencyKind", (int)grp.Translucency); DrawGroup(grp); } // ── Phase 6: translucent pass ─────────────────────────────────────── _gl.Enable(EnableCap.Blend); _gl.DepthMask(false); if (string.Equals(Environment.GetEnvironmentVariable("ACDREAM_NO_CULL"), "1", StringComparison.Ordinal)) { _gl.Disable(EnableCap.CullFace); } else { _gl.Enable(EnableCap.CullFace); _gl.CullFace(TriangleFace.Back); _gl.FrontFace(FrontFaceDirection.Ccw); } foreach (var grp in _translucentDraws) { switch (grp.Translucency) { case TranslucencyKind.Additive: _gl.BlendFunc(BlendingFactor.SrcAlpha, BlendingFactor.One); break; case TranslucencyKind.InvAlpha: _gl.BlendFunc(BlendingFactor.OneMinusSrcAlpha, BlendingFactor.SrcAlpha); break; default: _gl.BlendFunc(BlendingFactor.SrcAlpha, BlendingFactor.OneMinusSrcAlpha); break; } _shader.SetInt("uTranslucencyKind", (int)grp.Translucency); DrawGroup(grp); } _gl.DepthMask(true); _gl.Disable(EnableCap.Blend); _gl.Disable(EnableCap.CullFace); _gl.BindVertexArray(0); if (diag) { _drawsIssued += _opaqueDraws.Count + _translucentDraws.Count; _instancesIssued += totalInstances; MaybeFlushDiag(); } } private void DrawGroup(InstanceGroup grp) { _gl.ActiveTexture(TextureUnit.Texture0); _gl.BindTexture(TextureTarget.Texture2D, grp.TextureHandle); _gl.BindBuffer(BufferTargetARB.ElementArrayBuffer, grp.Ibo); // BaseInstance offsets the per-instance attribute fetches into our // shared instance VBO so each group reads its own slice. Requires // GL_ARB_base_instance (GL 4.2+); WB requires 4.3 so this is available. _gl.DrawElementsInstancedBaseVertexBaseInstance( PrimitiveType.Triangles, (uint)grp.IndexCount, DrawElementsType.UnsignedShort, (void*)(grp.FirstIndex * sizeof(ushort)), (uint)grp.InstanceCount, grp.BaseVertex, (uint)grp.FirstInstance); } private void MaybeFlushDiag() { long now = Environment.TickCount64; if (now - _lastLogTick > 5000) { Console.WriteLine( $"[WB-DIAG] entSeen={_entitiesSeen} entDrawn={_entitiesDrawn} meshMissing={_meshesMissing} drawsIssued={_drawsIssued} instances={_instancesIssued} groups={_groups.Count}"); _entitiesSeen = _entitiesDrawn = _meshesMissing = _drawsIssued = _instancesIssued = 0; _lastLogTick = now; } } private void ClassifyBatches( ObjectRenderData renderData, ulong gfxObjId, Matrix4x4 model, WorldEntity entity, MeshRef meshRef, ulong palHash, AcSurfaceMetadataTable metaTable) { for (int batchIdx = 0; batchIdx < renderData.Batches.Count; batchIdx++) { var batch = renderData.Batches[batchIdx]; TranslucencyKind translucency; if (metaTable.TryLookup(gfxObjId, batchIdx, out var meta)) { translucency = meta.Translucency; } else { translucency = batch.IsAdditive ? TranslucencyKind.Additive : batch.IsTransparent ? TranslucencyKind.AlphaBlend : TranslucencyKind.Opaque; } uint texHandle = ResolveTexture(entity, meshRef, batch, palHash); if (texHandle == 0) continue; var key = new GroupKey( batch.IBO, batch.FirstIndex, (int)batch.BaseVertex, batch.IndexCount, texHandle, translucency); if (!_groups.TryGetValue(key, out var grp)) { grp = new InstanceGroup { Ibo = batch.IBO, FirstIndex = batch.FirstIndex, BaseVertex = (int)batch.BaseVertex, IndexCount = batch.IndexCount, TextureHandle = texHandle, Translucency = translucency, }; _groups[key] = grp; } grp.Matrices.Add(model); } } private uint ResolveTexture(WorldEntity entity, MeshRef meshRef, ObjectRenderBatch batch, ulong palHash) { // WB stores the surface id on batch.Key.SurfaceId (TextureKey struct); // batch.SurfaceId is unset (zero) for batches built by ObjectMeshManager. uint surfaceId = batch.Key.SurfaceId; if (surfaceId == 0 || surfaceId == 0xFFFFFFFF) return 0; uint overrideOrigTex = 0; bool hasOrigTexOverride = meshRef.SurfaceOverrides is not null && meshRef.SurfaceOverrides.TryGetValue(surfaceId, out overrideOrigTex); uint? origTexOverride = hasOrigTexOverride ? overrideOrigTex : (uint?)null; if (entity.PaletteOverride is not null) { // perf #4: pass the entity-precomputed palette hash so TextureCache // can skip its internal HashPaletteOverride for repeat lookups // within the same character. return _textures.GetOrUploadWithPaletteOverride( surfaceId, origTexOverride, entity.PaletteOverride, palHash); } else if (hasOrigTexOverride) { return _textures.GetOrUploadWithOrigTextureOverride(surfaceId, overrideOrigTex); } else { return _textures.GetOrUpload(surfaceId); } } private void EnsureInstanceAttribs(uint vao) { if (!_patchedVaos.Add(vao)) return; _gl.BindVertexArray(vao); // NOTE: temporarily binding _instanceSsbo as ArrayBuffer for compile // compatibility. Tasks 9-10 replace with BindBufferBase(SSBO). _gl.BindBuffer(BufferTargetARB.ArrayBuffer, _instanceSsbo); for (uint row = 0; row < 4; row++) { uint loc = 3 + row; _gl.EnableVertexAttribArray(loc); _gl.VertexAttribPointer(loc, 4, VertexAttribPointerType.Float, false, 64, (void*)(row * 16)); _gl.VertexAttribDivisor(loc, 1); } } private static void WriteMatrix(float[] buf, int offset, in Matrix4x4 m) { buf[offset + 0] = m.M11; buf[offset + 1] = m.M12; buf[offset + 2] = m.M13; buf[offset + 3] = m.M14; buf[offset + 4] = m.M21; buf[offset + 5] = m.M22; buf[offset + 6] = m.M23; buf[offset + 7] = m.M24; buf[offset + 8] = m.M31; buf[offset + 9] = m.M32; buf[offset + 10] = m.M33; buf[offset + 11] = m.M34; buf[offset + 12] = m.M41; buf[offset + 13] = m.M42; buf[offset + 14] = m.M43; buf[offset + 15] = m.M44; } public void Dispose() { if (_disposed) return; _disposed = true; _gl.DeleteBuffer(_instanceSsbo); _gl.DeleteBuffer(_batchSsbo); _gl.DeleteBuffer(_indirectBuffer); } private readonly record struct GroupKey( uint Ibo, uint FirstIndex, int BaseVertex, int IndexCount, uint TextureHandle, TranslucencyKind Translucency); private sealed class InstanceGroup { public uint Ibo; public uint FirstIndex; public int BaseVertex; public int IndexCount; public uint TextureHandle; public TranslucencyKind Translucency; public int FirstInstance; // offset into the shared instance VBO (in instances, not bytes) public int InstanceCount; public float SortDistance; // squared distance from camera to first instance, for opaque sort public readonly List Matrices = new(); } }