// src/AcDream.App/Rendering/InstancedMeshRenderer.cs // // True instanced rendering for static-object meshes. // Groups entities by GfxObjId. All instance model matrices are written into // a single shared instance VBO once per frame. Each sub-mesh is drawn with // DrawElementsInstanced — one GL draw call per (GfxObj × sub-mesh) instead // of one per entity. For a scene with N unique GfxObjs and M total entities // this reduces draw calls from M*subMeshes to N*subMeshes. // // Matrix layout: // System.Numerics.Matrix4x4 is row-major. Written to the float[] buffer in // natural memory order (M11..M44). The GLSL shader reads 4 vec4 attributes // (aInstanceRow0-3) and constructs mat4(row0, row1, row2, row3). Because // GLSL mat4() takes column vectors, the rows of the C# matrix become the // columns of the GLSL mat4 — which is the same transpose that UniformMatrix4 // with transpose=false produces. Visual result is identical to the old // SetMatrix4("uModel", ...) path. // // Architecture note: public API matches StaticMeshRenderer so GameWindow only // needs to update the shader and uniform setup at the call sites. using System.Numerics; using System.Runtime.InteropServices; using AcDream.Core.Meshing; using AcDream.Core.Terrain; using AcDream.Core.World; using Silk.NET.OpenGL; namespace AcDream.App.Rendering; public sealed unsafe class InstancedMeshRenderer : IDisposable { private readonly GL _gl; private readonly Shader _shader; private readonly TextureCache _textures; // One GPU bundle per unique GfxObj id. Each GfxObj can have multiple sub-meshes. private readonly Dictionary> _gpuByGfxObj = new(); // Shared instance VBO — filled every frame with all instance model matrices. private readonly uint _instanceVbo; // Per-frame scratch: reused float buffer for instance matrix data. // 16 floats per mat4. Grown on demand; never shrunk. private float[] _instanceBuffer = new float[256 * 16]; // start at 256 instances // ── Instance grouping scratch ───────────────────────────────────────────── // // Reused every frame to avoid per-frame allocation. // // **Group key = (GfxObjId, PaletteOverrideHash, SurfaceOverridesHash).** // // An earlier implementation grouped on GfxObjId alone and resolved // the per-sub-mesh texture from the first instance in the group — which // is fine for scenery where every tree shares the same palette, but // utterly broken for NPCs: every humanoid uses the same base body // GfxObjs and they all piled into one group, so the first NPC's palette // was used for every NPC in the frame. Frustum culling + iteration // order meant that "first NPC" changed as the camera turned — producing // the "NPC clothing changes when I turn" symptom. // // Now we also key by the entity's PaletteOverride + per-MeshRef // SurfaceOverrides signature so only entities that decode to the // SAME texture for every sub-mesh can share a batch. Entities with // unique appearance fall to single-instance groups (still correct, // marginally slower than true instancing). private readonly Dictionary _groups = new(); private readonly record struct GroupKey(uint GfxObjId, ulong TextureSignature); public InstancedMeshRenderer(GL gl, Shader shader, TextureCache textures) { _gl = gl; _shader = shader; _textures = textures; _instanceVbo = _gl.GenBuffer(); } // ── Upload ──────────────────────────────────────────────────────────────── public void EnsureUploaded(uint gfxObjId, IReadOnlyList subMeshes) { if (_gpuByGfxObj.ContainsKey(gfxObjId)) return; var list = new List(subMeshes.Count); foreach (var sm in subMeshes) list.Add(UploadSubMesh(sm)); _gpuByGfxObj[gfxObjId] = list; } private SubMeshGpu UploadSubMesh(GfxObjSubMesh sm) { uint vao = _gl.GenVertexArray(); _gl.BindVertexArray(vao); // ── Vertex buffer (positions, normals, UVs) ─────────────────────────── uint vbo = _gl.GenBuffer(); _gl.BindBuffer(BufferTargetARB.ArrayBuffer, vbo); fixed (void* p = sm.Vertices) _gl.BufferData(BufferTargetARB.ArrayBuffer, (nuint)(sm.Vertices.Length * sizeof(Vertex)), p, BufferUsageARB.StaticDraw); uint stride = (uint)sizeof(Vertex); _gl.EnableVertexAttribArray(0); _gl.VertexAttribPointer(0, 3, VertexAttribPointerType.Float, false, stride, (void*)0); _gl.EnableVertexAttribArray(1); _gl.VertexAttribPointer(1, 3, VertexAttribPointerType.Float, false, stride, (void*)(3 * sizeof(float))); _gl.EnableVertexAttribArray(2); _gl.VertexAttribPointer(2, 2, VertexAttribPointerType.Float, false, stride, (void*)(6 * sizeof(float))); // Note: location 3 (uint TerrainLayer) is NOT used by mesh_instanced.vert; // that slot is reserved for per-instance mat4 row 0 from the instance VBO. // ── Index buffer ────────────────────────────────────────────────────── uint ebo = _gl.GenBuffer(); _gl.BindBuffer(BufferTargetARB.ElementArrayBuffer, ebo); fixed (void* p = sm.Indices) _gl.BufferData(BufferTargetARB.ElementArrayBuffer, (nuint)(sm.Indices.Length * sizeof(uint)), p, BufferUsageARB.StaticDraw); // ── Per-instance model matrix (locations 3-6) ───────────────────────── // Bind the shared instance VBO. The VAO captures this binding at each // attribute location. At draw time we re-call VertexAttribPointer with // the per-group byte offset (to address different groups in the VBO // without DrawElementsInstancedBaseInstance). _gl.BindBuffer(BufferTargetARB.ArrayBuffer, _instanceVbo); // mat4 = 4 × vec4, stride = 64 bytes, divisor = 1 (advance once per instance) for (uint row = 0; row < 4; row++) { uint loc = 3 + row; _gl.EnableVertexAttribArray(loc); _gl.VertexAttribPointer(loc, 4, VertexAttribPointerType.Float, false, 64, (void*)(row * 16)); _gl.VertexAttribDivisor(loc, 1); } _gl.BindVertexArray(0); return new SubMeshGpu { Vao = vao, Vbo = vbo, Ebo = ebo, IndexCount = sm.Indices.Length, SurfaceId = sm.SurfaceId, Translucency = sm.Translucency, }; } // ── Draw ────────────────────────────────────────────────────────────────── public void Draw(ICamera camera, IEnumerable<(uint LandblockId, Vector3 AabbMin, Vector3 AabbMax, IReadOnlyList Entities)> landblockEntries, FrustumPlanes? frustum = null, uint? neverCullLandblockId = null, HashSet? visibleCellIds = null, // L-fix1 (2026-04-28): set of entity ids that should bypass the // landblock-level frustum cull. Animated entities (other // players, NPCs, monsters) are always rendered if their // landblock is loaded — without this they vanish whenever the // camera rotates away from their landblock, even though // they're within visible distance of the player. Pass null / // empty to keep the previous "cull everything by landblock" // behavior. HashSet? animatedEntityIds = null) { _shader.Use(); var vp = camera.View * camera.Projection; _shader.SetMatrix4("uViewProjection", vp); // Phase G: lighting + ambient + fog are owned by the // SceneLighting UBO (binding=1) uploaded once per frame by // GameWindow. The instanced mesh fragment shader reads it // directly — no per-draw uniform uploads needed. // ── Collect and group instances ─────────────────────────────────────── CollectGroups(landblockEntries, frustum, neverCullLandblockId, visibleCellIds, animatedEntityIds); // ── Build and upload the instance buffer ────────────────────────────── // Count total instances. int totalInstances = 0; foreach (var grp in _groups.Values) totalInstances += grp.Count; // Grow the scratch buffer if needed. int needed = totalInstances * 16; if (_instanceBuffer.Length < needed) _instanceBuffer = new float[needed + 256 * 16]; // extra headroom // Write all groups contiguously. Record each group's starting offset // (in units of instances, not bytes) so we can address them at draw time. int instanceOffset = 0; foreach (var grp in _groups.Values) { grp.BufferOffset = instanceOffset; foreach (ref readonly var inst in CollectionsMarshal.AsSpan(grp.Entries)) WriteMatrix(_instanceBuffer, instanceOffset++ * 16, inst.Model); } // Upload all instance data in a single DynamicDraw call. if (totalInstances > 0) { _gl.BindBuffer(BufferTargetARB.ArrayBuffer, _instanceVbo); fixed (void* p = _instanceBuffer) _gl.BufferData(BufferTargetARB.ArrayBuffer, (nuint)(totalInstances * 16 * sizeof(float)), p, BufferUsageARB.DynamicDraw); } // ── Pass 1: Opaque + ClipMap ────────────────────────────────────────── // Diagnostic: ACDREAM_NO_CULL=1 disables backface culling entirely. if (string.Equals(Environment.GetEnvironmentVariable("ACDREAM_NO_CULL"), "1", StringComparison.Ordinal)) { _gl.Disable(EnableCap.CullFace); } foreach (var (key, grp) in _groups) { if (!_gpuByGfxObj.TryGetValue(key.GfxObjId, out var subMeshes)) continue; bool hasOpaqueSubMesh = false; foreach (var sub in subMeshes) { if (sub.Translucency == TranslucencyKind.Opaque || sub.Translucency == TranslucencyKind.ClipMap) { hasOpaqueSubMesh = true; break; } } if (!hasOpaqueSubMesh) continue; // For this group, instance data starts at grp.BufferOffset in the VBO. // We need to tell the VAO to read from that offset. uint byteOffset = (uint)(grp.BufferOffset * 64); // 64 bytes per mat4 foreach (var sub in subMeshes) { if (sub.Translucency != TranslucencyKind.Opaque && sub.Translucency != TranslucencyKind.ClipMap) continue; _shader.SetInt("uTranslucencyKind", (int)sub.Translucency); // Bind VAO + re-point instance attributes to the group's slice // in the shared VBO. This updates the VAO's stored offset for // locations 3-6 without touching the vertex or index bindings. _gl.BindVertexArray(sub.Vao); _gl.BindBuffer(BufferTargetARB.ArrayBuffer, _instanceVbo); for (uint row = 0; row < 4; row++) { _gl.VertexAttribPointer(3 + row, 4, VertexAttribPointerType.Float, false, 64, (void*)(byteOffset + row * 16)); } // Resolve texture from the first instance (all instances in this // group share the same GfxObj so they have compatible overrides // only in the degenerate case of mixed-palette entities using the // same GfxObj — rare enough to accept the approximation here). if (grp.Count == 0) continue; var firstEntry = grp.Entries[0]; uint tex = ResolveTex(firstEntry.Entity, firstEntry.MeshRef, sub); _gl.ActiveTexture(TextureUnit.Texture0); _gl.BindTexture(TextureTarget.Texture2D, tex); _gl.DrawElementsInstanced(PrimitiveType.Triangles, (uint)sub.IndexCount, DrawElementsType.UnsignedInt, (void*)0, (uint)grp.Count); } } // ── Pass 2: Translucent (AlphaBlend, Additive, InvAlpha) ───────────── _gl.Enable(EnableCap.Blend); _gl.DepthMask(false); // Diagnostic: ACDREAM_NO_CULL=1 disables backface culling (used 2026-05-01 // to test if our mesh winding (0,i,i+1) vs ACME's (i+1,i,0) is causing // visible polygons to be culled, especially around the neck/coat seam). if (string.Equals(Environment.GetEnvironmentVariable("ACDREAM_NO_CULL"), "1", StringComparison.Ordinal)) { _gl.Disable(EnableCap.CullFace); } else { _gl.Enable(EnableCap.CullFace); _gl.CullFace(TriangleFace.Back); _gl.FrontFace(FrontFaceDirection.Ccw); } foreach (var (key, grp) in _groups) { if (!_gpuByGfxObj.TryGetValue(key.GfxObjId, out var subMeshes)) continue; bool hasTranslucentSubMesh = false; foreach (var sub in subMeshes) { if (sub.Translucency != TranslucencyKind.Opaque && sub.Translucency != TranslucencyKind.ClipMap) { hasTranslucentSubMesh = true; break; } } if (!hasTranslucentSubMesh) continue; uint byteOffset = (uint)(grp.BufferOffset * 64); foreach (var sub in subMeshes) { if (sub.Translucency == TranslucencyKind.Opaque || sub.Translucency == TranslucencyKind.ClipMap) continue; switch (sub.Translucency) { case TranslucencyKind.Additive: _gl.BlendFunc(BlendingFactor.SrcAlpha, BlendingFactor.One); break; case TranslucencyKind.InvAlpha: _gl.BlendFunc(BlendingFactor.OneMinusSrcAlpha, BlendingFactor.SrcAlpha); break; default: // AlphaBlend _gl.BlendFunc(BlendingFactor.SrcAlpha, BlendingFactor.OneMinusSrcAlpha); break; } _shader.SetInt("uTranslucencyKind", (int)sub.Translucency); _gl.BindVertexArray(sub.Vao); _gl.BindBuffer(BufferTargetARB.ArrayBuffer, _instanceVbo); for (uint row = 0; row < 4; row++) { _gl.VertexAttribPointer(3 + row, 4, VertexAttribPointerType.Float, false, 64, (void*)(byteOffset + row * 16)); } if (grp.Count == 0) continue; var firstEntry = grp.Entries[0]; uint tex = ResolveTex(firstEntry.Entity, firstEntry.MeshRef, sub); _gl.ActiveTexture(TextureUnit.Texture0); _gl.BindTexture(TextureTarget.Texture2D, tex); _gl.DrawElementsInstanced(PrimitiveType.Triangles, (uint)sub.IndexCount, DrawElementsType.UnsignedInt, (void*)0, (uint)grp.Count); } } // Restore default GL state. _gl.DepthMask(true); _gl.Disable(EnableCap.Blend); _gl.Disable(EnableCap.CullFace); _gl.BindVertexArray(0); } // ── Grouping ────────────────────────────────────────────────────────────── /// /// Iterates all visible landblock entries and groups every (entity, meshRef) /// pair by GfxObjId. Clears previous frame's groups before filling. /// private void CollectGroups( IEnumerable<(uint LandblockId, Vector3 AabbMin, Vector3 AabbMax, IReadOnlyList Entities)> landblockEntries, FrustumPlanes? frustum, uint? neverCullLandblockId, HashSet? visibleCellIds, HashSet? animatedEntityIds) { foreach (var grp in _groups.Values) grp.Entries.Clear(); foreach (var entry in landblockEntries) { // L-fix1 (2026-04-28): the landblock cull decision is now // PER-LANDBLOCK boolean, not a continue. We still need to // walk the entity list because animated entities (in // animatedEntityIds) bypass the cull and render anyway. bool landblockVisible = frustum is null || entry.LandblockId == neverCullLandblockId || FrustumCuller.IsAabbVisible(frustum.Value, entry.AabbMin, entry.AabbMax); // Fast path: no animated entities globally → if landblock is // culled, skip the whole entity list (preserves the original // O(visible-landblocks) cost when the caller doesn't care // about animated bypass). if (!landblockVisible && (animatedEntityIds is null || animatedEntityIds.Count == 0)) continue; foreach (var entity in entry.Entities) { if (entity.MeshRefs.Count == 0) continue; // L-fix1: when the landblock is frustum-culled, only // render entities flagged as animated. This keeps // remote players / NPCs / monsters visible even when // their landblock rotates out of the view frustum. bool isAnimated = animatedEntityIds?.Contains(entity.Id) == true; if (!landblockVisible && !isAnimated) continue; // Step 4: portal visibility filter. If we have a visible cell set, // skip interior entities whose parent cell isn't visible. // visibleCellIds == null means camera is outdoors → show all interiors. if (entity.ParentCellId.HasValue && visibleCellIds is not null && !visibleCellIds.Contains(entity.ParentCellId.Value)) continue; var entityRoot = Matrix4x4.CreateFromQuaternion(entity.Rotation) * Matrix4x4.CreateTranslation(entity.Position); // Hash the entity's PaletteOverride once — shared by every // MeshRef on this entity, so we compute it outside the loop. ulong palHash = HashPaletteOverride(entity.PaletteOverride); foreach (var meshRef in entity.MeshRefs) { if (!_gpuByGfxObj.ContainsKey(meshRef.GfxObjId)) continue; var model = meshRef.PartTransform * entityRoot; // Texture signature = palette hash ^ surface-overrides hash. // Two instances can share a batch only when their ResolveTex // would return identical handles for every sub-mesh — that // means identical palette AND identical surface overrides. ulong surfHash = HashSurfaceOverrides(meshRef.SurfaceOverrides); ulong texSig = palHash ^ surfHash; var key = new GroupKey(meshRef.GfxObjId, texSig); if (!_groups.TryGetValue(key, out var group)) { group = new InstanceGroup(); _groups[key] = group; } group.Entries.Add(new InstanceEntry(model, entity, meshRef)); } } } } private static ulong HashPaletteOverride(AcDream.Core.World.PaletteOverride? p) { if (p is null) return 0UL; ulong h = 0xCBF29CE484222325UL; const ulong prime = 0x100000001B3UL; h = (h ^ p.BasePaletteId) * prime; foreach (var sp in p.SubPalettes) { h = (h ^ sp.SubPaletteId) * prime; h = (h ^ sp.Offset) * prime; h = (h ^ sp.Length) * prime; } return h; } /// /// Order-independent hash of a SurfaceOverrides dictionary. XOR of each /// (key, value) pair keeps the result stable regardless of Dictionary /// iteration order, so two instances whose override maps contain the /// same pairs will hash identically. /// private static ulong HashSurfaceOverrides(IReadOnlyDictionary? overrides) { if (overrides is null || overrides.Count == 0) return 0UL; ulong acc = 0UL; foreach (var kvp in overrides) { ulong pair = ((ulong)kvp.Key << 32) | kvp.Value; acc ^= pair; } // Fold with a prime so the zero case doesn't collide with "empty". return (acc ^ 0xCBF29CE484222325UL) * 0x100000001B3UL; } // ── Matrix write ────────────────────────────────────────────────────────── /// /// Writes a System.Numerics Matrix4x4 into starting /// at as 16 consecutive floats in row-major order /// (the C# natural memory layout). The GLSL shader reads each 4-float row /// as a column of the mat4 — identical to what UniformMatrix4(transpose=false) /// produces for the uniform path. /// private static void WriteMatrix(float[] buf, int offset, in Matrix4x4 m) { buf[offset + 0] = m.M11; buf[offset + 1] = m.M12; buf[offset + 2] = m.M13; buf[offset + 3] = m.M14; buf[offset + 4] = m.M21; buf[offset + 5] = m.M22; buf[offset + 6] = m.M23; buf[offset + 7] = m.M24; buf[offset + 8] = m.M31; buf[offset + 9] = m.M32; buf[offset + 10] = m.M33; buf[offset + 11] = m.M34; buf[offset + 12] = m.M41; buf[offset + 13] = m.M42; buf[offset + 14] = m.M43; buf[offset + 15] = m.M44; } // ── Texture resolution ──────────────────────────────────────────────────── private uint ResolveTex(WorldEntity entity, MeshRef meshRef, SubMeshGpu sub) { uint overrideOrigTex = 0; bool hasOrigTexOverride = meshRef.SurfaceOverrides is not null && meshRef.SurfaceOverrides.TryGetValue(sub.SurfaceId, out overrideOrigTex); uint? origTexOverride = hasOrigTexOverride ? overrideOrigTex : (uint?)null; if (entity.PaletteOverride is not null) { return _textures.GetOrUploadWithPaletteOverride( sub.SurfaceId, origTexOverride, entity.PaletteOverride); } else if (hasOrigTexOverride) { return _textures.GetOrUploadWithOrigTextureOverride(sub.SurfaceId, overrideOrigTex); } else { return _textures.GetOrUpload(sub.SurfaceId); } } // ── Disposal ────────────────────────────────────────────────────────────── public void Dispose() { foreach (var subs in _gpuByGfxObj.Values) { foreach (var sub in subs) { _gl.DeleteBuffer(sub.Vbo); _gl.DeleteBuffer(sub.Ebo); _gl.DeleteVertexArray(sub.Vao); } } _gl.DeleteBuffer(_instanceVbo); _gpuByGfxObj.Clear(); _groups.Clear(); } // ── Private types ───────────────────────────────────────────────────────── private sealed class SubMeshGpu { public uint Vao; public uint Vbo; public uint Ebo; public int IndexCount; public uint SurfaceId; public TranslucencyKind Translucency; } /// /// All instances of one GfxObj for this frame, plus their starting offset /// in the shared instance VBO (in units of instances, not bytes). /// private sealed class InstanceGroup { public readonly List Entries = new(); public int BufferOffset; public int Count => Entries.Count; } private readonly struct InstanceEntry { public readonly Matrix4x4 Model; public readonly WorldEntity Entity; public readonly MeshRef MeshRef; public InstanceEntry(Matrix4x4 model, WorldEntity entity, MeshRef meshRef) { Model = model; Entity = entity; MeshRef = meshRef; } } }