// src/AcDream.App/Rendering/InstancedMeshRenderer.cs
//
// True instanced rendering for static-object meshes.
// Groups entities by GfxObjId. All instance model matrices are written into
// a single shared instance VBO once per frame. Each sub-mesh is drawn with
// DrawElementsInstanced — one GL draw call per (GfxObj × sub-mesh) instead
// of one per entity. For a scene with N unique GfxObjs and M total entities
// this reduces draw calls from M*subMeshes to N*subMeshes.
//
// Matrix layout:
// System.Numerics.Matrix4x4 is row-major. Written to the float[] buffer in
// natural memory order (M11..M44). The GLSL shader reads 4 vec4 attributes
// (aInstanceRow0-3) and constructs mat4(row0, row1, row2, row3). Because
// GLSL mat4() takes column vectors, the rows of the C# matrix become the
// columns of the GLSL mat4 — which is the same transpose that UniformMatrix4
// with transpose=false produces. Visual result is identical to the old
// SetMatrix4("uModel", ...) path.
//
// Architecture note: public API matches StaticMeshRenderer so GameWindow only
// needs to update the shader and uniform setup at the call sites.
using System.Numerics;
using System.Runtime.InteropServices;
using AcDream.App.Rendering.Wb;
using AcDream.Core.Meshing;
using AcDream.Core.Terrain;
using AcDream.Core.World;
using Silk.NET.OpenGL;
namespace AcDream.App.Rendering;
public sealed unsafe class InstancedMeshRenderer : IDisposable
{
private readonly GL _gl;
private readonly Shader _shader;
private readonly TextureCache _textures;
///
/// Optional WB adapter. Held but currently unused — Phase N.4 Adjustment 2
/// (2026-05-08) reverted Task 9's renderer-level routing. Tier-routing decisions
/// (atlas vs per-instance) belong at the spawn-callback layer (Task 11
/// LandblockSpawnAdapter for atlas-tier; Task 17 EntitySpawnAdapter for
/// per-instance), not in the renderer which is intentionally tier-blind. The
/// constructor parameter is preserved so GameWindow's wire-up doesn't shift
/// when later tasks need adapter access.
///
private readonly WbMeshAdapter? _wbMeshAdapter;
// One GPU bundle per unique GfxObj id. Each GfxObj can have multiple sub-meshes.
private readonly Dictionary> _gpuByGfxObj = new();
// Shared instance VBO — filled every frame with all instance model matrices.
private readonly uint _instanceVbo;
// Per-frame scratch: reused float buffer for instance matrix data.
// 16 floats per mat4. Grown on demand; never shrunk.
private float[] _instanceBuffer = new float[256 * 16]; // start at 256 instances
// ── Instance grouping scratch ─────────────────────────────────────────────
//
// Reused every frame to avoid per-frame allocation.
//
// **Group key = (GfxObjId, PaletteOverrideHash, SurfaceOverridesHash).**
//
// An earlier implementation grouped on GfxObjId alone and resolved
// the per-sub-mesh texture from the first instance in the group — which
// is fine for scenery where every tree shares the same palette, but
// utterly broken for NPCs: every humanoid uses the same base body
// GfxObjs and they all piled into one group, so the first NPC's palette
// was used for every NPC in the frame. Frustum culling + iteration
// order meant that "first NPC" changed as the camera turned — producing
// the "NPC clothing changes when I turn" symptom.
//
// Now we also key by the entity's PaletteOverride + per-MeshRef
// SurfaceOverrides signature so only entities that decode to the
// SAME texture for every sub-mesh can share a batch. Entities with
// unique appearance fall to single-instance groups (still correct,
// marginally slower than true instancing).
private readonly Dictionary _groups = new();
private readonly record struct GroupKey(uint GfxObjId, ulong TextureSignature);
public InstancedMeshRenderer(GL gl, Shader shader, TextureCache textures,
WbMeshAdapter? wbMeshAdapter = null)
{
_gl = gl;
_shader = shader;
_textures = textures;
_wbMeshAdapter = wbMeshAdapter;
_instanceVbo = _gl.GenBuffer();
}
// ── Upload ────────────────────────────────────────────────────────────────
public void EnsureUploaded(uint gfxObjId, IReadOnlyList subMeshes)
{
if (_gpuByGfxObj.ContainsKey(gfxObjId))
return;
// Phase N.4 Adjustment 2 (2026-05-08): renderer is tier-blind. Tier-routing
// (atlas vs per-instance) lives at the spawn-callback layer (Tasks 11 + 17),
// not here. Smoke-test of the original Task 9 routing showed it caught
// characters / NPCs (server-spawned, per-instance tier) along with static
// scenery, because EnsureUploaded is called from both spawn paths.
var list = new List(subMeshes.Count);
foreach (var sm in subMeshes)
list.Add(UploadSubMesh(sm));
_gpuByGfxObj[gfxObjId] = list;
}
private SubMeshGpu UploadSubMesh(GfxObjSubMesh sm)
{
uint vao = _gl.GenVertexArray();
_gl.BindVertexArray(vao);
// ── Vertex buffer (positions, normals, UVs) ───────────────────────────
uint vbo = _gl.GenBuffer();
_gl.BindBuffer(BufferTargetARB.ArrayBuffer, vbo);
fixed (void* p = sm.Vertices)
_gl.BufferData(BufferTargetARB.ArrayBuffer,
(nuint)(sm.Vertices.Length * sizeof(Vertex)), p, BufferUsageARB.StaticDraw);
uint stride = (uint)sizeof(Vertex);
_gl.EnableVertexAttribArray(0);
_gl.VertexAttribPointer(0, 3, VertexAttribPointerType.Float, false, stride, (void*)0);
_gl.EnableVertexAttribArray(1);
_gl.VertexAttribPointer(1, 3, VertexAttribPointerType.Float, false, stride, (void*)(3 * sizeof(float)));
_gl.EnableVertexAttribArray(2);
_gl.VertexAttribPointer(2, 2, VertexAttribPointerType.Float, false, stride, (void*)(6 * sizeof(float)));
// Note: location 3 (uint TerrainLayer) is NOT used by mesh_instanced.vert;
// that slot is reserved for per-instance mat4 row 0 from the instance VBO.
// ── Index buffer ──────────────────────────────────────────────────────
uint ebo = _gl.GenBuffer();
_gl.BindBuffer(BufferTargetARB.ElementArrayBuffer, ebo);
fixed (void* p = sm.Indices)
_gl.BufferData(BufferTargetARB.ElementArrayBuffer,
(nuint)(sm.Indices.Length * sizeof(uint)), p, BufferUsageARB.StaticDraw);
// ── Per-instance model matrix (locations 3-6) ─────────────────────────
// Bind the shared instance VBO. The VAO captures this binding at each
// attribute location. At draw time we re-call VertexAttribPointer with
// the per-group byte offset (to address different groups in the VBO
// without DrawElementsInstancedBaseInstance).
_gl.BindBuffer(BufferTargetARB.ArrayBuffer, _instanceVbo);
// mat4 = 4 × vec4, stride = 64 bytes, divisor = 1 (advance once per instance)
for (uint row = 0; row < 4; row++)
{
uint loc = 3 + row;
_gl.EnableVertexAttribArray(loc);
_gl.VertexAttribPointer(loc, 4, VertexAttribPointerType.Float, false, 64, (void*)(row * 16));
_gl.VertexAttribDivisor(loc, 1);
}
_gl.BindVertexArray(0);
return new SubMeshGpu
{
Vao = vao,
Vbo = vbo,
Ebo = ebo,
IndexCount = sm.Indices.Length,
SurfaceId = sm.SurfaceId,
Translucency = sm.Translucency,
};
}
// ── Draw ──────────────────────────────────────────────────────────────────
public void Draw(ICamera camera,
IEnumerable<(uint LandblockId, Vector3 AabbMin, Vector3 AabbMax, IReadOnlyList Entities)> landblockEntries,
FrustumPlanes? frustum = null,
uint? neverCullLandblockId = null,
HashSet? visibleCellIds = null,
// L-fix1 (2026-04-28): set of entity ids that should bypass the
// landblock-level frustum cull. Animated entities (other
// players, NPCs, monsters) are always rendered if their
// landblock is loaded — without this they vanish whenever the
// camera rotates away from their landblock, even though
// they're within visible distance of the player. Pass null /
// empty to keep the previous "cull everything by landblock"
// behavior.
HashSet? animatedEntityIds = null)
{
_shader.Use();
var vp = camera.View * camera.Projection;
_shader.SetMatrix4("uViewProjection", vp);
// Phase G: lighting + ambient + fog are owned by the
// SceneLighting UBO (binding=1) uploaded once per frame by
// GameWindow. The instanced mesh fragment shader reads it
// directly — no per-draw uniform uploads needed.
// ── Collect and group instances ───────────────────────────────────────
CollectGroups(landblockEntries, frustum, neverCullLandblockId, visibleCellIds, animatedEntityIds);
// ── Build and upload the instance buffer ──────────────────────────────
// Count total instances.
int totalInstances = 0;
foreach (var grp in _groups.Values)
totalInstances += grp.Count;
// Grow the scratch buffer if needed.
int needed = totalInstances * 16;
if (_instanceBuffer.Length < needed)
_instanceBuffer = new float[needed + 256 * 16]; // extra headroom
// Write all groups contiguously. Record each group's starting offset
// (in units of instances, not bytes) so we can address them at draw time.
int instanceOffset = 0;
foreach (var grp in _groups.Values)
{
grp.BufferOffset = instanceOffset;
foreach (ref readonly var inst in CollectionsMarshal.AsSpan(grp.Entries))
WriteMatrix(_instanceBuffer, instanceOffset++ * 16, inst.Model);
}
// Upload all instance data in a single DynamicDraw call.
if (totalInstances > 0)
{
_gl.BindBuffer(BufferTargetARB.ArrayBuffer, _instanceVbo);
fixed (void* p = _instanceBuffer)
_gl.BufferData(BufferTargetARB.ArrayBuffer,
(nuint)(totalInstances * 16 * sizeof(float)), p, BufferUsageARB.DynamicDraw);
}
// ── Pass 1: Opaque + ClipMap ──────────────────────────────────────────
// Diagnostic: ACDREAM_NO_CULL=1 disables backface culling entirely.
if (string.Equals(Environment.GetEnvironmentVariable("ACDREAM_NO_CULL"), "1", StringComparison.Ordinal))
{
_gl.Disable(EnableCap.CullFace);
}
foreach (var (key, grp) in _groups)
{
if (!_gpuByGfxObj.TryGetValue(key.GfxObjId, out var subMeshes))
continue;
bool hasOpaqueSubMesh = false;
foreach (var sub in subMeshes)
{
if (sub.Translucency == TranslucencyKind.Opaque ||
sub.Translucency == TranslucencyKind.ClipMap)
{
hasOpaqueSubMesh = true;
break;
}
}
if (!hasOpaqueSubMesh) continue;
// For this group, instance data starts at grp.BufferOffset in the VBO.
// We need to tell the VAO to read from that offset.
uint byteOffset = (uint)(grp.BufferOffset * 64); // 64 bytes per mat4
foreach (var sub in subMeshes)
{
if (sub.Translucency != TranslucencyKind.Opaque &&
sub.Translucency != TranslucencyKind.ClipMap)
continue;
_shader.SetInt("uTranslucencyKind", (int)sub.Translucency);
// Bind VAO + re-point instance attributes to the group's slice
// in the shared VBO. This updates the VAO's stored offset for
// locations 3-6 without touching the vertex or index bindings.
_gl.BindVertexArray(sub.Vao);
_gl.BindBuffer(BufferTargetARB.ArrayBuffer, _instanceVbo);
for (uint row = 0; row < 4; row++)
{
_gl.VertexAttribPointer(3 + row, 4, VertexAttribPointerType.Float,
false, 64, (void*)(byteOffset + row * 16));
}
// Resolve texture from the first instance (all instances in this
// group share the same GfxObj so they have compatible overrides
// only in the degenerate case of mixed-palette entities using the
// same GfxObj — rare enough to accept the approximation here).
if (grp.Count == 0) continue;
var firstEntry = grp.Entries[0];
uint tex = ResolveTex(firstEntry.Entity, firstEntry.MeshRef, sub);
_gl.ActiveTexture(TextureUnit.Texture0);
_gl.BindTexture(TextureTarget.Texture2D, tex);
_gl.DrawElementsInstanced(PrimitiveType.Triangles,
(uint)sub.IndexCount,
DrawElementsType.UnsignedInt,
(void*)0,
(uint)grp.Count);
}
}
// ── Pass 2: Translucent (AlphaBlend, Additive, InvAlpha) ─────────────
_gl.Enable(EnableCap.Blend);
_gl.DepthMask(false);
// Diagnostic: ACDREAM_NO_CULL=1 disables backface culling (used 2026-05-01
// to test if our mesh winding (0,i,i+1) vs ACME's (i+1,i,0) is causing
// visible polygons to be culled, especially around the neck/coat seam).
if (string.Equals(Environment.GetEnvironmentVariable("ACDREAM_NO_CULL"), "1", StringComparison.Ordinal))
{
_gl.Disable(EnableCap.CullFace);
}
else
{
_gl.Enable(EnableCap.CullFace);
_gl.CullFace(TriangleFace.Back);
_gl.FrontFace(FrontFaceDirection.Ccw);
}
foreach (var (key, grp) in _groups)
{
if (!_gpuByGfxObj.TryGetValue(key.GfxObjId, out var subMeshes))
continue;
bool hasTranslucentSubMesh = false;
foreach (var sub in subMeshes)
{
if (sub.Translucency != TranslucencyKind.Opaque &&
sub.Translucency != TranslucencyKind.ClipMap)
{
hasTranslucentSubMesh = true;
break;
}
}
if (!hasTranslucentSubMesh) continue;
uint byteOffset = (uint)(grp.BufferOffset * 64);
foreach (var sub in subMeshes)
{
if (sub.Translucency == TranslucencyKind.Opaque ||
sub.Translucency == TranslucencyKind.ClipMap)
continue;
switch (sub.Translucency)
{
case TranslucencyKind.Additive:
_gl.BlendFunc(BlendingFactor.SrcAlpha, BlendingFactor.One);
break;
case TranslucencyKind.InvAlpha:
_gl.BlendFunc(BlendingFactor.OneMinusSrcAlpha, BlendingFactor.SrcAlpha);
break;
default: // AlphaBlend
_gl.BlendFunc(BlendingFactor.SrcAlpha, BlendingFactor.OneMinusSrcAlpha);
break;
}
_shader.SetInt("uTranslucencyKind", (int)sub.Translucency);
_gl.BindVertexArray(sub.Vao);
_gl.BindBuffer(BufferTargetARB.ArrayBuffer, _instanceVbo);
for (uint row = 0; row < 4; row++)
{
_gl.VertexAttribPointer(3 + row, 4, VertexAttribPointerType.Float,
false, 64, (void*)(byteOffset + row * 16));
}
if (grp.Count == 0) continue;
var firstEntry = grp.Entries[0];
uint tex = ResolveTex(firstEntry.Entity, firstEntry.MeshRef, sub);
_gl.ActiveTexture(TextureUnit.Texture0);
_gl.BindTexture(TextureTarget.Texture2D, tex);
_gl.DrawElementsInstanced(PrimitiveType.Triangles,
(uint)sub.IndexCount,
DrawElementsType.UnsignedInt,
(void*)0,
(uint)grp.Count);
}
}
// Restore default GL state.
_gl.DepthMask(true);
_gl.Disable(EnableCap.Blend);
_gl.Disable(EnableCap.CullFace);
_gl.BindVertexArray(0);
}
// ── Grouping ──────────────────────────────────────────────────────────────
///
/// Iterates all visible landblock entries and groups every (entity, meshRef)
/// pair by GfxObjId. Clears previous frame's groups before filling.
///
private void CollectGroups(
IEnumerable<(uint LandblockId, Vector3 AabbMin, Vector3 AabbMax, IReadOnlyList Entities)> landblockEntries,
FrustumPlanes? frustum,
uint? neverCullLandblockId,
HashSet? visibleCellIds,
HashSet? animatedEntityIds)
{
foreach (var grp in _groups.Values)
grp.Entries.Clear();
foreach (var entry in landblockEntries)
{
// L-fix1 (2026-04-28): the landblock cull decision is now
// PER-LANDBLOCK boolean, not a continue. We still need to
// walk the entity list because animated entities (in
// animatedEntityIds) bypass the cull and render anyway.
bool landblockVisible = frustum is null
|| entry.LandblockId == neverCullLandblockId
|| FrustumCuller.IsAabbVisible(frustum.Value, entry.AabbMin, entry.AabbMax);
// Fast path: no animated entities globally → if landblock is
// culled, skip the whole entity list (preserves the original
// O(visible-landblocks) cost when the caller doesn't care
// about animated bypass).
if (!landblockVisible && (animatedEntityIds is null || animatedEntityIds.Count == 0))
continue;
foreach (var entity in entry.Entities)
{
if (entity.MeshRefs.Count == 0)
continue;
// L-fix1: when the landblock is frustum-culled, only
// render entities flagged as animated. This keeps
// remote players / NPCs / monsters visible even when
// their landblock rotates out of the view frustum.
bool isAnimated = animatedEntityIds?.Contains(entity.Id) == true;
if (!landblockVisible && !isAnimated)
continue;
// Step 4: portal visibility filter. If we have a visible cell set,
// skip interior entities whose parent cell isn't visible.
// visibleCellIds == null means camera is outdoors → show all interiors.
if (entity.ParentCellId.HasValue && visibleCellIds is not null
&& !visibleCellIds.Contains(entity.ParentCellId.Value))
continue;
var entityRoot =
Matrix4x4.CreateFromQuaternion(entity.Rotation) *
Matrix4x4.CreateTranslation(entity.Position);
// Hash the entity's PaletteOverride once — shared by every
// MeshRef on this entity, so we compute it outside the loop.
ulong palHash = HashPaletteOverride(entity.PaletteOverride);
foreach (var meshRef in entity.MeshRefs)
{
if (!_gpuByGfxObj.TryGetValue(meshRef.GfxObjId, out var cachedMeshes))
continue;
var model = meshRef.PartTransform * entityRoot;
// Texture signature = palette hash ^ surface-overrides hash.
// Two instances can share a batch only when their ResolveTex
// would return identical handles for every sub-mesh — that
// means identical palette AND identical surface overrides.
ulong surfHash = HashSurfaceOverrides(meshRef.SurfaceOverrides);
ulong texSig = palHash ^ surfHash;
var key = new GroupKey(meshRef.GfxObjId, texSig);
if (!_groups.TryGetValue(key, out var group))
{
group = new InstanceGroup();
_groups[key] = group;
}
group.Entries.Add(new InstanceEntry(model, entity, meshRef));
}
}
}
}
private static ulong HashPaletteOverride(AcDream.Core.World.PaletteOverride? p)
{
if (p is null) return 0UL;
ulong h = 0xCBF29CE484222325UL;
const ulong prime = 0x100000001B3UL;
h = (h ^ p.BasePaletteId) * prime;
foreach (var sp in p.SubPalettes)
{
h = (h ^ sp.SubPaletteId) * prime;
h = (h ^ sp.Offset) * prime;
h = (h ^ sp.Length) * prime;
}
return h;
}
///
/// Order-independent hash of a SurfaceOverrides dictionary. XOR of each
/// (key, value) pair keeps the result stable regardless of Dictionary
/// iteration order, so two instances whose override maps contain the
/// same pairs will hash identically.
///
private static ulong HashSurfaceOverrides(IReadOnlyDictionary? overrides)
{
if (overrides is null || overrides.Count == 0) return 0UL;
ulong acc = 0UL;
foreach (var kvp in overrides)
{
ulong pair = ((ulong)kvp.Key << 32) | kvp.Value;
acc ^= pair;
}
// Fold with a prime so the zero case doesn't collide with "empty".
return (acc ^ 0xCBF29CE484222325UL) * 0x100000001B3UL;
}
// ── Matrix write ──────────────────────────────────────────────────────────
///
/// Writes a System.Numerics Matrix4x4 into starting
/// at as 16 consecutive floats in row-major order
/// (the C# natural memory layout). The GLSL shader reads each 4-float row
/// as a column of the mat4 — identical to what UniformMatrix4(transpose=false)
/// produces for the uniform path.
///
private static void WriteMatrix(float[] buf, int offset, in Matrix4x4 m)
{
buf[offset + 0] = m.M11; buf[offset + 1] = m.M12; buf[offset + 2] = m.M13; buf[offset + 3] = m.M14;
buf[offset + 4] = m.M21; buf[offset + 5] = m.M22; buf[offset + 6] = m.M23; buf[offset + 7] = m.M24;
buf[offset + 8] = m.M31; buf[offset + 9] = m.M32; buf[offset + 10] = m.M33; buf[offset + 11] = m.M34;
buf[offset + 12] = m.M41; buf[offset + 13] = m.M42; buf[offset + 14] = m.M43; buf[offset + 15] = m.M44;
}
// ── Texture resolution ────────────────────────────────────────────────────
private uint ResolveTex(WorldEntity entity, MeshRef meshRef, SubMeshGpu sub)
{
uint overrideOrigTex = 0;
bool hasOrigTexOverride = meshRef.SurfaceOverrides is not null
&& meshRef.SurfaceOverrides.TryGetValue(sub.SurfaceId, out overrideOrigTex);
uint? origTexOverride = hasOrigTexOverride ? overrideOrigTex : (uint?)null;
if (entity.PaletteOverride is not null)
{
return _textures.GetOrUploadWithPaletteOverride(
sub.SurfaceId, origTexOverride, entity.PaletteOverride);
}
else if (hasOrigTexOverride)
{
return _textures.GetOrUploadWithOrigTextureOverride(sub.SurfaceId, overrideOrigTex);
}
else
{
return _textures.GetOrUpload(sub.SurfaceId);
}
}
// ── Disposal ──────────────────────────────────────────────────────────────
public void Dispose()
{
foreach (var subs in _gpuByGfxObj.Values)
{
foreach (var sub in subs)
{
_gl.DeleteBuffer(sub.Vbo);
_gl.DeleteBuffer(sub.Ebo);
_gl.DeleteVertexArray(sub.Vao);
}
}
_gl.DeleteBuffer(_instanceVbo);
_gpuByGfxObj.Clear();
_groups.Clear();
}
// ── Private types ─────────────────────────────────────────────────────────
private sealed class SubMeshGpu
{
public uint Vao;
public uint Vbo;
public uint Ebo;
public int IndexCount;
public uint SurfaceId;
public TranslucencyKind Translucency;
}
///
/// All instances of one GfxObj for this frame, plus their starting offset
/// in the shared instance VBO (in units of instances, not bytes).
///
private sealed class InstanceGroup
{
public readonly List Entries = new();
public int BufferOffset;
public int Count => Entries.Count;
}
private readonly struct InstanceEntry
{
public readonly Matrix4x4 Model;
public readonly WorldEntity Entity;
public readonly MeshRef MeshRef;
public InstanceEntry(Matrix4x4 model, WorldEntity entity, MeshRef meshRef)
{
Model = model;
Entity = entity;
MeshRef = meshRef;
}
}
}