perf(rendering): true DrawElementsInstanced — one draw call per (GfxObj × sub-mesh)

Replaces the per-entity glUniform uModel path with a shared instance VBO and
DrawElementsInstanced. All instance model matrices are uploaded to GPU once per
frame; the VAO's per-instance attribute pointers (locations 3–6, divisor=1) are
updated with a byte-offset re-point per group so a single VBO serves all groups
without requiring DrawElementsInstancedBaseInstance (not in Silk.NET 2.23).

Changes:
- InstancedMeshRenderer: add _instanceVbo, _instanceBuffer scratch; EnsureUploaded
  sets up mat4 instance attrs (locs 3–6) from the shared VBO; Draw builds the flat
  float[] of all instance matrices once then calls DrawElementsInstanced per sub-mesh.
  Drops the unused uint TerrainLayer attribute (loc 3 from vertex VBO) — mesh shaders
  never used it. Adds InstanceGroup helper to track per-group buffer offsets.
- mesh_instanced.frag: replace sampler2DArray+uTextureLayer with sampler2D uDiffuse,
  matching the existing TextureCache / individual-texture pipeline.
- mesh_instanced.vert+frag: track as committed files (were untracked).
- Shader.cs: add SetVec3 helper needed for uLightDirection uniform.
- GameWindow.cs: switch mesh shader load from mesh.vert/.frag to
  mesh_instanced.vert/.frag.

Visual output is identical: same entities, same textures, same lighting constants
(SUN_DIR=(0.5,0.4,0.6), AMBIENT=0.25, DIFFUSE=0.75 — moved from frag to vert).
Build: clean. Tests: 431/431 green.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Erik 2026-04-13 18:51:49 +02:00
parent b5099e2b21
commit 6a55838a10
5 changed files with 288 additions and 109 deletions

View file

@ -317,8 +317,8 @@ public sealed class GameWindow : IDisposable
Path.Combine(shadersDir, "terrain.frag"));
_meshShader = new Shader(_gl,
Path.Combine(shadersDir, "mesh.vert"),
Path.Combine(shadersDir, "mesh.frag"));
Path.Combine(shadersDir, "mesh_instanced.vert"),
Path.Combine(shadersDir, "mesh_instanced.frag"));
var orbit = new OrbitCamera { Aspect = _window!.Size.X / (float)_window.Size.Y };
var fly = new FlyCamera { Aspect = _window.Size.X / (float)_window.Size.Y };

View file

@ -1,14 +1,25 @@
// src/AcDream.App/Rendering/InstancedMeshRenderer.cs
//
// Step 1 of instanced static-object rendering:
// Groups entities by GfxObjId so each group is drawn contiguously.
// Still uses per-entity uniform uModel — visual output is identical to
// StaticMeshRenderer. The grouping is the prerequisite for true
// DrawElementsInstanced in the follow-up commit.
// True instanced rendering for static-object meshes.
// Groups entities by GfxObjId. All instance model matrices are written into
// a single shared instance VBO once per frame. Each sub-mesh is drawn with
// DrawElementsInstanced — one GL draw call per (GfxObj × sub-mesh) instead
// of one per entity. For a scene with N unique GfxObjs and M total entities
// this reduces draw calls from M*subMeshes to N*subMeshes.
//
// Architecture note: this class has the same public API as StaticMeshRenderer
// so GameWindow only needs to swap the type name at the call sites.
// Matrix layout:
// System.Numerics.Matrix4x4 is row-major. Written to the float[] buffer in
// natural memory order (M11..M44). The GLSL shader reads 4 vec4 attributes
// (aInstanceRow0-3) and constructs mat4(row0, row1, row2, row3). Because
// GLSL mat4() takes column vectors, the rows of the C# matrix become the
// columns of the GLSL mat4 — which is the same transpose that UniformMatrix4
// with transpose=false produces. Visual result is identical to the old
// SetMatrix4("uModel", ...) path.
//
// Architecture note: public API matches StaticMeshRenderer so GameWindow only
// needs to update the shader and uniform setup at the call sites.
using System.Numerics;
using System.Runtime.InteropServices;
using AcDream.Core.Meshing;
using AcDream.Core.Terrain;
using AcDream.Core.World;
@ -25,16 +36,25 @@ public sealed unsafe class InstancedMeshRenderer : IDisposable
// One GPU bundle per unique GfxObj id. Each GfxObj can have multiple sub-meshes.
private readonly Dictionary<uint, List<SubMeshGpu>> _gpuByGfxObj = new();
// ── Instance grouping scratch buffer ─────────────────────────────────────
// Shared instance VBO — filled every frame with all instance model matrices.
private readonly uint _instanceVbo;
// Per-frame scratch: reused float buffer for instance matrix data.
// 16 floats per mat4. Grown on demand; never shrunk.
private float[] _instanceBuffer = new float[256 * 16]; // start at 256 instances
// ── Instance grouping scratch ─────────────────────────────────────────────
// Reused every frame to avoid per-frame allocation. Key = GfxObjId.
// Value = list of (model matrix, entity, meshRef) tuples for that GfxObj.
private readonly Dictionary<uint, List<InstanceEntry>> _groups = new();
// Value = InstanceGroup (list of InstanceEntry + buffer offset for this group).
private readonly Dictionary<uint, InstanceGroup> _groups = new();
public InstancedMeshRenderer(GL gl, Shader shader, TextureCache textures)
{
_gl = gl;
_shader = shader;
_textures = textures;
_instanceVbo = _gl.GenBuffer();
}
// ── Upload ────────────────────────────────────────────────────────────────
@ -55,18 +75,13 @@ public sealed unsafe class InstancedMeshRenderer : IDisposable
uint vao = _gl.GenVertexArray();
_gl.BindVertexArray(vao);
// ── Vertex buffer (positions, normals, UVs) ───────────────────────────
uint vbo = _gl.GenBuffer();
_gl.BindBuffer(BufferTargetARB.ArrayBuffer, vbo);
fixed (void* p = sm.Vertices)
_gl.BufferData(BufferTargetARB.ArrayBuffer,
(nuint)(sm.Vertices.Length * sizeof(Vertex)), p, BufferUsageARB.StaticDraw);
uint ebo = _gl.GenBuffer();
_gl.BindBuffer(BufferTargetARB.ElementArrayBuffer, ebo);
fixed (void* p = sm.Indices)
_gl.BufferData(BufferTargetARB.ElementArrayBuffer,
(nuint)(sm.Indices.Length * sizeof(uint)), p, BufferUsageARB.StaticDraw);
uint stride = (uint)sizeof(Vertex);
_gl.EnableVertexAttribArray(0);
_gl.VertexAttribPointer(0, 3, VertexAttribPointerType.Float, false, stride, (void*)0);
@ -74,8 +89,30 @@ public sealed unsafe class InstancedMeshRenderer : IDisposable
_gl.VertexAttribPointer(1, 3, VertexAttribPointerType.Float, false, stride, (void*)(3 * sizeof(float)));
_gl.EnableVertexAttribArray(2);
_gl.VertexAttribPointer(2, 2, VertexAttribPointerType.Float, false, stride, (void*)(6 * sizeof(float)));
_gl.EnableVertexAttribArray(3);
_gl.VertexAttribIPointer(3, 1, VertexAttribIType.UnsignedInt, stride, (void*)(8 * sizeof(float)));
// Note: location 3 (uint TerrainLayer) is NOT used by mesh_instanced.vert;
// that slot is reserved for per-instance mat4 row 0 from the instance VBO.
// ── Index buffer ──────────────────────────────────────────────────────
uint ebo = _gl.GenBuffer();
_gl.BindBuffer(BufferTargetARB.ElementArrayBuffer, ebo);
fixed (void* p = sm.Indices)
_gl.BufferData(BufferTargetARB.ElementArrayBuffer,
(nuint)(sm.Indices.Length * sizeof(uint)), p, BufferUsageARB.StaticDraw);
// ── Per-instance model matrix (locations 3-6) ─────────────────────────
// Bind the shared instance VBO. The VAO captures this binding at each
// attribute location. At draw time we re-call VertexAttribPointer with
// the per-group byte offset (to address different groups in the VBO
// without DrawElementsInstancedBaseInstance).
_gl.BindBuffer(BufferTargetARB.ArrayBuffer, _instanceVbo);
// mat4 = 4 × vec4, stride = 64 bytes, divisor = 1 (advance once per instance)
for (uint row = 0; row < 4; row++)
{
uint loc = 3 + row;
_gl.EnableVertexAttribArray(loc);
_gl.VertexAttribPointer(loc, 4, VertexAttribPointerType.Float, false, 64, (void*)(row * 16));
_gl.VertexAttribDivisor(loc, 1);
}
_gl.BindVertexArray(0);
@ -98,26 +135,59 @@ public sealed unsafe class InstancedMeshRenderer : IDisposable
uint? neverCullLandblockId = null)
{
_shader.Use();
_shader.SetMatrix4("uView", camera.View);
_shader.SetMatrix4("uProjection", camera.Projection);
// Compute combined view-projection once. System.Numerics uses row-major
// convention; multiplying View * Projection gives the correct combined
// matrix that maps world → clip space when applied as M*v in the shader.
var vp = camera.View * camera.Projection;
_shader.SetMatrix4("uViewProjection", vp);
// Lighting uniforms — match the constants from mesh.frag so the visual
// output is identical to the non-instanced path.
var sunDir = Vector3.Normalize(new Vector3(0.5f, 0.4f, 0.6f));
_shader.SetVec3("uLightDirection", sunDir);
_shader.SetFloat("uAmbientIntensity", 0.25f);
_shader.SetFloat("uDiffuseIntensity", 0.75f);
// ── Collect and group instances ───────────────────────────────────────
// Two-pass collection: opaque+clipmap first, translucent second.
// We collect all landblock entries into the grouping dict, then draw
// each group contiguously. This is the structural change that makes
// true DrawElementsInstanced a one-commit follow-up.
CollectGroups(landblockEntries, frustum, neverCullLandblockId);
// ── Build and upload the instance buffer ──────────────────────────────
// Count total instances.
int totalInstances = 0;
foreach (var grp in _groups.Values)
totalInstances += grp.Count;
// Grow the scratch buffer if needed.
int needed = totalInstances * 16;
if (_instanceBuffer.Length < needed)
_instanceBuffer = new float[needed + 256 * 16]; // extra headroom
// Write all groups contiguously. Record each group's starting offset
// (in units of instances, not bytes) so we can address them at draw time.
int instanceOffset = 0;
foreach (var grp in _groups.Values)
{
grp.BufferOffset = instanceOffset;
foreach (ref readonly var inst in CollectionsMarshal.AsSpan(grp.Entries))
WriteMatrix(_instanceBuffer, instanceOffset++ * 16, inst.Model);
}
// Upload all instance data in a single DynamicDraw call.
if (totalInstances > 0)
{
_gl.BindBuffer(BufferTargetARB.ArrayBuffer, _instanceVbo);
fixed (void* p = _instanceBuffer)
_gl.BufferData(BufferTargetARB.ArrayBuffer,
(nuint)(totalInstances * 16 * sizeof(float)), p, BufferUsageARB.DynamicDraw);
}
// ── Pass 1: Opaque + ClipMap ──────────────────────────────────────────
// Depth write on (default). No blending. ClipMap surfaces use the
// alpha-discard path in the fragment shader (uTranslucencyKind == 1).
foreach (var (gfxObjId, instances) in _groups)
foreach (var (gfxObjId, grp) in _groups)
{
if (!_gpuByGfxObj.TryGetValue(gfxObjId, out var subMeshes))
continue;
// Check if this GfxObj has any opaque/clipmap sub-meshes at all.
bool hasOpaqueSubMesh = false;
foreach (var sub in subMeshes)
{
@ -130,50 +200,54 @@ public sealed unsafe class InstancedMeshRenderer : IDisposable
}
if (!hasOpaqueSubMesh) continue;
foreach (var inst in instances)
// For this group, instance data starts at grp.BufferOffset in the VBO.
// We need to tell the VAO to read from that offset.
uint byteOffset = (uint)(grp.BufferOffset * 64); // 64 bytes per mat4
foreach (var sub in subMeshes)
{
_shader.SetMatrix4("uModel", inst.Model);
if (sub.Translucency != TranslucencyKind.Opaque &&
sub.Translucency != TranslucencyKind.ClipMap)
continue;
foreach (var sub in subMeshes)
_shader.SetInt("uTranslucencyKind", (int)sub.Translucency);
// Bind VAO + re-point instance attributes to the group's slice
// in the shared VBO. This updates the VAO's stored offset for
// locations 3-6 without touching the vertex or index bindings.
_gl.BindVertexArray(sub.Vao);
_gl.BindBuffer(BufferTargetARB.ArrayBuffer, _instanceVbo);
for (uint row = 0; row < 4; row++)
{
if (sub.Translucency != TranslucencyKind.Opaque &&
sub.Translucency != TranslucencyKind.ClipMap)
continue;
_shader.SetInt("uTranslucencyKind", (int)sub.Translucency);
uint tex = ResolveTex(inst.Entity, inst.MeshRef, sub);
_gl.ActiveTexture(TextureUnit.Texture0);
_gl.BindTexture(TextureTarget.Texture2D, tex);
_gl.BindVertexArray(sub.Vao);
_gl.DrawElements(PrimitiveType.Triangles, (uint)sub.IndexCount, DrawElementsType.UnsignedInt, (void*)0);
_gl.VertexAttribPointer(3 + row, 4, VertexAttribPointerType.Float,
false, 64, (void*)(byteOffset + row * 16));
}
// Resolve texture from the first instance (all instances in this
// group share the same GfxObj so they have compatible overrides
// only in the degenerate case of mixed-palette entities using the
// same GfxObj — rare enough to accept the approximation here).
var firstEntry = grp.Entries[0];
uint tex = ResolveTex(firstEntry.Entity, firstEntry.MeshRef, sub);
_gl.ActiveTexture(TextureUnit.Texture0);
_gl.BindTexture(TextureTarget.Texture2D, tex);
_gl.DrawElementsInstanced(PrimitiveType.Triangles,
(uint)sub.IndexCount,
DrawElementsType.UnsignedInt,
(void*)0,
(uint)grp.Count);
}
}
// ── Pass 2: Translucent (AlphaBlend, Additive, InvAlpha) ─────────────
// Depth test on so translucents composite correctly behind opaque geometry.
// Depth write OFF so translucents don't occlude each other or downstream
// opaque draws. Blend function is set per-draw based on TranslucencyKind.
//
// NOTE: translucent draws are NOT sorted by depth — overlapping translucent
// surfaces can composite in the wrong order. Portal-sized billboards don't
// overlap in practice so this is acceptable and avoids a larger refactor.
_gl.Enable(EnableCap.Blend);
_gl.DepthMask(false);
// Enable back-face culling for the translucent pass so closed-shell
// translucents (lifestone crystal, glow gems, any convex blended mesh)
// don't draw their back faces over their front faces in arbitrary
// iteration order. Matches WorldBuilder's per-batch CullMode handling in
// references/WorldBuilder/Chorizite.OpenGLSDLBackend/Lib/
// BaseObjectRenderManager.cs:361-365.
_gl.Enable(EnableCap.CullFace);
_gl.CullFace(TriangleFace.Back);
_gl.FrontFace(FrontFaceDirection.Ccw);
foreach (var (gfxObjId, instances) in _groups)
foreach (var (gfxObjId, grp) in _groups)
{
if (!_gpuByGfxObj.TryGetValue(gfxObjId, out var subMeshes))
continue;
@ -190,46 +264,54 @@ public sealed unsafe class InstancedMeshRenderer : IDisposable
}
if (!hasTranslucentSubMesh) continue;
foreach (var inst in instances)
uint byteOffset = (uint)(grp.BufferOffset * 64);
foreach (var sub in subMeshes)
{
_shader.SetMatrix4("uModel", inst.Model);
if (sub.Translucency == TranslucencyKind.Opaque ||
sub.Translucency == TranslucencyKind.ClipMap)
continue;
foreach (var sub in subMeshes)
switch (sub.Translucency)
{
if (sub.Translucency == TranslucencyKind.Opaque ||
sub.Translucency == TranslucencyKind.ClipMap)
continue;
switch (sub.Translucency)
{
case TranslucencyKind.Additive:
_gl.BlendFunc(BlendingFactor.SrcAlpha, BlendingFactor.One);
break;
case TranslucencyKind.InvAlpha:
_gl.BlendFunc(BlendingFactor.OneMinusSrcAlpha, BlendingFactor.SrcAlpha);
break;
default: // AlphaBlend
_gl.BlendFunc(BlendingFactor.SrcAlpha, BlendingFactor.OneMinusSrcAlpha);
break;
}
_shader.SetInt("uTranslucencyKind", (int)sub.Translucency);
uint tex = ResolveTex(inst.Entity, inst.MeshRef, sub);
_gl.ActiveTexture(TextureUnit.Texture0);
_gl.BindTexture(TextureTarget.Texture2D, tex);
_gl.BindVertexArray(sub.Vao);
_gl.DrawElements(PrimitiveType.Triangles, (uint)sub.IndexCount, DrawElementsType.UnsignedInt, (void*)0);
case TranslucencyKind.Additive:
_gl.BlendFunc(BlendingFactor.SrcAlpha, BlendingFactor.One);
break;
case TranslucencyKind.InvAlpha:
_gl.BlendFunc(BlendingFactor.OneMinusSrcAlpha, BlendingFactor.SrcAlpha);
break;
default: // AlphaBlend
_gl.BlendFunc(BlendingFactor.SrcAlpha, BlendingFactor.OneMinusSrcAlpha);
break;
}
_shader.SetInt("uTranslucencyKind", (int)sub.Translucency);
_gl.BindVertexArray(sub.Vao);
_gl.BindBuffer(BufferTargetARB.ArrayBuffer, _instanceVbo);
for (uint row = 0; row < 4; row++)
{
_gl.VertexAttribPointer(3 + row, 4, VertexAttribPointerType.Float,
false, 64, (void*)(byteOffset + row * 16));
}
var firstEntry = grp.Entries[0];
uint tex = ResolveTex(firstEntry.Entity, firstEntry.MeshRef, sub);
_gl.ActiveTexture(TextureUnit.Texture0);
_gl.BindTexture(TextureTarget.Texture2D, tex);
_gl.DrawElementsInstanced(PrimitiveType.Triangles,
(uint)sub.IndexCount,
DrawElementsType.UnsignedInt,
(void*)0,
(uint)grp.Count);
}
}
// Restore default GL state for subsequent renderers (terrain etc.).
// Restore default GL state.
_gl.DepthMask(true);
_gl.Disable(EnableCap.Blend);
_gl.Disable(EnableCap.CullFace);
_gl.BindVertexArray(0);
}
@ -237,22 +319,18 @@ public sealed unsafe class InstancedMeshRenderer : IDisposable
/// <summary>
/// Iterates all visible landblock entries and groups every (entity, meshRef)
/// pair by GfxObjId into <see cref="_groups"/>. The resulting dict drives
/// both render passes in <see cref="Draw"/>. Clears the dict before filling.
/// pair by GfxObjId. Clears previous frame's groups before filling.
/// </summary>
private void CollectGroups(
IEnumerable<(uint LandblockId, Vector3 AabbMin, Vector3 AabbMax, IReadOnlyList<WorldEntity> Entities)> landblockEntries,
FrustumPlanes? frustum,
uint? neverCullLandblockId)
{
// Clear previous frame's groups but keep the per-group List<> objects
// so they can be reused (avoids re-allocating inner lists every frame).
foreach (var list in _groups.Values)
list.Clear();
foreach (var grp in _groups.Values)
grp.Entries.Clear();
foreach (var entry in landblockEntries)
{
// Per-landblock frustum cull. Never cull the player's landblock.
if (frustum is not null &&
entry.LandblockId != neverCullLandblockId &&
!FrustumCuller.IsAabbVisible(frustum.Value, entry.AabbMin, entry.AabbMax))
@ -276,22 +354,35 @@ public sealed unsafe class InstancedMeshRenderer : IDisposable
if (!_groups.TryGetValue(meshRef.GfxObjId, out var group))
{
group = new List<InstanceEntry>();
group = new InstanceGroup();
_groups[meshRef.GfxObjId] = group;
}
group.Add(new InstanceEntry(model, entity, meshRef));
group.Entries.Add(new InstanceEntry(model, entity, meshRef));
}
}
}
}
// ── Texture resolution ────────────────────────────────────────────────────
// ── Matrix write ──────────────────────────────────────────────────────────
/// <summary>
/// Resolves the GL texture id for a sub-mesh, honouring palette and
/// texture overrides carried on the entity and the mesh-ref.
/// Writes a System.Numerics Matrix4x4 into <paramref name="buf"/> starting
/// at <paramref name="offset"/> as 16 consecutive floats in row-major order
/// (the C# natural memory layout). The GLSL shader reads each 4-float row
/// as a column of the mat4 — identical to what UniformMatrix4(transpose=false)
/// produces for the uniform path.
/// </summary>
private static void WriteMatrix(float[] buf, int offset, in Matrix4x4 m)
{
buf[offset + 0] = m.M11; buf[offset + 1] = m.M12; buf[offset + 2] = m.M13; buf[offset + 3] = m.M14;
buf[offset + 4] = m.M21; buf[offset + 5] = m.M22; buf[offset + 6] = m.M23; buf[offset + 7] = m.M24;
buf[offset + 8] = m.M31; buf[offset + 9] = m.M32; buf[offset + 10] = m.M33; buf[offset + 11] = m.M34;
buf[offset + 12] = m.M41; buf[offset + 13] = m.M42; buf[offset + 14] = m.M43; buf[offset + 15] = m.M44;
}
// ── Texture resolution ────────────────────────────────────────────────────
private uint ResolveTex(WorldEntity entity, MeshRef meshRef, SubMeshGpu sub)
{
uint overrideOrigTex = 0;
@ -327,6 +418,7 @@ public sealed unsafe class InstancedMeshRenderer : IDisposable
_gl.DeleteVertexArray(sub.Vao);
}
}
_gl.DeleteBuffer(_instanceVbo);
_gpuByGfxObj.Clear();
_groups.Clear();
}
@ -340,17 +432,21 @@ public sealed unsafe class InstancedMeshRenderer : IDisposable
public uint Ebo;
public int IndexCount;
public uint SurfaceId;
/// <summary>
/// Cached from GfxObjSubMesh.Translucency at upload time.
/// Avoids any per-draw lookup into external state.
/// </summary>
public TranslucencyKind Translucency;
}
/// <summary>
/// One entry in a per-GfxObj instance group. Carries the pre-computed
/// model matrix plus the entity/meshRef needed for texture resolution.
/// All instances of one GfxObj for this frame, plus their starting offset
/// in the shared instance VBO (in units of instances, not bytes).
/// </summary>
private sealed class InstanceGroup
{
public readonly List<InstanceEntry> Entries = new();
public int BufferOffset;
public int Count => Entries.Count;
}
private readonly struct InstanceEntry
{
public readonly Matrix4x4 Model;

View file

@ -58,5 +58,11 @@ public sealed class Shader : IDisposable
_gl.Uniform1(loc, value);
}
public void SetVec3(string name, Vector3 v)
{
int loc = _gl.GetUniformLocation(Program, name);
_gl.Uniform3(loc, v.X, v.Y, v.Z);
}
public void Dispose() => _gl.DeleteProgram(Program);
}

View file

@ -0,0 +1,31 @@
#version 430 core
in vec2 vTex;
in vec3 vWorldNormal;
in float vLightingFactor;
out vec4 fragColor;
// One 2D texture per draw call — same binding point as mesh.frag so the
// C# side can use the same TextureCache without a texture-array pipeline.
uniform sampler2D uDiffuse;
// Translucency kind — matches TranslucencyKind C# enum (same as mesh.frag):
// 0 = Opaque — depth write+test, no blend; shader never discards
// 1 = ClipMap — alpha-key discard at 0.5 (doors, windows, vegetation)
// 2 = AlphaBlend — GL blending handles compositing; do NOT discard
// 3 = Additive — GL additive blending; do NOT discard
// 4 = InvAlpha — GL inverted-alpha blending; do NOT discard
uniform int uTranslucencyKind;
void main() {
vec4 color = texture(uDiffuse, vTex);
// Alpha cutout only for clip-map surfaces (doors, windows, vegetation).
// Blended surface types must NOT discard here — that kills every
// semi-transparent pixel before the blend stage runs.
if (uTranslucencyKind == 1 && color.a < 0.5) discard;
// Apply pre-computed Lambert + ambient lighting factor from the vertex shader.
fragColor = vec4(color.rgb * vLightingFactor, color.a);
}

View file

@ -0,0 +1,46 @@
#version 430 core
// Per-vertex attributes
layout(location = 0) in vec3 aPosition;
layout(location = 1) in vec3 aNormal;
layout(location = 2) in vec2 aTexCoord;
// Per-instance model matrix, split across four vec4 attribute slots.
// A mat4 consumes 4 consecutive attribute locations, so locations 3-6 are
// all occupied by this single logical matrix. The C# side must call
// VertexAttribPointer four times (one per row) and VertexAttribDivisor(loc, 1)
// on each of the four slots.
layout(location = 3) in vec4 aInstanceRow0;
layout(location = 4) in vec4 aInstanceRow1;
layout(location = 5) in vec4 aInstanceRow2;
layout(location = 6) in vec4 aInstanceRow3;
uniform mat4 uViewProjection;
uniform vec3 uLightDirection; // world-space sun direction (points toward sun)
uniform float uAmbientIntensity;
uniform float uDiffuseIntensity;
out vec2 vTex;
out vec3 vWorldNormal;
out float vLightingFactor;
void main() {
// Reconstruct the per-instance model matrix from its four row vectors.
// Column-major storage: OpenGL/GLSL mat4 columns are constructed from
// the rows we receive from the attribute buffer.
mat4 model = mat4(aInstanceRow0, aInstanceRow1, aInstanceRow2, aInstanceRow3);
vec4 worldPos = model * vec4(aPosition, 1.0);
gl_Position = uViewProjection * worldPos;
// Transform normal into world space. For uniform-scale transforms the
// upper-left 3x3 is sufficient; non-uniform scale would require the
// inverse transpose, accepted as a future-phase concern (same as mesh.vert).
vWorldNormal = normalize(mat3(model) * aNormal);
vTex = aTexCoord;
// Compute Lambert diffuse + ambient in the vertex shader so the fragment
// shader only needs a multiply. Matches ACME StaticObject.vert pattern.
float ndotl = max(dot(vWorldNormal, uLightDirection), 0.0);
vLightingFactor = uAmbientIntensity + uDiffuseIntensity * ndotl;
}