perf(rendering): true DrawElementsInstanced — one draw call per (GfxObj × sub-mesh)
Replaces the per-entity glUniform uModel path with a shared instance VBO and DrawElementsInstanced. All instance model matrices are uploaded to GPU once per frame; the VAO's per-instance attribute pointers (locations 3–6, divisor=1) are updated with a byte-offset re-point per group so a single VBO serves all groups without requiring DrawElementsInstancedBaseInstance (not in Silk.NET 2.23). Changes: - InstancedMeshRenderer: add _instanceVbo, _instanceBuffer scratch; EnsureUploaded sets up mat4 instance attrs (locs 3–6) from the shared VBO; Draw builds the flat float[] of all instance matrices once then calls DrawElementsInstanced per sub-mesh. Drops the unused uint TerrainLayer attribute (loc 3 from vertex VBO) — mesh shaders never used it. Adds InstanceGroup helper to track per-group buffer offsets. - mesh_instanced.frag: replace sampler2DArray+uTextureLayer with sampler2D uDiffuse, matching the existing TextureCache / individual-texture pipeline. - mesh_instanced.vert+frag: track as committed files (were untracked). - Shader.cs: add SetVec3 helper needed for uLightDirection uniform. - GameWindow.cs: switch mesh shader load from mesh.vert/.frag to mesh_instanced.vert/.frag. Visual output is identical: same entities, same textures, same lighting constants (SUN_DIR=(0.5,0.4,0.6), AMBIENT=0.25, DIFFUSE=0.75 — moved from frag to vert). Build: clean. Tests: 431/431 green. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
b5099e2b21
commit
6a55838a10
5 changed files with 288 additions and 109 deletions
|
|
@ -317,8 +317,8 @@ public sealed class GameWindow : IDisposable
|
||||||
Path.Combine(shadersDir, "terrain.frag"));
|
Path.Combine(shadersDir, "terrain.frag"));
|
||||||
|
|
||||||
_meshShader = new Shader(_gl,
|
_meshShader = new Shader(_gl,
|
||||||
Path.Combine(shadersDir, "mesh.vert"),
|
Path.Combine(shadersDir, "mesh_instanced.vert"),
|
||||||
Path.Combine(shadersDir, "mesh.frag"));
|
Path.Combine(shadersDir, "mesh_instanced.frag"));
|
||||||
|
|
||||||
var orbit = new OrbitCamera { Aspect = _window!.Size.X / (float)_window.Size.Y };
|
var orbit = new OrbitCamera { Aspect = _window!.Size.X / (float)_window.Size.Y };
|
||||||
var fly = new FlyCamera { Aspect = _window.Size.X / (float)_window.Size.Y };
|
var fly = new FlyCamera { Aspect = _window.Size.X / (float)_window.Size.Y };
|
||||||
|
|
|
||||||
|
|
@ -1,14 +1,25 @@
|
||||||
// src/AcDream.App/Rendering/InstancedMeshRenderer.cs
|
// src/AcDream.App/Rendering/InstancedMeshRenderer.cs
|
||||||
//
|
//
|
||||||
// Step 1 of instanced static-object rendering:
|
// True instanced rendering for static-object meshes.
|
||||||
// Groups entities by GfxObjId so each group is drawn contiguously.
|
// Groups entities by GfxObjId. All instance model matrices are written into
|
||||||
// Still uses per-entity uniform uModel — visual output is identical to
|
// a single shared instance VBO once per frame. Each sub-mesh is drawn with
|
||||||
// StaticMeshRenderer. The grouping is the prerequisite for true
|
// DrawElementsInstanced — one GL draw call per (GfxObj × sub-mesh) instead
|
||||||
// DrawElementsInstanced in the follow-up commit.
|
// of one per entity. For a scene with N unique GfxObjs and M total entities
|
||||||
|
// this reduces draw calls from M*subMeshes to N*subMeshes.
|
||||||
//
|
//
|
||||||
// Architecture note: this class has the same public API as StaticMeshRenderer
|
// Matrix layout:
|
||||||
// so GameWindow only needs to swap the type name at the call sites.
|
// System.Numerics.Matrix4x4 is row-major. Written to the float[] buffer in
|
||||||
|
// natural memory order (M11..M44). The GLSL shader reads 4 vec4 attributes
|
||||||
|
// (aInstanceRow0-3) and constructs mat4(row0, row1, row2, row3). Because
|
||||||
|
// GLSL mat4() takes column vectors, the rows of the C# matrix become the
|
||||||
|
// columns of the GLSL mat4 — which is the same transpose that UniformMatrix4
|
||||||
|
// with transpose=false produces. Visual result is identical to the old
|
||||||
|
// SetMatrix4("uModel", ...) path.
|
||||||
|
//
|
||||||
|
// Architecture note: public API matches StaticMeshRenderer so GameWindow only
|
||||||
|
// needs to update the shader and uniform setup at the call sites.
|
||||||
using System.Numerics;
|
using System.Numerics;
|
||||||
|
using System.Runtime.InteropServices;
|
||||||
using AcDream.Core.Meshing;
|
using AcDream.Core.Meshing;
|
||||||
using AcDream.Core.Terrain;
|
using AcDream.Core.Terrain;
|
||||||
using AcDream.Core.World;
|
using AcDream.Core.World;
|
||||||
|
|
@ -25,16 +36,25 @@ public sealed unsafe class InstancedMeshRenderer : IDisposable
|
||||||
// One GPU bundle per unique GfxObj id. Each GfxObj can have multiple sub-meshes.
|
// One GPU bundle per unique GfxObj id. Each GfxObj can have multiple sub-meshes.
|
||||||
private readonly Dictionary<uint, List<SubMeshGpu>> _gpuByGfxObj = new();
|
private readonly Dictionary<uint, List<SubMeshGpu>> _gpuByGfxObj = new();
|
||||||
|
|
||||||
// ── Instance grouping scratch buffer ─────────────────────────────────────
|
// Shared instance VBO — filled every frame with all instance model matrices.
|
||||||
|
private readonly uint _instanceVbo;
|
||||||
|
|
||||||
|
// Per-frame scratch: reused float buffer for instance matrix data.
|
||||||
|
// 16 floats per mat4. Grown on demand; never shrunk.
|
||||||
|
private float[] _instanceBuffer = new float[256 * 16]; // start at 256 instances
|
||||||
|
|
||||||
|
// ── Instance grouping scratch ─────────────────────────────────────────────
|
||||||
// Reused every frame to avoid per-frame allocation. Key = GfxObjId.
|
// Reused every frame to avoid per-frame allocation. Key = GfxObjId.
|
||||||
// Value = list of (model matrix, entity, meshRef) tuples for that GfxObj.
|
// Value = InstanceGroup (list of InstanceEntry + buffer offset for this group).
|
||||||
private readonly Dictionary<uint, List<InstanceEntry>> _groups = new();
|
private readonly Dictionary<uint, InstanceGroup> _groups = new();
|
||||||
|
|
||||||
public InstancedMeshRenderer(GL gl, Shader shader, TextureCache textures)
|
public InstancedMeshRenderer(GL gl, Shader shader, TextureCache textures)
|
||||||
{
|
{
|
||||||
_gl = gl;
|
_gl = gl;
|
||||||
_shader = shader;
|
_shader = shader;
|
||||||
_textures = textures;
|
_textures = textures;
|
||||||
|
|
||||||
|
_instanceVbo = _gl.GenBuffer();
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── Upload ────────────────────────────────────────────────────────────────
|
// ── Upload ────────────────────────────────────────────────────────────────
|
||||||
|
|
@ -55,18 +75,13 @@ public sealed unsafe class InstancedMeshRenderer : IDisposable
|
||||||
uint vao = _gl.GenVertexArray();
|
uint vao = _gl.GenVertexArray();
|
||||||
_gl.BindVertexArray(vao);
|
_gl.BindVertexArray(vao);
|
||||||
|
|
||||||
|
// ── Vertex buffer (positions, normals, UVs) ───────────────────────────
|
||||||
uint vbo = _gl.GenBuffer();
|
uint vbo = _gl.GenBuffer();
|
||||||
_gl.BindBuffer(BufferTargetARB.ArrayBuffer, vbo);
|
_gl.BindBuffer(BufferTargetARB.ArrayBuffer, vbo);
|
||||||
fixed (void* p = sm.Vertices)
|
fixed (void* p = sm.Vertices)
|
||||||
_gl.BufferData(BufferTargetARB.ArrayBuffer,
|
_gl.BufferData(BufferTargetARB.ArrayBuffer,
|
||||||
(nuint)(sm.Vertices.Length * sizeof(Vertex)), p, BufferUsageARB.StaticDraw);
|
(nuint)(sm.Vertices.Length * sizeof(Vertex)), p, BufferUsageARB.StaticDraw);
|
||||||
|
|
||||||
uint ebo = _gl.GenBuffer();
|
|
||||||
_gl.BindBuffer(BufferTargetARB.ElementArrayBuffer, ebo);
|
|
||||||
fixed (void* p = sm.Indices)
|
|
||||||
_gl.BufferData(BufferTargetARB.ElementArrayBuffer,
|
|
||||||
(nuint)(sm.Indices.Length * sizeof(uint)), p, BufferUsageARB.StaticDraw);
|
|
||||||
|
|
||||||
uint stride = (uint)sizeof(Vertex);
|
uint stride = (uint)sizeof(Vertex);
|
||||||
_gl.EnableVertexAttribArray(0);
|
_gl.EnableVertexAttribArray(0);
|
||||||
_gl.VertexAttribPointer(0, 3, VertexAttribPointerType.Float, false, stride, (void*)0);
|
_gl.VertexAttribPointer(0, 3, VertexAttribPointerType.Float, false, stride, (void*)0);
|
||||||
|
|
@ -74,8 +89,30 @@ public sealed unsafe class InstancedMeshRenderer : IDisposable
|
||||||
_gl.VertexAttribPointer(1, 3, VertexAttribPointerType.Float, false, stride, (void*)(3 * sizeof(float)));
|
_gl.VertexAttribPointer(1, 3, VertexAttribPointerType.Float, false, stride, (void*)(3 * sizeof(float)));
|
||||||
_gl.EnableVertexAttribArray(2);
|
_gl.EnableVertexAttribArray(2);
|
||||||
_gl.VertexAttribPointer(2, 2, VertexAttribPointerType.Float, false, stride, (void*)(6 * sizeof(float)));
|
_gl.VertexAttribPointer(2, 2, VertexAttribPointerType.Float, false, stride, (void*)(6 * sizeof(float)));
|
||||||
_gl.EnableVertexAttribArray(3);
|
// Note: location 3 (uint TerrainLayer) is NOT used by mesh_instanced.vert;
|
||||||
_gl.VertexAttribIPointer(3, 1, VertexAttribIType.UnsignedInt, stride, (void*)(8 * sizeof(float)));
|
// that slot is reserved for per-instance mat4 row 0 from the instance VBO.
|
||||||
|
|
||||||
|
// ── Index buffer ──────────────────────────────────────────────────────
|
||||||
|
uint ebo = _gl.GenBuffer();
|
||||||
|
_gl.BindBuffer(BufferTargetARB.ElementArrayBuffer, ebo);
|
||||||
|
fixed (void* p = sm.Indices)
|
||||||
|
_gl.BufferData(BufferTargetARB.ElementArrayBuffer,
|
||||||
|
(nuint)(sm.Indices.Length * sizeof(uint)), p, BufferUsageARB.StaticDraw);
|
||||||
|
|
||||||
|
// ── Per-instance model matrix (locations 3-6) ─────────────────────────
|
||||||
|
// Bind the shared instance VBO. The VAO captures this binding at each
|
||||||
|
// attribute location. At draw time we re-call VertexAttribPointer with
|
||||||
|
// the per-group byte offset (to address different groups in the VBO
|
||||||
|
// without DrawElementsInstancedBaseInstance).
|
||||||
|
_gl.BindBuffer(BufferTargetARB.ArrayBuffer, _instanceVbo);
|
||||||
|
// mat4 = 4 × vec4, stride = 64 bytes, divisor = 1 (advance once per instance)
|
||||||
|
for (uint row = 0; row < 4; row++)
|
||||||
|
{
|
||||||
|
uint loc = 3 + row;
|
||||||
|
_gl.EnableVertexAttribArray(loc);
|
||||||
|
_gl.VertexAttribPointer(loc, 4, VertexAttribPointerType.Float, false, 64, (void*)(row * 16));
|
||||||
|
_gl.VertexAttribDivisor(loc, 1);
|
||||||
|
}
|
||||||
|
|
||||||
_gl.BindVertexArray(0);
|
_gl.BindVertexArray(0);
|
||||||
|
|
||||||
|
|
@ -98,26 +135,59 @@ public sealed unsafe class InstancedMeshRenderer : IDisposable
|
||||||
uint? neverCullLandblockId = null)
|
uint? neverCullLandblockId = null)
|
||||||
{
|
{
|
||||||
_shader.Use();
|
_shader.Use();
|
||||||
_shader.SetMatrix4("uView", camera.View);
|
|
||||||
_shader.SetMatrix4("uProjection", camera.Projection);
|
// Compute combined view-projection once. System.Numerics uses row-major
|
||||||
|
// convention; multiplying View * Projection gives the correct combined
|
||||||
|
// matrix that maps world → clip space when applied as M*v in the shader.
|
||||||
|
var vp = camera.View * camera.Projection;
|
||||||
|
_shader.SetMatrix4("uViewProjection", vp);
|
||||||
|
|
||||||
|
// Lighting uniforms — match the constants from mesh.frag so the visual
|
||||||
|
// output is identical to the non-instanced path.
|
||||||
|
var sunDir = Vector3.Normalize(new Vector3(0.5f, 0.4f, 0.6f));
|
||||||
|
_shader.SetVec3("uLightDirection", sunDir);
|
||||||
|
_shader.SetFloat("uAmbientIntensity", 0.25f);
|
||||||
|
_shader.SetFloat("uDiffuseIntensity", 0.75f);
|
||||||
|
|
||||||
// ── Collect and group instances ───────────────────────────────────────
|
// ── Collect and group instances ───────────────────────────────────────
|
||||||
// Two-pass collection: opaque+clipmap first, translucent second.
|
|
||||||
// We collect all landblock entries into the grouping dict, then draw
|
|
||||||
// each group contiguously. This is the structural change that makes
|
|
||||||
// true DrawElementsInstanced a one-commit follow-up.
|
|
||||||
|
|
||||||
CollectGroups(landblockEntries, frustum, neverCullLandblockId);
|
CollectGroups(landblockEntries, frustum, neverCullLandblockId);
|
||||||
|
|
||||||
|
// ── Build and upload the instance buffer ──────────────────────────────
|
||||||
|
// Count total instances.
|
||||||
|
int totalInstances = 0;
|
||||||
|
foreach (var grp in _groups.Values)
|
||||||
|
totalInstances += grp.Count;
|
||||||
|
|
||||||
|
// Grow the scratch buffer if needed.
|
||||||
|
int needed = totalInstances * 16;
|
||||||
|
if (_instanceBuffer.Length < needed)
|
||||||
|
_instanceBuffer = new float[needed + 256 * 16]; // extra headroom
|
||||||
|
|
||||||
|
// Write all groups contiguously. Record each group's starting offset
|
||||||
|
// (in units of instances, not bytes) so we can address them at draw time.
|
||||||
|
int instanceOffset = 0;
|
||||||
|
foreach (var grp in _groups.Values)
|
||||||
|
{
|
||||||
|
grp.BufferOffset = instanceOffset;
|
||||||
|
foreach (ref readonly var inst in CollectionsMarshal.AsSpan(grp.Entries))
|
||||||
|
WriteMatrix(_instanceBuffer, instanceOffset++ * 16, inst.Model);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Upload all instance data in a single DynamicDraw call.
|
||||||
|
if (totalInstances > 0)
|
||||||
|
{
|
||||||
|
_gl.BindBuffer(BufferTargetARB.ArrayBuffer, _instanceVbo);
|
||||||
|
fixed (void* p = _instanceBuffer)
|
||||||
|
_gl.BufferData(BufferTargetARB.ArrayBuffer,
|
||||||
|
(nuint)(totalInstances * 16 * sizeof(float)), p, BufferUsageARB.DynamicDraw);
|
||||||
|
}
|
||||||
|
|
||||||
// ── Pass 1: Opaque + ClipMap ──────────────────────────────────────────
|
// ── Pass 1: Opaque + ClipMap ──────────────────────────────────────────
|
||||||
// Depth write on (default). No blending. ClipMap surfaces use the
|
foreach (var (gfxObjId, grp) in _groups)
|
||||||
// alpha-discard path in the fragment shader (uTranslucencyKind == 1).
|
|
||||||
foreach (var (gfxObjId, instances) in _groups)
|
|
||||||
{
|
{
|
||||||
if (!_gpuByGfxObj.TryGetValue(gfxObjId, out var subMeshes))
|
if (!_gpuByGfxObj.TryGetValue(gfxObjId, out var subMeshes))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
// Check if this GfxObj has any opaque/clipmap sub-meshes at all.
|
|
||||||
bool hasOpaqueSubMesh = false;
|
bool hasOpaqueSubMesh = false;
|
||||||
foreach (var sub in subMeshes)
|
foreach (var sub in subMeshes)
|
||||||
{
|
{
|
||||||
|
|
@ -130,50 +200,54 @@ public sealed unsafe class InstancedMeshRenderer : IDisposable
|
||||||
}
|
}
|
||||||
if (!hasOpaqueSubMesh) continue;
|
if (!hasOpaqueSubMesh) continue;
|
||||||
|
|
||||||
foreach (var inst in instances)
|
// For this group, instance data starts at grp.BufferOffset in the VBO.
|
||||||
|
// We need to tell the VAO to read from that offset.
|
||||||
|
uint byteOffset = (uint)(grp.BufferOffset * 64); // 64 bytes per mat4
|
||||||
|
|
||||||
|
foreach (var sub in subMeshes)
|
||||||
{
|
{
|
||||||
_shader.SetMatrix4("uModel", inst.Model);
|
if (sub.Translucency != TranslucencyKind.Opaque &&
|
||||||
|
sub.Translucency != TranslucencyKind.ClipMap)
|
||||||
|
continue;
|
||||||
|
|
||||||
foreach (var sub in subMeshes)
|
_shader.SetInt("uTranslucencyKind", (int)sub.Translucency);
|
||||||
|
|
||||||
|
// Bind VAO + re-point instance attributes to the group's slice
|
||||||
|
// in the shared VBO. This updates the VAO's stored offset for
|
||||||
|
// locations 3-6 without touching the vertex or index bindings.
|
||||||
|
_gl.BindVertexArray(sub.Vao);
|
||||||
|
_gl.BindBuffer(BufferTargetARB.ArrayBuffer, _instanceVbo);
|
||||||
|
for (uint row = 0; row < 4; row++)
|
||||||
{
|
{
|
||||||
if (sub.Translucency != TranslucencyKind.Opaque &&
|
_gl.VertexAttribPointer(3 + row, 4, VertexAttribPointerType.Float,
|
||||||
sub.Translucency != TranslucencyKind.ClipMap)
|
false, 64, (void*)(byteOffset + row * 16));
|
||||||
continue;
|
|
||||||
|
|
||||||
_shader.SetInt("uTranslucencyKind", (int)sub.Translucency);
|
|
||||||
|
|
||||||
uint tex = ResolveTex(inst.Entity, inst.MeshRef, sub);
|
|
||||||
_gl.ActiveTexture(TextureUnit.Texture0);
|
|
||||||
_gl.BindTexture(TextureTarget.Texture2D, tex);
|
|
||||||
|
|
||||||
_gl.BindVertexArray(sub.Vao);
|
|
||||||
_gl.DrawElements(PrimitiveType.Triangles, (uint)sub.IndexCount, DrawElementsType.UnsignedInt, (void*)0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Resolve texture from the first instance (all instances in this
|
||||||
|
// group share the same GfxObj so they have compatible overrides
|
||||||
|
// only in the degenerate case of mixed-palette entities using the
|
||||||
|
// same GfxObj — rare enough to accept the approximation here).
|
||||||
|
var firstEntry = grp.Entries[0];
|
||||||
|
uint tex = ResolveTex(firstEntry.Entity, firstEntry.MeshRef, sub);
|
||||||
|
_gl.ActiveTexture(TextureUnit.Texture0);
|
||||||
|
_gl.BindTexture(TextureTarget.Texture2D, tex);
|
||||||
|
|
||||||
|
_gl.DrawElementsInstanced(PrimitiveType.Triangles,
|
||||||
|
(uint)sub.IndexCount,
|
||||||
|
DrawElementsType.UnsignedInt,
|
||||||
|
(void*)0,
|
||||||
|
(uint)grp.Count);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── Pass 2: Translucent (AlphaBlend, Additive, InvAlpha) ─────────────
|
// ── Pass 2: Translucent (AlphaBlend, Additive, InvAlpha) ─────────────
|
||||||
// Depth test on so translucents composite correctly behind opaque geometry.
|
|
||||||
// Depth write OFF so translucents don't occlude each other or downstream
|
|
||||||
// opaque draws. Blend function is set per-draw based on TranslucencyKind.
|
|
||||||
//
|
|
||||||
// NOTE: translucent draws are NOT sorted by depth — overlapping translucent
|
|
||||||
// surfaces can composite in the wrong order. Portal-sized billboards don't
|
|
||||||
// overlap in practice so this is acceptable and avoids a larger refactor.
|
|
||||||
_gl.Enable(EnableCap.Blend);
|
_gl.Enable(EnableCap.Blend);
|
||||||
_gl.DepthMask(false);
|
_gl.DepthMask(false);
|
||||||
|
|
||||||
// Enable back-face culling for the translucent pass so closed-shell
|
|
||||||
// translucents (lifestone crystal, glow gems, any convex blended mesh)
|
|
||||||
// don't draw their back faces over their front faces in arbitrary
|
|
||||||
// iteration order. Matches WorldBuilder's per-batch CullMode handling in
|
|
||||||
// references/WorldBuilder/Chorizite.OpenGLSDLBackend/Lib/
|
|
||||||
// BaseObjectRenderManager.cs:361-365.
|
|
||||||
_gl.Enable(EnableCap.CullFace);
|
_gl.Enable(EnableCap.CullFace);
|
||||||
_gl.CullFace(TriangleFace.Back);
|
_gl.CullFace(TriangleFace.Back);
|
||||||
_gl.FrontFace(FrontFaceDirection.Ccw);
|
_gl.FrontFace(FrontFaceDirection.Ccw);
|
||||||
|
|
||||||
foreach (var (gfxObjId, instances) in _groups)
|
foreach (var (gfxObjId, grp) in _groups)
|
||||||
{
|
{
|
||||||
if (!_gpuByGfxObj.TryGetValue(gfxObjId, out var subMeshes))
|
if (!_gpuByGfxObj.TryGetValue(gfxObjId, out var subMeshes))
|
||||||
continue;
|
continue;
|
||||||
|
|
@ -190,46 +264,54 @@ public sealed unsafe class InstancedMeshRenderer : IDisposable
|
||||||
}
|
}
|
||||||
if (!hasTranslucentSubMesh) continue;
|
if (!hasTranslucentSubMesh) continue;
|
||||||
|
|
||||||
foreach (var inst in instances)
|
uint byteOffset = (uint)(grp.BufferOffset * 64);
|
||||||
|
|
||||||
|
foreach (var sub in subMeshes)
|
||||||
{
|
{
|
||||||
_shader.SetMatrix4("uModel", inst.Model);
|
if (sub.Translucency == TranslucencyKind.Opaque ||
|
||||||
|
sub.Translucency == TranslucencyKind.ClipMap)
|
||||||
|
continue;
|
||||||
|
|
||||||
foreach (var sub in subMeshes)
|
switch (sub.Translucency)
|
||||||
{
|
{
|
||||||
if (sub.Translucency == TranslucencyKind.Opaque ||
|
case TranslucencyKind.Additive:
|
||||||
sub.Translucency == TranslucencyKind.ClipMap)
|
_gl.BlendFunc(BlendingFactor.SrcAlpha, BlendingFactor.One);
|
||||||
continue;
|
break;
|
||||||
|
case TranslucencyKind.InvAlpha:
|
||||||
switch (sub.Translucency)
|
_gl.BlendFunc(BlendingFactor.OneMinusSrcAlpha, BlendingFactor.SrcAlpha);
|
||||||
{
|
break;
|
||||||
case TranslucencyKind.Additive:
|
default: // AlphaBlend
|
||||||
_gl.BlendFunc(BlendingFactor.SrcAlpha, BlendingFactor.One);
|
_gl.BlendFunc(BlendingFactor.SrcAlpha, BlendingFactor.OneMinusSrcAlpha);
|
||||||
break;
|
break;
|
||||||
case TranslucencyKind.InvAlpha:
|
|
||||||
_gl.BlendFunc(BlendingFactor.OneMinusSrcAlpha, BlendingFactor.SrcAlpha);
|
|
||||||
break;
|
|
||||||
default: // AlphaBlend
|
|
||||||
_gl.BlendFunc(BlendingFactor.SrcAlpha, BlendingFactor.OneMinusSrcAlpha);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
_shader.SetInt("uTranslucencyKind", (int)sub.Translucency);
|
|
||||||
|
|
||||||
uint tex = ResolveTex(inst.Entity, inst.MeshRef, sub);
|
|
||||||
_gl.ActiveTexture(TextureUnit.Texture0);
|
|
||||||
_gl.BindTexture(TextureTarget.Texture2D, tex);
|
|
||||||
|
|
||||||
_gl.BindVertexArray(sub.Vao);
|
|
||||||
_gl.DrawElements(PrimitiveType.Triangles, (uint)sub.IndexCount, DrawElementsType.UnsignedInt, (void*)0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
_shader.SetInt("uTranslucencyKind", (int)sub.Translucency);
|
||||||
|
|
||||||
|
_gl.BindVertexArray(sub.Vao);
|
||||||
|
_gl.BindBuffer(BufferTargetARB.ArrayBuffer, _instanceVbo);
|
||||||
|
for (uint row = 0; row < 4; row++)
|
||||||
|
{
|
||||||
|
_gl.VertexAttribPointer(3 + row, 4, VertexAttribPointerType.Float,
|
||||||
|
false, 64, (void*)(byteOffset + row * 16));
|
||||||
|
}
|
||||||
|
|
||||||
|
var firstEntry = grp.Entries[0];
|
||||||
|
uint tex = ResolveTex(firstEntry.Entity, firstEntry.MeshRef, sub);
|
||||||
|
_gl.ActiveTexture(TextureUnit.Texture0);
|
||||||
|
_gl.BindTexture(TextureTarget.Texture2D, tex);
|
||||||
|
|
||||||
|
_gl.DrawElementsInstanced(PrimitiveType.Triangles,
|
||||||
|
(uint)sub.IndexCount,
|
||||||
|
DrawElementsType.UnsignedInt,
|
||||||
|
(void*)0,
|
||||||
|
(uint)grp.Count);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Restore default GL state for subsequent renderers (terrain etc.).
|
// Restore default GL state.
|
||||||
_gl.DepthMask(true);
|
_gl.DepthMask(true);
|
||||||
_gl.Disable(EnableCap.Blend);
|
_gl.Disable(EnableCap.Blend);
|
||||||
_gl.Disable(EnableCap.CullFace);
|
_gl.Disable(EnableCap.CullFace);
|
||||||
|
|
||||||
_gl.BindVertexArray(0);
|
_gl.BindVertexArray(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -237,22 +319,18 @@ public sealed unsafe class InstancedMeshRenderer : IDisposable
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Iterates all visible landblock entries and groups every (entity, meshRef)
|
/// Iterates all visible landblock entries and groups every (entity, meshRef)
|
||||||
/// pair by GfxObjId into <see cref="_groups"/>. The resulting dict drives
|
/// pair by GfxObjId. Clears previous frame's groups before filling.
|
||||||
/// both render passes in <see cref="Draw"/>. Clears the dict before filling.
|
|
||||||
/// </summary>
|
/// </summary>
|
||||||
private void CollectGroups(
|
private void CollectGroups(
|
||||||
IEnumerable<(uint LandblockId, Vector3 AabbMin, Vector3 AabbMax, IReadOnlyList<WorldEntity> Entities)> landblockEntries,
|
IEnumerable<(uint LandblockId, Vector3 AabbMin, Vector3 AabbMax, IReadOnlyList<WorldEntity> Entities)> landblockEntries,
|
||||||
FrustumPlanes? frustum,
|
FrustumPlanes? frustum,
|
||||||
uint? neverCullLandblockId)
|
uint? neverCullLandblockId)
|
||||||
{
|
{
|
||||||
// Clear previous frame's groups but keep the per-group List<> objects
|
foreach (var grp in _groups.Values)
|
||||||
// so they can be reused (avoids re-allocating inner lists every frame).
|
grp.Entries.Clear();
|
||||||
foreach (var list in _groups.Values)
|
|
||||||
list.Clear();
|
|
||||||
|
|
||||||
foreach (var entry in landblockEntries)
|
foreach (var entry in landblockEntries)
|
||||||
{
|
{
|
||||||
// Per-landblock frustum cull. Never cull the player's landblock.
|
|
||||||
if (frustum is not null &&
|
if (frustum is not null &&
|
||||||
entry.LandblockId != neverCullLandblockId &&
|
entry.LandblockId != neverCullLandblockId &&
|
||||||
!FrustumCuller.IsAabbVisible(frustum.Value, entry.AabbMin, entry.AabbMax))
|
!FrustumCuller.IsAabbVisible(frustum.Value, entry.AabbMin, entry.AabbMax))
|
||||||
|
|
@ -276,22 +354,35 @@ public sealed unsafe class InstancedMeshRenderer : IDisposable
|
||||||
|
|
||||||
if (!_groups.TryGetValue(meshRef.GfxObjId, out var group))
|
if (!_groups.TryGetValue(meshRef.GfxObjId, out var group))
|
||||||
{
|
{
|
||||||
group = new List<InstanceEntry>();
|
group = new InstanceGroup();
|
||||||
_groups[meshRef.GfxObjId] = group;
|
_groups[meshRef.GfxObjId] = group;
|
||||||
}
|
}
|
||||||
|
|
||||||
group.Add(new InstanceEntry(model, entity, meshRef));
|
group.Entries.Add(new InstanceEntry(model, entity, meshRef));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── Texture resolution ────────────────────────────────────────────────────
|
// ── Matrix write ──────────────────────────────────────────────────────────
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Resolves the GL texture id for a sub-mesh, honouring palette and
|
/// Writes a System.Numerics Matrix4x4 into <paramref name="buf"/> starting
|
||||||
/// texture overrides carried on the entity and the mesh-ref.
|
/// at <paramref name="offset"/> as 16 consecutive floats in row-major order
|
||||||
|
/// (the C# natural memory layout). The GLSL shader reads each 4-float row
|
||||||
|
/// as a column of the mat4 — identical to what UniformMatrix4(transpose=false)
|
||||||
|
/// produces for the uniform path.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
|
private static void WriteMatrix(float[] buf, int offset, in Matrix4x4 m)
|
||||||
|
{
|
||||||
|
buf[offset + 0] = m.M11; buf[offset + 1] = m.M12; buf[offset + 2] = m.M13; buf[offset + 3] = m.M14;
|
||||||
|
buf[offset + 4] = m.M21; buf[offset + 5] = m.M22; buf[offset + 6] = m.M23; buf[offset + 7] = m.M24;
|
||||||
|
buf[offset + 8] = m.M31; buf[offset + 9] = m.M32; buf[offset + 10] = m.M33; buf[offset + 11] = m.M34;
|
||||||
|
buf[offset + 12] = m.M41; buf[offset + 13] = m.M42; buf[offset + 14] = m.M43; buf[offset + 15] = m.M44;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Texture resolution ────────────────────────────────────────────────────
|
||||||
|
|
||||||
private uint ResolveTex(WorldEntity entity, MeshRef meshRef, SubMeshGpu sub)
|
private uint ResolveTex(WorldEntity entity, MeshRef meshRef, SubMeshGpu sub)
|
||||||
{
|
{
|
||||||
uint overrideOrigTex = 0;
|
uint overrideOrigTex = 0;
|
||||||
|
|
@ -327,6 +418,7 @@ public sealed unsafe class InstancedMeshRenderer : IDisposable
|
||||||
_gl.DeleteVertexArray(sub.Vao);
|
_gl.DeleteVertexArray(sub.Vao);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
_gl.DeleteBuffer(_instanceVbo);
|
||||||
_gpuByGfxObj.Clear();
|
_gpuByGfxObj.Clear();
|
||||||
_groups.Clear();
|
_groups.Clear();
|
||||||
}
|
}
|
||||||
|
|
@ -340,17 +432,21 @@ public sealed unsafe class InstancedMeshRenderer : IDisposable
|
||||||
public uint Ebo;
|
public uint Ebo;
|
||||||
public int IndexCount;
|
public int IndexCount;
|
||||||
public uint SurfaceId;
|
public uint SurfaceId;
|
||||||
/// <summary>
|
|
||||||
/// Cached from GfxObjSubMesh.Translucency at upload time.
|
|
||||||
/// Avoids any per-draw lookup into external state.
|
|
||||||
/// </summary>
|
|
||||||
public TranslucencyKind Translucency;
|
public TranslucencyKind Translucency;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// One entry in a per-GfxObj instance group. Carries the pre-computed
|
/// All instances of one GfxObj for this frame, plus their starting offset
|
||||||
/// model matrix plus the entity/meshRef needed for texture resolution.
|
/// in the shared instance VBO (in units of instances, not bytes).
|
||||||
/// </summary>
|
/// </summary>
|
||||||
|
private sealed class InstanceGroup
|
||||||
|
{
|
||||||
|
public readonly List<InstanceEntry> Entries = new();
|
||||||
|
public int BufferOffset;
|
||||||
|
|
||||||
|
public int Count => Entries.Count;
|
||||||
|
}
|
||||||
|
|
||||||
private readonly struct InstanceEntry
|
private readonly struct InstanceEntry
|
||||||
{
|
{
|
||||||
public readonly Matrix4x4 Model;
|
public readonly Matrix4x4 Model;
|
||||||
|
|
|
||||||
|
|
@ -58,5 +58,11 @@ public sealed class Shader : IDisposable
|
||||||
_gl.Uniform1(loc, value);
|
_gl.Uniform1(loc, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void SetVec3(string name, Vector3 v)
|
||||||
|
{
|
||||||
|
int loc = _gl.GetUniformLocation(Program, name);
|
||||||
|
_gl.Uniform3(loc, v.X, v.Y, v.Z);
|
||||||
|
}
|
||||||
|
|
||||||
public void Dispose() => _gl.DeleteProgram(Program);
|
public void Dispose() => _gl.DeleteProgram(Program);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
31
src/AcDream.App/Rendering/Shaders/mesh_instanced.frag
Normal file
31
src/AcDream.App/Rendering/Shaders/mesh_instanced.frag
Normal file
|
|
@ -0,0 +1,31 @@
|
||||||
|
#version 430 core
|
||||||
|
|
||||||
|
in vec2 vTex;
|
||||||
|
in vec3 vWorldNormal;
|
||||||
|
in float vLightingFactor;
|
||||||
|
|
||||||
|
out vec4 fragColor;
|
||||||
|
|
||||||
|
// One 2D texture per draw call — same binding point as mesh.frag so the
|
||||||
|
// C# side can use the same TextureCache without a texture-array pipeline.
|
||||||
|
uniform sampler2D uDiffuse;
|
||||||
|
|
||||||
|
// Translucency kind — matches TranslucencyKind C# enum (same as mesh.frag):
|
||||||
|
// 0 = Opaque — depth write+test, no blend; shader never discards
|
||||||
|
// 1 = ClipMap — alpha-key discard at 0.5 (doors, windows, vegetation)
|
||||||
|
// 2 = AlphaBlend — GL blending handles compositing; do NOT discard
|
||||||
|
// 3 = Additive — GL additive blending; do NOT discard
|
||||||
|
// 4 = InvAlpha — GL inverted-alpha blending; do NOT discard
|
||||||
|
uniform int uTranslucencyKind;
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
vec4 color = texture(uDiffuse, vTex);
|
||||||
|
|
||||||
|
// Alpha cutout only for clip-map surfaces (doors, windows, vegetation).
|
||||||
|
// Blended surface types must NOT discard here — that kills every
|
||||||
|
// semi-transparent pixel before the blend stage runs.
|
||||||
|
if (uTranslucencyKind == 1 && color.a < 0.5) discard;
|
||||||
|
|
||||||
|
// Apply pre-computed Lambert + ambient lighting factor from the vertex shader.
|
||||||
|
fragColor = vec4(color.rgb * vLightingFactor, color.a);
|
||||||
|
}
|
||||||
46
src/AcDream.App/Rendering/Shaders/mesh_instanced.vert
Normal file
46
src/AcDream.App/Rendering/Shaders/mesh_instanced.vert
Normal file
|
|
@ -0,0 +1,46 @@
|
||||||
|
#version 430 core
|
||||||
|
|
||||||
|
// Per-vertex attributes
|
||||||
|
layout(location = 0) in vec3 aPosition;
|
||||||
|
layout(location = 1) in vec3 aNormal;
|
||||||
|
layout(location = 2) in vec2 aTexCoord;
|
||||||
|
|
||||||
|
// Per-instance model matrix, split across four vec4 attribute slots.
|
||||||
|
// A mat4 consumes 4 consecutive attribute locations, so locations 3-6 are
|
||||||
|
// all occupied by this single logical matrix. The C# side must call
|
||||||
|
// VertexAttribPointer four times (one per row) and VertexAttribDivisor(loc, 1)
|
||||||
|
// on each of the four slots.
|
||||||
|
layout(location = 3) in vec4 aInstanceRow0;
|
||||||
|
layout(location = 4) in vec4 aInstanceRow1;
|
||||||
|
layout(location = 5) in vec4 aInstanceRow2;
|
||||||
|
layout(location = 6) in vec4 aInstanceRow3;
|
||||||
|
|
||||||
|
uniform mat4 uViewProjection;
|
||||||
|
uniform vec3 uLightDirection; // world-space sun direction (points toward sun)
|
||||||
|
uniform float uAmbientIntensity;
|
||||||
|
uniform float uDiffuseIntensity;
|
||||||
|
|
||||||
|
out vec2 vTex;
|
||||||
|
out vec3 vWorldNormal;
|
||||||
|
out float vLightingFactor;
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
// Reconstruct the per-instance model matrix from its four row vectors.
|
||||||
|
// Column-major storage: OpenGL/GLSL mat4 columns are constructed from
|
||||||
|
// the rows we receive from the attribute buffer.
|
||||||
|
mat4 model = mat4(aInstanceRow0, aInstanceRow1, aInstanceRow2, aInstanceRow3);
|
||||||
|
|
||||||
|
vec4 worldPos = model * vec4(aPosition, 1.0);
|
||||||
|
gl_Position = uViewProjection * worldPos;
|
||||||
|
|
||||||
|
// Transform normal into world space. For uniform-scale transforms the
|
||||||
|
// upper-left 3x3 is sufficient; non-uniform scale would require the
|
||||||
|
// inverse transpose, accepted as a future-phase concern (same as mesh.vert).
|
||||||
|
vWorldNormal = normalize(mat3(model) * aNormal);
|
||||||
|
vTex = aTexCoord;
|
||||||
|
|
||||||
|
// Compute Lambert diffuse + ambient in the vertex shader so the fragment
|
||||||
|
// shader only needs a multiply. Matches ACME StaticObject.vert pattern.
|
||||||
|
float ndotl = max(dot(vWorldNormal, uLightDirection), 0.0);
|
||||||
|
vLightingFactor = uAmbientIntensity + uDiffuseIntensity * ndotl;
|
||||||
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue