diff --git a/src/AcDream.App/Rendering/TerrainModernRenderer.cs b/src/AcDream.App/Rendering/TerrainModernRenderer.cs new file mode 100644 index 0000000..efa54ea --- /dev/null +++ b/src/AcDream.App/Rendering/TerrainModernRenderer.cs @@ -0,0 +1,344 @@ +using System.Numerics; +using AcDream.App.Rendering.Wb; +using AcDream.Core.Terrain; +using Silk.NET.OpenGL; + +namespace AcDream.App.Rendering; + +/// +/// Phase N.5b modern terrain dispatcher. Single global VBO/EBO with a slot +/// allocator (one slot per landblock, 384 verts × 40 bytes = 15,360 bytes +/// per slot). Per-frame: build a DrawElementsIndirectCommand array from +/// visible slots, upload, dispatch via glMultiDrawElementsIndirect. Atlas +/// textures bound via bindless handles set per-frame as sampler uniforms. +/// +/// Total ~6-8 GL calls per frame for terrain regardless of visible +/// landblock count. +/// +public sealed unsafe class TerrainModernRenderer : IDisposable +{ + private const int VertsPerLandblock = LandblockMesh.VerticesPerLandblock; // 384 + private const int IndicesPerLandblock = VertsPerLandblock; + private const int VertexSize = 40; // sizeof(TerrainVertex) + private const int IndexSize = sizeof(uint); + private const float LandblockSize = LandblockMesh.LandblockSize; // 192 + + private readonly GL _gl; + private readonly BindlessSupport _bindless; + private readonly Shader _shader; + private readonly TerrainAtlas _atlas; + + private readonly TerrainSlotAllocator _alloc; + + // Per-slot live data (index by slot integer; null entries are unused slots). + private SlotData?[] _slots; + + // Reverse map: landblockId -> slot, for RemoveLandblock and replacement. + private readonly Dictionary _idToSlot = new(); + + // GPU buffers. + private uint _globalVao; + private uint _globalVbo; + private uint _globalEbo; + private uint _indirectBuffer; + private int _indirectCapacity; + + // Cached sampler-uniform locations (matrix uniforms are set by name via Shader.SetMatrix4). + private int _uTerrainLoc; + private int _uAlphaLoc; + + // Reusable per-frame buffers. + private readonly List _visibleSlots = new(); + private DrawElementsIndirectCommand[] _deicScratch = Array.Empty(); + + // Diag. + public int LoadedSlots => _alloc.LoadedCount; + public int VisibleSlots => _visibleSlots.Count; + public int CapacitySlots => _alloc.Capacity; + + public TerrainModernRenderer( + GL gl, + BindlessSupport bindless, + Shader shader, + TerrainAtlas atlas, + int initialSlotCapacity = 64) + { + _gl = gl; + _bindless = bindless; + _shader = shader; + _atlas = atlas; + _alloc = new TerrainSlotAllocator(initialSlotCapacity); + _slots = new SlotData?[initialSlotCapacity]; + + _uTerrainLoc = _gl.GetUniformLocation(_shader.Program, "uTerrain"); + _uAlphaLoc = _gl.GetUniformLocation(_shader.Program, "uAlpha"); + + _globalVao = _gl.GenVertexArray(); + _globalVbo = _gl.GenBuffer(); + _globalEbo = _gl.GenBuffer(); + AllocateGpuBuffers(initialSlotCapacity); + ConfigureVao(); + + _indirectBuffer = _gl.GenBuffer(); + } + + public void AddLandblock(uint landblockId, LandblockMeshData meshData, Vector3 worldOrigin) + { + ArgumentNullException.ThrowIfNull(meshData); + if (meshData.Vertices.Length != VertsPerLandblock) + throw new ArgumentException( + $"Expected {VertsPerLandblock} vertices, got {meshData.Vertices.Length}", + nameof(meshData)); + + if (_idToSlot.ContainsKey(landblockId)) + RemoveLandblock(landblockId); + + int slot = _alloc.Allocate(out var needsGrow); + if (needsGrow) + { + int newCap = Math.Max(_alloc.Capacity * 2, slot + 1); + EnsureCapacity(newCap); + } + + // Bake worldOrigin into vertex positions; capture min/max Z for AABB. + var bakedVerts = new TerrainVertex[VertsPerLandblock]; + float zMin = float.MaxValue, zMax = float.MinValue; + for (int i = 0; i < VertsPerLandblock; i++) + { + var v = meshData.Vertices[i]; + var worldPos = v.Position + worldOrigin; + bakedVerts[i] = new TerrainVertex(worldPos, v.Normal, v.Data0, v.Data1, v.Data2, v.Data3); + if (worldPos.Z < zMin) zMin = worldPos.Z; + if (worldPos.Z > zMax) zMax = worldPos.Z; + } + if (zMin == float.MaxValue) { zMin = 0f; zMax = 0f; } + + // Bake baseVertex into indices on the CPU side (driver-portable pattern). + uint baseVertex = (uint)(slot * VertsPerLandblock); + var bakedIndices = new uint[IndicesPerLandblock]; + for (int i = 0; i < IndicesPerLandblock; i++) + bakedIndices[i] = meshData.Indices[i] + baseVertex; + + // glBufferSubData into the slot's VBO + EBO regions. + nint vboByteOffset = (nint)(slot * VertsPerLandblock * VertexSize); + nint eboByteOffset = (nint)(slot * IndicesPerLandblock * IndexSize); + + _gl.BindBuffer(BufferTargetARB.ArrayBuffer, _globalVbo); + fixed (TerrainVertex* p = bakedVerts) + { + _gl.BufferSubData(BufferTargetARB.ArrayBuffer, vboByteOffset, + (nuint)(VertsPerLandblock * VertexSize), p); + } + _gl.BindBuffer(BufferTargetARB.ArrayBuffer, 0); + + _gl.BindBuffer(BufferTargetARB.ElementArrayBuffer, _globalEbo); + fixed (uint* p = bakedIndices) + { + _gl.BufferSubData(BufferTargetARB.ElementArrayBuffer, eboByteOffset, + (nuint)(IndicesPerLandblock * IndexSize), p); + } + _gl.BindBuffer(BufferTargetARB.ElementArrayBuffer, 0); + + _slots[slot] = new SlotData + { + LandblockId = landblockId, + WorldOrigin = worldOrigin, + FirstIndex = (uint)(slot * IndicesPerLandblock), + IndexCount = IndicesPerLandblock, + AabbMin = new Vector3(worldOrigin.X, worldOrigin.Y, zMin), + AabbMax = new Vector3(worldOrigin.X + LandblockSize, worldOrigin.Y + LandblockSize, zMax), + }; + _idToSlot[landblockId] = slot; + } + + public void RemoveLandblock(uint landblockId) + { + if (!_idToSlot.TryGetValue(landblockId, out var slot)) + return; + _idToSlot.Remove(landblockId); + _slots[slot] = null; + _alloc.Free(slot); + // No GPU clear: the per-frame DEIC array won't reference this slot. + } + + public void Draw(ICamera camera, FrustumPlanes? frustum = null, uint? neverCullLandblockId = null) + { + if (_alloc.LoadedCount == 0) return; + + // Build visible slot list with per-slot frustum cull. + _visibleSlots.Clear(); + for (int slot = 0; slot < _slots.Length; slot++) + { + var data = _slots[slot]; + if (data is null) continue; + if (frustum is not null && data.LandblockId != neverCullLandblockId) + { + if (!FrustumCuller.IsAabbVisible(frustum.Value, data.AabbMin, data.AabbMax)) + continue; + } + _visibleSlots.Add(slot); + } + if (_visibleSlots.Count == 0) return; + + // Build DEIC array. + if (_deicScratch.Length < _visibleSlots.Count) + _deicScratch = new DrawElementsIndirectCommand[Math.Max(_visibleSlots.Count, 64)]; + for (int i = 0; i < _visibleSlots.Count; i++) + { + var data = _slots[_visibleSlots[i]]!; + _deicScratch[i] = new DrawElementsIndirectCommand + { + Count = (uint)data.IndexCount, + InstanceCount = 1u, + FirstIndex = data.FirstIndex, + BaseVertex = 0, // baked into indices on upload + BaseInstance = 0, + }; + } + + // Grow indirect buffer if needed. + if (_visibleSlots.Count > _indirectCapacity) + { + _indirectCapacity = Math.Max(64, _visibleSlots.Count * 2); + _gl.BindBuffer(GLEnum.DrawIndirectBuffer, _indirectBuffer); + _gl.BufferData(GLEnum.DrawIndirectBuffer, + (nuint)(_indirectCapacity * sizeof(DrawElementsIndirectCommand)), + null, GLEnum.DynamicDraw); + } + else + { + _gl.BindBuffer(GLEnum.DrawIndirectBuffer, _indirectBuffer); + } + + // Upload DEIC array. + fixed (DrawElementsIndirectCommand* p = _deicScratch) + { + _gl.BufferSubData(GLEnum.DrawIndirectBuffer, 0, + (nuint)(_visibleSlots.Count * sizeof(DrawElementsIndirectCommand)), p); + } + + // Bind shader + uniforms + atlas handles. + _shader.Use(); + _shader.SetMatrix4("uView", camera.View); + _shader.SetMatrix4("uProjection", camera.Projection); + + var (terrainHandle, alphaHandle) = _atlas.GetBindlessHandles(); + _bindless.SetSamplerHandleUniform(_shader.Program, _uTerrainLoc, terrainHandle); + _bindless.SetSamplerHandleUniform(_shader.Program, _uAlphaLoc, alphaHandle); + + _gl.BindVertexArray(_globalVao); + _gl.MemoryBarrier(MemoryBarrierMask.CommandBarrierBit); + _gl.MultiDrawElementsIndirect( + PrimitiveType.Triangles, DrawElementsType.UnsignedInt, + (void*)0, + (uint)_visibleSlots.Count, + (uint)sizeof(DrawElementsIndirectCommand)); + _gl.BindVertexArray(0); + _gl.BindBuffer(GLEnum.DrawIndirectBuffer, 0); + } + + public void Dispose() + { + _gl.DeleteVertexArray(_globalVao); + _gl.DeleteBuffer(_globalVbo); + _gl.DeleteBuffer(_globalEbo); + _gl.DeleteBuffer(_indirectBuffer); + } + + // ---------------------------------------------------------------- + // Private helpers + // ---------------------------------------------------------------- + + private void AllocateGpuBuffers(int capacitySlots) + { + nuint vboBytes = (nuint)(capacitySlots * VertsPerLandblock * VertexSize); + nuint eboBytes = (nuint)(capacitySlots * IndicesPerLandblock * IndexSize); + + _gl.BindBuffer(BufferTargetARB.ArrayBuffer, _globalVbo); + _gl.BufferData(BufferTargetARB.ArrayBuffer, vboBytes, null, BufferUsageARB.DynamicDraw); + _gl.BindBuffer(BufferTargetARB.ArrayBuffer, 0); + + _gl.BindBuffer(BufferTargetARB.ElementArrayBuffer, _globalEbo); + _gl.BufferData(BufferTargetARB.ElementArrayBuffer, eboBytes, null, BufferUsageARB.DynamicDraw); + _gl.BindBuffer(BufferTargetARB.ElementArrayBuffer, 0); + } + + private void ConfigureVao() + { + _gl.BindVertexArray(_globalVao); + _gl.BindBuffer(BufferTargetARB.ArrayBuffer, _globalVbo); + _gl.BindBuffer(BufferTargetARB.ElementArrayBuffer, _globalEbo); + + uint stride = (uint)VertexSize; + + // location 0: Position + _gl.EnableVertexAttribArray(0); + _gl.VertexAttribPointer(0, 3, VertexAttribPointerType.Float, false, stride, (void*)0); + // location 1: Normal + _gl.EnableVertexAttribArray(1); + _gl.VertexAttribPointer(1, 3, VertexAttribPointerType.Float, false, stride, (void*)(3 * sizeof(float))); + // locations 2-5: Data0..Data3 (uvec4 byte attributes) + nint dataOffset = 6 * sizeof(float); + _gl.EnableVertexAttribArray(2); + _gl.VertexAttribIPointer(2, 4, VertexAttribIType.UnsignedByte, stride, (void*)dataOffset); + _gl.EnableVertexAttribArray(3); + _gl.VertexAttribIPointer(3, 4, VertexAttribIType.UnsignedByte, stride, (void*)(dataOffset + 4)); + _gl.EnableVertexAttribArray(4); + _gl.VertexAttribIPointer(4, 4, VertexAttribIType.UnsignedByte, stride, (void*)(dataOffset + 8)); + _gl.EnableVertexAttribArray(5); + _gl.VertexAttribIPointer(5, 4, VertexAttribIType.UnsignedByte, stride, (void*)(dataOffset + 12)); + + _gl.BindVertexArray(0); + } + + private void EnsureCapacity(int newCapacity) + { + if (newCapacity <= _alloc.Capacity) return; + + // Allocate new VBO + EBO at new size; copy old contents; swap; recreate VAO. + uint newVbo = _gl.GenBuffer(); + uint newEbo = _gl.GenBuffer(); + + nuint newVboBytes = (nuint)(newCapacity * VertsPerLandblock * VertexSize); + nuint newEboBytes = (nuint)(newCapacity * IndicesPerLandblock * IndexSize); + nuint oldVboBytes = (nuint)(_alloc.Capacity * VertsPerLandblock * VertexSize); + nuint oldEboBytes = (nuint)(_alloc.Capacity * IndicesPerLandblock * IndexSize); + + _gl.BindBuffer(BufferTargetARB.ArrayBuffer, newVbo); + _gl.BufferData(BufferTargetARB.ArrayBuffer, newVboBytes, null, BufferUsageARB.DynamicDraw); + _gl.BindBuffer(BufferTargetARB.CopyReadBuffer, _globalVbo); + _gl.BindBuffer(BufferTargetARB.CopyWriteBuffer, newVbo); + _gl.CopyBufferSubData(CopyBufferSubDataTarget.CopyReadBuffer, CopyBufferSubDataTarget.CopyWriteBuffer, + 0, 0, oldVboBytes); + _gl.DeleteBuffer(_globalVbo); + _globalVbo = newVbo; + + _gl.BindBuffer(BufferTargetARB.ElementArrayBuffer, newEbo); + _gl.BufferData(BufferTargetARB.ElementArrayBuffer, newEboBytes, null, BufferUsageARB.DynamicDraw); + _gl.BindBuffer(BufferTargetARB.CopyReadBuffer, _globalEbo); + _gl.BindBuffer(BufferTargetARB.CopyWriteBuffer, newEbo); + _gl.CopyBufferSubData(CopyBufferSubDataTarget.CopyReadBuffer, CopyBufferSubDataTarget.CopyWriteBuffer, + 0, 0, oldEboBytes); + _gl.DeleteBuffer(_globalEbo); + _globalEbo = newEbo; + + // Recreate VAO with new buffer bindings. + _gl.DeleteVertexArray(_globalVao); + _globalVao = _gl.GenVertexArray(); + ConfigureVao(); + + // Grow slot tracking array. + Array.Resize(ref _slots, newCapacity); + _alloc.GrowTo(newCapacity); + } + + private sealed class SlotData + { + public uint LandblockId; + public Vector3 WorldOrigin; + public uint FirstIndex; + public int IndexCount; + public Vector3 AabbMin; + public Vector3 AabbMax; + } +} diff --git a/src/AcDream.App/Rendering/Wb/BindlessSupport.cs b/src/AcDream.App/Rendering/Wb/BindlessSupport.cs index eeb4f9d..9abe4ee 100644 --- a/src/AcDream.App/Rendering/Wb/BindlessSupport.cs +++ b/src/AcDream.App/Rendering/Wb/BindlessSupport.cs @@ -45,6 +45,15 @@ public sealed class BindlessSupport _ext.MakeTextureHandleNonResident(handle); } + /// + /// Set a sampler-typed uniform from a 64-bit bindless handle. Uses + /// glProgramUniformHandleARB so it doesn't require the program to be bound. + /// + public void SetSamplerHandleUniform(uint program, int location, ulong handle) + { + _ext.ProgramUniformHandle(program, location, handle); + } + /// Detect GL_ARB_shader_draw_parameters in addition to bindless. /// N.5's vertex shader uses gl_BaseInstanceARB and gl_DrawIDARB /// from this extension.