From da56063be5707e7436cc2e1c2b5cb03c7cf95046 Mon Sep 17 00:00:00 2001 From: Erik Date: Sat, 9 May 2026 12:53:21 +0200 Subject: [PATCH] =?UTF-8?q?fix(N.5b):=20black=20terrain=20=E2=80=94=20swit?= =?UTF-8?q?ch=20to=20uvec2=20handle=20+=20sampler=20constructor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Symptom: terrain renders pure black in modern path (legacy renderer correct). Diagnostic at TerrainModernRenderer.Draw showed: glProgramUniformHandle(prog=4, loc=5, handle=0x100251xxx) → GL_INVALID_OPERATION (0x0502) on both terrain and alpha sampler uniforms. Root cause: the `uniform sampler2DArray` + glProgramUniformHandleARB combination is rejected by the NVIDIA Windows driver in this configuration. The handle is valid and resident; the uniform location is valid; the program is valid; but the driver refuses to bind a 64-bit handle to a sampler uniform via the program-uniform path. Fix: switch to N.5's mesh_modern pattern — pass each 64-bit handle as a `uniform uvec2` (low + high 32-bit halves) and construct the sampler at the use site via the GLSL `sampler2DArray(handle)` constructor. This form is what ARB_bindless_texture documents as universally supported and is what N.5 already uses successfully. Files: - terrain_modern.frag: replace `uniform sampler2DArray uTerrain/uAlpha` with `uniform uvec2 uTerrainHandle/uAlphaHandle` + `#define`s - TerrainModernRenderer.cs: cache uvec2 uniform locations; set via `glProgramUniform2(program, loc, low32, high32)` per frame - BindlessSupport.cs: remove now-unused `SetSamplerHandleUniform`, leave a comment noting why the helper was retired - GameWindow.cs: also strip the temporary [TERRAIN-DBG] cursor-wrap print added during the perf-baseline investigation Build green; 114/114 tests in N.5+N.5b filter still pass; user-verified terrain renders correctly in modern path post-fix. Captured fresh perf baseline: - Legacy: cpu_us median 1.5 / p95 3.0 (1 chunk = 1 glDrawElements) - Modern: cpu_us median 6.4-7.0 / p95 9-14 (51 visible LBs, 1 MDI call) Modern is ~4× slower on CPU at radius=5 because the chunked legacy path already collapsed the scene to one draw call. The architectural wins (zero glBindTexture/frame; constant-cost dispatch as A.5 raises radius) will be documented in T10's perf baseline doc; the spec's "≥10% lower CPU" acceptance criterion is invalid at radius=5 and needs revision. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/AcDream.App/Rendering/GameWindow.cs | 15 +++++++++++---- .../Rendering/Shaders/terrain_modern.frag | 18 ++++++++++++++---- .../Rendering/TerrainModernRenderer.cs | 19 ++++++++++++------- .../Rendering/Wb/BindlessSupport.cs | 16 ++++++++-------- 4 files changed, 45 insertions(+), 23 deletions(-) diff --git a/src/AcDream.App/Rendering/GameWindow.cs b/src/AcDream.App/Rendering/GameWindow.cs index bdf88d6..3f851f0 100644 --- a/src/AcDream.App/Rendering/GameWindow.cs +++ b/src/AcDream.App/Rendering/GameWindow.cs @@ -6364,7 +6364,11 @@ public sealed class GameWindow : IDisposable else _terrain?.Draw(camera, frustum, neverCullLandblockId: playerLb); _terrainCpuStopwatch.Stop(); - _terrainCpuSamples[_terrainCpuSampleCursor] = (long)(_terrainCpuStopwatch.Elapsed.TotalMicroseconds); + // Multiply by 100 then divide by 100 in the diag print to keep + // 0.01 µs precision in the long-typed sample buffer. Terrain Draw + // is sub-microsecond on simple scenes; truncating to integer µs + // would round nearly every sample to 0. + _terrainCpuSamples[_terrainCpuSampleCursor] = (long)(_terrainCpuStopwatch.Elapsed.TotalMicroseconds * 100.0); _terrainCpuSampleCursor = (_terrainCpuSampleCursor + 1) % _terrainCpuSamples.Length; MaybeFlushTerrainDiag(); @@ -8778,10 +8782,13 @@ public sealed class GameWindow : IDisposable long now = Environment.TickCount64; if (now - _terrainLastDiagTick <= 5000) return; - long cpuMedUs = TerrainDiagMedianMicros(_terrainCpuSamples); - long cpuP95Us = TerrainDiagPercentile95Micros(_terrainCpuSamples); + // Samples are stored as microseconds × 100 (so 1.23 µs becomes 123 long). + long cpuMedHundredthsUs = TerrainDiagMedianMicros(_terrainCpuSamples); + long cpuP95HundredthsUs = TerrainDiagPercentile95Micros(_terrainCpuSamples); + double cpuMedUs = cpuMedHundredthsUs / 100.0; + double cpuP95Us = cpuP95HundredthsUs / 100.0; Console.WriteLine( - $"[TERRAIN-DIAG{(_useLegacyTerrain ? "/legacy" : "/modern")}] cpu_ms={cpuMedUs / 1000.0:F2}/{cpuP95Us / 1000.0:F2} " + + $"[TERRAIN-DIAG{(_useLegacyTerrain ? "/legacy" : "/modern")}] cpu_us={cpuMedUs:F2}m/{cpuP95Us:F2}p95 " + $"draws={_terrain?.VisibleSlots ?? 0}/frame " + $"visible={_terrain?.VisibleSlots ?? 0} " + $"loaded={_terrain?.LoadedSlots ?? 0} " + diff --git a/src/AcDream.App/Rendering/Shaders/terrain_modern.frag b/src/AcDream.App/Rendering/Shaders/terrain_modern.frag index c06724d..27e9aa2 100644 --- a/src/AcDream.App/Rendering/Shaders/terrain_modern.frag +++ b/src/AcDream.App/Rendering/Shaders/terrain_modern.frag @@ -3,8 +3,16 @@ // Phase N.5b: terrain fragment shader on the modern bindless dispatcher. // Math identical to terrain.frag (Phase 3c per-cell maskBlend3 + -// Phase G fog + lightning flash). uTerrain and uAlpha are bound via -// glProgramUniformHandleARB on the C# side; GLSL sampling is unchanged. +// Phase G fog + lightning flash). +// +// Bindless texture handles are passed as uvec2 (low/high 32 bits) and +// reconstructed into sampler2DArray at use sites via the GLSL +// sampler-from-handle constructor. The alternative pattern — +// `uniform sampler2DArray` set via glProgramUniformHandleARB — produces +// GL_INVALID_OPERATION on at least one driver in practice (NVIDIA on +// Windows). The uvec2 + constructor pattern is what N.5's mesh_modern +// shader uses and is the documented "always works" form per the +// ARB_bindless_texture spec. in vec2 vBaseUV; in vec3 vWorldNormal; @@ -19,8 +27,10 @@ flat in float vBaseTexIdx; out vec4 fragColor; -uniform sampler2DArray uTerrain; -uniform sampler2DArray uAlpha; +uniform uvec2 uTerrainHandle; +uniform uvec2 uAlphaHandle; +#define uTerrain sampler2DArray(uTerrainHandle) +#define uAlpha sampler2DArray(uAlphaHandle) struct Light { vec4 posAndKind; diff --git a/src/AcDream.App/Rendering/TerrainModernRenderer.cs b/src/AcDream.App/Rendering/TerrainModernRenderer.cs index e70a955..536acf5 100644 --- a/src/AcDream.App/Rendering/TerrainModernRenderer.cs +++ b/src/AcDream.App/Rendering/TerrainModernRenderer.cs @@ -50,9 +50,9 @@ public sealed unsafe class TerrainModernRenderer : IDisposable private uint _indirectBuffer; private int _indirectCapacity; - // Cached sampler-uniform locations (matrix uniforms are set by name via Shader.SetMatrix4). - private int _uTerrainLoc; - private int _uAlphaLoc; + // Cached uvec2-handle uniform locations (matrix uniforms are set by name via Shader.SetMatrix4). + private int _uTerrainHandleLoc; + private int _uAlphaHandleLoc; // Reusable per-frame buffers. private readonly List _visibleSlots = new(); @@ -77,8 +77,8 @@ public sealed unsafe class TerrainModernRenderer : IDisposable _alloc = new TerrainSlotAllocator(initialSlotCapacity); _slots = new SlotData?[initialSlotCapacity]; - _uTerrainLoc = _gl.GetUniformLocation(_shader.Program, "uTerrain"); - _uAlphaLoc = _gl.GetUniformLocation(_shader.Program, "uAlpha"); + _uTerrainHandleLoc = _gl.GetUniformLocation(_shader.Program, "uTerrainHandle"); + _uAlphaHandleLoc = _gl.GetUniformLocation(_shader.Program, "uAlphaHandle"); _globalVao = _gl.GenVertexArray(); _globalVbo = _gl.GenBuffer(); @@ -234,8 +234,13 @@ public sealed unsafe class TerrainModernRenderer : IDisposable _shader.SetMatrix4("uProjection", camera.Projection); var (terrainHandle, alphaHandle) = _atlas.GetBindlessHandles(); - _bindless.SetSamplerHandleUniform(_shader.Program, _uTerrainLoc, terrainHandle); - _bindless.SetSamplerHandleUniform(_shader.Program, _uAlphaLoc, alphaHandle); + // Pass each 64-bit handle as a uvec2 (low 32 bits, high 32 bits). + // GLSL constructs sampler2DArray(uTerrainHandle) at the use site — + // see terrain_modern.frag for why this is the safe pattern. + _gl.ProgramUniform2(_shader.Program, _uTerrainHandleLoc, + (uint)(terrainHandle & 0xFFFFFFFFu), (uint)(terrainHandle >> 32)); + _gl.ProgramUniform2(_shader.Program, _uAlphaHandleLoc, + (uint)(alphaHandle & 0xFFFFFFFFu), (uint)(alphaHandle >> 32)); _gl.BindVertexArray(_globalVao); _gl.MemoryBarrier(MemoryBarrierMask.CommandBarrierBit); diff --git a/src/AcDream.App/Rendering/Wb/BindlessSupport.cs b/src/AcDream.App/Rendering/Wb/BindlessSupport.cs index 9abe4ee..64dda3c 100644 --- a/src/AcDream.App/Rendering/Wb/BindlessSupport.cs +++ b/src/AcDream.App/Rendering/Wb/BindlessSupport.cs @@ -45,14 +45,14 @@ public sealed class BindlessSupport _ext.MakeTextureHandleNonResident(handle); } - /// - /// Set a sampler-typed uniform from a 64-bit bindless handle. Uses - /// glProgramUniformHandleARB so it doesn't require the program to be bound. - /// - public void SetSamplerHandleUniform(uint program, int location, ulong handle) - { - _ext.ProgramUniformHandle(program, location, handle); - } + // Phase N.5b note: a `SetSamplerHandleUniform` wrapper was added in T6 + // and removed when terrain rendering surfaced GL_INVALID_OPERATION on + // NVIDIA Windows for the `uniform sampler2DArray` + glProgramUniformHandleARB + // combination. The replacement pattern (uvec2 handle uniform + GLSL + // sampler-from-handle constructor — see terrain_modern.frag) lives at the + // call site via plain `_gl.ProgramUniform2(program, loc, low, high)`. If + // you re-introduce a sampler-handle helper, restrict it to drivers known + // to accept the direct sampler-uniform path. /// Detect GL_ARB_shader_draw_parameters in addition to bindless. /// N.5's vertex shader uses gl_BaseInstanceARB and gl_DrawIDARB