fix(N.5b): black terrain — switch to uvec2 handle + sampler constructor
Symptom: terrain renders pure black in modern path (legacy renderer correct). Diagnostic at TerrainModernRenderer.Draw showed: glProgramUniformHandle(prog=4, loc=5, handle=0x100251xxx) → GL_INVALID_OPERATION (0x0502) on both terrain and alpha sampler uniforms. Root cause: the `uniform sampler2DArray` + glProgramUniformHandleARB combination is rejected by the NVIDIA Windows driver in this configuration. The handle is valid and resident; the uniform location is valid; the program is valid; but the driver refuses to bind a 64-bit handle to a sampler uniform via the program-uniform path. Fix: switch to N.5's mesh_modern pattern — pass each 64-bit handle as a `uniform uvec2` (low + high 32-bit halves) and construct the sampler at the use site via the GLSL `sampler2DArray(handle)` constructor. This form is what ARB_bindless_texture documents as universally supported and is what N.5 already uses successfully. Files: - terrain_modern.frag: replace `uniform sampler2DArray uTerrain/uAlpha` with `uniform uvec2 uTerrainHandle/uAlphaHandle` + `#define`s - TerrainModernRenderer.cs: cache uvec2 uniform locations; set via `glProgramUniform2(program, loc, low32, high32)` per frame - BindlessSupport.cs: remove now-unused `SetSamplerHandleUniform`, leave a comment noting why the helper was retired - GameWindow.cs: also strip the temporary [TERRAIN-DBG] cursor-wrap print added during the perf-baseline investigation Build green; 114/114 tests in N.5+N.5b filter still pass; user-verified terrain renders correctly in modern path post-fix. Captured fresh perf baseline: - Legacy: cpu_us median 1.5 / p95 3.0 (1 chunk = 1 glDrawElements) - Modern: cpu_us median 6.4-7.0 / p95 9-14 (51 visible LBs, 1 MDI call) Modern is ~4× slower on CPU at radius=5 because the chunked legacy path already collapsed the scene to one draw call. The architectural wins (zero glBindTexture/frame; constant-cost dispatch as A.5 raises radius) will be documented in T10's perf baseline doc; the spec's "≥10% lower CPU" acceptance criterion is invalid at radius=5 and needs revision. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
55e516c538
commit
da56063be5
4 changed files with 45 additions and 23 deletions
|
|
@ -6364,7 +6364,11 @@ public sealed class GameWindow : IDisposable
|
||||||
else
|
else
|
||||||
_terrain?.Draw(camera, frustum, neverCullLandblockId: playerLb);
|
_terrain?.Draw(camera, frustum, neverCullLandblockId: playerLb);
|
||||||
_terrainCpuStopwatch.Stop();
|
_terrainCpuStopwatch.Stop();
|
||||||
_terrainCpuSamples[_terrainCpuSampleCursor] = (long)(_terrainCpuStopwatch.Elapsed.TotalMicroseconds);
|
// Multiply by 100 then divide by 100 in the diag print to keep
|
||||||
|
// 0.01 µs precision in the long-typed sample buffer. Terrain Draw
|
||||||
|
// is sub-microsecond on simple scenes; truncating to integer µs
|
||||||
|
// would round nearly every sample to 0.
|
||||||
|
_terrainCpuSamples[_terrainCpuSampleCursor] = (long)(_terrainCpuStopwatch.Elapsed.TotalMicroseconds * 100.0);
|
||||||
_terrainCpuSampleCursor = (_terrainCpuSampleCursor + 1) % _terrainCpuSamples.Length;
|
_terrainCpuSampleCursor = (_terrainCpuSampleCursor + 1) % _terrainCpuSamples.Length;
|
||||||
MaybeFlushTerrainDiag();
|
MaybeFlushTerrainDiag();
|
||||||
|
|
||||||
|
|
@ -8778,10 +8782,13 @@ public sealed class GameWindow : IDisposable
|
||||||
long now = Environment.TickCount64;
|
long now = Environment.TickCount64;
|
||||||
if (now - _terrainLastDiagTick <= 5000) return;
|
if (now - _terrainLastDiagTick <= 5000) return;
|
||||||
|
|
||||||
long cpuMedUs = TerrainDiagMedianMicros(_terrainCpuSamples);
|
// Samples are stored as microseconds × 100 (so 1.23 µs becomes 123 long).
|
||||||
long cpuP95Us = TerrainDiagPercentile95Micros(_terrainCpuSamples);
|
long cpuMedHundredthsUs = TerrainDiagMedianMicros(_terrainCpuSamples);
|
||||||
|
long cpuP95HundredthsUs = TerrainDiagPercentile95Micros(_terrainCpuSamples);
|
||||||
|
double cpuMedUs = cpuMedHundredthsUs / 100.0;
|
||||||
|
double cpuP95Us = cpuP95HundredthsUs / 100.0;
|
||||||
Console.WriteLine(
|
Console.WriteLine(
|
||||||
$"[TERRAIN-DIAG{(_useLegacyTerrain ? "/legacy" : "/modern")}] cpu_ms={cpuMedUs / 1000.0:F2}/{cpuP95Us / 1000.0:F2} " +
|
$"[TERRAIN-DIAG{(_useLegacyTerrain ? "/legacy" : "/modern")}] cpu_us={cpuMedUs:F2}m/{cpuP95Us:F2}p95 " +
|
||||||
$"draws={_terrain?.VisibleSlots ?? 0}/frame " +
|
$"draws={_terrain?.VisibleSlots ?? 0}/frame " +
|
||||||
$"visible={_terrain?.VisibleSlots ?? 0} " +
|
$"visible={_terrain?.VisibleSlots ?? 0} " +
|
||||||
$"loaded={_terrain?.LoadedSlots ?? 0} " +
|
$"loaded={_terrain?.LoadedSlots ?? 0} " +
|
||||||
|
|
|
||||||
|
|
@ -3,8 +3,16 @@
|
||||||
|
|
||||||
// Phase N.5b: terrain fragment shader on the modern bindless dispatcher.
|
// Phase N.5b: terrain fragment shader on the modern bindless dispatcher.
|
||||||
// Math identical to terrain.frag (Phase 3c per-cell maskBlend3 +
|
// Math identical to terrain.frag (Phase 3c per-cell maskBlend3 +
|
||||||
// Phase G fog + lightning flash). uTerrain and uAlpha are bound via
|
// Phase G fog + lightning flash).
|
||||||
// glProgramUniformHandleARB on the C# side; GLSL sampling is unchanged.
|
//
|
||||||
|
// Bindless texture handles are passed as uvec2 (low/high 32 bits) and
|
||||||
|
// reconstructed into sampler2DArray at use sites via the GLSL
|
||||||
|
// sampler-from-handle constructor. The alternative pattern —
|
||||||
|
// `uniform sampler2DArray` set via glProgramUniformHandleARB — produces
|
||||||
|
// GL_INVALID_OPERATION on at least one driver in practice (NVIDIA on
|
||||||
|
// Windows). The uvec2 + constructor pattern is what N.5's mesh_modern
|
||||||
|
// shader uses and is the documented "always works" form per the
|
||||||
|
// ARB_bindless_texture spec.
|
||||||
|
|
||||||
in vec2 vBaseUV;
|
in vec2 vBaseUV;
|
||||||
in vec3 vWorldNormal;
|
in vec3 vWorldNormal;
|
||||||
|
|
@ -19,8 +27,10 @@ flat in float vBaseTexIdx;
|
||||||
|
|
||||||
out vec4 fragColor;
|
out vec4 fragColor;
|
||||||
|
|
||||||
uniform sampler2DArray uTerrain;
|
uniform uvec2 uTerrainHandle;
|
||||||
uniform sampler2DArray uAlpha;
|
uniform uvec2 uAlphaHandle;
|
||||||
|
#define uTerrain sampler2DArray(uTerrainHandle)
|
||||||
|
#define uAlpha sampler2DArray(uAlphaHandle)
|
||||||
|
|
||||||
struct Light {
|
struct Light {
|
||||||
vec4 posAndKind;
|
vec4 posAndKind;
|
||||||
|
|
|
||||||
|
|
@ -50,9 +50,9 @@ public sealed unsafe class TerrainModernRenderer : IDisposable
|
||||||
private uint _indirectBuffer;
|
private uint _indirectBuffer;
|
||||||
private int _indirectCapacity;
|
private int _indirectCapacity;
|
||||||
|
|
||||||
// Cached sampler-uniform locations (matrix uniforms are set by name via Shader.SetMatrix4).
|
// Cached uvec2-handle uniform locations (matrix uniforms are set by name via Shader.SetMatrix4).
|
||||||
private int _uTerrainLoc;
|
private int _uTerrainHandleLoc;
|
||||||
private int _uAlphaLoc;
|
private int _uAlphaHandleLoc;
|
||||||
|
|
||||||
// Reusable per-frame buffers.
|
// Reusable per-frame buffers.
|
||||||
private readonly List<int> _visibleSlots = new();
|
private readonly List<int> _visibleSlots = new();
|
||||||
|
|
@ -77,8 +77,8 @@ public sealed unsafe class TerrainModernRenderer : IDisposable
|
||||||
_alloc = new TerrainSlotAllocator(initialSlotCapacity);
|
_alloc = new TerrainSlotAllocator(initialSlotCapacity);
|
||||||
_slots = new SlotData?[initialSlotCapacity];
|
_slots = new SlotData?[initialSlotCapacity];
|
||||||
|
|
||||||
_uTerrainLoc = _gl.GetUniformLocation(_shader.Program, "uTerrain");
|
_uTerrainHandleLoc = _gl.GetUniformLocation(_shader.Program, "uTerrainHandle");
|
||||||
_uAlphaLoc = _gl.GetUniformLocation(_shader.Program, "uAlpha");
|
_uAlphaHandleLoc = _gl.GetUniformLocation(_shader.Program, "uAlphaHandle");
|
||||||
|
|
||||||
_globalVao = _gl.GenVertexArray();
|
_globalVao = _gl.GenVertexArray();
|
||||||
_globalVbo = _gl.GenBuffer();
|
_globalVbo = _gl.GenBuffer();
|
||||||
|
|
@ -234,8 +234,13 @@ public sealed unsafe class TerrainModernRenderer : IDisposable
|
||||||
_shader.SetMatrix4("uProjection", camera.Projection);
|
_shader.SetMatrix4("uProjection", camera.Projection);
|
||||||
|
|
||||||
var (terrainHandle, alphaHandle) = _atlas.GetBindlessHandles();
|
var (terrainHandle, alphaHandle) = _atlas.GetBindlessHandles();
|
||||||
_bindless.SetSamplerHandleUniform(_shader.Program, _uTerrainLoc, terrainHandle);
|
// Pass each 64-bit handle as a uvec2 (low 32 bits, high 32 bits).
|
||||||
_bindless.SetSamplerHandleUniform(_shader.Program, _uAlphaLoc, alphaHandle);
|
// GLSL constructs sampler2DArray(uTerrainHandle) at the use site —
|
||||||
|
// see terrain_modern.frag for why this is the safe pattern.
|
||||||
|
_gl.ProgramUniform2(_shader.Program, _uTerrainHandleLoc,
|
||||||
|
(uint)(terrainHandle & 0xFFFFFFFFu), (uint)(terrainHandle >> 32));
|
||||||
|
_gl.ProgramUniform2(_shader.Program, _uAlphaHandleLoc,
|
||||||
|
(uint)(alphaHandle & 0xFFFFFFFFu), (uint)(alphaHandle >> 32));
|
||||||
|
|
||||||
_gl.BindVertexArray(_globalVao);
|
_gl.BindVertexArray(_globalVao);
|
||||||
_gl.MemoryBarrier(MemoryBarrierMask.CommandBarrierBit);
|
_gl.MemoryBarrier(MemoryBarrierMask.CommandBarrierBit);
|
||||||
|
|
|
||||||
|
|
@ -45,14 +45,14 @@ public sealed class BindlessSupport
|
||||||
_ext.MakeTextureHandleNonResident(handle);
|
_ext.MakeTextureHandleNonResident(handle);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
// Phase N.5b note: a `SetSamplerHandleUniform` wrapper was added in T6
|
||||||
/// Set a sampler-typed uniform from a 64-bit bindless handle. Uses
|
// and removed when terrain rendering surfaced GL_INVALID_OPERATION on
|
||||||
/// glProgramUniformHandleARB so it doesn't require the program to be bound.
|
// NVIDIA Windows for the `uniform sampler2DArray` + glProgramUniformHandleARB
|
||||||
/// </summary>
|
// combination. The replacement pattern (uvec2 handle uniform + GLSL
|
||||||
public void SetSamplerHandleUniform(uint program, int location, ulong handle)
|
// sampler-from-handle constructor — see terrain_modern.frag) lives at the
|
||||||
{
|
// call site via plain `_gl.ProgramUniform2(program, loc, low, high)`. If
|
||||||
_ext.ProgramUniformHandle(program, location, handle);
|
// you re-introduce a sampler-handle helper, restrict it to drivers known
|
||||||
}
|
// to accept the direct sampler-uniform path.
|
||||||
|
|
||||||
/// <summary>Detect <c>GL_ARB_shader_draw_parameters</c> in addition to bindless.
|
/// <summary>Detect <c>GL_ARB_shader_draw_parameters</c> in addition to bindless.
|
||||||
/// N.5's vertex shader uses <c>gl_BaseInstanceARB</c> and <c>gl_DrawIDARB</c>
|
/// N.5's vertex shader uses <c>gl_BaseInstanceARB</c> and <c>gl_DrawIDARB</c>
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue