phase(N.5) Task 12: CPU stopwatch + GL_TIME_ELAPSED queries in [WB-DIAG]

Adds median + 95th-percentile CPU + GPU dispatch time to the existing
5-second [WB-DIAG] rollup. CPU via Stopwatch (always running, cheap;
only logged under ACDREAM_WB_DIAG=1). GPU via two GL_TIME_ELAPSED
queries (opaque + transparent) wrapping each glMultiDrawElementsIndirect,
polled non-blocking via QueryResultAvailable on the next frame.

Sample window is 256 frames per signal; median + p95 reported.
Numbers populate the SHIP commit's perf table at Task 19.

Silk.NET naming note: GL_TIME_ELAPSED queries use QueryTarget.TimeElapsed
(confirmed present in Silk.NET.OpenGL 2.23.0 DLL). The 64-bit result is
read via GetQueryObject(..., out ulong) which dispatches to
glGetQueryObjectui64v; the int overload (glGetQueryObjectiv) is used for
the ResultAvailable poll, matching WorldBuilder's VisibilityManager pattern.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Erik 2026-05-08 20:57:26 +02:00
parent cfe1ca3151
commit d114dca1e8

View file

@ -112,6 +112,16 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
private int _instancesIssued; private int _instancesIssued;
private long _lastLogTick; private long _lastLogTick;
// CPU + GPU timing for [WB-DIAG] under ACDREAM_WB_DIAG=1.
private readonly System.Diagnostics.Stopwatch _cpuStopwatch = new();
private readonly long[] _cpuSamples = new long[256]; // microseconds
private int _cpuSampleCursor;
private uint _gpuQueryOpaque;
private uint _gpuQueryTransparent;
private readonly long[] _gpuSamples = new long[256]; // microseconds
private int _gpuSampleCursor;
private bool _gpuQueriesInitialized;
public WbDrawDispatcher( public WbDrawDispatcher(
GL gl, GL gl,
Shader shader, Shader shader,
@ -158,6 +168,16 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
bool diag = string.Equals(Environment.GetEnvironmentVariable("ACDREAM_WB_DIAG"), "1", StringComparison.Ordinal); bool diag = string.Equals(Environment.GetEnvironmentVariable("ACDREAM_WB_DIAG"), "1", StringComparison.Ordinal);
if (diag && !_gpuQueriesInitialized)
{
_gpuQueryOpaque = _gl.GenQuery();
_gpuQueryTransparent = _gl.GenQuery();
_gpuQueriesInitialized = true;
}
// Always run the CPU stopwatch — cheap; only logged under diag.
_cpuStopwatch.Restart();
// Camera world-space position for front-to-back sort (perf #2). The view // Camera world-space position for front-to-back sort (perf #2). The view
// matrix is the inverse of the camera's world transform, so the world // matrix is the inverse of the camera's world transform, so the world
// translation lives in the inverse's translation row. // translation lives in the inverse's translation row.
@ -267,6 +287,7 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
// Nothing visible — skip the GL pass entirely. // Nothing visible — skip the GL pass entirely.
if (anyVao == 0) if (anyVao == 0)
{ {
_cpuStopwatch.Stop();
if (diag) MaybeFlushDiag(); if (diag) MaybeFlushDiag();
return; return;
} }
@ -276,6 +297,7 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
foreach (var grp in _groups.Values) totalInstances += grp.Matrices.Count; foreach (var grp in _groups.Values) totalInstances += grp.Matrices.Count;
if (totalInstances == 0) if (totalInstances == 0)
{ {
_cpuStopwatch.Stop();
if (diag) MaybeFlushDiag(); if (diag) MaybeFlushDiag();
return; return;
} }
@ -379,12 +401,14 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
_gl.DepthMask(true); _gl.DepthMask(true);
_shader.SetInt("uRenderPass", 0); _shader.SetInt("uRenderPass", 0);
_gl.BindBuffer(BufferTargetARB.DrawIndirectBuffer, _indirectBuffer); _gl.BindBuffer(BufferTargetARB.DrawIndirectBuffer, _indirectBuffer);
if (diag && _gpuQueriesInitialized) _gl.BeginQuery(QueryTarget.TimeElapsed, _gpuQueryOpaque);
_gl.MultiDrawElementsIndirect( _gl.MultiDrawElementsIndirect(
PrimitiveType.Triangles, PrimitiveType.Triangles,
DrawElementsType.UnsignedShort, DrawElementsType.UnsignedShort,
(void*)0, (void*)0,
(uint)_opaqueDrawCount, (uint)_opaqueDrawCount,
(uint)DrawCommandStride); (uint)DrawCommandStride);
if (diag && _gpuQueriesInitialized) _gl.EndQuery(QueryTarget.TimeElapsed);
} }
// ── Phase 8: transparent pass ──────────────────────────────────────── // ── Phase 8: transparent pass ────────────────────────────────────────
@ -394,12 +418,14 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
_gl.BlendFunc(BlendingFactor.SrcAlpha, BlendingFactor.OneMinusSrcAlpha); _gl.BlendFunc(BlendingFactor.SrcAlpha, BlendingFactor.OneMinusSrcAlpha);
_gl.DepthMask(false); _gl.DepthMask(false);
_shader.SetInt("uRenderPass", 1); _shader.SetInt("uRenderPass", 1);
if (diag && _gpuQueriesInitialized) _gl.BeginQuery(QueryTarget.TimeElapsed, _gpuQueryTransparent);
_gl.MultiDrawElementsIndirect( _gl.MultiDrawElementsIndirect(
PrimitiveType.Triangles, PrimitiveType.Triangles,
DrawElementsType.UnsignedShort, DrawElementsType.UnsignedShort,
(void*)_transparentByteOffset, (void*)_transparentByteOffset,
(uint)_transparentDrawCount, (uint)_transparentDrawCount,
(uint)DrawCommandStride); (uint)DrawCommandStride);
if (diag && _gpuQueriesInitialized) _gl.EndQuery(QueryTarget.TimeElapsed);
_gl.DepthMask(true); _gl.DepthMask(true);
_gl.Disable(EnableCap.Blend); _gl.Disable(EnableCap.Blend);
} }
@ -407,9 +433,31 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
_gl.Disable(EnableCap.CullFace); _gl.Disable(EnableCap.CullFace);
_gl.BindVertexArray(0); _gl.BindVertexArray(0);
_cpuStopwatch.Stop();
if (diag) if (diag)
{ {
_drawsIssued += _opaqueDrawCount + _transparentDrawCount; long cpuUs = _cpuStopwatch.ElapsedTicks * 1_000_000L / System.Diagnostics.Stopwatch.Frequency;
_cpuSamples[_cpuSampleCursor] = cpuUs;
_cpuSampleCursor = (_cpuSampleCursor + 1) % _cpuSamples.Length;
// Read GPU samples non-blocking; the result for the previous frame's
// queries should be ready by now. If not, drop the sample (don't stall
// the CPU waiting for the GPU).
if (_gpuQueriesInitialized)
{
_gl.GetQueryObject(_gpuQueryOpaque, QueryObjectParameterName.ResultAvailable, out int avail);
if (avail != 0)
{
_gl.GetQueryObject(_gpuQueryOpaque, QueryObjectParameterName.Result, out ulong opaqueNs);
_gl.GetQueryObject(_gpuQueryTransparent, QueryObjectParameterName.Result, out ulong transNs);
long gpuUs = (long)((opaqueNs + transNs) / 1000UL);
_gpuSamples[_gpuSampleCursor] = gpuUs;
_gpuSampleCursor = (_gpuSampleCursor + 1) % _gpuSamples.Length;
}
}
_drawsIssued += _opaqueDrawCount + _transparentDrawCount;
_instancesIssued += totalInstances; _instancesIssued += totalInstances;
MaybeFlushDiag(); MaybeFlushDiag();
} }
@ -437,13 +485,41 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
long now = Environment.TickCount64; long now = Environment.TickCount64;
if (now - _lastLogTick > 5000) if (now - _lastLogTick > 5000)
{ {
long cpuMed = MedianMicros(_cpuSamples);
long cpuP95 = Percentile95Micros(_cpuSamples);
long gpuMed = MedianMicros(_gpuSamples);
long gpuP95 = Percentile95Micros(_gpuSamples);
Console.WriteLine( Console.WriteLine(
$"[WB-DIAG] entSeen={_entitiesSeen} entDrawn={_entitiesDrawn} meshMissing={_meshesMissing} drawsIssued={_drawsIssued} instances={_instancesIssued} groups={_groups.Count}"); $"[WB-DIAG] entSeen={_entitiesSeen} entDrawn={_entitiesDrawn} meshMissing={_meshesMissing} drawsIssued={_drawsIssued} instances={_instancesIssued} groups={_groups.Count} " +
$"cpu_us={cpuMed}m/{cpuP95}p95 gpu_us={gpuMed}m/{gpuP95}p95");
_entitiesSeen = _entitiesDrawn = _meshesMissing = _drawsIssued = _instancesIssued = 0; _entitiesSeen = _entitiesDrawn = _meshesMissing = _drawsIssued = _instancesIssued = 0;
_lastLogTick = now; _lastLogTick = now;
// Don't reset the sample buffers — they're a moving window of the
// last 256 frames; clearing per 5s flush would lose recent history.
} }
} }
private static long MedianMicros(long[] samples)
{
var copy = (long[])samples.Clone();
Array.Sort(copy);
int nz = 0;
foreach (var v in copy) if (v > 0) nz++;
if (nz == 0) return 0;
return copy[copy.Length - nz / 2];
}
private static long Percentile95Micros(long[] samples)
{
var copy = (long[])samples.Clone();
Array.Sort(copy);
int nz = 0;
foreach (var v in copy) if (v > 0) nz++;
if (nz == 0) return 0;
int idx = copy.Length - 1 - (int)(nz * 0.05);
return copy[idx];
}
private void ClassifyBatches( private void ClassifyBatches(
ObjectRenderData renderData, ObjectRenderData renderData,
ulong gfxObjId, ulong gfxObjId,
@ -538,6 +614,11 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
_gl.DeleteBuffer(_instanceSsbo); _gl.DeleteBuffer(_instanceSsbo);
_gl.DeleteBuffer(_batchSsbo); _gl.DeleteBuffer(_batchSsbo);
_gl.DeleteBuffer(_indirectBuffer); _gl.DeleteBuffer(_indirectBuffer);
if (_gpuQueriesInitialized)
{
_gl.DeleteQuery(_gpuQueryOpaque);
_gl.DeleteQuery(_gpuQueryTransparent);
}
} }
// ── Public types + helpers for BuildIndirectArrays (Task 9) ───────────── // ── Public types + helpers for BuildIndirectArrays (Task 9) ─────────────