phase(N.5) Task 12: CPU stopwatch + GL_TIME_ELAPSED queries in [WB-DIAG]
Adds median + 95th-percentile CPU + GPU dispatch time to the existing 5-second [WB-DIAG] rollup. CPU via Stopwatch (always running, cheap; only logged under ACDREAM_WB_DIAG=1). GPU via two GL_TIME_ELAPSED queries (opaque + transparent) wrapping each glMultiDrawElementsIndirect, polled non-blocking via QueryResultAvailable on the next frame. Sample window is 256 frames per signal; median + p95 reported. Numbers populate the SHIP commit's perf table at Task 19. Silk.NET naming note: GL_TIME_ELAPSED queries use QueryTarget.TimeElapsed (confirmed present in Silk.NET.OpenGL 2.23.0 DLL). The 64-bit result is read via GetQueryObject(..., out ulong) which dispatches to glGetQueryObjectui64v; the int overload (glGetQueryObjectiv) is used for the ResultAvailable poll, matching WorldBuilder's VisibilityManager pattern. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
cfe1ca3151
commit
d114dca1e8
1 changed files with 83 additions and 2 deletions
|
|
@ -112,6 +112,16 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
|
|||
private int _instancesIssued;
|
||||
private long _lastLogTick;
|
||||
|
||||
// CPU + GPU timing for [WB-DIAG] under ACDREAM_WB_DIAG=1.
|
||||
private readonly System.Diagnostics.Stopwatch _cpuStopwatch = new();
|
||||
private readonly long[] _cpuSamples = new long[256]; // microseconds
|
||||
private int _cpuSampleCursor;
|
||||
private uint _gpuQueryOpaque;
|
||||
private uint _gpuQueryTransparent;
|
||||
private readonly long[] _gpuSamples = new long[256]; // microseconds
|
||||
private int _gpuSampleCursor;
|
||||
private bool _gpuQueriesInitialized;
|
||||
|
||||
public WbDrawDispatcher(
|
||||
GL gl,
|
||||
Shader shader,
|
||||
|
|
@ -158,6 +168,16 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
|
|||
|
||||
bool diag = string.Equals(Environment.GetEnvironmentVariable("ACDREAM_WB_DIAG"), "1", StringComparison.Ordinal);
|
||||
|
||||
if (diag && !_gpuQueriesInitialized)
|
||||
{
|
||||
_gpuQueryOpaque = _gl.GenQuery();
|
||||
_gpuQueryTransparent = _gl.GenQuery();
|
||||
_gpuQueriesInitialized = true;
|
||||
}
|
||||
|
||||
// Always run the CPU stopwatch — cheap; only logged under diag.
|
||||
_cpuStopwatch.Restart();
|
||||
|
||||
// Camera world-space position for front-to-back sort (perf #2). The view
|
||||
// matrix is the inverse of the camera's world transform, so the world
|
||||
// translation lives in the inverse's translation row.
|
||||
|
|
@ -267,6 +287,7 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
|
|||
// Nothing visible — skip the GL pass entirely.
|
||||
if (anyVao == 0)
|
||||
{
|
||||
_cpuStopwatch.Stop();
|
||||
if (diag) MaybeFlushDiag();
|
||||
return;
|
||||
}
|
||||
|
|
@ -276,6 +297,7 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
|
|||
foreach (var grp in _groups.Values) totalInstances += grp.Matrices.Count;
|
||||
if (totalInstances == 0)
|
||||
{
|
||||
_cpuStopwatch.Stop();
|
||||
if (diag) MaybeFlushDiag();
|
||||
return;
|
||||
}
|
||||
|
|
@ -379,12 +401,14 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
|
|||
_gl.DepthMask(true);
|
||||
_shader.SetInt("uRenderPass", 0);
|
||||
_gl.BindBuffer(BufferTargetARB.DrawIndirectBuffer, _indirectBuffer);
|
||||
if (diag && _gpuQueriesInitialized) _gl.BeginQuery(QueryTarget.TimeElapsed, _gpuQueryOpaque);
|
||||
_gl.MultiDrawElementsIndirect(
|
||||
PrimitiveType.Triangles,
|
||||
DrawElementsType.UnsignedShort,
|
||||
(void*)0,
|
||||
(uint)_opaqueDrawCount,
|
||||
(uint)DrawCommandStride);
|
||||
if (diag && _gpuQueriesInitialized) _gl.EndQuery(QueryTarget.TimeElapsed);
|
||||
}
|
||||
|
||||
// ── Phase 8: transparent pass ────────────────────────────────────────
|
||||
|
|
@ -394,12 +418,14 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
|
|||
_gl.BlendFunc(BlendingFactor.SrcAlpha, BlendingFactor.OneMinusSrcAlpha);
|
||||
_gl.DepthMask(false);
|
||||
_shader.SetInt("uRenderPass", 1);
|
||||
if (diag && _gpuQueriesInitialized) _gl.BeginQuery(QueryTarget.TimeElapsed, _gpuQueryTransparent);
|
||||
_gl.MultiDrawElementsIndirect(
|
||||
PrimitiveType.Triangles,
|
||||
DrawElementsType.UnsignedShort,
|
||||
(void*)_transparentByteOffset,
|
||||
(uint)_transparentDrawCount,
|
||||
(uint)DrawCommandStride);
|
||||
if (diag && _gpuQueriesInitialized) _gl.EndQuery(QueryTarget.TimeElapsed);
|
||||
_gl.DepthMask(true);
|
||||
_gl.Disable(EnableCap.Blend);
|
||||
}
|
||||
|
|
@ -407,9 +433,31 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
|
|||
_gl.Disable(EnableCap.CullFace);
|
||||
_gl.BindVertexArray(0);
|
||||
|
||||
_cpuStopwatch.Stop();
|
||||
|
||||
if (diag)
|
||||
{
|
||||
_drawsIssued += _opaqueDrawCount + _transparentDrawCount;
|
||||
long cpuUs = _cpuStopwatch.ElapsedTicks * 1_000_000L / System.Diagnostics.Stopwatch.Frequency;
|
||||
_cpuSamples[_cpuSampleCursor] = cpuUs;
|
||||
_cpuSampleCursor = (_cpuSampleCursor + 1) % _cpuSamples.Length;
|
||||
|
||||
// Read GPU samples non-blocking; the result for the previous frame's
|
||||
// queries should be ready by now. If not, drop the sample (don't stall
|
||||
// the CPU waiting for the GPU).
|
||||
if (_gpuQueriesInitialized)
|
||||
{
|
||||
_gl.GetQueryObject(_gpuQueryOpaque, QueryObjectParameterName.ResultAvailable, out int avail);
|
||||
if (avail != 0)
|
||||
{
|
||||
_gl.GetQueryObject(_gpuQueryOpaque, QueryObjectParameterName.Result, out ulong opaqueNs);
|
||||
_gl.GetQueryObject(_gpuQueryTransparent, QueryObjectParameterName.Result, out ulong transNs);
|
||||
long gpuUs = (long)((opaqueNs + transNs) / 1000UL);
|
||||
_gpuSamples[_gpuSampleCursor] = gpuUs;
|
||||
_gpuSampleCursor = (_gpuSampleCursor + 1) % _gpuSamples.Length;
|
||||
}
|
||||
}
|
||||
|
||||
_drawsIssued += _opaqueDrawCount + _transparentDrawCount;
|
||||
_instancesIssued += totalInstances;
|
||||
MaybeFlushDiag();
|
||||
}
|
||||
|
|
@ -437,13 +485,41 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
|
|||
long now = Environment.TickCount64;
|
||||
if (now - _lastLogTick > 5000)
|
||||
{
|
||||
long cpuMed = MedianMicros(_cpuSamples);
|
||||
long cpuP95 = Percentile95Micros(_cpuSamples);
|
||||
long gpuMed = MedianMicros(_gpuSamples);
|
||||
long gpuP95 = Percentile95Micros(_gpuSamples);
|
||||
Console.WriteLine(
|
||||
$"[WB-DIAG] entSeen={_entitiesSeen} entDrawn={_entitiesDrawn} meshMissing={_meshesMissing} drawsIssued={_drawsIssued} instances={_instancesIssued} groups={_groups.Count}");
|
||||
$"[WB-DIAG] entSeen={_entitiesSeen} entDrawn={_entitiesDrawn} meshMissing={_meshesMissing} drawsIssued={_drawsIssued} instances={_instancesIssued} groups={_groups.Count} " +
|
||||
$"cpu_us={cpuMed}m/{cpuP95}p95 gpu_us={gpuMed}m/{gpuP95}p95");
|
||||
_entitiesSeen = _entitiesDrawn = _meshesMissing = _drawsIssued = _instancesIssued = 0;
|
||||
_lastLogTick = now;
|
||||
// Don't reset the sample buffers — they're a moving window of the
|
||||
// last 256 frames; clearing per 5s flush would lose recent history.
|
||||
}
|
||||
}
|
||||
|
||||
private static long MedianMicros(long[] samples)
|
||||
{
|
||||
var copy = (long[])samples.Clone();
|
||||
Array.Sort(copy);
|
||||
int nz = 0;
|
||||
foreach (var v in copy) if (v > 0) nz++;
|
||||
if (nz == 0) return 0;
|
||||
return copy[copy.Length - nz / 2];
|
||||
}
|
||||
|
||||
private static long Percentile95Micros(long[] samples)
|
||||
{
|
||||
var copy = (long[])samples.Clone();
|
||||
Array.Sort(copy);
|
||||
int nz = 0;
|
||||
foreach (var v in copy) if (v > 0) nz++;
|
||||
if (nz == 0) return 0;
|
||||
int idx = copy.Length - 1 - (int)(nz * 0.05);
|
||||
return copy[idx];
|
||||
}
|
||||
|
||||
private void ClassifyBatches(
|
||||
ObjectRenderData renderData,
|
||||
ulong gfxObjId,
|
||||
|
|
@ -538,6 +614,11 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
|
|||
_gl.DeleteBuffer(_instanceSsbo);
|
||||
_gl.DeleteBuffer(_batchSsbo);
|
||||
_gl.DeleteBuffer(_indirectBuffer);
|
||||
if (_gpuQueriesInitialized)
|
||||
{
|
||||
_gl.DeleteQuery(_gpuQueryOpaque);
|
||||
_gl.DeleteQuery(_gpuQueryTransparent);
|
||||
}
|
||||
}
|
||||
|
||||
// ── Public types + helpers for BuildIndirectArrays (Task 9) ─────────────
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue