phase(N.5) Task 12: CPU stopwatch + GL_TIME_ELAPSED queries in [WB-DIAG]
Adds median + 95th-percentile CPU + GPU dispatch time to the existing 5-second [WB-DIAG] rollup. CPU via Stopwatch (always running, cheap; only logged under ACDREAM_WB_DIAG=1). GPU via two GL_TIME_ELAPSED queries (opaque + transparent) wrapping each glMultiDrawElementsIndirect, polled non-blocking via QueryResultAvailable on the next frame. Sample window is 256 frames per signal; median + p95 reported. Numbers populate the SHIP commit's perf table at Task 19. Silk.NET naming note: GL_TIME_ELAPSED queries use QueryTarget.TimeElapsed (confirmed present in Silk.NET.OpenGL 2.23.0 DLL). The 64-bit result is read via GetQueryObject(..., out ulong) which dispatches to glGetQueryObjectui64v; the int overload (glGetQueryObjectiv) is used for the ResultAvailable poll, matching WorldBuilder's VisibilityManager pattern. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
cfe1ca3151
commit
d114dca1e8
1 changed files with 83 additions and 2 deletions
|
|
@ -112,6 +112,16 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
|
||||||
private int _instancesIssued;
|
private int _instancesIssued;
|
||||||
private long _lastLogTick;
|
private long _lastLogTick;
|
||||||
|
|
||||||
|
// CPU + GPU timing for [WB-DIAG] under ACDREAM_WB_DIAG=1.
|
||||||
|
private readonly System.Diagnostics.Stopwatch _cpuStopwatch = new();
|
||||||
|
private readonly long[] _cpuSamples = new long[256]; // microseconds
|
||||||
|
private int _cpuSampleCursor;
|
||||||
|
private uint _gpuQueryOpaque;
|
||||||
|
private uint _gpuQueryTransparent;
|
||||||
|
private readonly long[] _gpuSamples = new long[256]; // microseconds
|
||||||
|
private int _gpuSampleCursor;
|
||||||
|
private bool _gpuQueriesInitialized;
|
||||||
|
|
||||||
public WbDrawDispatcher(
|
public WbDrawDispatcher(
|
||||||
GL gl,
|
GL gl,
|
||||||
Shader shader,
|
Shader shader,
|
||||||
|
|
@ -158,6 +168,16 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
|
||||||
|
|
||||||
bool diag = string.Equals(Environment.GetEnvironmentVariable("ACDREAM_WB_DIAG"), "1", StringComparison.Ordinal);
|
bool diag = string.Equals(Environment.GetEnvironmentVariable("ACDREAM_WB_DIAG"), "1", StringComparison.Ordinal);
|
||||||
|
|
||||||
|
if (diag && !_gpuQueriesInitialized)
|
||||||
|
{
|
||||||
|
_gpuQueryOpaque = _gl.GenQuery();
|
||||||
|
_gpuQueryTransparent = _gl.GenQuery();
|
||||||
|
_gpuQueriesInitialized = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Always run the CPU stopwatch — cheap; only logged under diag.
|
||||||
|
_cpuStopwatch.Restart();
|
||||||
|
|
||||||
// Camera world-space position for front-to-back sort (perf #2). The view
|
// Camera world-space position for front-to-back sort (perf #2). The view
|
||||||
// matrix is the inverse of the camera's world transform, so the world
|
// matrix is the inverse of the camera's world transform, so the world
|
||||||
// translation lives in the inverse's translation row.
|
// translation lives in the inverse's translation row.
|
||||||
|
|
@ -267,6 +287,7 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
|
||||||
// Nothing visible — skip the GL pass entirely.
|
// Nothing visible — skip the GL pass entirely.
|
||||||
if (anyVao == 0)
|
if (anyVao == 0)
|
||||||
{
|
{
|
||||||
|
_cpuStopwatch.Stop();
|
||||||
if (diag) MaybeFlushDiag();
|
if (diag) MaybeFlushDiag();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
@ -276,6 +297,7 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
|
||||||
foreach (var grp in _groups.Values) totalInstances += grp.Matrices.Count;
|
foreach (var grp in _groups.Values) totalInstances += grp.Matrices.Count;
|
||||||
if (totalInstances == 0)
|
if (totalInstances == 0)
|
||||||
{
|
{
|
||||||
|
_cpuStopwatch.Stop();
|
||||||
if (diag) MaybeFlushDiag();
|
if (diag) MaybeFlushDiag();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
@ -379,12 +401,14 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
|
||||||
_gl.DepthMask(true);
|
_gl.DepthMask(true);
|
||||||
_shader.SetInt("uRenderPass", 0);
|
_shader.SetInt("uRenderPass", 0);
|
||||||
_gl.BindBuffer(BufferTargetARB.DrawIndirectBuffer, _indirectBuffer);
|
_gl.BindBuffer(BufferTargetARB.DrawIndirectBuffer, _indirectBuffer);
|
||||||
|
if (diag && _gpuQueriesInitialized) _gl.BeginQuery(QueryTarget.TimeElapsed, _gpuQueryOpaque);
|
||||||
_gl.MultiDrawElementsIndirect(
|
_gl.MultiDrawElementsIndirect(
|
||||||
PrimitiveType.Triangles,
|
PrimitiveType.Triangles,
|
||||||
DrawElementsType.UnsignedShort,
|
DrawElementsType.UnsignedShort,
|
||||||
(void*)0,
|
(void*)0,
|
||||||
(uint)_opaqueDrawCount,
|
(uint)_opaqueDrawCount,
|
||||||
(uint)DrawCommandStride);
|
(uint)DrawCommandStride);
|
||||||
|
if (diag && _gpuQueriesInitialized) _gl.EndQuery(QueryTarget.TimeElapsed);
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── Phase 8: transparent pass ────────────────────────────────────────
|
// ── Phase 8: transparent pass ────────────────────────────────────────
|
||||||
|
|
@ -394,12 +418,14 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
|
||||||
_gl.BlendFunc(BlendingFactor.SrcAlpha, BlendingFactor.OneMinusSrcAlpha);
|
_gl.BlendFunc(BlendingFactor.SrcAlpha, BlendingFactor.OneMinusSrcAlpha);
|
||||||
_gl.DepthMask(false);
|
_gl.DepthMask(false);
|
||||||
_shader.SetInt("uRenderPass", 1);
|
_shader.SetInt("uRenderPass", 1);
|
||||||
|
if (diag && _gpuQueriesInitialized) _gl.BeginQuery(QueryTarget.TimeElapsed, _gpuQueryTransparent);
|
||||||
_gl.MultiDrawElementsIndirect(
|
_gl.MultiDrawElementsIndirect(
|
||||||
PrimitiveType.Triangles,
|
PrimitiveType.Triangles,
|
||||||
DrawElementsType.UnsignedShort,
|
DrawElementsType.UnsignedShort,
|
||||||
(void*)_transparentByteOffset,
|
(void*)_transparentByteOffset,
|
||||||
(uint)_transparentDrawCount,
|
(uint)_transparentDrawCount,
|
||||||
(uint)DrawCommandStride);
|
(uint)DrawCommandStride);
|
||||||
|
if (diag && _gpuQueriesInitialized) _gl.EndQuery(QueryTarget.TimeElapsed);
|
||||||
_gl.DepthMask(true);
|
_gl.DepthMask(true);
|
||||||
_gl.Disable(EnableCap.Blend);
|
_gl.Disable(EnableCap.Blend);
|
||||||
}
|
}
|
||||||
|
|
@ -407,9 +433,31 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
|
||||||
_gl.Disable(EnableCap.CullFace);
|
_gl.Disable(EnableCap.CullFace);
|
||||||
_gl.BindVertexArray(0);
|
_gl.BindVertexArray(0);
|
||||||
|
|
||||||
|
_cpuStopwatch.Stop();
|
||||||
|
|
||||||
if (diag)
|
if (diag)
|
||||||
{
|
{
|
||||||
_drawsIssued += _opaqueDrawCount + _transparentDrawCount;
|
long cpuUs = _cpuStopwatch.ElapsedTicks * 1_000_000L / System.Diagnostics.Stopwatch.Frequency;
|
||||||
|
_cpuSamples[_cpuSampleCursor] = cpuUs;
|
||||||
|
_cpuSampleCursor = (_cpuSampleCursor + 1) % _cpuSamples.Length;
|
||||||
|
|
||||||
|
// Read GPU samples non-blocking; the result for the previous frame's
|
||||||
|
// queries should be ready by now. If not, drop the sample (don't stall
|
||||||
|
// the CPU waiting for the GPU).
|
||||||
|
if (_gpuQueriesInitialized)
|
||||||
|
{
|
||||||
|
_gl.GetQueryObject(_gpuQueryOpaque, QueryObjectParameterName.ResultAvailable, out int avail);
|
||||||
|
if (avail != 0)
|
||||||
|
{
|
||||||
|
_gl.GetQueryObject(_gpuQueryOpaque, QueryObjectParameterName.Result, out ulong opaqueNs);
|
||||||
|
_gl.GetQueryObject(_gpuQueryTransparent, QueryObjectParameterName.Result, out ulong transNs);
|
||||||
|
long gpuUs = (long)((opaqueNs + transNs) / 1000UL);
|
||||||
|
_gpuSamples[_gpuSampleCursor] = gpuUs;
|
||||||
|
_gpuSampleCursor = (_gpuSampleCursor + 1) % _gpuSamples.Length;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
_drawsIssued += _opaqueDrawCount + _transparentDrawCount;
|
||||||
_instancesIssued += totalInstances;
|
_instancesIssued += totalInstances;
|
||||||
MaybeFlushDiag();
|
MaybeFlushDiag();
|
||||||
}
|
}
|
||||||
|
|
@ -437,13 +485,41 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
|
||||||
long now = Environment.TickCount64;
|
long now = Environment.TickCount64;
|
||||||
if (now - _lastLogTick > 5000)
|
if (now - _lastLogTick > 5000)
|
||||||
{
|
{
|
||||||
|
long cpuMed = MedianMicros(_cpuSamples);
|
||||||
|
long cpuP95 = Percentile95Micros(_cpuSamples);
|
||||||
|
long gpuMed = MedianMicros(_gpuSamples);
|
||||||
|
long gpuP95 = Percentile95Micros(_gpuSamples);
|
||||||
Console.WriteLine(
|
Console.WriteLine(
|
||||||
$"[WB-DIAG] entSeen={_entitiesSeen} entDrawn={_entitiesDrawn} meshMissing={_meshesMissing} drawsIssued={_drawsIssued} instances={_instancesIssued} groups={_groups.Count}");
|
$"[WB-DIAG] entSeen={_entitiesSeen} entDrawn={_entitiesDrawn} meshMissing={_meshesMissing} drawsIssued={_drawsIssued} instances={_instancesIssued} groups={_groups.Count} " +
|
||||||
|
$"cpu_us={cpuMed}m/{cpuP95}p95 gpu_us={gpuMed}m/{gpuP95}p95");
|
||||||
_entitiesSeen = _entitiesDrawn = _meshesMissing = _drawsIssued = _instancesIssued = 0;
|
_entitiesSeen = _entitiesDrawn = _meshesMissing = _drawsIssued = _instancesIssued = 0;
|
||||||
_lastLogTick = now;
|
_lastLogTick = now;
|
||||||
|
// Don't reset the sample buffers — they're a moving window of the
|
||||||
|
// last 256 frames; clearing per 5s flush would lose recent history.
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static long MedianMicros(long[] samples)
|
||||||
|
{
|
||||||
|
var copy = (long[])samples.Clone();
|
||||||
|
Array.Sort(copy);
|
||||||
|
int nz = 0;
|
||||||
|
foreach (var v in copy) if (v > 0) nz++;
|
||||||
|
if (nz == 0) return 0;
|
||||||
|
return copy[copy.Length - nz / 2];
|
||||||
|
}
|
||||||
|
|
||||||
|
private static long Percentile95Micros(long[] samples)
|
||||||
|
{
|
||||||
|
var copy = (long[])samples.Clone();
|
||||||
|
Array.Sort(copy);
|
||||||
|
int nz = 0;
|
||||||
|
foreach (var v in copy) if (v > 0) nz++;
|
||||||
|
if (nz == 0) return 0;
|
||||||
|
int idx = copy.Length - 1 - (int)(nz * 0.05);
|
||||||
|
return copy[idx];
|
||||||
|
}
|
||||||
|
|
||||||
private void ClassifyBatches(
|
private void ClassifyBatches(
|
||||||
ObjectRenderData renderData,
|
ObjectRenderData renderData,
|
||||||
ulong gfxObjId,
|
ulong gfxObjId,
|
||||||
|
|
@ -538,6 +614,11 @@ public sealed unsafe class WbDrawDispatcher : IDisposable
|
||||||
_gl.DeleteBuffer(_instanceSsbo);
|
_gl.DeleteBuffer(_instanceSsbo);
|
||||||
_gl.DeleteBuffer(_batchSsbo);
|
_gl.DeleteBuffer(_batchSsbo);
|
||||||
_gl.DeleteBuffer(_indirectBuffer);
|
_gl.DeleteBuffer(_indirectBuffer);
|
||||||
|
if (_gpuQueriesInitialized)
|
||||||
|
{
|
||||||
|
_gl.DeleteQuery(_gpuQueryOpaque);
|
||||||
|
_gl.DeleteQuery(_gpuQueryTransparent);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── Public types + helpers for BuildIndirectArrays (Task 9) ─────────────
|
// ── Public types + helpers for BuildIndirectArrays (Task 9) ─────────────
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue