From d114dca1e851de40dc2ab8adfd8998536d303eb9 Mon Sep 17 00:00:00 2001 From: Erik Date: Fri, 8 May 2026 20:57:26 +0200 Subject: [PATCH] phase(N.5) Task 12: CPU stopwatch + GL_TIME_ELAPSED queries in [WB-DIAG] Adds median + 95th-percentile CPU + GPU dispatch time to the existing 5-second [WB-DIAG] rollup. CPU via Stopwatch (always running, cheap; only logged under ACDREAM_WB_DIAG=1). GPU via two GL_TIME_ELAPSED queries (opaque + transparent) wrapping each glMultiDrawElementsIndirect, polled non-blocking via QueryResultAvailable on the next frame. Sample window is 256 frames per signal; median + p95 reported. Numbers populate the SHIP commit's perf table at Task 19. Silk.NET naming note: GL_TIME_ELAPSED queries use QueryTarget.TimeElapsed (confirmed present in Silk.NET.OpenGL 2.23.0 DLL). The 64-bit result is read via GetQueryObject(..., out ulong) which dispatches to glGetQueryObjectui64v; the int overload (glGetQueryObjectiv) is used for the ResultAvailable poll, matching WorldBuilder's VisibilityManager pattern. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../Rendering/Wb/WbDrawDispatcher.cs | 85 ++++++++++++++++++- 1 file changed, 83 insertions(+), 2 deletions(-) diff --git a/src/AcDream.App/Rendering/Wb/WbDrawDispatcher.cs b/src/AcDream.App/Rendering/Wb/WbDrawDispatcher.cs index 05b9919..4dca392 100644 --- a/src/AcDream.App/Rendering/Wb/WbDrawDispatcher.cs +++ b/src/AcDream.App/Rendering/Wb/WbDrawDispatcher.cs @@ -112,6 +112,16 @@ public sealed unsafe class WbDrawDispatcher : IDisposable private int _instancesIssued; private long _lastLogTick; + // CPU + GPU timing for [WB-DIAG] under ACDREAM_WB_DIAG=1. + private readonly System.Diagnostics.Stopwatch _cpuStopwatch = new(); + private readonly long[] _cpuSamples = new long[256]; // microseconds + private int _cpuSampleCursor; + private uint _gpuQueryOpaque; + private uint _gpuQueryTransparent; + private readonly long[] _gpuSamples = new long[256]; // microseconds + private int _gpuSampleCursor; + private bool _gpuQueriesInitialized; + public WbDrawDispatcher( GL gl, Shader shader, @@ -158,6 +168,16 @@ public sealed unsafe class WbDrawDispatcher : IDisposable bool diag = string.Equals(Environment.GetEnvironmentVariable("ACDREAM_WB_DIAG"), "1", StringComparison.Ordinal); + if (diag && !_gpuQueriesInitialized) + { + _gpuQueryOpaque = _gl.GenQuery(); + _gpuQueryTransparent = _gl.GenQuery(); + _gpuQueriesInitialized = true; + } + + // Always run the CPU stopwatch — cheap; only logged under diag. + _cpuStopwatch.Restart(); + // Camera world-space position for front-to-back sort (perf #2). The view // matrix is the inverse of the camera's world transform, so the world // translation lives in the inverse's translation row. @@ -267,6 +287,7 @@ public sealed unsafe class WbDrawDispatcher : IDisposable // Nothing visible — skip the GL pass entirely. if (anyVao == 0) { + _cpuStopwatch.Stop(); if (diag) MaybeFlushDiag(); return; } @@ -276,6 +297,7 @@ public sealed unsafe class WbDrawDispatcher : IDisposable foreach (var grp in _groups.Values) totalInstances += grp.Matrices.Count; if (totalInstances == 0) { + _cpuStopwatch.Stop(); if (diag) MaybeFlushDiag(); return; } @@ -379,12 +401,14 @@ public sealed unsafe class WbDrawDispatcher : IDisposable _gl.DepthMask(true); _shader.SetInt("uRenderPass", 0); _gl.BindBuffer(BufferTargetARB.DrawIndirectBuffer, _indirectBuffer); + if (diag && _gpuQueriesInitialized) _gl.BeginQuery(QueryTarget.TimeElapsed, _gpuQueryOpaque); _gl.MultiDrawElementsIndirect( PrimitiveType.Triangles, DrawElementsType.UnsignedShort, (void*)0, (uint)_opaqueDrawCount, (uint)DrawCommandStride); + if (diag && _gpuQueriesInitialized) _gl.EndQuery(QueryTarget.TimeElapsed); } // ── Phase 8: transparent pass ──────────────────────────────────────── @@ -394,12 +418,14 @@ public sealed unsafe class WbDrawDispatcher : IDisposable _gl.BlendFunc(BlendingFactor.SrcAlpha, BlendingFactor.OneMinusSrcAlpha); _gl.DepthMask(false); _shader.SetInt("uRenderPass", 1); + if (diag && _gpuQueriesInitialized) _gl.BeginQuery(QueryTarget.TimeElapsed, _gpuQueryTransparent); _gl.MultiDrawElementsIndirect( PrimitiveType.Triangles, DrawElementsType.UnsignedShort, (void*)_transparentByteOffset, (uint)_transparentDrawCount, (uint)DrawCommandStride); + if (diag && _gpuQueriesInitialized) _gl.EndQuery(QueryTarget.TimeElapsed); _gl.DepthMask(true); _gl.Disable(EnableCap.Blend); } @@ -407,9 +433,31 @@ public sealed unsafe class WbDrawDispatcher : IDisposable _gl.Disable(EnableCap.CullFace); _gl.BindVertexArray(0); + _cpuStopwatch.Stop(); + if (diag) { - _drawsIssued += _opaqueDrawCount + _transparentDrawCount; + long cpuUs = _cpuStopwatch.ElapsedTicks * 1_000_000L / System.Diagnostics.Stopwatch.Frequency; + _cpuSamples[_cpuSampleCursor] = cpuUs; + _cpuSampleCursor = (_cpuSampleCursor + 1) % _cpuSamples.Length; + + // Read GPU samples non-blocking; the result for the previous frame's + // queries should be ready by now. If not, drop the sample (don't stall + // the CPU waiting for the GPU). + if (_gpuQueriesInitialized) + { + _gl.GetQueryObject(_gpuQueryOpaque, QueryObjectParameterName.ResultAvailable, out int avail); + if (avail != 0) + { + _gl.GetQueryObject(_gpuQueryOpaque, QueryObjectParameterName.Result, out ulong opaqueNs); + _gl.GetQueryObject(_gpuQueryTransparent, QueryObjectParameterName.Result, out ulong transNs); + long gpuUs = (long)((opaqueNs + transNs) / 1000UL); + _gpuSamples[_gpuSampleCursor] = gpuUs; + _gpuSampleCursor = (_gpuSampleCursor + 1) % _gpuSamples.Length; + } + } + + _drawsIssued += _opaqueDrawCount + _transparentDrawCount; _instancesIssued += totalInstances; MaybeFlushDiag(); } @@ -437,13 +485,41 @@ public sealed unsafe class WbDrawDispatcher : IDisposable long now = Environment.TickCount64; if (now - _lastLogTick > 5000) { + long cpuMed = MedianMicros(_cpuSamples); + long cpuP95 = Percentile95Micros(_cpuSamples); + long gpuMed = MedianMicros(_gpuSamples); + long gpuP95 = Percentile95Micros(_gpuSamples); Console.WriteLine( - $"[WB-DIAG] entSeen={_entitiesSeen} entDrawn={_entitiesDrawn} meshMissing={_meshesMissing} drawsIssued={_drawsIssued} instances={_instancesIssued} groups={_groups.Count}"); + $"[WB-DIAG] entSeen={_entitiesSeen} entDrawn={_entitiesDrawn} meshMissing={_meshesMissing} drawsIssued={_drawsIssued} instances={_instancesIssued} groups={_groups.Count} " + + $"cpu_us={cpuMed}m/{cpuP95}p95 gpu_us={gpuMed}m/{gpuP95}p95"); _entitiesSeen = _entitiesDrawn = _meshesMissing = _drawsIssued = _instancesIssued = 0; _lastLogTick = now; + // Don't reset the sample buffers — they're a moving window of the + // last 256 frames; clearing per 5s flush would lose recent history. } } + private static long MedianMicros(long[] samples) + { + var copy = (long[])samples.Clone(); + Array.Sort(copy); + int nz = 0; + foreach (var v in copy) if (v > 0) nz++; + if (nz == 0) return 0; + return copy[copy.Length - nz / 2]; + } + + private static long Percentile95Micros(long[] samples) + { + var copy = (long[])samples.Clone(); + Array.Sort(copy); + int nz = 0; + foreach (var v in copy) if (v > 0) nz++; + if (nz == 0) return 0; + int idx = copy.Length - 1 - (int)(nz * 0.05); + return copy[idx]; + } + private void ClassifyBatches( ObjectRenderData renderData, ulong gfxObjId, @@ -538,6 +614,11 @@ public sealed unsafe class WbDrawDispatcher : IDisposable _gl.DeleteBuffer(_instanceSsbo); _gl.DeleteBuffer(_batchSsbo); _gl.DeleteBuffer(_indirectBuffer); + if (_gpuQueriesInitialized) + { + _gl.DeleteQuery(_gpuQueryOpaque); + _gl.DeleteQuery(_gpuQueryTransparent); + } } // ── Public types + helpers for BuildIndirectArrays (Task 9) ─────────────