docs(perf): Phase N.6 slice 1 — radius=12 baseline + surface dump path

Capture authoritative CPU+GPU dispatch numbers at Holtburg with the
gpu_us diagnostic now working (commit 25cb147). Three radii (4/8/12)
x two motion modes (standstill/walking) + a surface-format histogram
from ACDREAM_DUMP_SURFACES=1.

Adds env-gated one-shot dump path (TextureCache.TickSurfaceHistogramDumpIfEnabled,
called from GameWindow.OnRender) that fires once after both (a) frame
600 of the session AND (b) the upload-metadata dict reaches 100 entries
-- the cache-size gate prevents the dump from firing during pre-world
GUI ticks where OnRender spins at high rates but no scenery has streamed.
Output writes to %LOCALAPPDATA%\acdream\n6-surfaces.txt with a try/catch
around the I/O so disk-full / permission errors don't crash mid-measurement.

Baseline document at docs/plans/2026-05-11-phase-n6-perf-baseline.md
documents:
- CPU dominates GPU by 30-50x at every radius (strongly CPU-bound)
- GPU wildly under-utilized (max gpu_us p95 ~600us vs 16,600us frame budget)
- CPU scales superlinearly with N1 (Tier 1 cache wins on inner loop but
  not outer LB walk)
- Surface atlas opportunity high (59% of textures in top-3 triples) but
  win is memory-only since GPU isn't bottlenecked

Recommendation: C.1.5 (PES emitter wiring) next, then a reduced-scope
N.6 slice 2 (drop atlas + persistent-mapped buffers -- not justified by
the GPU under-utilization observed).

Roadmap entry amended to split N.6 into slice 1 (shipped) and slice 2
(planned, reduced scope, deferred until after C.1.5).

Spec: docs/superpowers/specs/2026-05-11-phase-n6-slice1-design.md.
Plan: docs/superpowers/plans/2026-05-11-phase-n6-slice1.md (Task 4).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Erik 2026-05-11 12:34:10 +02:00
parent 25cb147d97
commit 13abf96a5e
4 changed files with 318 additions and 16 deletions

View file

@ -6310,6 +6310,10 @@ public sealed class GameWindow : IDisposable
_gl!.Clear(ClearBufferMask.ColorBufferBit | ClearBufferMask.DepthBufferBit);
// Phase N.6 slice 1: one-shot surface-format histogram dump under
// ACDREAM_DUMP_SURFACES=1. Zero cost when off.
_textureCache?.TickSurfaceHistogramDumpIfEnabled();
// Phase N.4: drain WB pipeline queues (staged mesh data +
// GL thread queue). Must happen before any draw work so that
// resources uploaded this frame are available immediately.

View file

@ -4,6 +4,7 @@ using AcDream.Core.World;
using DatReaderWriter;
using DatReaderWriter.DBObjs;
using Silk.NET.OpenGL;
using System.Linq;
using SurfaceType = DatReaderWriter.Enums.SurfaceType;
namespace AcDream.App.Rendering;
@ -40,6 +41,20 @@ public sealed unsafe class TextureCache : Wb.ITextureCachePerInstance, IDisposab
private readonly Dictionary<(uint surfaceId, uint origTexOverride), (uint Name, ulong Handle)> _bindlessByOverridden = new();
private readonly Dictionary<(uint surfaceId, uint origTexOverride, ulong paletteHash), (uint Name, ulong Handle)> _bindlessByPalette = new();
// Phase N.6 slice 1 (2026-05-11): per-upload metadata for the
// ACDREAM_DUMP_SURFACES=1 histogram dump path. Populated at upload
// time so the dump method doesn't have to query GL state. Keyed by
// GL texture name (same key used in cache value tuples). Format
// label is "RGBA8_DECODED" for the post-decode upload (all uploads
// currently land as RGBA8 regardless of source format).
private readonly Dictionary<uint, (int Width, int Height, string Format)> _uploadMetadata = new();
// Frame counter for the one-shot ACDREAM_DUMP_SURFACES=1 trigger.
// Increments per Tick call; fires the dump once at frame index 600
// and never again for the session. See spec §5.
private int _dumpFrameCounter;
private bool _surfaceHistogramAlreadyDumped;
public TextureCache(GL gl, DatCollection dats, Wb.BindlessSupport? bindless = null)
{
_gl = gl;
@ -258,6 +273,114 @@ public sealed unsafe class TextureCache : Wb.ITextureCachePerInstance, IDisposab
return h;
}
/// <summary>
/// Phase N.6 slice 1: one-shot surface-format histogram dump for the
/// atlas-opportunity audit. Activated by ACDREAM_DUMP_SURFACES=1; fires
/// once after BOTH gates pass:
/// 1. <c>_dumpFrameCounter &gt;= 600</c> — at least 600 OnRender ticks
/// have elapsed (catches the "we're already past startup boilerplate"
/// bound; ~10s at 60fps, ~3s at 200fps).
/// 2. <c>_uploadMetadata.Count &gt;= 100</c> — the cache contains at
/// least 100 uploaded textures, indicating streaming has actually
/// pulled in world content (not just sky/UI/font). The original
/// frame-only gate fired during the login/handshake phase where
/// OnRender ticks at GUI rates but no world has streamed in.
/// Output goes to %LOCALAPPDATA%\acdream\n6-surfaces.txt. Zero cost
/// when off. See spec §5 in
/// docs/superpowers/specs/2026-05-11-phase-n6-slice1-design.md.
/// </summary>
public void TickSurfaceHistogramDumpIfEnabled()
{
if (_surfaceHistogramAlreadyDumped) return;
if (!string.Equals(System.Environment.GetEnvironmentVariable("ACDREAM_DUMP_SURFACES"), "1", StringComparison.Ordinal)) return;
_dumpFrameCounter++;
if (_dumpFrameCounter < 600) return;
if (_uploadMetadata.Count < 100) return;
DumpSurfaceHistogram();
_surfaceHistogramAlreadyDumped = true;
}
private void DumpSurfaceHistogram()
{
try
{
DumpSurfaceHistogramCore();
}
catch (Exception ex)
{
// Diagnostic-only path. If the dump file can't be written
// (disk full, permission denied, antivirus lock, path too
// long) we must NOT crash OnRender — that would invalidate
// the very measurement pass this diagnostic is meant to
// support. Log to stderr and let the caller mark the dump
// as "already done" so it doesn't retry every frame.
Console.Error.WriteLine($"[N6-DUMP] Failed to write surface histogram: {ex.Message}");
}
}
private void DumpSurfaceHistogramCore()
{
var localAppData = System.Environment.GetFolderPath(System.Environment.SpecialFolder.LocalApplicationData);
var outDir = System.IO.Path.Combine(localAppData, "acdream");
System.IO.Directory.CreateDirectory(outDir);
var outPath = System.IO.Path.Combine(outDir, "n6-surfaces.txt");
var sb = new System.Text.StringBuilder();
sb.AppendLine($"# acdream surface-format histogram — generated {DateTime.UtcNow:yyyy-MM-ddTHH:mm:ssZ}");
sb.AppendLine("# Per-entry: surfaceId(hex), width, height, format, byteCount");
sb.AppendLine();
// Walk every cached entry across the 6 caches, dedupe by GL name.
var seen = new HashSet<uint>();
long totalBytes = 0;
var bucketsByDim = new Dictionary<(int W, int H), int>();
var bucketsByFormat = new Dictionary<string, int>();
var bucketsByTriple = new Dictionary<(int W, int H, string F), int>();
void Emit(uint surfaceId, uint name)
{
if (!seen.Add(name)) return;
if (!_uploadMetadata.TryGetValue(name, out var meta)) return;
int bytes = meta.Width * meta.Height * 4;
totalBytes += bytes;
sb.AppendLine($"0x{surfaceId:X8}, {meta.Width}, {meta.Height}, {meta.Format}, {bytes}");
var dimKey = (meta.Width, meta.Height);
bucketsByDim[dimKey] = bucketsByDim.GetValueOrDefault(dimKey) + 1;
bucketsByFormat[meta.Format] = bucketsByFormat.GetValueOrDefault(meta.Format) + 1;
var tripleKey = (meta.Width, meta.Height, meta.Format);
bucketsByTriple[tripleKey] = bucketsByTriple.GetValueOrDefault(tripleKey) + 1;
}
foreach (var kv in _handlesBySurfaceId) Emit(kv.Key, kv.Value);
foreach (var kv in _handlesByOverridden) Emit(kv.Key.surfaceId, kv.Value);
foreach (var kv in _handlesByPalette) Emit(kv.Key.surfaceId, kv.Value);
foreach (var kv in _bindlessBySurfaceId) Emit(kv.Key, kv.Value.Name);
foreach (var kv in _bindlessByOverridden) Emit(kv.Key.surfaceId, kv.Value.Name);
foreach (var kv in _bindlessByPalette) Emit(kv.Key.surfaceId, kv.Value.Name);
sb.AppendLine();
sb.AppendLine("# Rollups");
sb.AppendLine($"# Total unique GL textures: {seen.Count}");
sb.AppendLine($"# Total bytes (sum of W*H*4): {totalBytes}");
sb.AppendLine("# Top 10 (W,H) dimension buckets:");
foreach (var kv in bucketsByDim.OrderByDescending(kv => kv.Value).Take(10))
sb.AppendLine($"# {kv.Key.W}x{kv.Key.H}: {kv.Value}");
sb.AppendLine("# Format buckets:");
foreach (var kv in bucketsByFormat.OrderByDescending(kv => kv.Value))
sb.AppendLine($"# {kv.Key}: {kv.Value}");
sb.AppendLine("# Top 10 (W,H,format) triples — atlas-opportunity input:");
foreach (var kv in bucketsByTriple.OrderByDescending(kv => kv.Value).Take(10))
sb.AppendLine($"# {kv.Key.W}x{kv.Key.H} {kv.Key.F}: {kv.Value}");
System.IO.File.WriteAllText(outPath, sb.ToString());
Console.WriteLine($"[N6-DUMP] Surface histogram written to {outPath} ({seen.Count} textures, {totalBytes} bytes)");
}
private DecodedTexture DecodeFromDats(uint surfaceId, uint? origTextureOverride, PaletteOverride? paletteOverride)
{
var surface = _dats.Get<Surface>(surfaceId);
@ -364,6 +487,7 @@ public sealed unsafe class TextureCache : Wb.ITextureCachePerInstance, IDisposab
_gl.TexParameter(TextureTarget.Texture2D, TextureParameterName.TextureWrapT, (int)TextureWrapMode.Repeat);
_gl.BindTexture(TextureTarget.Texture2D, 0);
_uploadMetadata[tex] = (decoded.Width, decoded.Height, "RGBA8_DECODED");
return tex;
}
@ -396,6 +520,7 @@ public sealed unsafe class TextureCache : Wb.ITextureCachePerInstance, IDisposab
_gl.TexParameter(TextureTarget.Texture2DArray, TextureParameterName.TextureWrapT, (int)TextureWrapMode.Repeat);
_gl.BindTexture(TextureTarget.Texture2DArray, 0);
_uploadMetadata[tex] = (decoded.Width, decoded.Height, "RGBA8_DECODED");
return tex;
}