docs(perf): Phase N.6 slice 1 — radius=12 baseline + surface dump path
Capture authoritative CPU+GPU dispatch numbers at Holtburg with the
gpu_us diagnostic now working (commit 25cb147). Three radii (4/8/12)
x two motion modes (standstill/walking) + a surface-format histogram
from ACDREAM_DUMP_SURFACES=1.
Adds env-gated one-shot dump path (TextureCache.TickSurfaceHistogramDumpIfEnabled,
called from GameWindow.OnRender) that fires once after both (a) frame
600 of the session AND (b) the upload-metadata dict reaches 100 entries
-- the cache-size gate prevents the dump from firing during pre-world
GUI ticks where OnRender spins at high rates but no scenery has streamed.
Output writes to %LOCALAPPDATA%\acdream\n6-surfaces.txt with a try/catch
around the I/O so disk-full / permission errors don't crash mid-measurement.
Baseline document at docs/plans/2026-05-11-phase-n6-perf-baseline.md
documents:
- CPU dominates GPU by 30-50x at every radius (strongly CPU-bound)
- GPU wildly under-utilized (max gpu_us p95 ~600us vs 16,600us frame budget)
- CPU scales superlinearly with N1 (Tier 1 cache wins on inner loop but
not outer LB walk)
- Surface atlas opportunity high (59% of textures in top-3 triples) but
win is memory-only since GPU isn't bottlenecked
Recommendation: C.1.5 (PES emitter wiring) next, then a reduced-scope
N.6 slice 2 (drop atlas + persistent-mapped buffers -- not justified by
the GPU under-utilization observed).
Roadmap entry amended to split N.6 into slice 1 (shipped) and slice 2
(planned, reduced scope, deferred until after C.1.5).
Spec: docs/superpowers/specs/2026-05-11-phase-n6-slice1-design.md.
Plan: docs/superpowers/plans/2026-05-11-phase-n6-slice1.md (Task 4).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
25cb147d97
commit
13abf96a5e
4 changed files with 318 additions and 16 deletions
|
|
@ -6310,6 +6310,10 @@ public sealed class GameWindow : IDisposable
|
|||
|
||||
_gl!.Clear(ClearBufferMask.ColorBufferBit | ClearBufferMask.DepthBufferBit);
|
||||
|
||||
// Phase N.6 slice 1: one-shot surface-format histogram dump under
|
||||
// ACDREAM_DUMP_SURFACES=1. Zero cost when off.
|
||||
_textureCache?.TickSurfaceHistogramDumpIfEnabled();
|
||||
|
||||
// Phase N.4: drain WB pipeline queues (staged mesh data +
|
||||
// GL thread queue). Must happen before any draw work so that
|
||||
// resources uploaded this frame are available immediately.
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ using AcDream.Core.World;
|
|||
using DatReaderWriter;
|
||||
using DatReaderWriter.DBObjs;
|
||||
using Silk.NET.OpenGL;
|
||||
using System.Linq;
|
||||
using SurfaceType = DatReaderWriter.Enums.SurfaceType;
|
||||
|
||||
namespace AcDream.App.Rendering;
|
||||
|
|
@ -40,6 +41,20 @@ public sealed unsafe class TextureCache : Wb.ITextureCachePerInstance, IDisposab
|
|||
private readonly Dictionary<(uint surfaceId, uint origTexOverride), (uint Name, ulong Handle)> _bindlessByOverridden = new();
|
||||
private readonly Dictionary<(uint surfaceId, uint origTexOverride, ulong paletteHash), (uint Name, ulong Handle)> _bindlessByPalette = new();
|
||||
|
||||
// Phase N.6 slice 1 (2026-05-11): per-upload metadata for the
|
||||
// ACDREAM_DUMP_SURFACES=1 histogram dump path. Populated at upload
|
||||
// time so the dump method doesn't have to query GL state. Keyed by
|
||||
// GL texture name (same key used in cache value tuples). Format
|
||||
// label is "RGBA8_DECODED" for the post-decode upload (all uploads
|
||||
// currently land as RGBA8 regardless of source format).
|
||||
private readonly Dictionary<uint, (int Width, int Height, string Format)> _uploadMetadata = new();
|
||||
|
||||
// Frame counter for the one-shot ACDREAM_DUMP_SURFACES=1 trigger.
|
||||
// Increments per Tick call; fires the dump once at frame index 600
|
||||
// and never again for the session. See spec §5.
|
||||
private int _dumpFrameCounter;
|
||||
private bool _surfaceHistogramAlreadyDumped;
|
||||
|
||||
public TextureCache(GL gl, DatCollection dats, Wb.BindlessSupport? bindless = null)
|
||||
{
|
||||
_gl = gl;
|
||||
|
|
@ -258,6 +273,114 @@ public sealed unsafe class TextureCache : Wb.ITextureCachePerInstance, IDisposab
|
|||
return h;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Phase N.6 slice 1: one-shot surface-format histogram dump for the
|
||||
/// atlas-opportunity audit. Activated by ACDREAM_DUMP_SURFACES=1; fires
|
||||
/// once after BOTH gates pass:
|
||||
/// 1. <c>_dumpFrameCounter >= 600</c> — at least 600 OnRender ticks
|
||||
/// have elapsed (catches the "we're already past startup boilerplate"
|
||||
/// bound; ~10s at 60fps, ~3s at 200fps).
|
||||
/// 2. <c>_uploadMetadata.Count >= 100</c> — the cache contains at
|
||||
/// least 100 uploaded textures, indicating streaming has actually
|
||||
/// pulled in world content (not just sky/UI/font). The original
|
||||
/// frame-only gate fired during the login/handshake phase where
|
||||
/// OnRender ticks at GUI rates but no world has streamed in.
|
||||
/// Output goes to %LOCALAPPDATA%\acdream\n6-surfaces.txt. Zero cost
|
||||
/// when off. See spec §5 in
|
||||
/// docs/superpowers/specs/2026-05-11-phase-n6-slice1-design.md.
|
||||
/// </summary>
|
||||
public void TickSurfaceHistogramDumpIfEnabled()
|
||||
{
|
||||
if (_surfaceHistogramAlreadyDumped) return;
|
||||
if (!string.Equals(System.Environment.GetEnvironmentVariable("ACDREAM_DUMP_SURFACES"), "1", StringComparison.Ordinal)) return;
|
||||
_dumpFrameCounter++;
|
||||
if (_dumpFrameCounter < 600) return;
|
||||
if (_uploadMetadata.Count < 100) return;
|
||||
|
||||
DumpSurfaceHistogram();
|
||||
_surfaceHistogramAlreadyDumped = true;
|
||||
}
|
||||
|
||||
private void DumpSurfaceHistogram()
|
||||
{
|
||||
try
|
||||
{
|
||||
DumpSurfaceHistogramCore();
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
// Diagnostic-only path. If the dump file can't be written
|
||||
// (disk full, permission denied, antivirus lock, path too
|
||||
// long) we must NOT crash OnRender — that would invalidate
|
||||
// the very measurement pass this diagnostic is meant to
|
||||
// support. Log to stderr and let the caller mark the dump
|
||||
// as "already done" so it doesn't retry every frame.
|
||||
Console.Error.WriteLine($"[N6-DUMP] Failed to write surface histogram: {ex.Message}");
|
||||
}
|
||||
}
|
||||
|
||||
private void DumpSurfaceHistogramCore()
|
||||
{
|
||||
var localAppData = System.Environment.GetFolderPath(System.Environment.SpecialFolder.LocalApplicationData);
|
||||
var outDir = System.IO.Path.Combine(localAppData, "acdream");
|
||||
System.IO.Directory.CreateDirectory(outDir);
|
||||
var outPath = System.IO.Path.Combine(outDir, "n6-surfaces.txt");
|
||||
|
||||
var sb = new System.Text.StringBuilder();
|
||||
sb.AppendLine($"# acdream surface-format histogram — generated {DateTime.UtcNow:yyyy-MM-ddTHH:mm:ssZ}");
|
||||
sb.AppendLine("# Per-entry: surfaceId(hex), width, height, format, byteCount");
|
||||
sb.AppendLine();
|
||||
|
||||
// Walk every cached entry across the 6 caches, dedupe by GL name.
|
||||
var seen = new HashSet<uint>();
|
||||
long totalBytes = 0;
|
||||
var bucketsByDim = new Dictionary<(int W, int H), int>();
|
||||
var bucketsByFormat = new Dictionary<string, int>();
|
||||
var bucketsByTriple = new Dictionary<(int W, int H, string F), int>();
|
||||
|
||||
void Emit(uint surfaceId, uint name)
|
||||
{
|
||||
if (!seen.Add(name)) return;
|
||||
if (!_uploadMetadata.TryGetValue(name, out var meta)) return;
|
||||
int bytes = meta.Width * meta.Height * 4;
|
||||
totalBytes += bytes;
|
||||
sb.AppendLine($"0x{surfaceId:X8}, {meta.Width}, {meta.Height}, {meta.Format}, {bytes}");
|
||||
|
||||
var dimKey = (meta.Width, meta.Height);
|
||||
bucketsByDim[dimKey] = bucketsByDim.GetValueOrDefault(dimKey) + 1;
|
||||
bucketsByFormat[meta.Format] = bucketsByFormat.GetValueOrDefault(meta.Format) + 1;
|
||||
var tripleKey = (meta.Width, meta.Height, meta.Format);
|
||||
bucketsByTriple[tripleKey] = bucketsByTriple.GetValueOrDefault(tripleKey) + 1;
|
||||
}
|
||||
|
||||
foreach (var kv in _handlesBySurfaceId) Emit(kv.Key, kv.Value);
|
||||
foreach (var kv in _handlesByOverridden) Emit(kv.Key.surfaceId, kv.Value);
|
||||
foreach (var kv in _handlesByPalette) Emit(kv.Key.surfaceId, kv.Value);
|
||||
foreach (var kv in _bindlessBySurfaceId) Emit(kv.Key, kv.Value.Name);
|
||||
foreach (var kv in _bindlessByOverridden) Emit(kv.Key.surfaceId, kv.Value.Name);
|
||||
foreach (var kv in _bindlessByPalette) Emit(kv.Key.surfaceId, kv.Value.Name);
|
||||
|
||||
sb.AppendLine();
|
||||
sb.AppendLine("# Rollups");
|
||||
sb.AppendLine($"# Total unique GL textures: {seen.Count}");
|
||||
sb.AppendLine($"# Total bytes (sum of W*H*4): {totalBytes}");
|
||||
|
||||
sb.AppendLine("# Top 10 (W,H) dimension buckets:");
|
||||
foreach (var kv in bucketsByDim.OrderByDescending(kv => kv.Value).Take(10))
|
||||
sb.AppendLine($"# {kv.Key.W}x{kv.Key.H}: {kv.Value}");
|
||||
|
||||
sb.AppendLine("# Format buckets:");
|
||||
foreach (var kv in bucketsByFormat.OrderByDescending(kv => kv.Value))
|
||||
sb.AppendLine($"# {kv.Key}: {kv.Value}");
|
||||
|
||||
sb.AppendLine("# Top 10 (W,H,format) triples — atlas-opportunity input:");
|
||||
foreach (var kv in bucketsByTriple.OrderByDescending(kv => kv.Value).Take(10))
|
||||
sb.AppendLine($"# {kv.Key.W}x{kv.Key.H} {kv.Key.F}: {kv.Value}");
|
||||
|
||||
System.IO.File.WriteAllText(outPath, sb.ToString());
|
||||
Console.WriteLine($"[N6-DUMP] Surface histogram written to {outPath} ({seen.Count} textures, {totalBytes} bytes)");
|
||||
}
|
||||
|
||||
private DecodedTexture DecodeFromDats(uint surfaceId, uint? origTextureOverride, PaletteOverride? paletteOverride)
|
||||
{
|
||||
var surface = _dats.Get<Surface>(surfaceId);
|
||||
|
|
@ -364,6 +487,7 @@ public sealed unsafe class TextureCache : Wb.ITextureCachePerInstance, IDisposab
|
|||
_gl.TexParameter(TextureTarget.Texture2D, TextureParameterName.TextureWrapT, (int)TextureWrapMode.Repeat);
|
||||
|
||||
_gl.BindTexture(TextureTarget.Texture2D, 0);
|
||||
_uploadMetadata[tex] = (decoded.Width, decoded.Height, "RGBA8_DECODED");
|
||||
return tex;
|
||||
}
|
||||
|
||||
|
|
@ -396,6 +520,7 @@ public sealed unsafe class TextureCache : Wb.ITextureCachePerInstance, IDisposab
|
|||
_gl.TexParameter(TextureTarget.Texture2DArray, TextureParameterName.TextureWrapT, (int)TextureWrapMode.Repeat);
|
||||
|
||||
_gl.BindTexture(TextureTarget.Texture2DArray, 0);
|
||||
_uploadMetadata[tex] = (decoded.Width, decoded.Height, "RGBA8_DECODED");
|
||||
return tex;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue