From 13abf96a5ece97016fac23f12af67794b6690255 Mon Sep 17 00:00:00 2001 From: Erik Date: Mon, 11 May 2026 12:34:10 +0200 Subject: [PATCH] =?UTF-8?q?docs(perf):=20Phase=20N.6=20slice=201=20?= =?UTF-8?q?=E2=80=94=20radius=3D12=20baseline=20+=20surface=20dump=20path?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Capture authoritative CPU+GPU dispatch numbers at Holtburg with the gpu_us diagnostic now working (commit 25cb147). Three radii (4/8/12) x two motion modes (standstill/walking) + a surface-format histogram from ACDREAM_DUMP_SURFACES=1. Adds env-gated one-shot dump path (TextureCache.TickSurfaceHistogramDumpIfEnabled, called from GameWindow.OnRender) that fires once after both (a) frame 600 of the session AND (b) the upload-metadata dict reaches 100 entries -- the cache-size gate prevents the dump from firing during pre-world GUI ticks where OnRender spins at high rates but no scenery has streamed. Output writes to %LOCALAPPDATA%\acdream\n6-surfaces.txt with a try/catch around the I/O so disk-full / permission errors don't crash mid-measurement. Baseline document at docs/plans/2026-05-11-phase-n6-perf-baseline.md documents: - CPU dominates GPU by 30-50x at every radius (strongly CPU-bound) - GPU wildly under-utilized (max gpu_us p95 ~600us vs 16,600us frame budget) - CPU scales superlinearly with N1 (Tier 1 cache wins on inner loop but not outer LB walk) - Surface atlas opportunity high (59% of textures in top-3 triples) but win is memory-only since GPU isn't bottlenecked Recommendation: C.1.5 (PES emitter wiring) next, then a reduced-scope N.6 slice 2 (drop atlas + persistent-mapped buffers -- not justified by the GPU under-utilization observed). Roadmap entry amended to split N.6 into slice 1 (shipped) and slice 2 (planned, reduced scope, deferred until after C.1.5). Spec: docs/superpowers/specs/2026-05-11-phase-n6-slice1-design.md. Plan: docs/superpowers/plans/2026-05-11-phase-n6-slice1.md (Task 4). Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/plans/2026-04-11-roadmap.md | 36 ++-- .../2026-05-11-phase-n6-perf-baseline.md | 169 ++++++++++++++++++ src/AcDream.App/Rendering/GameWindow.cs | 4 + src/AcDream.App/Rendering/TextureCache.cs | 125 +++++++++++++ 4 files changed, 318 insertions(+), 16 deletions(-) create mode 100644 docs/plans/2026-05-11-phase-n6-perf-baseline.md diff --git a/docs/plans/2026-04-11-roadmap.md b/docs/plans/2026-04-11-roadmap.md index 2478aa4..7f535f3 100644 --- a/docs/plans/2026-04-11-roadmap.md +++ b/docs/plans/2026-04-11-roadmap.md @@ -687,22 +687,26 @@ for our deletions/additions; merge upstream `master` periodically. manifest at higher radius. Spec acceptance criterion #5 was wrong; amended via `docs/plans/2026-05-09-phase-n5b-perf-baseline.md`. Plan archived at `docs/superpowers/plans/2026-05-09-phase-n5b-terrain-modern.md`. -- **N.6 — Perf polish.** **Planned (post-A.5 polish takes priority).** - Builds on N.5 + N.5b. Legacy renderer retirement was pulled forward - into N.5 ship amendment — `InstancedMeshRenderer`, `StaticMeshRenderer`, - `WbFoundationFlag` are gone — and the terrain legacy renderer - (`TerrainChunkRenderer` + `TerrainRenderer` + `terrain.vert/.frag`) - retired in N.5b. N.6 scope: WB atlas adoption for memory savings - on shared content, persistent-mapped buffers if `glBufferData` shows - up in profiling (the modern terrain path's per-frame DEIC `BufferSubData` - is a candidate), GPU-side culling via compute pre-pass (eliminates - the per-frame slot walk + DEIC build entirely), GL_TIME_ELAPSED query - double-buffering (deferred from N.5 — diagnostic shows `gpu_us=0/0` - under `ACDREAM_WB_DIAG=1`), direct higher-radius perf comparison (A.5 - has now landed — modern's architectural wins are measurable), retire the - legacy `Texture2D`/`sampler2D` path in `TextureCache` (currently kept - for Sky + Debug + particle paths now that Terrain has migrated). - Plan + spec written when work begins. **Estimate: 1-2 weeks.** +- **N.6 slice 1 — GPU timing fix + radius=12 perf baseline.** **SHIPPED 2026-05-11.** + Fixed the gpu_us double-buffering bug in `WbDrawDispatcher` (ring-of-3 + query slots, read-before-overwrite, vendor-neutral across AMD/NVIDIA/Intel + desktop GL). Added env-gated surface-format histogram dump in `TextureCache` + for atlas-opportunity audit. Captured authoritative baseline at Holtburg + radii 4 / 8 / 12 (standstill + walking) with the now-working `gpu_us` + diagnostic. Plan + spec at `docs/superpowers/{specs,plans}/2026-05-11-phase-n6-slice1-*.md`. + Baseline numbers + next-phase recommendation at + [docs/plans/2026-05-11-phase-n6-perf-baseline.md](2026-05-11-phase-n6-perf-baseline.md). +- **N.6 slice 2 — Perf polish cleanup.** **Planned — deferred until after C.1.5 + (PES emitter wiring) per the baseline doc's recommendation.** Builds on + slice 1's measurement. Scope: retire the legacy `Texture2D`/`sampler2D` path + in `TextureCache` (currently kept for Sky + Debug + particle paths now that + Terrain has migrated); delete orphan `mesh.frag` (verify zero callers post-N.5 + amendment); decide bindless-everywhere vs legacy-island for the remaining + `sampler2D` consumers. **Dropped from slice 2 scope per baseline data**: + WB atlas adoption and persistent-mapped buffers — both target GPU/sampler + throughput but the baseline shows GPU is wildly under-utilized (max gpu_us + p95 ~600 µs vs 16,600 µs frame budget). Slice 2 reduces to a ~1-day cleanup. + Plan + spec written when work begins. **Estimate: ~1 day once C.1.5 lands.** - **N.7 — EnvCells / dungeons.** Replace EnvCell rendering with WB's `EnvCellRenderManager` + `PortalRenderManager` on top of N.4's foundation. **Estimate: 1-2 weeks** (was 2-3 — naturally smaller now diff --git a/docs/plans/2026-05-11-phase-n6-perf-baseline.md b/docs/plans/2026-05-11-phase-n6-perf-baseline.md new file mode 100644 index 0000000..75f6a8e --- /dev/null +++ b/docs/plans/2026-05-11-phase-n6-perf-baseline.md @@ -0,0 +1,169 @@ +# Phase N.6 slice 1 — perf baseline at Holtburg + +**Created:** 2026-05-11. +**Spec:** [docs/superpowers/specs/2026-05-11-phase-n6-slice1-design.md](../superpowers/specs/2026-05-11-phase-n6-slice1-design.md) +**Measured against commit:** `25cb147` (Task 1 final — gpu_us fix + diag-gate symmetry follow-up) +**Purpose:** Capture authoritative CPU+GPU dispatch numbers so the next-phase decision (slice 2 vs C.1.5 vs Tier 2) rests on real data. + +--- + +## §1. Setup + +- **Hardware:** Radeon RX 9070 XT +- **Resolution:** 1440p (2560×1440) +- **Quality preset:** High (default) +- **Connection:** live ACE at `127.0.0.1:9000` +- **Character:** `+Acdream` at Holtburg +- **Sky / time:** clear midday (F7 → Noon, F10 → Clear) +- **Build:** Debug +- **Date measured:** 2026-05-11 +- **Environment overrides:** `ACDREAM_WB_DIAG=1`, `ACDREAM_STREAM_RADIUS=` + +Note: `ACDREAM_STREAM_RADIUS=N` forces N₁=N (all N near-tier landblocks at full detail). +This is NOT the production A.5 default (N₁=4 / N₂=12), which was characterized in +CLAUDE.md as comfortable 200–400 FPS at the default preset. These measurements +characterize the scaling curve — what happens as near-tier radius grows — not current +production behavior. FPS was not captured directly (no window-title screenshot per run); +it can be derived from `(1e6 / total_frame_time_us)` but the dispatcher's `cpu_us` is +only part of the frame (terrain, sky, particles, UI, GL submission overhead, and +swap-buffer wait are not included). + +## §2. Dispatch CPU / GPU numbers + +Each cell records the median of the last 3 `[WB-DIAG]` lines from a ~30s stable window. +`entSeen / entDrawn / groups / drawsIssued` are also from those lines (values per 5s bucket). +FPS column omitted — not captured per the note above. + +| Radius | Motion | cpu_us median | cpu_us p95 | gpu_us median | gpu_us p95 | entSeen (per 5s) | entDrawn (per 5s) | groups | drawsIssued (per 5s) | +|--------|------------|---------------|------------|---------------|------------|------------------|-------------------|--------|----------------------| +| 4 | standstill | 3,208 | 3,313 | 93 | 95 | 16.9M | 15.5M | 1,216 | 1.65M | +| 4 | walking | 2,967 | 3,112 | 95 | 120 | 13.9M | 13.9M | 1,850 | 1.45M | +| 8 | standstill | 6,732 | 7,199 | 126 | 130 | 19.8M | 19.8M | 333 | 218K | +| 8 | walking | 6,572 | 6,927 | 96 | 113 | 18.1M | 18.0M | 534 | 245K | +| 12 | standstill | 12,853 | 13,525 | 344 | 507 | 19.6M | 19.6M | 541 | 184K | +| 12 | walking | 16,320 | 17,241 | 553 | 603 | 17.8M | 17.8M | 898 | 200K | + +**Notable:** `meshMissing` counts at r4 standstill (~1.45M per 5s) drop to near-zero while +walking. This suggests the static-entity slow path's mesh-load lifecycle has some delay +before populating for newly-streamed content. Not fatal — doesn't affect rendered output — +but worth a follow-up issue in `docs/ISSUES.md` if it persists in normal play. + +## §3. Surface-format histogram + +From `ACDREAM_DUMP_SURFACES=1` at radius=12, ~30s after enter-world. +Output written to `%LOCALAPPDATA%\acdream\n6-surfaces.txt`. + +- **Total unique GL textures:** 760 +- **Total bytes (sum of W×H×4):** 96,387,584 (~96.4 MB) + +**Top 10 (W, H) dimension buckets:** + +| Dimensions | Count | Share | +|------------|-------|-------| +| 128×128 | 236 | 31% | +| 64×64 | 111 | 15% | +| 256×256 | 102 | 13% | +| 128×256 | 71 | 9% | +| 64×128 | 69 | 9% | +| 256×128 | 48 | 6% | +| 128×64 | 39 | 5% | +| 512×512 | 30 | 4% | +| 8×8 | 18 | 2% | +| 32×32 | 14 | 2% | + +**Format distribution:** + +| Format | Count | Share | +|---------------|-------|-------| +| RGBA8_DECODED | 760 | 100% | + +All uploads land as RGBA8 regardless of source format (INDEX16, P8, DXT, BGRA, etc. +all decode through `TextureHelpers` before upload). The source-format diversity is real +but invisible to GL after the decode step. + +**Top 10 (W, H, format) triples — atlas-opportunity input:** + +Same as the dimension buckets above since there is only one format. The top-3 triples +(128×128, 64×64, 256×256) cover 449 of 760 surfaces = **59%**. + +**Atlas-opportunity score: 59%** of surfaces fall into the top-3 (W, H, format) triples. +The spec §6 threshold for "atlas work is justified for memory savings" is >30%; this +measurement is well above it. However, see §4 for why atlas is not the right next step +despite the high score. + +## §4. Conclusion + next-phase recommendation + +### What the data shows + +**The entity dispatcher is strongly CPU-bound.** At every radius, CPU dominates GPU by +30–50×. At radius=12 standstill: 12.9 ms CPU vs 0.34 ms GPU. At radius=12 walking the +ratio is 16.3 ms CPU vs 0.55 ms GPU. There is no GPU bottleneck. + +**GPU is wildly under-utilized.** The highest gpu_us p95 observed is 603 µs at radius=12 +walking — against a 16,600 µs frame budget at 60 FPS. The GPU is working at roughly +3.6% of its 60fps capacity for entity rendering alone. Even accounting for terrain, sky, +particles, UI, and swap-buffer overhead, there is substantial headroom. The "GPU +comfortable" threshold (gpu_us p95 < 8,000 µs) is not even close to being challenged. + +**CPU scales superlinearly with N₁ (near-tier radius).** As N₁ grows from 4 → 8 → 12, +median cpu_us grows from 3.2 ms → 6.7 ms → 12.9 ms — roughly 1.0× → 2.1× → 4.0× the +r4 baseline. The Tier 1 entity-classification cache (`EntityClassificationCache`, shipped +as #53) wins on the inner loop (per-entity classification avoided on cache hits) but the +outer per-LB walk still scales with N₁. This is exactly what the Tier 2 plan (persistent +groups) at `docs/plans/2026-05-10-perf-tiers-2-3-roadmap.md` addresses by eliminating +the per-frame LB scan entirely. + +**Radius=12 is not the production scenario.** `ACDREAM_STREAM_RADIUS=12` forces N₁=12 +(625 near LBs at full detail). The production A.5 default preset is N₁=4 / N₂=12 (81 +full-detail near + 544 terrain-only far), which CLAUDE.md already characterizes as +comfortable 200–400 FPS at the default preset. The numbers above characterize the scaling +curve for headroom analysis, not the experience a typical player sees. + +**Atlas opportunity is high (59%) but the win is memory-only.** With 96 MB of textures +and 59% in the top-3 dimension buckets, atlas consolidation would reduce sampler-switch +count (currently near-zero already, since bindless textures are made resident once) and +shrink the texture memory footprint by roughly 40–50% through packing. But GPU is not +bottlenecked on sampler switches or memory bandwidth — the 0.6 ms gpu_us p95 at radius=12 +walking demonstrates this directly. Atlas adoption would cost 1–2 weeks of implementation +risk for a memory saving the process doesn't currently need at 96 MB. + +### Recommendation + +**Primary: do C.1.5 next (PES emitter wiring — portals, chimneys, fireplaces).** Four +reasons: (a) the production dispatcher is already comfortable at the default N₁=4 preset +per the CLAUDE.md notes; (b) the two slice-2 items that were "conditional on baseline" +data (atlas adoption and persistent-mapped buffers) are not justified — GPU is not +bottlenecked; (c) C.1.5 fills a visible content gap that has been open since C.1 shipped +and is in the roadmap queue ahead of N.6 slice 2; (d) C.1.5 stabilizes the particle path +before any future shader migration work in slice 2 touches `particle.frag`. Starting +point for C.1.5 scoping: `docs/plans/2026-04-27-phase-c1-pes-particles.md` lines 285–295. + +**Secondary (after C.1.5 lands): N.6 slice 2 with reduced scope.** The baseline data +justifies dropping atlas adoption and persistent-mapped buffers from slice 2 entirely. +What remains is a ~1-day cleanup: retire orphan `mesh.frag` (verify zero callers post-N.5 +amendment), collapse dead `_handlesByOverridden` / `_handlesByPalette` legacy caches once +their callers are confirmed gone, migrate `particle.frag` to bindless sampling after C.1.5 +stabilizes the path. Slice 2 is a cleanup sprint, not a performance phase. + +**Tertiary option (if perf escalation becomes pressing): Tier 2 first.** The scaling +curve (3.2 → 6.7 → 12.9 ms as N₁ grows 4 → 8 → 12) confirms the per-LB walk is the +bottleneck — exactly what Tier 2's persistent-group structure at +`docs/plans/2026-05-10-perf-tiers-2-3-roadmap.md` addresses. Not urgent at the current +default N₁=4; worth revisiting if a future quality preset wants N₁=8 as default or if the +200–400 FPS range at N₁=4 shrinks after more content is streamed. + +**Decision rule for revisiting:** if future measurement at the default preset shows +cpu_us median > 5,000 µs or gpu_us p95 > 8,000 µs, re-open the escalation question. +Otherwise, hold the C.1.5 → reduced-slice-2 sequence. + +## §5. Raw logs + +Scratch logs from this measurement run (not committed; can be deleted once the doc is +reviewed): + +- `baseline-r4-stand.log`, `baseline-r4-walk.log` +- `baseline-r8-stand.log`, `baseline-r8-walk.log` +- `baseline-r12-stand.log`, `baseline-r12-walk.log` +- `baseline-surfaces.log` (launch log for `ACDREAM_DUMP_SURFACES=1` run) +- `baseline-surfaces.txt` (copy of `%LOCALAPPDATA%\acdream\n6-surfaces.txt`) +- `task1-verify.log` (Task 1 manual verification log) diff --git a/src/AcDream.App/Rendering/GameWindow.cs b/src/AcDream.App/Rendering/GameWindow.cs index c3bba03..b81d484 100644 --- a/src/AcDream.App/Rendering/GameWindow.cs +++ b/src/AcDream.App/Rendering/GameWindow.cs @@ -6310,6 +6310,10 @@ public sealed class GameWindow : IDisposable _gl!.Clear(ClearBufferMask.ColorBufferBit | ClearBufferMask.DepthBufferBit); + // Phase N.6 slice 1: one-shot surface-format histogram dump under + // ACDREAM_DUMP_SURFACES=1. Zero cost when off. + _textureCache?.TickSurfaceHistogramDumpIfEnabled(); + // Phase N.4: drain WB pipeline queues (staged mesh data + // GL thread queue). Must happen before any draw work so that // resources uploaded this frame are available immediately. diff --git a/src/AcDream.App/Rendering/TextureCache.cs b/src/AcDream.App/Rendering/TextureCache.cs index 78eef29..5aea075 100644 --- a/src/AcDream.App/Rendering/TextureCache.cs +++ b/src/AcDream.App/Rendering/TextureCache.cs @@ -4,6 +4,7 @@ using AcDream.Core.World; using DatReaderWriter; using DatReaderWriter.DBObjs; using Silk.NET.OpenGL; +using System.Linq; using SurfaceType = DatReaderWriter.Enums.SurfaceType; namespace AcDream.App.Rendering; @@ -40,6 +41,20 @@ public sealed unsafe class TextureCache : Wb.ITextureCachePerInstance, IDisposab private readonly Dictionary<(uint surfaceId, uint origTexOverride), (uint Name, ulong Handle)> _bindlessByOverridden = new(); private readonly Dictionary<(uint surfaceId, uint origTexOverride, ulong paletteHash), (uint Name, ulong Handle)> _bindlessByPalette = new(); + // Phase N.6 slice 1 (2026-05-11): per-upload metadata for the + // ACDREAM_DUMP_SURFACES=1 histogram dump path. Populated at upload + // time so the dump method doesn't have to query GL state. Keyed by + // GL texture name (same key used in cache value tuples). Format + // label is "RGBA8_DECODED" for the post-decode upload (all uploads + // currently land as RGBA8 regardless of source format). + private readonly Dictionary _uploadMetadata = new(); + + // Frame counter for the one-shot ACDREAM_DUMP_SURFACES=1 trigger. + // Increments per Tick call; fires the dump once at frame index 600 + // and never again for the session. See spec §5. + private int _dumpFrameCounter; + private bool _surfaceHistogramAlreadyDumped; + public TextureCache(GL gl, DatCollection dats, Wb.BindlessSupport? bindless = null) { _gl = gl; @@ -258,6 +273,114 @@ public sealed unsafe class TextureCache : Wb.ITextureCachePerInstance, IDisposab return h; } + /// + /// Phase N.6 slice 1: one-shot surface-format histogram dump for the + /// atlas-opportunity audit. Activated by ACDREAM_DUMP_SURFACES=1; fires + /// once after BOTH gates pass: + /// 1. _dumpFrameCounter >= 600 — at least 600 OnRender ticks + /// have elapsed (catches the "we're already past startup boilerplate" + /// bound; ~10s at 60fps, ~3s at 200fps). + /// 2. _uploadMetadata.Count >= 100 — the cache contains at + /// least 100 uploaded textures, indicating streaming has actually + /// pulled in world content (not just sky/UI/font). The original + /// frame-only gate fired during the login/handshake phase where + /// OnRender ticks at GUI rates but no world has streamed in. + /// Output goes to %LOCALAPPDATA%\acdream\n6-surfaces.txt. Zero cost + /// when off. See spec §5 in + /// docs/superpowers/specs/2026-05-11-phase-n6-slice1-design.md. + /// + public void TickSurfaceHistogramDumpIfEnabled() + { + if (_surfaceHistogramAlreadyDumped) return; + if (!string.Equals(System.Environment.GetEnvironmentVariable("ACDREAM_DUMP_SURFACES"), "1", StringComparison.Ordinal)) return; + _dumpFrameCounter++; + if (_dumpFrameCounter < 600) return; + if (_uploadMetadata.Count < 100) return; + + DumpSurfaceHistogram(); + _surfaceHistogramAlreadyDumped = true; + } + + private void DumpSurfaceHistogram() + { + try + { + DumpSurfaceHistogramCore(); + } + catch (Exception ex) + { + // Diagnostic-only path. If the dump file can't be written + // (disk full, permission denied, antivirus lock, path too + // long) we must NOT crash OnRender — that would invalidate + // the very measurement pass this diagnostic is meant to + // support. Log to stderr and let the caller mark the dump + // as "already done" so it doesn't retry every frame. + Console.Error.WriteLine($"[N6-DUMP] Failed to write surface histogram: {ex.Message}"); + } + } + + private void DumpSurfaceHistogramCore() + { + var localAppData = System.Environment.GetFolderPath(System.Environment.SpecialFolder.LocalApplicationData); + var outDir = System.IO.Path.Combine(localAppData, "acdream"); + System.IO.Directory.CreateDirectory(outDir); + var outPath = System.IO.Path.Combine(outDir, "n6-surfaces.txt"); + + var sb = new System.Text.StringBuilder(); + sb.AppendLine($"# acdream surface-format histogram — generated {DateTime.UtcNow:yyyy-MM-ddTHH:mm:ssZ}"); + sb.AppendLine("# Per-entry: surfaceId(hex), width, height, format, byteCount"); + sb.AppendLine(); + + // Walk every cached entry across the 6 caches, dedupe by GL name. + var seen = new HashSet(); + long totalBytes = 0; + var bucketsByDim = new Dictionary<(int W, int H), int>(); + var bucketsByFormat = new Dictionary(); + var bucketsByTriple = new Dictionary<(int W, int H, string F), int>(); + + void Emit(uint surfaceId, uint name) + { + if (!seen.Add(name)) return; + if (!_uploadMetadata.TryGetValue(name, out var meta)) return; + int bytes = meta.Width * meta.Height * 4; + totalBytes += bytes; + sb.AppendLine($"0x{surfaceId:X8}, {meta.Width}, {meta.Height}, {meta.Format}, {bytes}"); + + var dimKey = (meta.Width, meta.Height); + bucketsByDim[dimKey] = bucketsByDim.GetValueOrDefault(dimKey) + 1; + bucketsByFormat[meta.Format] = bucketsByFormat.GetValueOrDefault(meta.Format) + 1; + var tripleKey = (meta.Width, meta.Height, meta.Format); + bucketsByTriple[tripleKey] = bucketsByTriple.GetValueOrDefault(tripleKey) + 1; + } + + foreach (var kv in _handlesBySurfaceId) Emit(kv.Key, kv.Value); + foreach (var kv in _handlesByOverridden) Emit(kv.Key.surfaceId, kv.Value); + foreach (var kv in _handlesByPalette) Emit(kv.Key.surfaceId, kv.Value); + foreach (var kv in _bindlessBySurfaceId) Emit(kv.Key, kv.Value.Name); + foreach (var kv in _bindlessByOverridden) Emit(kv.Key.surfaceId, kv.Value.Name); + foreach (var kv in _bindlessByPalette) Emit(kv.Key.surfaceId, kv.Value.Name); + + sb.AppendLine(); + sb.AppendLine("# Rollups"); + sb.AppendLine($"# Total unique GL textures: {seen.Count}"); + sb.AppendLine($"# Total bytes (sum of W*H*4): {totalBytes}"); + + sb.AppendLine("# Top 10 (W,H) dimension buckets:"); + foreach (var kv in bucketsByDim.OrderByDescending(kv => kv.Value).Take(10)) + sb.AppendLine($"# {kv.Key.W}x{kv.Key.H}: {kv.Value}"); + + sb.AppendLine("# Format buckets:"); + foreach (var kv in bucketsByFormat.OrderByDescending(kv => kv.Value)) + sb.AppendLine($"# {kv.Key}: {kv.Value}"); + + sb.AppendLine("# Top 10 (W,H,format) triples — atlas-opportunity input:"); + foreach (var kv in bucketsByTriple.OrderByDescending(kv => kv.Value).Take(10)) + sb.AppendLine($"# {kv.Key.W}x{kv.Key.H} {kv.Key.F}: {kv.Value}"); + + System.IO.File.WriteAllText(outPath, sb.ToString()); + Console.WriteLine($"[N6-DUMP] Surface histogram written to {outPath} ({seen.Count} textures, {totalBytes} bytes)"); + } + private DecodedTexture DecodeFromDats(uint surfaceId, uint? origTextureOverride, PaletteOverride? paletteOverride) { var surface = _dats.Get(surfaceId); @@ -364,6 +487,7 @@ public sealed unsafe class TextureCache : Wb.ITextureCachePerInstance, IDisposab _gl.TexParameter(TextureTarget.Texture2D, TextureParameterName.TextureWrapT, (int)TextureWrapMode.Repeat); _gl.BindTexture(TextureTarget.Texture2D, 0); + _uploadMetadata[tex] = (decoded.Width, decoded.Height, "RGBA8_DECODED"); return tex; } @@ -396,6 +520,7 @@ public sealed unsafe class TextureCache : Wb.ITextureCachePerInstance, IDisposab _gl.TexParameter(TextureTarget.Texture2DArray, TextureParameterName.TextureWrapT, (int)TextureWrapMode.Repeat); _gl.BindTexture(TextureTarget.Texture2DArray, 0); + _uploadMetadata[tex] = (decoded.Width, decoded.Height, "RGBA8_DECODED"); return tex; }