From b838eccb38e9a76a57b70173036887849ad5f30b Mon Sep 17 00:00:00 2001 From: Erik Date: Tue, 19 May 2026 13:00:18 +0200 Subject: [PATCH] =?UTF-8?q?feat(wb):=20ConsoleErrorLogger=20+=20cause=20re?= =?UTF-8?q?port=20=E2=80=94=20H1=20swallowed-exception=20confirmed?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 2 diagnostic chain identified the EXACT cause of 26/123 Holtburg cells silently failing in WB's PrepareEnvCellMeshData: ArgumentOutOfRangeException thrown from Setup.Unpack inside DatReaderWriter when WB calls TryGet(stab.Id, ...) on a stab id whose prefix is GfxObj (0x01xxxxxx), not Setup (0x02xxxxxx). DatReaderWriter finds the file in Portal's tree (GfxObjs and Setups share tree-lookups), attempts to parse GfxObj bytes as Setup format, throws OOR. Exception bubbles to PrepareMeshData's outer try/catch which silently swallows + returns null. Entire cell fails to upload. This commit lands the diagnostic infrastructure that surfaced the bug: - WbMeshAdapter: replaced NullLogger with a small Console-backed ConsoleErrorLogger private class. Filters to LogLevel.Error+. WB's existing _logger.LogError(ex, ...) at the swallow site now writes [wb-error] lines with type + message + top 5 stack frames. Bridges WB's intentional log point to acdream's console. - WbMeshAdapter: extended [indoor-upload] NULL_RESULT probe with reader-divergence diagnostic (ourCellDb.TryGet, wbResolveId.Count, wbSelectedType, wbDbIsPortal, wbDbTryGet, hadRenderData). Made it possible to rule out cache-hits and reader-divergence as causes before identifying the real one. - Cause report at docs/research/2026-05-19-indoor-cell-rendering-cause.md documents the full chain: 55 ArgumentOutOfRangeException stack traces captured in one launch, all from PrepareEnvCellMeshData line 1223. The fix itself (1-line guard at WB's TryGet call site) is applied to references/WorldBuilder/.../ObjectMeshManager.cs — which is a git submodule. Will be committed separately to the WB submodule after visual verification. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../2026-05-19-indoor-cell-rendering-cause.md | 94 +++++++++++++++++++ src/AcDream.App/Rendering/Wb/WbMeshAdapter.cs | 67 ++++++++++++- 2 files changed, 157 insertions(+), 4 deletions(-) create mode 100644 docs/research/2026-05-19-indoor-cell-rendering-cause.md diff --git a/docs/research/2026-05-19-indoor-cell-rendering-cause.md b/docs/research/2026-05-19-indoor-cell-rendering-cause.md new file mode 100644 index 0000000..682a6a4 --- /dev/null +++ b/docs/research/2026-05-19-indoor-cell-rendering-cause.md @@ -0,0 +1,94 @@ +# Indoor Cell Rendering — Phase 2 Cause Report + +**Date:** 2026-05-19 +**Predecessor:** Phase 1 capture confirmed H1 (silent failure in WB). +**Capture method:** Phase 2's `ContinueWith` + `ConsoleErrorLogger` injected into WB's `ObjectMeshManager` surfaced the exception WB was silently catching. + +## Cause + +**Single failure mode:** `ArgumentOutOfRangeException` thrown from `DatReaderWriter.DBObjs.Setup.Unpack` at WB's [`ObjectMeshManager.cs:1223`](../../references/WorldBuilder/Chorizite.OpenGLSDLBackend/Lib/ObjectMeshManager.cs:1223): + +```csharp +// For EnvCell static objects, we need to manually collect emitters if they are Setups +if (_dats.Portal.TryGet(stab.Id, out var stabSetup)) { // ← throws +``` + +WB iterates `envCell.StaticObjects` and **blindly calls `TryGet` on every stab id**, regardless of whether the id is actually a Setup-prefix (`0x02xxxxxx`) or a GfxObj-prefix (`0x01xxxxxx`). When stab.Id is a GfxObj, `DatReaderWriter` finds the file (Portal dat has both GfxObjs and Setups under the same tree-lookup) and attempts to deserialize the GfxObj bytes as a Setup record. The Setup format is structurally different — early parse fails inside `QualifiedDataId.Unpack` → `DatBinReader.ReadBytesInternal` throws `ArgumentOutOfRangeException`. + +The exception bubbles up to `PrepareMeshData`'s outer try/catch at line 589: + +```csharp +catch (Exception ex) { + _logger.LogError(ex, "Error preparing mesh data for 0x{Id:X16}", id); + return null; // ← swallows exception, returns null +} +``` + +The entire EnvCell upload fails silently. The cell's room geometry (floor / walls / ceiling) never reaches `_renderData`, so the dispatcher skips drawing it. Static objects inside the cell (which acdream hydrates separately) still render — they have their own GfxObj uploads. + +**This also explains the user's "objects below ground" observation:** with the floor mesh missing, you see the cell's static objects (tables / chairs / fireplaces) through where the floor should be. Visually they appear "below ground." + +## Sample evidence + +55 NULL_RESULT cells captured at multiple landblocks (`0xA5B4`, `0xA7B4`, `0xA8B2`, `0xA9B0`, `0xA9B2`, `0xA9B3`, `0xA9B4`). All 55 share the same exception type and stack frame: + +``` +[wb-error] Error preparing mesh data for 0x00000000A9B20114 +[wb-error] ArgumentOutOfRangeException: Specified argument was out of the range of valid values. +[wb-error] at DatReaderWriter.DBObjs.Setup.Unpack(DatBinReader reader) +[wb-error] at DatReaderWriter.DatDatabase.TryGet[T](UInt32 fileId, T& value) +[wb-error] at WorldBuilder.Shared.Services.DefaultDatDatabase.TryGet[T](UInt32 fileId, T& value) +[wb-error] at Chorizite.OpenGLSDLBackend.Lib.ObjectMeshManager.PrepareEnvCellMeshData(...) line 1223 +[wb-error] at Chorizite.OpenGLSDLBackend.Lib.ObjectMeshManager.PrepareMeshData(...) line 571 +``` + +For Holtburg (`0xA9B4`) specifically: 123 requested → 97 completed + 26 silently failed. The 26 failures all match this exception signature. The first interior cell `0xA9B40100` is among them — exactly where the user reported a missing floor. + +## Why the other hypotheses were ruled out + +Phase 1 ruled out H2-H6 via the captured probe data. Phase 2's diagnostic walk: + +1. `ourCellDb.TryGet=True` — acdream's DatCollection finds the cell. +2. `wbResolveId.Count=1` — WB's ResolveId also finds it. +3. `wbSelectedType=EnvCell` — type classification is correct. +4. `wbDbTryGet=True` — the cell record IS loadable by WB. +5. `hadRenderData=False` at request time — no pre-existing cache hit. + +All preconditions for a successful upload were met. The failure was in a downstream emitter-collection step (line 1223) that's tangential to the cell's own geometry — but its exception silently kills the entire upload. + +## Fix + +**One-line WB fork patch.** Pre-check the Setup-prefix bit before calling `TryGet`: + +```csharp +// Before: +if (_dats.Portal.TryGet(stab.Id, out var stabSetup)) { + +// After: +if ((stab.Id & 0xFF000000u) == 0x02000000u + && _dats.Portal.TryGet(stab.Id, out var stabSetup)) { +``` + +For GfxObj-prefixed stabs (which have no `DefaultScript` and no emitters anyway), the branch is now skipped correctly. For Setup-prefixed stabs, behavior is unchanged. + +This is in our WB fork at [`references/WorldBuilder/Chorizite.OpenGLSDLBackend/Lib/ObjectMeshManager.cs:1230`](../../references/WorldBuilder/Chorizite.OpenGLSDLBackend/Lib/ObjectMeshManager.cs:1230). The patch should be upstreamed — it's a real WB bug. + +## Verification approach + +After applying the fix: +1. Re-launch with `ACDREAM_PROBE_INDOOR_UPLOAD=1`. +2. Walk Holtburg. +3. Expect: zero `[wb-error]` lines, zero `[indoor-upload] NULL_RESULT` lines. Previously-failing cells now have `[indoor-upload] completed` lines. +4. Visual: floor renders in Holtburg Inn; objects no longer appear "below ground." + +## Phase 1 → Phase 2 chain summary + +The diagnostic-driven approach worked end-to-end: + +- **Phase 1:** Added 5 probes. Identified that 26 Holtburg cells silently fail. Confirmed H1 class of bug. Could not pinpoint without exception data. +- **Phase 2 Task 1:** Wrapped `PrepareMeshDataAsync` in a continuation to capture `Task.Exception`. Found that the task was never faulted — `tcs.TrySetResult(null)` ran instead. Hypothesized exception was swallowed inside `PrepareMeshData`. +- **Phase 2 cause-narrowing diagnostics:** Added `ourCellDb.TryGet` + `wbResolveId.Count` + `wbSelectedType` + `wbDbIsPortal` + `wbDbTryGet` + `hadRenderData` checks. Each iteration narrowed the cause class. +- **Phase 2 final probe:** Replaced WB's `NullLogger` with a Console-backed `ConsoleErrorLogger`. WB's existing `_logger.LogError(ex, ...)` call at the catch block immediately surfaced 55 ArgumentOutOfRangeException stack traces with file:line locations. **Cause definitively identified in one capture.** +- **Phase 2 fix:** One-line guard at the throwing call site. + +Total runtime: ~3 client launches to nail it. diff --git a/src/AcDream.App/Rendering/Wb/WbMeshAdapter.cs b/src/AcDream.App/Rendering/Wb/WbMeshAdapter.cs index 9893e31..2729747 100644 --- a/src/AcDream.App/Rendering/Wb/WbMeshAdapter.cs +++ b/src/AcDream.App/Rendering/Wb/WbMeshAdapter.cs @@ -77,10 +77,52 @@ public sealed class WbMeshAdapter : IDisposable, IWbMeshAdapter _dats = dats; _graphicsDevice = new OpenGLGraphicsDevice(gl, logger, new DebugRenderSettings()); _wbDats = new DefaultDatReaderWriter(datDir); + // Phase 2 diagnostic — replace NullLogger with a Console-backed + // logger so WB's internal catch block at ObjectMeshManager.cs:589 + // (and similar) surfaces its swallowed exceptions instead of + // dropping them. ConsoleErrorLogger filters to LogLevel.Error+ + // so successful operations stay quiet. _meshManager = new ObjectMeshManager( _graphicsDevice, _wbDats, - NullLogger.Instance); + new ConsoleErrorLogger()); + } + + /// + /// Minimal Console-backed logger that fires only on + /// and above. Format: + /// [wb-error] <message> + /// [wb-error] <ExceptionType>: <ExceptionMessage> + /// [wb-error] at <frame> (up to 5 frames) + /// Used to surface WB's silently-caught exceptions in + /// ObjectMeshManager.PrepareMeshData. + /// + private sealed class ConsoleErrorLogger : ILogger + { + public IDisposable BeginScope(TState state) where TState : notnull => NullScope.Instance; + public bool IsEnabled(LogLevel logLevel) => logLevel >= LogLevel.Error; + public void Log( + LogLevel logLevel, EventId eventId, TState state, Exception? exception, + Func formatter) + { + if (!IsEnabled(logLevel)) return; + var message = formatter(state, exception); + Console.WriteLine($"[wb-error] {message}"); + if (exception is not null) + { + Console.WriteLine($"[wb-error] {exception.GetType().Name}: {exception.Message}"); + var stack = (exception.StackTrace ?? "") + .Split(new[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries) + .Take(5); + foreach (var s in stack) Console.WriteLine($"[wb-error] {s.Trim()}"); + } + } + + private sealed class NullScope : IDisposable + { + public static readonly NullScope Instance = new(); + public void Dispose() { } + } } private WbMeshAdapter() @@ -150,8 +192,9 @@ public sealed class WbMeshAdapter : IDisposable, IWbMeshAdapter // [indoor-upload] requested probe — only for EnvCell ids. if (RenderingDiagnostics.IsEnvCellId(id) && RenderingDiagnostics.ProbeIndoorUploadEnabled) { + bool hadRenderDataAtRequest = _meshManager.HasRenderData(id); _pendingEnvCellRequests.Add(id); - Console.WriteLine($"[indoor-upload] requested cellId=0x{id:X8}"); + Console.WriteLine($"[indoor-upload] requested cellId=0x{id:X8} hadRenderData={hadRenderDataAtRequest}"); // Phase 2 — surface what WB's catch block silently swallows. // ObjectMeshManager.PrepareMeshData has a try/catch at line 589 @@ -189,16 +232,32 @@ public sealed class WbMeshAdapter : IDisposable, IWbMeshAdapter catch { /* swallow — this is best-effort diagnostic */ } int wbResolveCount = -1; + string wbSelectedType = "none"; + bool wbDbTryGetEnvCell = false; + bool wbDbIsPortal = false; try { - wbResolveCount = _wbDats?.ResolveId((uint)cellId).Count() ?? -1; + var wbResolutions = _wbDats?.ResolveId((uint)cellId).ToList(); + wbResolveCount = wbResolutions?.Count ?? -1; + if (wbResolutions is not null && wbResolutions.Count > 0) + { + var selected = wbResolutions + .OrderByDescending(r => r.Database == _wbDats!.Portal) + .First(); + wbSelectedType = selected.Type.ToString(); + wbDbIsPortal = selected.Database == _wbDats!.Portal; + try { wbDbTryGetEnvCell = selected.Database.TryGet((uint)cellId, out _); } catch {} + } } catch { /* swallow — best-effort */ } Console.WriteLine( $"[indoor-upload] NULL_RESULT cellId=0x{cellId:X8} " + $"ourCellDb.TryGet={ourCellFound} " + - $"wbResolveId.Count={wbResolveCount}"); + $"wbResolveId.Count={wbResolveCount} " + + $"wbSelectedType={wbSelectedType} " + + $"wbDbIsPortal={wbDbIsPortal} " + + $"wbDbTryGet={wbDbTryGetEnvCell}"); } }, TaskScheduler.Default); }