From c991fb23ce16fac7cf0ee1828bf8673306aeb459 Mon Sep 17 00:00:00 2001 From: Erik Date: Sat, 11 Apr 2026 22:49:37 +0200 Subject: [PATCH] =?UTF-8?q?fix(app):=20Phase=20A.1=20=E2=80=94=20serialize?= =?UTF-8?q?=20DatCollection=20access=20behind=20=5FdatLock?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Critical hotfix for the first live run of streaming. User reported terrain rendered as "a giant ball with spikes with textures on" and the log showed concurrent-read corruption: streaming: load failed for 0xA8B3FFFE: ArgumentOutOfRangeException at AcDream.Core.World.LandblockLoader.Load ... line 18 Line 18 is `dats.Get(landblockId)`. Root cause: my spec claimed DatCollection is thread-safe for reads. It isn't. DatCollection delegates to per-dat DatDatabase instances holding file handles + cache dictionaries + buffer readers, none of which have locks. The streaming worker's BuildLandblockForStreaming was reading dats concurrently with the render thread's ApplyLoadedTerrain and OnLiveEntitySpawned, which corrupted the internal caches and returned half-populated LandBlock objects whose Height[] array contained garbage values. Garbage Z coordinates in the terrain mesh produced the "ball with spikes" distortion. Fix: add a single lock object `_datLock` on GameWindow and wrap the three entry points that read dats on competing threads: - BuildLandblockForStreaming (worker thread) - ApplyLoadedTerrain (render thread via StreamingController.Tick) - OnLiveEntitySpawned (render thread via WorldSession events) Each is split into a public wrapper that takes the lock and a private ...Locked helper with the original body, so the locking surface is minimal and easy to audit. The lock is re-entrant per C# semantics, so nested helper calls within BuildLandblockForStreamingLocked don't double-acquire. Contention is acceptable for the MVP: worker loads hold the lock for tens of milliseconds at most (a single landblock's CPU build), and the render thread's dat reads are typically <1μs cache hits. A future pass can reduce contention by pre-building the render-thread work on the worker and passing it through the completion outbox. Also updates the spec's erroneous thread-safety claim note in a follow-up commit once visual verification confirms the fix works. 212 tests still green. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/AcDream.App/Rendering/GameWindow.cs | 54 +++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/src/AcDream.App/Rendering/GameWindow.cs b/src/AcDream.App/Rendering/GameWindow.cs index 74e2305..8636f38 100644 --- a/src/AcDream.App/Rendering/GameWindow.cs +++ b/src/AcDream.App/Rendering/GameWindow.cs @@ -34,6 +34,15 @@ public sealed class GameWindow : IDisposable private int _streamingRadius = 2; // default 5×5 private uint? _lastLivePlayerLandblockId; + // Phase A.1 hotfix: DatCollection is NOT thread-safe. The streaming worker + // thread and the render thread both read dats (BuildLandblockForStreaming + // on the worker; ApplyLoadedTerrain + live-spawn handlers on the render + // thread). Concurrent reads corrupt internal caches and produce + // half-populated LandBlock.Height[] arrays, which caused terrain to render + // as "a giant ball with spikes" before this lock was added. All _dats.Get + // calls that can race with the worker thread MUST acquire this lock. + private readonly object _datLock = new(); + // Terrain build context shared across all streamed landblocks. Stored as // fields so ApplyLoadedTerrain (render-thread callback) can call // LandblockMesh.Build without re-deriving these each time. @@ -336,6 +345,18 @@ public sealed class GameWindow : IDisposable /// on the main thread (Tick runs in the Silk.NET Update callback). /// private void OnLiveEntitySpawned(AcDream.Core.Net.WorldSession.EntitySpawn spawn) + { + // Phase A.1 hotfix: live CreateObject handler reads dats extensively + // (Setup, GfxObj, Surface, SurfaceTexture) to hydrate the spawned + // entity. All of it must run under the dat lock so it doesn't race + // with BuildLandblockForStreaming on the worker thread. + lock (_datLock) + { + OnLiveEntitySpawnedLocked(spawn); + } + } + + private void OnLiveEntitySpawnedLocked(AcDream.Core.Net.WorldSession.EntitySpawn spawn) { _liveSpawnReceived++; @@ -822,6 +843,23 @@ public sealed class GameWindow : IDisposable { if (_dats is null) return null; + // Phase A.1 hotfix: hold the dat lock for the entire load. The worker + // thread mustn't read dats concurrently with the render thread's + // ApplyLoadedTerrain / live-spawn handlers. Hold time is bounded by + // the size of a single landblock's CPU-side build (tens of ms worst + // case), which blocks the render thread for at most that duration. + // This is the minimum correct behavior; a future pass can reduce + // contention by pre-building render-thread work on the worker. + lock (_datLock) + { + return BuildLandblockForStreamingLocked(landblockId); + } + } + + private AcDream.Core.World.LoadedLandblock? BuildLandblockForStreamingLocked(uint landblockId) + { + if (_dats is null) return null; + var baseLoaded = AcDream.Core.World.LandblockLoader.Load(_dats, landblockId); if (baseLoaded is null) return null; @@ -1111,6 +1149,22 @@ public sealed class GameWindow : IDisposable /// Must only be called from the render thread. /// private void ApplyLoadedTerrain(AcDream.Core.World.LoadedLandblock lb) + { + if (_terrain is null || _dats is null || _blendCtx is null + || _heightTable is null || _surfaceCache is null) return; + + // Phase A.1 hotfix: render-thread path also takes the dat lock so it + // doesn't race with BuildLandblockForStreaming on the worker thread. + // Hold the lock across the entire apply because we read dats below + // (GfxObj sub-mesh builds) and mutate the shared _surfaceCache from + // LandblockMesh.Build. + lock (_datLock) + { + ApplyLoadedTerrainLocked(lb); + } + } + + private void ApplyLoadedTerrainLocked(AcDream.Core.World.LoadedLandblock lb) { if (_terrain is null || _dats is null || _blendCtx is null || _heightTable is null || _surfaceCache is null) return;