fix(app): Phase A.1 — serialize DatCollection access behind _datLock
Critical hotfix for the first live run of streaming. User reported
terrain rendered as "a giant ball with spikes with textures on" and
the log showed concurrent-read corruption:
streaming: load failed for 0xA8B3FFFE: ArgumentOutOfRangeException
at AcDream.Core.World.LandblockLoader.Load ... line 18
Line 18 is `dats.Get<LandBlock>(landblockId)`. Root cause: my spec
claimed DatCollection is thread-safe for reads. It isn't. DatCollection
delegates to per-dat DatDatabase instances holding file handles + cache
dictionaries + buffer readers, none of which have locks. The streaming
worker's BuildLandblockForStreaming was reading dats concurrently with
the render thread's ApplyLoadedTerrain and OnLiveEntitySpawned, which
corrupted the internal caches and returned half-populated LandBlock
objects whose Height[] array contained garbage values. Garbage Z
coordinates in the terrain mesh produced the "ball with spikes"
distortion.
Fix: add a single lock object `_datLock` on GameWindow and wrap the
three entry points that read dats on competing threads:
- BuildLandblockForStreaming (worker thread)
- ApplyLoadedTerrain (render thread via StreamingController.Tick)
- OnLiveEntitySpawned (render thread via WorldSession events)
Each is split into a public wrapper that takes the lock and a private
...Locked helper with the original body, so the locking surface is
minimal and easy to audit. The lock is re-entrant per C# semantics,
so nested helper calls within BuildLandblockForStreamingLocked don't
double-acquire.
Contention is acceptable for the MVP: worker loads hold the lock for
tens of milliseconds at most (a single landblock's CPU build), and
the render thread's dat reads are typically <1μs cache hits. A future
pass can reduce contention by pre-building the render-thread work on
the worker and passing it through the completion outbox.
Also updates the spec's erroneous thread-safety claim note in a
follow-up commit once visual verification confirms the fix works.
212 tests still green.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
fca299780c
commit
c991fb23ce
1 changed files with 54 additions and 0 deletions
|
|
@ -34,6 +34,15 @@ public sealed class GameWindow : IDisposable
|
||||||
private int _streamingRadius = 2; // default 5×5
|
private int _streamingRadius = 2; // default 5×5
|
||||||
private uint? _lastLivePlayerLandblockId;
|
private uint? _lastLivePlayerLandblockId;
|
||||||
|
|
||||||
|
// Phase A.1 hotfix: DatCollection is NOT thread-safe. The streaming worker
|
||||||
|
// thread and the render thread both read dats (BuildLandblockForStreaming
|
||||||
|
// on the worker; ApplyLoadedTerrain + live-spawn handlers on the render
|
||||||
|
// thread). Concurrent reads corrupt internal caches and produce
|
||||||
|
// half-populated LandBlock.Height[] arrays, which caused terrain to render
|
||||||
|
// as "a giant ball with spikes" before this lock was added. All _dats.Get
|
||||||
|
// calls that can race with the worker thread MUST acquire this lock.
|
||||||
|
private readonly object _datLock = new();
|
||||||
|
|
||||||
// Terrain build context shared across all streamed landblocks. Stored as
|
// Terrain build context shared across all streamed landblocks. Stored as
|
||||||
// fields so ApplyLoadedTerrain (render-thread callback) can call
|
// fields so ApplyLoadedTerrain (render-thread callback) can call
|
||||||
// LandblockMesh.Build without re-deriving these each time.
|
// LandblockMesh.Build without re-deriving these each time.
|
||||||
|
|
@ -336,6 +345,18 @@ public sealed class GameWindow : IDisposable
|
||||||
/// on the main thread (Tick runs in the Silk.NET Update callback).
|
/// on the main thread (Tick runs in the Silk.NET Update callback).
|
||||||
/// </summary>
|
/// </summary>
|
||||||
private void OnLiveEntitySpawned(AcDream.Core.Net.WorldSession.EntitySpawn spawn)
|
private void OnLiveEntitySpawned(AcDream.Core.Net.WorldSession.EntitySpawn spawn)
|
||||||
|
{
|
||||||
|
// Phase A.1 hotfix: live CreateObject handler reads dats extensively
|
||||||
|
// (Setup, GfxObj, Surface, SurfaceTexture) to hydrate the spawned
|
||||||
|
// entity. All of it must run under the dat lock so it doesn't race
|
||||||
|
// with BuildLandblockForStreaming on the worker thread.
|
||||||
|
lock (_datLock)
|
||||||
|
{
|
||||||
|
OnLiveEntitySpawnedLocked(spawn);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void OnLiveEntitySpawnedLocked(AcDream.Core.Net.WorldSession.EntitySpawn spawn)
|
||||||
{
|
{
|
||||||
_liveSpawnReceived++;
|
_liveSpawnReceived++;
|
||||||
|
|
||||||
|
|
@ -822,6 +843,23 @@ public sealed class GameWindow : IDisposable
|
||||||
{
|
{
|
||||||
if (_dats is null) return null;
|
if (_dats is null) return null;
|
||||||
|
|
||||||
|
// Phase A.1 hotfix: hold the dat lock for the entire load. The worker
|
||||||
|
// thread mustn't read dats concurrently with the render thread's
|
||||||
|
// ApplyLoadedTerrain / live-spawn handlers. Hold time is bounded by
|
||||||
|
// the size of a single landblock's CPU-side build (tens of ms worst
|
||||||
|
// case), which blocks the render thread for at most that duration.
|
||||||
|
// This is the minimum correct behavior; a future pass can reduce
|
||||||
|
// contention by pre-building render-thread work on the worker.
|
||||||
|
lock (_datLock)
|
||||||
|
{
|
||||||
|
return BuildLandblockForStreamingLocked(landblockId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private AcDream.Core.World.LoadedLandblock? BuildLandblockForStreamingLocked(uint landblockId)
|
||||||
|
{
|
||||||
|
if (_dats is null) return null;
|
||||||
|
|
||||||
var baseLoaded = AcDream.Core.World.LandblockLoader.Load(_dats, landblockId);
|
var baseLoaded = AcDream.Core.World.LandblockLoader.Load(_dats, landblockId);
|
||||||
if (baseLoaded is null) return null;
|
if (baseLoaded is null) return null;
|
||||||
|
|
||||||
|
|
@ -1111,6 +1149,22 @@ public sealed class GameWindow : IDisposable
|
||||||
/// Must only be called from the render thread.
|
/// Must only be called from the render thread.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
private void ApplyLoadedTerrain(AcDream.Core.World.LoadedLandblock lb)
|
private void ApplyLoadedTerrain(AcDream.Core.World.LoadedLandblock lb)
|
||||||
|
{
|
||||||
|
if (_terrain is null || _dats is null || _blendCtx is null
|
||||||
|
|| _heightTable is null || _surfaceCache is null) return;
|
||||||
|
|
||||||
|
// Phase A.1 hotfix: render-thread path also takes the dat lock so it
|
||||||
|
// doesn't race with BuildLandblockForStreaming on the worker thread.
|
||||||
|
// Hold the lock across the entire apply because we read dats below
|
||||||
|
// (GfxObj sub-mesh builds) and mutate the shared _surfaceCache from
|
||||||
|
// LandblockMesh.Build.
|
||||||
|
lock (_datLock)
|
||||||
|
{
|
||||||
|
ApplyLoadedTerrainLocked(lb);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void ApplyLoadedTerrainLocked(AcDream.Core.World.LoadedLandblock lb)
|
||||||
{
|
{
|
||||||
if (_terrain is null || _dats is null || _blendCtx is null
|
if (_terrain is null || _dats is null || _blendCtx is null
|
||||||
|| _heightTable is null || _surfaceCache is null) return;
|
|| _heightTable is null || _surfaceCache is null) return;
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue