fix(render): quiesce dat readers before teardown — kill the shutdown AccessViolation

ObjectMeshManager.Dispose never stopped its Task.Run(ProcessQueueAsync) decode
workers, and LandblockStreamer.Dispose abandoned its worker after a 2s join.
GameWindow.OnClosing then disposed the DatCollection, which unmaps the dats''
memory-mapped views (MemoryMappedBlockAllocator.DestroyMappedFile nulls
_viewPtr) — a worker still inside ReadBlock dereferences the dead view pointer:
an uncatchable AccessViolationException with ReadBlock on the stack, firing on
close/relaunch during decode storms. This is the recorded crash signature from
the 2026-06-09 white-walls session.

- ObjectMeshManager.Dispose: set IsDisposed under the queue lock, cancel+drain
  pending requests, then wait (<=10s) for _activeWorkers==0; loud LogError if
  workers outlive the wait. ProcessQueueAsync re-checks IsDisposed per dequeue;
  Prepare*Async entries + enqueue blocks early-out when disposed.
- LandblockStreamer.Dispose: join 2s -> 15s with a loud [streamer] line on
  timeout (cancellation honored between jobs; one landblock load bounds it).
- Also includes the [tex-skip] tripwire lines on ObjectMeshManager''s five
  silent dat-miss exits (GfxObj + CellStruct texture chains) — part of the
  white-walls attribution net (#105), zero output when healthy.

Verified: 3x close-mid-decode-storm smoke (in-world at ~8s, WM_CLOSE at ~11s),
clean exits, no crash signatures, no quiesce timeouts. Full suite: 294+218+420
green; Core 1338 green + 4 pre-existing physics failures (reproduced at bare
HEAD, unrelated). Investigation:
docs/research/2026-06-09-dat-reader-thread-safety-investigation.md

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
Erik 2026-06-09 21:27:22 +02:00
parent d0bd28543b
commit 8fadf770fe
2 changed files with 73 additions and 7 deletions

View file

@ -382,7 +382,7 @@ namespace AcDream.App.Rendering.Wb {
/// Phase 1 (Background Thread): Prepare CPU-side mesh data for deduplicated EnvCell geometry. /// Phase 1 (Background Thread): Prepare CPU-side mesh data for deduplicated EnvCell geometry.
/// </summary> /// </summary>
public Task<ObjectMeshData?> PrepareEnvCellGeomMeshDataAsync(ulong geomId, uint environmentId, ushort cellStructure, List<ushort> surfaces, CancellationToken ct = default) { public Task<ObjectMeshData?> PrepareEnvCellGeomMeshDataAsync(ulong geomId, uint environmentId, ushort cellStructure, List<ushort> surfaces, CancellationToken ct = default) {
if (HasRenderData(geomId)) return Task.FromResult<ObjectMeshData?>(null); if (IsDisposed || HasRenderData(geomId)) return Task.FromResult<ObjectMeshData?>(null);
// Check CPU cache first // Check CPU cache first
lock (_cpuMeshCache) { lock (_cpuMeshCache) {
@ -403,6 +403,11 @@ namespace AcDream.App.Rendering.Wb {
_preparationTasks[geomId] = task; _preparationTasks[geomId] = task;
lock (_pendingRequests) { lock (_pendingRequests) {
if (IsDisposed) {
tcs.TrySetCanceled();
_preparationTasks.TryRemove(geomId, out _);
return task;
}
// Special handling for EnvCell geometry - we need to store the cell data for the worker // Special handling for EnvCell geometry - we need to store the cell data for the worker
_pendingEnvCellRequests[geomId] = new EnvCellGeomRequest { _pendingEnvCellRequests[geomId] = new EnvCellGeomRequest {
EnvironmentId = environmentId, EnvironmentId = environmentId,
@ -420,7 +425,7 @@ namespace AcDream.App.Rendering.Wb {
} }
public Task<ObjectMeshData?> PrepareMeshDataAsync(ulong id, bool isSetup, CancellationToken ct = default) { public Task<ObjectMeshData?> PrepareMeshDataAsync(ulong id, bool isSetup, CancellationToken ct = default) {
if (HasRenderData(id)) return Task.FromResult<ObjectMeshData?>(null); if (IsDisposed || HasRenderData(id)) return Task.FromResult<ObjectMeshData?>(null);
// Check CPU cache first // Check CPU cache first
lock (_cpuMeshCache) { lock (_cpuMeshCache) {
@ -452,6 +457,11 @@ namespace AcDream.App.Rendering.Wb {
_preparationTasks[id] = task; _preparationTasks[id] = task;
lock (_pendingRequests) { lock (_pendingRequests) {
if (IsDisposed) {
tcs.TrySetCanceled();
_preparationTasks.TryRemove(id, out _);
return task;
}
_pendingRequests.Add((id, isSetup, tcs, ct)); _pendingRequests.Add((id, isSetup, tcs, ct));
if (_activeWorkers < MaxParallelLoads) { if (_activeWorkers < MaxParallelLoads) {
_activeWorkers++; _activeWorkers++;
@ -471,7 +481,9 @@ namespace AcDream.App.Rendering.Wb {
CancellationToken ct; CancellationToken ct;
lock (_pendingRequests) { lock (_pendingRequests) {
if (_pendingRequests.Count == 0) { // IsDisposed re-check: lets Dispose() drain the queue and
// observe _activeWorkers reach 0 before the dats unmap.
if (IsDisposed || _pendingRequests.Count == 0) {
return; return;
} }
@ -972,7 +984,11 @@ namespace AcDream.App.Rendering.Wb {
if (surfaceIdx < 0 || surfaceIdx >= gfxObj.Surfaces.Count) return; if (surfaceIdx < 0 || surfaceIdx >= gfxObj.Surfaces.Count) return;
var surfaceId = gfxObj.Surfaces[surfaceIdx]; var surfaceId = gfxObj.Surfaces[surfaceIdx];
if (!_dats.Portal.TryGet<Surface>(surfaceId, out var surface)) return; if (!_dats.Portal.TryGet<Surface>(surfaceId, out var surface)) {
// TEMP diagnostic (dat-race investigation 2026-06-09, strip with fix)
Console.WriteLine($"[tex-skip] gfxobj Surface 0x{surfaceId:X8} miss -> poly batch dropped (obj 0x{gfxObj.Id:X8})");
return;
}
int texWidth, texHeight; int texWidth, texHeight;
byte[] textureData; byte[] textureData;
@ -1133,6 +1149,8 @@ namespace AcDream.App.Rendering.Wb {
sourceFormat == DatReaderWriter.Enums.PixelFormat.PFID_DXT5))); sourceFormat == DatReaderWriter.Enums.PixelFormat.PFID_DXT5)));
} }
else { else {
// TEMP diagnostic (dat-race investigation 2026-06-09, strip with fix)
Console.WriteLine($"[tex-skip] gfxobj SurfaceTexture 0x{surface.OrigTextureId:X8} miss -> poly batch dropped (surface 0x{surfaceId:X8})");
return; return;
} }
@ -1320,7 +1338,11 @@ namespace AcDream.App.Rendering.Wb {
return; return;
} }
if (!_dats.Portal.TryGet<Surface>(surfaceId, out var surface)) return; if (!_dats.Portal.TryGet<Surface>(surfaceId, out var surface)) {
// TEMP diagnostic (dat-race investigation 2026-06-09, strip with fix)
Console.WriteLine($"[tex-skip] cellstruct Surface 0x{surfaceId:X8} miss -> WALL poly batch dropped (cellstruct 0x{cellStruct:X4})");
return;
}
int texWidth, texHeight; int texWidth, texHeight;
byte[] textureData; byte[] textureData;
@ -1345,6 +1367,8 @@ namespace AcDream.App.Rendering.Wb {
var renderSurfaceId = surfaceTexture.Textures.First(); var renderSurfaceId = surfaceTexture.Textures.First();
if (!_dats.Portal.TryGet<RenderSurface>(renderSurfaceId, out var renderSurface)) { if (!_dats.Portal.TryGet<RenderSurface>(renderSurfaceId, out var renderSurface)) {
if (!_dats.HighRes.TryGet<RenderSurface>(renderSurfaceId, out var hrRenderSurface)) { if (!_dats.HighRes.TryGet<RenderSurface>(renderSurfaceId, out var hrRenderSurface)) {
// TEMP diagnostic (dat-race investigation 2026-06-09, strip with fix)
Console.WriteLine($"[tex-skip] cellstruct RenderSurface 0x{renderSurfaceId:X8} miss (portal+highres) -> WALL poly batch dropped");
return; return;
} }
renderSurface = hrRenderSurface; renderSurface = hrRenderSurface;
@ -1454,6 +1478,8 @@ namespace AcDream.App.Rendering.Wb {
} }
} }
else { else {
// TEMP diagnostic (dat-race investigation 2026-06-09, strip with fix)
Console.WriteLine($"[tex-skip] cellstruct SurfaceTexture 0x{surface.OrigTextureId:X8} miss -> WALL poly batch dropped (surface 0x{surfaceId:X8})");
return; return;
} }
@ -1960,7 +1986,39 @@ namespace AcDream.App.Rendering.Wb {
public void Dispose() { public void Dispose() {
if (IsDisposed) return; if (IsDisposed) return;
IsDisposed = true;
// Quiesce the background decode workers BEFORE returning: the owner
// disposes the DatCollection right after this adapter chain, which
// unmaps the dats' memory-mapped views. A worker still inside
// MemoryMappedBlockAllocator.ReadBlock at that point dereferences the
// dead view pointer — an uncatchable, process-fatal AccessViolation
// (dat-race investigation 2026-06-09). Setting IsDisposed under the
// queue lock publishes it to workers, which re-check it before every
// dequeue; draining the queue means each worker exits after at most
// its current (millisecond-scale) item.
lock (_pendingRequests) {
IsDisposed = true;
foreach (var (id, _, tcs, _) in _pendingRequests) {
tcs.TrySetCanceled();
_preparationTasks.TryRemove(id, out _);
}
_pendingRequests.Clear();
_pendingEnvCellRequests.Clear();
}
var deadline = System.Environment.TickCount64 + 10_000;
while (System.Environment.TickCount64 < deadline) {
lock (_pendingRequests) {
if (_activeWorkers == 0) break;
}
Thread.Sleep(5);
}
lock (_pendingRequests) {
if (_activeWorkers > 0)
_logger.LogError(
"Dispose: {Count} mesh-decode workers still active after 10s — dat teardown may race in-flight reads",
_activeWorkers);
}
_graphicsDevice.QueueGLAction(gl => { _graphicsDevice.QueueGLAction(gl => {
foreach (var data in _renderData.Values) { foreach (var data in _renderData.Values) {
if (!_useModernRendering) { if (!_useModernRendering) {

View file

@ -329,7 +329,15 @@ public sealed class LandblockStreamer : IDisposable
if (System.Threading.Interlocked.Exchange(ref _disposed, 1) != 0) return; if (System.Threading.Interlocked.Exchange(ref _disposed, 1) != 0) return;
_cancel.Cancel(); _cancel.Cancel();
_inbox.Writer.TryComplete(); _inbox.Writer.TryComplete();
_worker?.Join(TimeSpan.FromSeconds(2)); // Generous join: the owner disposes the DatCollection after this, which
// unmaps the dats' memory-mapped views — an abandoned worker mid-dat-read
// would take the process down with an AccessViolation in
// MemoryMappedBlockAllocator.ReadBlock (dat-race investigation 2026-06-09).
// Cancellation is honored between jobs, so the wait is bounded by one
// landblock load; 15s only ever elapses if the worker is genuinely hung.
if (_worker is not null && !_worker.Join(TimeSpan.FromSeconds(15)))
Console.Error.WriteLine(
"[streamer] worker did not stop within 15s — dat teardown may race an in-flight load");
_cancel.Dispose(); _cancel.Dispose();
} }
} }