acdream/tests/AcDream.Core.Tests/Conformance/DatConcurrencyStressTests.cs
Erik b3920d83f6 test(conformance): dat-reader concurrency hammer — concurrent READS exonerated
Settles the long-standing lore that DatCollection is not thread-safe for reads,
for Chorizite.DatReaderWriter 2.1.7: replays acdream''s real four-population
access pattern (render / streamer / decode-pool / raw) against the live dats —
golden FNV-1a fingerprints taken single-threaded, then 8 threads x 25 shuffled
passes over ~2900 files spanning the cell heightmap/LBI/EnvCell set and the
portal texture chain (Environment -> Surface -> SurfaceTexture -> RenderSurface
incl. highres probes). Two layers: raw TryGetFileBytes (BTree + ReadBlock, no
caching) and typed TryGet with FileCachingStrategy.Never (full production
unpack path: ArrayPool + DatBinReader + ObjectFactory).

Result: ~1.1M concurrent reads, ZERO anomalies — byte-identical to golden.
Matches the line-level audit of the release/2.1.7 source (ReadBlock keeps all
cursor state in locals over a stable read-only mmap view; locked LRU BTree
node cache; ConcurrentDictionary file cache; fresh DatBinReader per call).
The real crash bug was dispose-during-read at teardown (fixed in 8fadf77).
Keep this as the regression guard for any future dat-reader version bump.

Skips cleanly when the dats are absent (CI), matching suite convention.
Full evidence: docs/research/2026-06-09-dat-reader-thread-safety-investigation.md

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-06-09 21:28:32 +02:00

283 lines
12 KiB
C#
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Linq;
using System.Threading;
using DatReaderWriter;
using DatReaderWriter.DBObjs;
using DatReaderWriter.Options;
using Xunit;
namespace AcDream.Core.Tests.Conformance;
/// <summary>
/// Dat-reader concurrency stress apparatus (dat-race investigation 2026-06-09).
///
/// acdream reads ONE shared DatCollection from four thread populations (render
/// thread, streamer worker, mesh-decode pool, audio) — see the GameWindow._datLock
/// vs DatDatabaseWrapper._lock split. Intermittent in-game symptoms (white cottage
/// walls = silently dropped texture batches; AccessViolation crash reports in
/// MemoryMappedBlockAllocator.ReadBlock) were attributed to the library not being
/// thread-safe. A line-level audit of Chorizite.DatReaderWriter 2.1.7 found the
/// READ path memory-safe for read-only dats (ReadBlock keeps all cursor state in
/// locals over a stable mmap view; the BTree LRU node cache locks internally;
/// caches are ConcurrentDictionary). This test settles the question empirically:
///
/// Phase 1 (raw): TryGetFileBytes — exercises DatBTreeReaderWriter.TryGetFile +
/// MemoryMappedBlockAllocator.ReadBlock + Decompress with a fresh output array
/// per call (no caching at any layer), so every call re-walks the real disk path.
///
/// Phase 2 (typed): TryGet&lt;T&gt; on a FileCachingStrategy.Never collection —
/// adds the ArrayPool rent/return, DatBinReader, ObjectFactory and Unpack layers
/// (the full production read path ObjectMeshManager uses).
///
/// Golden fingerprints are taken single-threaded, then the same id set is hammered
/// from many threads in shuffled order. ANY flip of the success flag or fingerprint
/// under concurrency reproduces the in-game corruption class deterministically.
/// If this test is stably green over millions of reads, concurrent same-instance
/// READS are exonerated and the in-game symptoms must come from lifecycle bugs
/// (e.g. dispose-during-read at teardown) or layers above the dat reader.
/// </summary>
[Trait("Category", "Conformance")]
public class DatConcurrencyStressTests
{
private const int HammerThreads = 8;
private const int LoopsPerThread = 25;
private sealed record FileRef(DatFileSource Source, uint Id);
private enum DatFileSource { Cell, Portal, HighRes }
private sealed record Golden(bool Ok, int Length, ulong Fnv);
[Fact]
public void ConcurrentRawReads_MatchSingleThreadedGolden()
{
var datDir = ConformanceDats.ResolveDatDir();
if (datDir is null) return; // dats absent (CI) — skip, matching suite convention
using var dats = new DatCollection(datDir, DatAccessType.Read);
var refs = BuildIdSet(dats);
Assert.True(refs.Count > 500, $"id set unexpectedly small ({refs.Count}) — fixture assumptions broke");
// Golden pass: single-threaded raw reads.
var golden = new Dictionary<FileRef, Golden>(refs.Count);
foreach (var r in refs)
golden[r] = ReadRaw(dats, r);
// Hammer: every thread re-reads the FULL set in its own shuffled order.
var anomalies = HammerAndCollect(refs, r =>
{
var got = ReadRaw(dats, r);
return golden[r] == got
? null
: $"{r.Source} 0x{r.Id:X8}: golden=({golden[r].Ok},{golden[r].Length},{golden[r].Fnv:X16}) got=({got.Ok},{got.Length},{got.Fnv:X16})";
});
Assert.True(anomalies.IsEmpty,
$"{anomalies.Count} concurrent raw-read anomalies. First: {string.Join(" | ", anomalies.Take(10))}");
}
[Fact]
public void ConcurrentTypedReads_MatchSingleThreadedGolden()
{
var datDir = ConformanceDats.ResolveDatDir();
if (datDir is null) return; // dats absent (CI) — skip
// FileCachingStrategy.Never: every TryGet<T> re-reads + re-unpacks from disk,
// matching the worst-case production path and keeping the hammer honest
// (OnDemand would serve all post-first reads from the ConcurrentDictionary).
using var dats = new DatCollection(new DatCollectionOptions
{
DatDirectory = datDir,
AccessType = DatAccessType.Read,
FileCachingStrategy = FileCachingStrategy.Never,
});
var refs = BuildIdSet(dats);
var golden = new Dictionary<FileRef, ulong>(refs.Count);
foreach (var r in refs)
golden[r] = ReadTypedFingerprint(dats, r);
var anomalies = HammerAndCollect(refs, r =>
{
var got = ReadTypedFingerprint(dats, r);
return golden[r] == got
? null
: $"{r.Source} 0x{r.Id:X8}: golden=0x{golden[r]:X16} got=0x{got:X16}";
});
Assert.True(anomalies.IsEmpty,
$"{anomalies.Count} concurrent typed-read anomalies. First: {string.Join(" | ", anomalies.Take(10))}");
}
// ---- hammer scaffolding -------------------------------------------------
private static ConcurrentBag<string> HammerAndCollect(
IReadOnlyList<FileRef> refs, Func<FileRef, string?> probe)
{
var anomalies = new ConcurrentBag<string>();
var threads = new List<Thread>();
using var start = new ManualResetEventSlim(false);
for (int t = 0; t < HammerThreads; t++)
{
int seed = 7919 * (t + 1); // deterministic per-thread shuffle
var thread = new Thread(() =>
{
var order = refs.ToArray();
var rng = new Random(seed);
start.Wait();
for (int loop = 0; loop < LoopsPerThread; loop++)
{
// FisherYates so threads disagree about visit order — maximizes
// simultaneous different-file + same-file overlap.
for (int i = order.Length - 1; i > 0; i--)
{
int j = rng.Next(i + 1);
(order[i], order[j]) = (order[j], order[i]);
}
foreach (var r in order)
{
if (anomalies.Count > 50) return; // enough evidence
var a = probe(r);
if (a is not null) anomalies.Add(a);
}
}
})
{ IsBackground = true, Name = $"dat-hammer-{t}" };
thread.Start();
threads.Add(thread);
}
start.Set(); // release all threads at once
foreach (var th in threads)
Assert.True(th.Join(TimeSpan.FromMinutes(4)), "hammer thread did not finish in time");
return anomalies;
}
/// <summary>
/// Mirrors the real client's id mix: Holtburg + neighbor landblocks (cell dat
/// heightmaps, LandBlockInfos, EnvCells) plus the portal-dat chain those cells
/// reference (Environments, Surfaces, SurfaceTextures, RenderSurfaces) and the
/// highres-dat RenderSurface probes the texture path makes.
/// </summary>
private static List<FileRef> BuildIdSet(DatCollection dats)
{
var refs = new List<FileRef>();
var portalIds = new HashSet<uint>();
var highResIds = new HashSet<uint>();
// Enumerate the cell dat's real file table around the Holtburg region —
// heightmaps (xxFFFF), LandBlockInfos (xxFFFE) and EnvCells (xx01xx+) —
// instead of guessing per-landblock counts (rural blocks have NumCells=0).
var cellIds = dats.Cell.Tree.GetFilesInRange(0xA8000000u, 0xABFFFFFFu)
.Select(f => f.Id)
.Take(2500)
.ToList();
refs.AddRange(cellIds.Select(id => new FileRef(DatFileSource.Cell, id)));
// Walk the portal-dat texture chain (Environment → Surface → SurfaceTexture
// → RenderSurface) for a sample of those EnvCells — the exact chain the
// white-walls symptom lives on.
int chained = 0;
foreach (var envCellId in cellIds.Where(id => (id & 0xFFFFu) is >= 0x0100 and < 0xFF00))
{
if (chained++ >= 400) break;
if (!dats.Cell.TryGet<EnvCell>(envCellId, out var envCell))
continue;
portalIds.Add(0x0D000000u | envCell.EnvironmentId);
foreach (var rawSurface in envCell.Surfaces)
{
uint surfaceId = 0x08000000u | rawSurface;
if (!portalIds.Add(surfaceId))
continue;
if (!dats.Portal.TryGet<Surface>(surfaceId, out var surface)
|| (uint)surface.OrigTextureId == 0)
continue;
uint surfaceTextureId = (uint)surface.OrigTextureId;
if (!portalIds.Add(surfaceTextureId))
continue;
if (dats.Portal.TryGet<SurfaceTexture>(surfaceTextureId, out var st)
&& st.Textures.Count > 0)
{
uint renderSurfaceId = (uint)st.Textures[0];
portalIds.Add(renderSurfaceId);
highResIds.Add(renderSurfaceId); // texture path probes highres too
}
}
}
refs.AddRange(portalIds.Select(id => new FileRef(DatFileSource.Portal, id)));
refs.AddRange(highResIds.Select(id => new FileRef(DatFileSource.HighRes, id)));
return refs;
}
private static Golden ReadRaw(DatCollection dats, FileRef r)
{
var db = Db(dats, r.Source);
if (!db.TryGetFileBytes(r.Id, out byte[] bytes))
return new Golden(false, 0, 0);
return new Golden(true, bytes.Length, Fnv(bytes));
}
/// <summary>
/// Typed read through the full production unpack path; the fingerprint folds
/// the stable identity-bearing fields each consumer relies on. ok=false maps
/// to 0 so success-flag flips always show.
/// </summary>
private static ulong ReadTypedFingerprint(DatCollection dats, FileRef r)
{
var db = Db(dats, r.Source);
if (r.Source == DatFileSource.Cell)
{
if ((r.Id & 0xFFFFu) == 0xFFFFu)
return db.TryGet<LandBlock>(r.Id, out var lbk)
? Mix(1, (ulong)lbk.Height.Length) : 0;
if ((r.Id & 0xFFFFu) == 0xFFFEu)
return db.TryGet<LandBlockInfo>(r.Id, out var lbi)
? Mix(2, lbi.NumCells) : 0;
return db.TryGet<EnvCell>(r.Id, out var cell)
? Mix(3, (ulong)cell.CellPortals.Count << 32
| (uint)cell.Surfaces.Count << 16
| cell.EnvironmentId) : 0;
}
return (r.Id >> 24) switch
{
0x0D => db.TryGet<DatReaderWriter.DBObjs.Environment>(r.Id, out var env)
? Mix(4, (ulong)env.Cells.Count) : 0,
0x08 => db.TryGet<Surface>(r.Id, out var s)
? Mix(5, (ulong)s.Type << 32 | (uint)s.OrigTextureId) : 0,
0x05 => db.TryGet<SurfaceTexture>(r.Id, out var st)
? Mix(6, (ulong)st.Textures.Count << 32
| (st.Textures.Count > 0 ? (uint)st.Textures[0] : 0u)) : 0,
0x06 => db.TryGet<RenderSurface>(r.Id, out var rs)
? Mix(7, (ulong)rs.Width << 48 | (ulong)rs.Height << 32
| (uint)rs.SourceData.Length) : 0,
_ => 0xFEEDu, // unexpected namespace — constant so it can't flap
};
}
private static DatDatabase Db(DatCollection dats, DatFileSource source) => source switch
{
DatFileSource.Cell => dats.Cell,
DatFileSource.Portal => dats.Portal,
_ => dats.HighRes,
};
private static ulong Fnv(byte[] bytes)
{
ulong h = 14695981039346656037UL;
foreach (var b in bytes)
{
h ^= b;
h *= 1099511628211UL;
}
return h;
}
private static ulong Mix(ulong tag, ulong value) =>
(tag << 56) ^ value ^ 0xA5A5_5A5A_0000_0000UL;
}