Settles the long-standing lore that DatCollection is not thread-safe for reads,
for Chorizite.DatReaderWriter 2.1.7: replays acdream''s real four-population
access pattern (render / streamer / decode-pool / raw) against the live dats —
golden FNV-1a fingerprints taken single-threaded, then 8 threads x 25 shuffled
passes over ~2900 files spanning the cell heightmap/LBI/EnvCell set and the
portal texture chain (Environment -> Surface -> SurfaceTexture -> RenderSurface
incl. highres probes). Two layers: raw TryGetFileBytes (BTree + ReadBlock, no
caching) and typed TryGet with FileCachingStrategy.Never (full production
unpack path: ArrayPool + DatBinReader + ObjectFactory).
Result: ~1.1M concurrent reads, ZERO anomalies — byte-identical to golden.
Matches the line-level audit of the release/2.1.7 source (ReadBlock keeps all
cursor state in locals over a stable read-only mmap view; locked LRU BTree
node cache; ConcurrentDictionary file cache; fresh DatBinReader per call).
The real crash bug was dispose-during-read at teardown (fixed in 8fadf77).
Keep this as the regression guard for any future dat-reader version bump.
Skips cleanly when the dats are absent (CI), matching suite convention.
Full evidence: docs/research/2026-06-09-dat-reader-thread-safety-investigation.md
Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
283 lines
12 KiB
C#
283 lines
12 KiB
C#
using System;
|
||
using System.Collections.Concurrent;
|
||
using System.Collections.Generic;
|
||
using System.Linq;
|
||
using System.Threading;
|
||
using DatReaderWriter;
|
||
using DatReaderWriter.DBObjs;
|
||
using DatReaderWriter.Options;
|
||
using Xunit;
|
||
|
||
namespace AcDream.Core.Tests.Conformance;
|
||
|
||
/// <summary>
|
||
/// Dat-reader concurrency stress apparatus (dat-race investigation 2026-06-09).
|
||
///
|
||
/// acdream reads ONE shared DatCollection from four thread populations (render
|
||
/// thread, streamer worker, mesh-decode pool, audio) — see the GameWindow._datLock
|
||
/// vs DatDatabaseWrapper._lock split. Intermittent in-game symptoms (white cottage
|
||
/// walls = silently dropped texture batches; AccessViolation crash reports in
|
||
/// MemoryMappedBlockAllocator.ReadBlock) were attributed to the library not being
|
||
/// thread-safe. A line-level audit of Chorizite.DatReaderWriter 2.1.7 found the
|
||
/// READ path memory-safe for read-only dats (ReadBlock keeps all cursor state in
|
||
/// locals over a stable mmap view; the BTree LRU node cache locks internally;
|
||
/// caches are ConcurrentDictionary). This test settles the question empirically:
|
||
///
|
||
/// Phase 1 (raw): TryGetFileBytes — exercises DatBTreeReaderWriter.TryGetFile +
|
||
/// MemoryMappedBlockAllocator.ReadBlock + Decompress with a fresh output array
|
||
/// per call (no caching at any layer), so every call re-walks the real disk path.
|
||
///
|
||
/// Phase 2 (typed): TryGet<T> on a FileCachingStrategy.Never collection —
|
||
/// adds the ArrayPool rent/return, DatBinReader, ObjectFactory and Unpack layers
|
||
/// (the full production read path ObjectMeshManager uses).
|
||
///
|
||
/// Golden fingerprints are taken single-threaded, then the same id set is hammered
|
||
/// from many threads in shuffled order. ANY flip of the success flag or fingerprint
|
||
/// under concurrency reproduces the in-game corruption class deterministically.
|
||
/// If this test is stably green over millions of reads, concurrent same-instance
|
||
/// READS are exonerated and the in-game symptoms must come from lifecycle bugs
|
||
/// (e.g. dispose-during-read at teardown) or layers above the dat reader.
|
||
/// </summary>
|
||
[Trait("Category", "Conformance")]
|
||
public class DatConcurrencyStressTests
|
||
{
|
||
private const int HammerThreads = 8;
|
||
private const int LoopsPerThread = 25;
|
||
|
||
private sealed record FileRef(DatFileSource Source, uint Id);
|
||
|
||
private enum DatFileSource { Cell, Portal, HighRes }
|
||
|
||
private sealed record Golden(bool Ok, int Length, ulong Fnv);
|
||
|
||
[Fact]
|
||
public void ConcurrentRawReads_MatchSingleThreadedGolden()
|
||
{
|
||
var datDir = ConformanceDats.ResolveDatDir();
|
||
if (datDir is null) return; // dats absent (CI) — skip, matching suite convention
|
||
|
||
using var dats = new DatCollection(datDir, DatAccessType.Read);
|
||
var refs = BuildIdSet(dats);
|
||
Assert.True(refs.Count > 500, $"id set unexpectedly small ({refs.Count}) — fixture assumptions broke");
|
||
|
||
// Golden pass: single-threaded raw reads.
|
||
var golden = new Dictionary<FileRef, Golden>(refs.Count);
|
||
foreach (var r in refs)
|
||
golden[r] = ReadRaw(dats, r);
|
||
|
||
// Hammer: every thread re-reads the FULL set in its own shuffled order.
|
||
var anomalies = HammerAndCollect(refs, r =>
|
||
{
|
||
var got = ReadRaw(dats, r);
|
||
return golden[r] == got
|
||
? null
|
||
: $"{r.Source} 0x{r.Id:X8}: golden=({golden[r].Ok},{golden[r].Length},{golden[r].Fnv:X16}) got=({got.Ok},{got.Length},{got.Fnv:X16})";
|
||
});
|
||
|
||
Assert.True(anomalies.IsEmpty,
|
||
$"{anomalies.Count} concurrent raw-read anomalies. First: {string.Join(" | ", anomalies.Take(10))}");
|
||
}
|
||
|
||
[Fact]
|
||
public void ConcurrentTypedReads_MatchSingleThreadedGolden()
|
||
{
|
||
var datDir = ConformanceDats.ResolveDatDir();
|
||
if (datDir is null) return; // dats absent (CI) — skip
|
||
|
||
// FileCachingStrategy.Never: every TryGet<T> re-reads + re-unpacks from disk,
|
||
// matching the worst-case production path and keeping the hammer honest
|
||
// (OnDemand would serve all post-first reads from the ConcurrentDictionary).
|
||
using var dats = new DatCollection(new DatCollectionOptions
|
||
{
|
||
DatDirectory = datDir,
|
||
AccessType = DatAccessType.Read,
|
||
FileCachingStrategy = FileCachingStrategy.Never,
|
||
});
|
||
var refs = BuildIdSet(dats);
|
||
|
||
var golden = new Dictionary<FileRef, ulong>(refs.Count);
|
||
foreach (var r in refs)
|
||
golden[r] = ReadTypedFingerprint(dats, r);
|
||
|
||
var anomalies = HammerAndCollect(refs, r =>
|
||
{
|
||
var got = ReadTypedFingerprint(dats, r);
|
||
return golden[r] == got
|
||
? null
|
||
: $"{r.Source} 0x{r.Id:X8}: golden=0x{golden[r]:X16} got=0x{got:X16}";
|
||
});
|
||
|
||
Assert.True(anomalies.IsEmpty,
|
||
$"{anomalies.Count} concurrent typed-read anomalies. First: {string.Join(" | ", anomalies.Take(10))}");
|
||
}
|
||
|
||
// ---- hammer scaffolding -------------------------------------------------
|
||
|
||
private static ConcurrentBag<string> HammerAndCollect(
|
||
IReadOnlyList<FileRef> refs, Func<FileRef, string?> probe)
|
||
{
|
||
var anomalies = new ConcurrentBag<string>();
|
||
var threads = new List<Thread>();
|
||
using var start = new ManualResetEventSlim(false);
|
||
|
||
for (int t = 0; t < HammerThreads; t++)
|
||
{
|
||
int seed = 7919 * (t + 1); // deterministic per-thread shuffle
|
||
var thread = new Thread(() =>
|
||
{
|
||
var order = refs.ToArray();
|
||
var rng = new Random(seed);
|
||
start.Wait();
|
||
for (int loop = 0; loop < LoopsPerThread; loop++)
|
||
{
|
||
// Fisher–Yates so threads disagree about visit order — maximizes
|
||
// simultaneous different-file + same-file overlap.
|
||
for (int i = order.Length - 1; i > 0; i--)
|
||
{
|
||
int j = rng.Next(i + 1);
|
||
(order[i], order[j]) = (order[j], order[i]);
|
||
}
|
||
foreach (var r in order)
|
||
{
|
||
if (anomalies.Count > 50) return; // enough evidence
|
||
var a = probe(r);
|
||
if (a is not null) anomalies.Add(a);
|
||
}
|
||
}
|
||
})
|
||
{ IsBackground = true, Name = $"dat-hammer-{t}" };
|
||
thread.Start();
|
||
threads.Add(thread);
|
||
}
|
||
|
||
start.Set(); // release all threads at once
|
||
foreach (var th in threads)
|
||
Assert.True(th.Join(TimeSpan.FromMinutes(4)), "hammer thread did not finish in time");
|
||
return anomalies;
|
||
}
|
||
|
||
/// <summary>
|
||
/// Mirrors the real client's id mix: Holtburg + neighbor landblocks (cell dat
|
||
/// heightmaps, LandBlockInfos, EnvCells) plus the portal-dat chain those cells
|
||
/// reference (Environments, Surfaces, SurfaceTextures, RenderSurfaces) and the
|
||
/// highres-dat RenderSurface probes the texture path makes.
|
||
/// </summary>
|
||
private static List<FileRef> BuildIdSet(DatCollection dats)
|
||
{
|
||
var refs = new List<FileRef>();
|
||
var portalIds = new HashSet<uint>();
|
||
var highResIds = new HashSet<uint>();
|
||
|
||
// Enumerate the cell dat's real file table around the Holtburg region —
|
||
// heightmaps (xxFFFF), LandBlockInfos (xxFFFE) and EnvCells (xx01xx+) —
|
||
// instead of guessing per-landblock counts (rural blocks have NumCells=0).
|
||
var cellIds = dats.Cell.Tree.GetFilesInRange(0xA8000000u, 0xABFFFFFFu)
|
||
.Select(f => f.Id)
|
||
.Take(2500)
|
||
.ToList();
|
||
refs.AddRange(cellIds.Select(id => new FileRef(DatFileSource.Cell, id)));
|
||
|
||
// Walk the portal-dat texture chain (Environment → Surface → SurfaceTexture
|
||
// → RenderSurface) for a sample of those EnvCells — the exact chain the
|
||
// white-walls symptom lives on.
|
||
int chained = 0;
|
||
foreach (var envCellId in cellIds.Where(id => (id & 0xFFFFu) is >= 0x0100 and < 0xFF00))
|
||
{
|
||
if (chained++ >= 400) break;
|
||
if (!dats.Cell.TryGet<EnvCell>(envCellId, out var envCell))
|
||
continue;
|
||
|
||
portalIds.Add(0x0D000000u | envCell.EnvironmentId);
|
||
foreach (var rawSurface in envCell.Surfaces)
|
||
{
|
||
uint surfaceId = 0x08000000u | rawSurface;
|
||
if (!portalIds.Add(surfaceId))
|
||
continue;
|
||
if (!dats.Portal.TryGet<Surface>(surfaceId, out var surface)
|
||
|| (uint)surface.OrigTextureId == 0)
|
||
continue;
|
||
uint surfaceTextureId = (uint)surface.OrigTextureId;
|
||
if (!portalIds.Add(surfaceTextureId))
|
||
continue;
|
||
if (dats.Portal.TryGet<SurfaceTexture>(surfaceTextureId, out var st)
|
||
&& st.Textures.Count > 0)
|
||
{
|
||
uint renderSurfaceId = (uint)st.Textures[0];
|
||
portalIds.Add(renderSurfaceId);
|
||
highResIds.Add(renderSurfaceId); // texture path probes highres too
|
||
}
|
||
}
|
||
}
|
||
|
||
refs.AddRange(portalIds.Select(id => new FileRef(DatFileSource.Portal, id)));
|
||
refs.AddRange(highResIds.Select(id => new FileRef(DatFileSource.HighRes, id)));
|
||
return refs;
|
||
}
|
||
|
||
private static Golden ReadRaw(DatCollection dats, FileRef r)
|
||
{
|
||
var db = Db(dats, r.Source);
|
||
if (!db.TryGetFileBytes(r.Id, out byte[] bytes))
|
||
return new Golden(false, 0, 0);
|
||
return new Golden(true, bytes.Length, Fnv(bytes));
|
||
}
|
||
|
||
/// <summary>
|
||
/// Typed read through the full production unpack path; the fingerprint folds
|
||
/// the stable identity-bearing fields each consumer relies on. ok=false maps
|
||
/// to 0 so success-flag flips always show.
|
||
/// </summary>
|
||
private static ulong ReadTypedFingerprint(DatCollection dats, FileRef r)
|
||
{
|
||
var db = Db(dats, r.Source);
|
||
if (r.Source == DatFileSource.Cell)
|
||
{
|
||
if ((r.Id & 0xFFFFu) == 0xFFFFu)
|
||
return db.TryGet<LandBlock>(r.Id, out var lbk)
|
||
? Mix(1, (ulong)lbk.Height.Length) : 0;
|
||
if ((r.Id & 0xFFFFu) == 0xFFFEu)
|
||
return db.TryGet<LandBlockInfo>(r.Id, out var lbi)
|
||
? Mix(2, lbi.NumCells) : 0;
|
||
return db.TryGet<EnvCell>(r.Id, out var cell)
|
||
? Mix(3, (ulong)cell.CellPortals.Count << 32
|
||
| (uint)cell.Surfaces.Count << 16
|
||
| cell.EnvironmentId) : 0;
|
||
}
|
||
|
||
return (r.Id >> 24) switch
|
||
{
|
||
0x0D => db.TryGet<DatReaderWriter.DBObjs.Environment>(r.Id, out var env)
|
||
? Mix(4, (ulong)env.Cells.Count) : 0,
|
||
0x08 => db.TryGet<Surface>(r.Id, out var s)
|
||
? Mix(5, (ulong)s.Type << 32 | (uint)s.OrigTextureId) : 0,
|
||
0x05 => db.TryGet<SurfaceTexture>(r.Id, out var st)
|
||
? Mix(6, (ulong)st.Textures.Count << 32
|
||
| (st.Textures.Count > 0 ? (uint)st.Textures[0] : 0u)) : 0,
|
||
0x06 => db.TryGet<RenderSurface>(r.Id, out var rs)
|
||
? Mix(7, (ulong)rs.Width << 48 | (ulong)rs.Height << 32
|
||
| (uint)rs.SourceData.Length) : 0,
|
||
_ => 0xFEEDu, // unexpected namespace — constant so it can't flap
|
||
};
|
||
}
|
||
|
||
private static DatDatabase Db(DatCollection dats, DatFileSource source) => source switch
|
||
{
|
||
DatFileSource.Cell => dats.Cell,
|
||
DatFileSource.Portal => dats.Portal,
|
||
_ => dats.HighRes,
|
||
};
|
||
|
||
private static ulong Fnv(byte[] bytes)
|
||
{
|
||
ulong h = 14695981039346656037UL;
|
||
foreach (var b in bytes)
|
||
{
|
||
h ^= b;
|
||
h *= 1099511628211UL;
|
||
}
|
||
return h;
|
||
}
|
||
|
||
private static ulong Mix(ulong tag, ulong value) =>
|
||
(tag << 56) ^ value ^ 0xA5A5_5A5A_0000_0000UL;
|
||
}
|