From b3920d83f69f2be5b4dc43c0579724b50bd14042 Mon Sep 17 00:00:00 2001 From: Erik Date: Tue, 9 Jun 2026 21:28:32 +0200 Subject: [PATCH] =?UTF-8?q?test(conformance):=20dat-reader=20concurrency?= =?UTF-8?q?=20hammer=20=E2=80=94=20concurrent=20READS=20exonerated?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Settles the long-standing lore that DatCollection is not thread-safe for reads, for Chorizite.DatReaderWriter 2.1.7: replays acdream''s real four-population access pattern (render / streamer / decode-pool / raw) against the live dats — golden FNV-1a fingerprints taken single-threaded, then 8 threads x 25 shuffled passes over ~2900 files spanning the cell heightmap/LBI/EnvCell set and the portal texture chain (Environment -> Surface -> SurfaceTexture -> RenderSurface incl. highres probes). Two layers: raw TryGetFileBytes (BTree + ReadBlock, no caching) and typed TryGet with FileCachingStrategy.Never (full production unpack path: ArrayPool + DatBinReader + ObjectFactory). Result: ~1.1M concurrent reads, ZERO anomalies — byte-identical to golden. Matches the line-level audit of the release/2.1.7 source (ReadBlock keeps all cursor state in locals over a stable read-only mmap view; locked LRU BTree node cache; ConcurrentDictionary file cache; fresh DatBinReader per call). The real crash bug was dispose-during-read at teardown (fixed in 8fadf77). Keep this as the regression guard for any future dat-reader version bump. Skips cleanly when the dats are absent (CI), matching suite convention. Full evidence: docs/research/2026-06-09-dat-reader-thread-safety-investigation.md Co-Authored-By: Claude Fable 5 --- .../Conformance/DatConcurrencyStressTests.cs | 283 ++++++++++++++++++ 1 file changed, 283 insertions(+) create mode 100644 tests/AcDream.Core.Tests/Conformance/DatConcurrencyStressTests.cs diff --git a/tests/AcDream.Core.Tests/Conformance/DatConcurrencyStressTests.cs b/tests/AcDream.Core.Tests/Conformance/DatConcurrencyStressTests.cs new file mode 100644 index 00000000..6ab79f9e --- /dev/null +++ b/tests/AcDream.Core.Tests/Conformance/DatConcurrencyStressTests.cs @@ -0,0 +1,283 @@ +using System; +using System.Collections.Concurrent; +using System.Collections.Generic; +using System.Linq; +using System.Threading; +using DatReaderWriter; +using DatReaderWriter.DBObjs; +using DatReaderWriter.Options; +using Xunit; + +namespace AcDream.Core.Tests.Conformance; + +/// +/// Dat-reader concurrency stress apparatus (dat-race investigation 2026-06-09). +/// +/// acdream reads ONE shared DatCollection from four thread populations (render +/// thread, streamer worker, mesh-decode pool, audio) — see the GameWindow._datLock +/// vs DatDatabaseWrapper._lock split. Intermittent in-game symptoms (white cottage +/// walls = silently dropped texture batches; AccessViolation crash reports in +/// MemoryMappedBlockAllocator.ReadBlock) were attributed to the library not being +/// thread-safe. A line-level audit of Chorizite.DatReaderWriter 2.1.7 found the +/// READ path memory-safe for read-only dats (ReadBlock keeps all cursor state in +/// locals over a stable mmap view; the BTree LRU node cache locks internally; +/// caches are ConcurrentDictionary). This test settles the question empirically: +/// +/// Phase 1 (raw): TryGetFileBytes — exercises DatBTreeReaderWriter.TryGetFile + +/// MemoryMappedBlockAllocator.ReadBlock + Decompress with a fresh output array +/// per call (no caching at any layer), so every call re-walks the real disk path. +/// +/// Phase 2 (typed): TryGet<T> on a FileCachingStrategy.Never collection — +/// adds the ArrayPool rent/return, DatBinReader, ObjectFactory and Unpack layers +/// (the full production read path ObjectMeshManager uses). +/// +/// Golden fingerprints are taken single-threaded, then the same id set is hammered +/// from many threads in shuffled order. ANY flip of the success flag or fingerprint +/// under concurrency reproduces the in-game corruption class deterministically. +/// If this test is stably green over millions of reads, concurrent same-instance +/// READS are exonerated and the in-game symptoms must come from lifecycle bugs +/// (e.g. dispose-during-read at teardown) or layers above the dat reader. +/// +[Trait("Category", "Conformance")] +public class DatConcurrencyStressTests +{ + private const int HammerThreads = 8; + private const int LoopsPerThread = 25; + + private sealed record FileRef(DatFileSource Source, uint Id); + + private enum DatFileSource { Cell, Portal, HighRes } + + private sealed record Golden(bool Ok, int Length, ulong Fnv); + + [Fact] + public void ConcurrentRawReads_MatchSingleThreadedGolden() + { + var datDir = ConformanceDats.ResolveDatDir(); + if (datDir is null) return; // dats absent (CI) — skip, matching suite convention + + using var dats = new DatCollection(datDir, DatAccessType.Read); + var refs = BuildIdSet(dats); + Assert.True(refs.Count > 500, $"id set unexpectedly small ({refs.Count}) — fixture assumptions broke"); + + // Golden pass: single-threaded raw reads. + var golden = new Dictionary(refs.Count); + foreach (var r in refs) + golden[r] = ReadRaw(dats, r); + + // Hammer: every thread re-reads the FULL set in its own shuffled order. + var anomalies = HammerAndCollect(refs, r => + { + var got = ReadRaw(dats, r); + return golden[r] == got + ? null + : $"{r.Source} 0x{r.Id:X8}: golden=({golden[r].Ok},{golden[r].Length},{golden[r].Fnv:X16}) got=({got.Ok},{got.Length},{got.Fnv:X16})"; + }); + + Assert.True(anomalies.IsEmpty, + $"{anomalies.Count} concurrent raw-read anomalies. First: {string.Join(" | ", anomalies.Take(10))}"); + } + + [Fact] + public void ConcurrentTypedReads_MatchSingleThreadedGolden() + { + var datDir = ConformanceDats.ResolveDatDir(); + if (datDir is null) return; // dats absent (CI) — skip + + // FileCachingStrategy.Never: every TryGet re-reads + re-unpacks from disk, + // matching the worst-case production path and keeping the hammer honest + // (OnDemand would serve all post-first reads from the ConcurrentDictionary). + using var dats = new DatCollection(new DatCollectionOptions + { + DatDirectory = datDir, + AccessType = DatAccessType.Read, + FileCachingStrategy = FileCachingStrategy.Never, + }); + var refs = BuildIdSet(dats); + + var golden = new Dictionary(refs.Count); + foreach (var r in refs) + golden[r] = ReadTypedFingerprint(dats, r); + + var anomalies = HammerAndCollect(refs, r => + { + var got = ReadTypedFingerprint(dats, r); + return golden[r] == got + ? null + : $"{r.Source} 0x{r.Id:X8}: golden=0x{golden[r]:X16} got=0x{got:X16}"; + }); + + Assert.True(anomalies.IsEmpty, + $"{anomalies.Count} concurrent typed-read anomalies. First: {string.Join(" | ", anomalies.Take(10))}"); + } + + // ---- hammer scaffolding ------------------------------------------------- + + private static ConcurrentBag HammerAndCollect( + IReadOnlyList refs, Func probe) + { + var anomalies = new ConcurrentBag(); + var threads = new List(); + using var start = new ManualResetEventSlim(false); + + for (int t = 0; t < HammerThreads; t++) + { + int seed = 7919 * (t + 1); // deterministic per-thread shuffle + var thread = new Thread(() => + { + var order = refs.ToArray(); + var rng = new Random(seed); + start.Wait(); + for (int loop = 0; loop < LoopsPerThread; loop++) + { + // Fisher–Yates so threads disagree about visit order — maximizes + // simultaneous different-file + same-file overlap. + for (int i = order.Length - 1; i > 0; i--) + { + int j = rng.Next(i + 1); + (order[i], order[j]) = (order[j], order[i]); + } + foreach (var r in order) + { + if (anomalies.Count > 50) return; // enough evidence + var a = probe(r); + if (a is not null) anomalies.Add(a); + } + } + }) + { IsBackground = true, Name = $"dat-hammer-{t}" }; + thread.Start(); + threads.Add(thread); + } + + start.Set(); // release all threads at once + foreach (var th in threads) + Assert.True(th.Join(TimeSpan.FromMinutes(4)), "hammer thread did not finish in time"); + return anomalies; + } + + /// + /// Mirrors the real client's id mix: Holtburg + neighbor landblocks (cell dat + /// heightmaps, LandBlockInfos, EnvCells) plus the portal-dat chain those cells + /// reference (Environments, Surfaces, SurfaceTextures, RenderSurfaces) and the + /// highres-dat RenderSurface probes the texture path makes. + /// + private static List BuildIdSet(DatCollection dats) + { + var refs = new List(); + var portalIds = new HashSet(); + var highResIds = new HashSet(); + + // Enumerate the cell dat's real file table around the Holtburg region — + // heightmaps (xxFFFF), LandBlockInfos (xxFFFE) and EnvCells (xx01xx+) — + // instead of guessing per-landblock counts (rural blocks have NumCells=0). + var cellIds = dats.Cell.Tree.GetFilesInRange(0xA8000000u, 0xABFFFFFFu) + .Select(f => f.Id) + .Take(2500) + .ToList(); + refs.AddRange(cellIds.Select(id => new FileRef(DatFileSource.Cell, id))); + + // Walk the portal-dat texture chain (Environment → Surface → SurfaceTexture + // → RenderSurface) for a sample of those EnvCells — the exact chain the + // white-walls symptom lives on. + int chained = 0; + foreach (var envCellId in cellIds.Where(id => (id & 0xFFFFu) is >= 0x0100 and < 0xFF00)) + { + if (chained++ >= 400) break; + if (!dats.Cell.TryGet(envCellId, out var envCell)) + continue; + + portalIds.Add(0x0D000000u | envCell.EnvironmentId); + foreach (var rawSurface in envCell.Surfaces) + { + uint surfaceId = 0x08000000u | rawSurface; + if (!portalIds.Add(surfaceId)) + continue; + if (!dats.Portal.TryGet(surfaceId, out var surface) + || (uint)surface.OrigTextureId == 0) + continue; + uint surfaceTextureId = (uint)surface.OrigTextureId; + if (!portalIds.Add(surfaceTextureId)) + continue; + if (dats.Portal.TryGet(surfaceTextureId, out var st) + && st.Textures.Count > 0) + { + uint renderSurfaceId = (uint)st.Textures[0]; + portalIds.Add(renderSurfaceId); + highResIds.Add(renderSurfaceId); // texture path probes highres too + } + } + } + + refs.AddRange(portalIds.Select(id => new FileRef(DatFileSource.Portal, id))); + refs.AddRange(highResIds.Select(id => new FileRef(DatFileSource.HighRes, id))); + return refs; + } + + private static Golden ReadRaw(DatCollection dats, FileRef r) + { + var db = Db(dats, r.Source); + if (!db.TryGetFileBytes(r.Id, out byte[] bytes)) + return new Golden(false, 0, 0); + return new Golden(true, bytes.Length, Fnv(bytes)); + } + + /// + /// Typed read through the full production unpack path; the fingerprint folds + /// the stable identity-bearing fields each consumer relies on. ok=false maps + /// to 0 so success-flag flips always show. + /// + private static ulong ReadTypedFingerprint(DatCollection dats, FileRef r) + { + var db = Db(dats, r.Source); + if (r.Source == DatFileSource.Cell) + { + if ((r.Id & 0xFFFFu) == 0xFFFFu) + return db.TryGet(r.Id, out var lbk) + ? Mix(1, (ulong)lbk.Height.Length) : 0; + if ((r.Id & 0xFFFFu) == 0xFFFEu) + return db.TryGet(r.Id, out var lbi) + ? Mix(2, lbi.NumCells) : 0; + return db.TryGet(r.Id, out var cell) + ? Mix(3, (ulong)cell.CellPortals.Count << 32 + | (uint)cell.Surfaces.Count << 16 + | cell.EnvironmentId) : 0; + } + + return (r.Id >> 24) switch + { + 0x0D => db.TryGet(r.Id, out var env) + ? Mix(4, (ulong)env.Cells.Count) : 0, + 0x08 => db.TryGet(r.Id, out var s) + ? Mix(5, (ulong)s.Type << 32 | (uint)s.OrigTextureId) : 0, + 0x05 => db.TryGet(r.Id, out var st) + ? Mix(6, (ulong)st.Textures.Count << 32 + | (st.Textures.Count > 0 ? (uint)st.Textures[0] : 0u)) : 0, + 0x06 => db.TryGet(r.Id, out var rs) + ? Mix(7, (ulong)rs.Width << 48 | (ulong)rs.Height << 32 + | (uint)rs.SourceData.Length) : 0, + _ => 0xFEEDu, // unexpected namespace — constant so it can't flap + }; + } + + private static DatDatabase Db(DatCollection dats, DatFileSource source) => source switch + { + DatFileSource.Cell => dats.Cell, + DatFileSource.Portal => dats.Portal, + _ => dats.HighRes, + }; + + private static ulong Fnv(byte[] bytes) + { + ulong h = 14695981039346656037UL; + foreach (var b in bytes) + { + h ^= b; + h *= 1099511628211UL; + } + return h; + } + + private static ulong Mix(ulong tag, ulong value) => + (tag << 56) ^ value ^ 0xA5A5_5A5A_0000_0000UL; +}