acdream/src/AcDream.App/Rendering/Wb/WbMeshAdapter.cs
Erik 8682a8db70 close #125: bounded upload retry kills the sticky-drop debt (failed GL uploads were never re-staged)
The GL root cause was fixed in fcade06 (the gpu_us query-ring stale
errors). This closes the remaining design debt: a genuinely-failed
UploadMeshData was dropped permanently.

Exact mechanism (traced this session): UploadMeshData's catch returns
null, the staged item is already consumed, and _renderData stays empty -
but the prepared data lingers in _cpuMeshCache, so the #128 EnsureLoaded
re-arm hits PrepareMeshDataAsync's CPU-cache short-circuit
(ObjectMeshManager.cs:448-453) which returns the cached data WITHOUT
re-staging it for upload. The mesh stays invisible until CPU-cache
eviction - session-sticky under low cache pressure (the in-tower
scenario).

Fix: the per-frame Tick drain (WbMeshAdapter) now re-stages a failed
upload for the NEXT frame via ObjectMeshManager.UploadOrRequeue, bounded
by MaxUploadRetries (3). The attempt counter lives on the ObjectMeshData
object so it resets to 0 naturally on re-prepare. Re-stages are
collected and re-enqueued AFTER the drain loop, never inside it, so a
deterministic failure cannot spin the queue within a single frame; past
the cap it gives up with a loud [up-retry] ... giving up line - a
genuine GL defect now surfaces instead of the old silent permanent drop
or an unbounded retry storm. Retail loads content synchronously and has
no such failure mode; this converges the async pipeline toward that
guarantee.

The uncaught GenerateMipmaps path (open-question c) is INTENTIONALLY
left to surface errors - a blanket catch there would mask future real
defects (no-workarounds rule), and its trigger (fcade06) is retired.

No visual gate (robustness). Build green; App.Tests 264 + WbMeshAdapter
tests green. No GL-context test seam exists for the upload path, so the
bounded retry is verified by construction + the regression suite.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-06-13 10:27:26 +02:00

337 lines
15 KiB
C#

using System;
using System.Collections.Generic;
using AcDream.Core.Meshing;
using AcDream.Core.Rendering;
using DatReaderWriter;
using DatReaderWriter.DBObjs;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Logging.Abstractions;
using Silk.NET.OpenGL;
namespace AcDream.App.Rendering.Wb;
/// <summary>
/// Single seam between acdream and WB's render pipeline. Owns the
/// <c>ObjectMeshManager</c> instance and exposes a stable acdream-shaped API
/// so the rest of the renderer doesn't need to know about WB's types directly.
///
/// <para>
/// As of Phase O-T7, all DAT I/O routes through <see cref="DatCollectionAdapter"/>
/// (backed by our shared <see cref="DatCollection"/>) — the separate
/// <c>DefaultDatReaderWriter</c> file-handle set has been removed.
/// </para>
/// </summary>
public sealed class WbMeshAdapter : IDisposable, IWbMeshAdapter
{
private readonly OpenGLGraphicsDevice? _graphicsDevice;
private readonly ObjectMeshManager? _meshManager;
private readonly DatCollection? _dats;
private readonly AcSurfaceMetadataTable _metadataTable = new();
private readonly HashSet<ulong> _metadataPopulated = new();
/// <summary>
/// True when this instance was created via <see cref="CreateUninitialized"/>;
/// all public methods no-op when uninitialized.
/// </summary>
private readonly bool _isUninitialized;
private bool _disposed;
/// <summary>
/// Constructs the full WB pipeline: OpenGLGraphicsDevice → DatCollectionAdapter
/// → ObjectMeshManager.
/// </summary>
/// <param name="gl">Active Silk.NET GL context. Must be bound to the current
/// thread (construction runs GL queries; call from OnLoad).</param>
/// <param name="dats">acdream's DatCollection, used to populate the surface
/// metadata side-table via <c>GfxObjMesh.Build</c>. Shares file handles with
/// the rest of the client; read-only access from the render thread.</param>
/// <param name="logger">Logger for the adapter; ObjectMeshManager uses
/// NullLogger internally.</param>
public WbMeshAdapter(GL gl, DatCollection dats, ILogger<WbMeshAdapter> logger)
{
ArgumentNullException.ThrowIfNull(gl);
ArgumentNullException.ThrowIfNull(dats);
ArgumentNullException.ThrowIfNull(logger);
_dats = dats;
_graphicsDevice = new OpenGLGraphicsDevice(gl, logger, new DebugRenderSettings());
_graphicsDevice.ParticleBatcher = new ParticleBatcher(_graphicsDevice);
// ConsoleErrorLogger surfaces WB's silently-caught exceptions
// (ObjectMeshManager.PrepareMeshData try/catch at line ~589).
_meshManager = new ObjectMeshManager(
_graphicsDevice,
new DatCollectionAdapter(dats),
new ConsoleErrorLogger<ObjectMeshManager>());
}
/// <summary>
/// Minimal Console-backed logger that fires only on
/// <see cref="LogLevel.Error"/> and above. Format:
/// <code>[wb-error] &lt;message&gt;
/// [wb-error] &lt;ExceptionType&gt;: &lt;ExceptionMessage&gt;
/// [wb-error] at &lt;frame&gt; (up to 5 frames)</code>
/// Used to surface WB's silently-caught exceptions in
/// <c>ObjectMeshManager.PrepareMeshData</c>.
/// </summary>
private sealed class ConsoleErrorLogger<T> : ILogger<T>
{
public IDisposable BeginScope<TState>(TState state) where TState : notnull => NullScope.Instance;
public bool IsEnabled(LogLevel logLevel) => logLevel >= LogLevel.Error;
public void Log<TState>(
LogLevel logLevel, EventId eventId, TState state, Exception? exception,
Func<TState, Exception?, string> formatter)
{
if (!IsEnabled(logLevel)) return;
var message = formatter(state, exception);
Console.WriteLine($"[wb-error] {message}");
if (exception is not null)
{
Console.WriteLine($"[wb-error] {exception.GetType().Name}: {exception.Message}");
var stack = (exception.StackTrace ?? "")
.Split(new[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries)
.Take(5);
foreach (var s in stack) Console.WriteLine($"[wb-error] {s.Trim()}");
}
}
private sealed class NullScope : IDisposable
{
public static readonly NullScope Instance = new();
public void Dispose() { }
}
}
private WbMeshAdapter()
{
// Uninitialized constructor — only for tests / flag-off cases where
// the caller wants a Dispose-safe no-op instance.
_isUninitialized = true;
}
/// <summary>Test/init helper — produces a Dispose-safe instance with no
/// underlying mesh manager. Public methods are all no-ops.</summary>
public static WbMeshAdapter CreateUninitialized() => new();
/// <summary>
/// The surface metadata side-table populated on each first
/// <see cref="IncrementRefCount"/>. Queried by the draw dispatcher
/// to determine translucency, luminosity, and fog behavior per batch.
/// </summary>
public AcSurfaceMetadataTable MetadataTable => _metadataTable;
/// <summary>
/// Phase A8 (2026-05-28): exposes the underlying <see cref="ObjectMeshManager"/>
/// so <c>EnvCellRenderer</c> can share the same global mesh buffer (VAO/VBO/IBO).
/// Returns null when the adapter is uninitialized.
/// </summary>
public ObjectMeshManager? MeshManager => _meshManager;
/// <summary>
/// Returns the WB render data for <paramref name="id"/>, or null if not
/// yet uploaded or if this adapter is uninitialized. Increments WB's
/// internal usage counter — use <see cref="TryGetRenderData"/> for
/// render-loop lookups that should not affect lifecycle.
/// </summary>
public ObjectRenderData? GetRenderData(ulong id)
{
if (_isUninitialized || _meshManager is null) return null;
return _meshManager.GetRenderData(id);
}
/// <summary>
/// Returns the WB render data for <paramref name="id"/> without
/// modifying reference counts. Returns null if the mesh is not yet
/// uploaded. Safe for render-loop lookups.
/// </summary>
public ObjectRenderData? TryGetRenderData(ulong id)
{
if (_isUninitialized || _meshManager is null) return null;
return _meshManager.TryGetRenderData(id);
}
/// <inheritdoc/>
public void IncrementRefCount(ulong id)
{
if (_isUninitialized || _meshManager is null) return;
_meshManager.IncrementRefCount(id);
bool firstEver = _metadataPopulated.Add(id);
if (firstEver)
PopulateMetadata(id);
// WB's IncrementRefCount alone only bumps a usage counter; it does
// NOT trigger mesh loading. We must explicitly call PrepareMeshDataAsync
// so the background workers actually decode the GfxObj. The result
// auto-enqueues into _stagedMeshData (ObjectMeshManager line 510),
// which Tick() drains onto the GPU. Until that completes,
// TryGetRenderData(id) returns null and the dispatcher silently
// skips the entity — standard streaming flicker.
//
// #128 (2026-06-11): Prepare must RE-ARM whenever the id has no render
// data — NOT only on the first-ever registration. The old
// first-ever-only gate (`if (_metadataPopulated.Add(id))`) permanently
// lost any id whose initial decode was cancelled before completing
// (landblock unload → CancelStagedUploads during login/teleport
// churn) or whose upload was later LRU-evicted: every subsequent
// registration skipped Prepare, so the mesh stayed invisible for the
// session with zero log output — the dispatcher's slow path just
// counted meshMissing forever (issue #55's 1.45M/5s mountain was this
// bug's heartbeat). User-visible: the AAB3 tower staircase rendering
// partially or not at all depending on the session's landblock
// load/unload interleaving (#119/#128 "broken stairs"). Safe to call
// unconditionally when data is absent: PrepareMeshDataAsync early-outs
// on existing render data, returns the in-flight task when already
// pending, and dedups via _preparationTasks.
//
// isSetup: false — acdream's MeshRefs already carry expanded
// per-part GfxObj ids (0x01XXXXXX). WB's Setup-expansion path is
// unused.
if (firstEver || _meshManager.TryGetRenderData(id) is null)
_meshManager.PrepareMeshDataAsync(id, isSetup: false);
}
/// <inheritdoc/>
public void DecrementRefCount(ulong id)
{
if (_isUninitialized || _meshManager is null) return;
_meshManager.DecrementRefCount(id);
}
/// <summary>
/// #128 self-heal (2026-06-11): re-request a mesh load at the POINT OF
/// USE. Registration-time re-arming was insufficient — a preparation
/// cancelled by landblock churn AFTER the last registration event
/// (running across blocks loads/unloads them repeatedly) left the mesh
/// permanently unloadable with no later event to re-fire it. The draw
/// dispatcher touches every missing-but-referenced mesh every frame (the
/// meshMissing slow path) — that is the one place a retry can never be
/// missed. Cheap and idempotent: PrepareMeshDataAsync early-outs on
/// existing render data and returns the in-flight task when pending.
/// Retail-equivalence: retail loads content synchronously — geometry is
/// never permanently absent; this converges our async pipeline to the
/// same guarantee.
/// </summary>
public void EnsureLoaded(ulong id)
{
if (_isUninitialized || _meshManager is null) return;
_meshManager.PrepareMeshDataAsync(id, isSetup: false);
}
/// <summary>
/// Per-frame drain of the WB pipeline's main-thread work queues. MUST be
/// called once per frame from the render thread. Without this, the staged
/// mesh data queue grows unbounded (memory leak) and queued GL actions
/// never execute.
///
/// <para>
/// Order matters: <c>ProcessGLQueue</c> runs first to apply any pending GL
/// state changes (e.g., texture uploads queued by background workers
/// during mesh prep). Then we drain staged mesh data, calling
/// <c>UploadMeshData</c> on each item to materialize the actual GL VAO /
/// VBO / IBO resources. After Tick, <c>GetRenderData</c> for any id
/// previously passed to <c>IncrementRefCount</c> may return non-null.
/// </para>
///
/// <para>
/// No-op when the adapter is uninitialized (e.g., flag is off and the
/// adapter was constructed via <c>CreateUninitialized</c>).
/// </para>
/// </summary>
public void Tick()
{
if (_isUninitialized) return;
if (_disposed) return;
_graphicsDevice!.ProcessGLQueue();
// #125: drain staged uploads; a FAILED upload (UploadMeshData returned
// null from its catch) is re-staged for a LATER frame, not dropped. The
// re-stages are collected and re-enqueued AFTER the loop — re-enqueuing
// inside the while would let a deterministic failure spin the queue in a
// single frame. UploadOrRequeue bounds the retries (MaxUploadRetries) so
// a genuine defect surfaces loudly instead of the old silent sticky drop.
List<ObjectMeshData>? requeue = null;
while (_meshManager!.StagedMeshData.TryDequeue(out var meshData))
{
if (_meshManager.UploadOrRequeue(meshData))
(requeue ??= new()).Add(meshData);
}
if (requeue is not null)
foreach (var m in requeue)
_meshManager.StagedMeshData.Enqueue(m);
bool texProbe = AcDream.Core.Rendering.RenderingDiagnostics.ProbeTexFlushEnabled;
var pendingBefore = texProbe
? _meshManager.GetPendingTextureUpdateStats()
: default;
// #105 root cause (2026-06-10): TextureAtlasManager.AddTexture only STAGES
// texture content (PBO write + ManagedGLTextureArray._pendingUpdates) — the
// actual TexSubImage3D copies + mipmap regeneration happen in
// ProcessDirtyUpdates, which WB drives ONCE PER FRAME from its render loop
// (WB GameScene.cs:975 `_meshManager?.GenerateMipmaps()`, just before the
// opaque pass). That call site lived in the GameScene file the N.4/O-T4
// extraction replaced with GameWindow, so the driver was silently dropped:
// staged updates only ever reached the GPU as a side effect of PBO growth,
// and every layer staged after an array's LAST growth kept undefined
// TexStorage3D content behind a valid resident bindless handle — the
// intermittent white indoor walls (#105). Pre-fix evidence: 126 updates
// stuck across 34/34 arrays at standstill (texflush-prefix.log). Tick()
// runs before all draw passes (GameWindow OnRender), so this is the exact
// WB-equivalent position.
_meshManager.GenerateMipmaps();
if (texProbe)
EmitTexFlushProbe(pendingBefore);
}
// #105 apparatus state — see RenderingDiagnostics.ProbeTexFlushEnabled.
private int _lastTexFlushBefore = -1;
private int _texFlushHeartbeat;
/// <summary>
/// #105 apparatus: one <c>[tex-flush]</c> line on change of the staged-texture
/// pending picture (plus a ~10 s heartbeat while anything is stuck). A healthy
/// frame ends with <c>after=0</c>; <c>before==after&gt;0</c> persisting at
/// standstill is the white-walls mechanism live (staged uploads never applied).
/// </summary>
private void EmitTexFlushProbe((int PendingUpdates, int ArraysWithPending, int TotalArrays) before)
{
var after = _meshManager!.GetPendingTextureUpdateStats();
bool changed = before.PendingUpdates != _lastTexFlushBefore;
bool flushed = after.PendingUpdates != before.PendingUpdates;
bool heartbeat = after.PendingUpdates > 0 && ++_texFlushHeartbeat >= 600;
if (!changed && !flushed && !heartbeat) return;
_texFlushHeartbeat = 0;
_lastTexFlushBefore = before.PendingUpdates;
Console.WriteLine(
$"[tex-flush] before={before.PendingUpdates} after={after.PendingUpdates}" +
$" arrays={after.ArraysWithPending}/{after.TotalArrays}" +
$" (arraysBefore={before.ArraysWithPending})");
}
private void PopulateMetadata(ulong id)
{
if (_dats is null) return;
if (!_dats.Portal.TryGet<GfxObj>((uint)id, out var gfxObj)) return;
var subMeshes = GfxObjMesh.Build(gfxObj, _dats);
for (int i = 0; i < subMeshes.Count; i++)
{
var sm = subMeshes[i];
_metadataTable.Add(id, i, new AcSurfaceMetadata(
sm.Translucency, sm.Luminosity, sm.Diffuse,
sm.SurfOpacity, sm.NeedsUvRepeat, sm.DisableFog));
}
}
/// <inheritdoc/>
public void Dispose()
{
if (_disposed) return;
_disposed = true;
_meshManager?.Dispose();
_graphicsDevice?.Dispose();
}
}