acdream/src/AcDream.App/Rendering/Wb/WbDrawDispatcher.cs
Erik 573526dae5 phase(N.4): WbDrawDispatcher perf pass — sort, cull, hash memoization
Four small wins on top of the grouped-instanced refactor.

1. Drop unused animState lookup. Was a side-effect-free
   _entitySpawnAdapter.GetState call per per-instance entity, made
   redundant by the Issue #47 fix that trusts MeshRefs.

2. Front-to-back sort opaque groups. Squared distance from camera to
   each group's first-instance translation; ascending sort. Lets the
   GPU's depth test reject fragments behind closer geometry — real
   win on dense scenes (Holtburg courtyard, Foundry interior).

3. Per-entity AABB frustum cull. 5m-radius AABB check per entity
   before walking parts. Skips work for distant entities even when
   their landblock is partially visible. Animated entities (other
   characters, NPCs, monsters) bypass — they always need per-frame
   work for animation regardless. Conservative radius covers typical
   entity bounds; large outliers stay landblock-culled.

4. Memoize palette hash per entity. TextureCache.HashPaletteOverride
   is now internal; new GetOrUploadWithPaletteOverride overload takes
   a precomputed hash. The dispatcher computes it ONCE per entity and
   reuses across every (part, batch) lookup, avoiding the per-batch
   FNV-1a fold over SubPalettes. Trees / scenery without palette
   overrides skip entirely (palHash stays 0).

Visual output unchanged; FPS up further, especially in dense scenes.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-05-08 17:51:03 +02:00

521 lines
21 KiB
C#
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

using System;
using System.Collections.Generic;
using System.Numerics;
using AcDream.Core.Meshing;
using AcDream.Core.Terrain;
using AcDream.Core.World;
using Chorizite.OpenGLSDLBackend.Lib;
using Silk.NET.OpenGL;
namespace AcDream.App.Rendering.Wb;
/// <summary>
/// Draws entities using WB's <see cref="ObjectRenderData"/> (a single global
/// VAO/VBO/IBO under modern rendering) with acdream's <see cref="TextureCache"/>
/// for texture resolution and <see cref="AcSurfaceMetadataTable"/> for
/// translucency classification.
///
/// <para>
/// <b>Atlas-tier</b> entities (<c>ServerGuid == 0</c>): mesh data comes from WB's
/// <see cref="ObjectMeshManager"/> via <see cref="WbMeshAdapter.TryGetRenderData"/>.
/// Textures resolve through <see cref="TextureCache.GetOrUpload"/> using the batch's
/// <c>SurfaceId</c>.
/// </para>
///
/// <para>
/// <b>Per-instance-tier</b> entities (<c>ServerGuid != 0</c>): mesh data also from
/// WB, but textures resolve through <see cref="TextureCache"/> with palette and
/// surface overrides applied. <see cref="AnimatedEntityState"/> is currently
/// unused at draw time — GameWindow's spawn path already bakes AnimPartChanges +
/// GfxObjDegradeResolver (Issue #47 close-detail mesh) into <c>MeshRefs</c>.
/// </para>
///
/// <para>
/// <b>GL strategy:</b> GROUPED instanced drawing. All visible (entity, batch)
/// pairs are bucketed by <see cref="GroupKey"/>; within a group a single
/// <c>glDrawElementsInstancedBaseVertexBaseInstance</c> renders all instances.
/// All matrices for the frame land in one shared instance VBO via a single
/// <c>BufferData</c> upload. This drops draw calls from O(entities×batches)
/// to O(unique GfxObj×batch×texture) — typically two orders of magnitude fewer.
/// </para>
///
/// <para>
/// <b>Shader:</b> reuses <c>mesh_instanced</c> (vert locations 0-2 = Position/
/// Normal/UV from WB's <c>VertexPositionNormalTexture</c>; locations 3-6 = instance
/// matrix from our VBO). WB's 32-byte vertex stride is compatible.
/// </para>
///
/// <para>
/// <b>Modern rendering assumption:</b> WB's <c>_useModernRendering</c> path (GL
/// 4.3 + bindless) puts every mesh in a single shared VAO/VBO/IBO and uses
/// <c>FirstIndex</c> + <c>BaseVertex</c> per batch. The dispatcher honors those
/// offsets via <c>DrawElementsInstancedBaseVertex(BaseInstance)</c>. The legacy
/// per-mesh-VAO path also works since FirstIndex/BaseVertex are zero there.
/// </para>
/// </summary>
public sealed unsafe class WbDrawDispatcher : IDisposable
{
private readonly GL _gl;
private readonly Shader _shader;
private readonly TextureCache _textures;
private readonly WbMeshAdapter _meshAdapter;
private readonly EntitySpawnAdapter _entitySpawnAdapter;
private readonly uint _instanceVbo;
private readonly HashSet<uint> _patchedVaos = new();
// Per-frame scratch — reused across frames to avoid per-frame allocation.
private readonly Dictionary<GroupKey, InstanceGroup> _groups = new();
private readonly List<InstanceGroup> _opaqueDraws = new();
private readonly List<InstanceGroup> _translucentDraws = new();
private float[] _instanceBuffer = new float[256 * 16]; // grow on demand, never shrink
// Per-entity-cull AABB radius. Conservative — covers most entities; large
// outliers (long banners, tall columns) are still landblock-culled.
private const float PerEntityCullRadius = 5.0f;
private bool _disposed;
// Diagnostic counters logged once per ~5s under ACDREAM_WB_DIAG=1.
private int _entitiesSeen;
private int _entitiesDrawn;
private int _meshesMissing;
private int _drawsIssued;
private int _instancesIssued;
private long _lastLogTick;
public WbDrawDispatcher(
GL gl,
Shader shader,
TextureCache textures,
WbMeshAdapter meshAdapter,
EntitySpawnAdapter entitySpawnAdapter)
{
ArgumentNullException.ThrowIfNull(gl);
ArgumentNullException.ThrowIfNull(shader);
ArgumentNullException.ThrowIfNull(textures);
ArgumentNullException.ThrowIfNull(meshAdapter);
ArgumentNullException.ThrowIfNull(entitySpawnAdapter);
_gl = gl;
_shader = shader;
_textures = textures;
_meshAdapter = meshAdapter;
_entitySpawnAdapter = entitySpawnAdapter;
_instanceVbo = _gl.GenBuffer();
}
public static Matrix4x4 ComposePartWorldMatrix(
Matrix4x4 entityWorld,
Matrix4x4 animOverride,
Matrix4x4 restPose)
=> restPose * animOverride * entityWorld;
public void Draw(
ICamera camera,
IEnumerable<(uint LandblockId, Vector3 AabbMin, Vector3 AabbMax, IReadOnlyList<WorldEntity> Entities)> landblockEntries,
FrustumPlanes? frustum = null,
uint? neverCullLandblockId = null,
HashSet<uint>? visibleCellIds = null,
HashSet<uint>? animatedEntityIds = null)
{
_shader.Use();
var vp = camera.View * camera.Projection;
_shader.SetMatrix4("uViewProjection", vp);
bool diag = string.Equals(Environment.GetEnvironmentVariable("ACDREAM_WB_DIAG"), "1", StringComparison.Ordinal);
// Camera world-space position for front-to-back sort (perf #2). The view
// matrix is the inverse of the camera's world transform, so the world
// translation lives in the inverse's translation row.
Vector3 camPos = Vector3.Zero;
if (Matrix4x4.Invert(camera.View, out var invView))
camPos = invView.Translation;
// ── Phase 1: clear groups, walk entities, build groups ──────────────
foreach (var grp in _groups.Values) grp.Matrices.Clear();
var metaTable = _meshAdapter.MetadataTable;
uint anyVao = 0;
foreach (var entry in landblockEntries)
{
bool landblockVisible = frustum is null
|| entry.LandblockId == neverCullLandblockId
|| FrustumCuller.IsAabbVisible(frustum.Value, entry.AabbMin, entry.AabbMax);
if (!landblockVisible && (animatedEntityIds is null || animatedEntityIds.Count == 0))
continue;
foreach (var entity in entry.Entities)
{
if (entity.MeshRefs.Count == 0) continue;
bool isAnimated = animatedEntityIds?.Contains(entity.Id) == true;
if (!landblockVisible && !isAnimated) continue;
if (entity.ParentCellId.HasValue && visibleCellIds is not null
&& !visibleCellIds.Contains(entity.ParentCellId.Value))
continue;
// Per-entity AABB frustum cull (perf #3). Skips work for distant
// entities even when their landblock is visible. Animated
// entities bypass — they're tracked at landblock level + need
// per-frame work for animation regardless. Conservative 5m
// radius covers typical entity bounds.
if (frustum is not null && !isAnimated && entry.LandblockId != neverCullLandblockId)
{
var p = entity.Position;
var aMin = new Vector3(p.X - PerEntityCullRadius, p.Y - PerEntityCullRadius, p.Z - PerEntityCullRadius);
var aMax = new Vector3(p.X + PerEntityCullRadius, p.Y + PerEntityCullRadius, p.Z + PerEntityCullRadius);
if (!FrustumCuller.IsAabbVisible(frustum.Value, aMin, aMax))
continue;
}
if (diag) _entitiesSeen++;
var entityWorld =
Matrix4x4.CreateFromQuaternion(entity.Rotation) *
Matrix4x4.CreateTranslation(entity.Position);
// Compute palette-override hash ONCE per entity (perf #4).
// Reused across every (part, batch) lookup so the FNV-1a fold
// over SubPalettes runs once instead of N times. Zero when the
// entity has no palette override (trees, scenery).
ulong palHash = 0;
if (entity.PaletteOverride is not null)
palHash = TextureCache.HashPaletteOverride(entity.PaletteOverride);
bool drewAny = false;
for (int partIdx = 0; partIdx < entity.MeshRefs.Count; partIdx++)
{
// Note: GameWindow's spawn path already applies
// AnimPartChanges + GfxObjDegradeResolver (Issue #47 fix —
// close-detail mesh swap for humanoids) to MeshRefs. We
// trust MeshRefs as the source of truth here. AnimatedEntityState's
// overrides become relevant only for hot-swap (0xF625
// ObjDescEvent) which today rebuilds MeshRefs anyway.
var meshRef = entity.MeshRefs[partIdx];
ulong gfxObjId = meshRef.GfxObjId;
var renderData = _meshAdapter.TryGetRenderData(gfxObjId);
if (renderData is null)
{
if (diag) _meshesMissing++;
continue;
}
drewAny = true;
if (anyVao == 0) anyVao = renderData.VAO;
if (renderData.IsSetup && renderData.SetupParts.Count > 0)
{
foreach (var (partGfxObjId, partTransform) in renderData.SetupParts)
{
var partData = _meshAdapter.TryGetRenderData(partGfxObjId);
if (partData is null) continue;
var model = ComposePartWorldMatrix(
entityWorld, meshRef.PartTransform, partTransform);
ClassifyBatches(partData, partGfxObjId, model, entity, meshRef, palHash, metaTable);
}
}
else
{
var model = meshRef.PartTransform * entityWorld;
ClassifyBatches(renderData, gfxObjId, model, entity, meshRef, palHash, metaTable);
}
}
if (diag && drewAny) _entitiesDrawn++;
}
}
// Nothing visible — skip the GL pass entirely.
if (anyVao == 0)
{
if (diag) MaybeFlushDiag();
return;
}
// ── Phase 2: lay matrices out contiguously, assign per-group offsets,
// split into opaque/translucent + compute sort keys ─────────
int totalInstances = 0;
foreach (var grp in _groups.Values) totalInstances += grp.Matrices.Count;
if (totalInstances == 0)
{
if (diag) MaybeFlushDiag();
return;
}
int needed = totalInstances * 16;
if (_instanceBuffer.Length < needed)
_instanceBuffer = new float[needed + 256 * 16]; // headroom
_opaqueDraws.Clear();
_translucentDraws.Clear();
int cursor = 0;
foreach (var grp in _groups.Values)
{
if (grp.Matrices.Count == 0) continue;
grp.FirstInstance = cursor;
grp.InstanceCount = grp.Matrices.Count;
// Use the first instance's translation as the group's representative
// position for front-to-back sort (perf #2). Cheap heuristic; works
// well when instances of one group are spatially coherent
// (typical for trees in one landblock area, NPCs at one spawn).
var firstM = grp.Matrices[0];
var grpPos = new Vector3(firstM.M41, firstM.M42, firstM.M43);
grp.SortDistance = Vector3.DistanceSquared(camPos, grpPos);
for (int i = 0; i < grp.Matrices.Count; i++)
{
WriteMatrix(_instanceBuffer, cursor * 16, grp.Matrices[i]);
cursor++;
}
if (grp.Translucency == TranslucencyKind.Opaque || grp.Translucency == TranslucencyKind.ClipMap)
_opaqueDraws.Add(grp);
else
_translucentDraws.Add(grp);
}
// Front-to-back sort for opaque pass: nearer groups draw first so the
// depth test rejects fragments hidden behind them, reducing fragment
// shader cost from overdraw on dense scenes (Holtburg courtyard,
// Foundry interior).
_opaqueDraws.Sort(static (a, b) => a.SortDistance.CompareTo(b.SortDistance));
// ── Phase 3: one upload of all matrices ─────────────────────────────
_gl.BindBuffer(BufferTargetARB.ArrayBuffer, _instanceVbo);
fixed (float* p = _instanceBuffer)
_gl.BufferData(BufferTargetARB.ArrayBuffer,
(nuint)(totalInstances * 16 * sizeof(float)), p, BufferUsageARB.DynamicDraw);
// ── Phase 4: bind VAO once (modern rendering shares one global VAO) ──
EnsureInstanceAttribs(anyVao);
_gl.BindVertexArray(anyVao);
// ── Phase 5: opaque + ClipMap pass (front-to-back sorted) ───────────
if (string.Equals(Environment.GetEnvironmentVariable("ACDREAM_NO_CULL"), "1", StringComparison.Ordinal))
_gl.Disable(EnableCap.CullFace);
foreach (var grp in _opaqueDraws)
{
_shader.SetInt("uTranslucencyKind", (int)grp.Translucency);
DrawGroup(grp);
}
// ── Phase 6: translucent pass ───────────────────────────────────────
_gl.Enable(EnableCap.Blend);
_gl.DepthMask(false);
if (string.Equals(Environment.GetEnvironmentVariable("ACDREAM_NO_CULL"), "1", StringComparison.Ordinal))
{
_gl.Disable(EnableCap.CullFace);
}
else
{
_gl.Enable(EnableCap.CullFace);
_gl.CullFace(TriangleFace.Back);
_gl.FrontFace(FrontFaceDirection.Ccw);
}
foreach (var grp in _translucentDraws)
{
switch (grp.Translucency)
{
case TranslucencyKind.Additive:
_gl.BlendFunc(BlendingFactor.SrcAlpha, BlendingFactor.One);
break;
case TranslucencyKind.InvAlpha:
_gl.BlendFunc(BlendingFactor.OneMinusSrcAlpha, BlendingFactor.SrcAlpha);
break;
default:
_gl.BlendFunc(BlendingFactor.SrcAlpha, BlendingFactor.OneMinusSrcAlpha);
break;
}
_shader.SetInt("uTranslucencyKind", (int)grp.Translucency);
DrawGroup(grp);
}
_gl.DepthMask(true);
_gl.Disable(EnableCap.Blend);
_gl.Disable(EnableCap.CullFace);
_gl.BindVertexArray(0);
if (diag)
{
_drawsIssued += _opaqueDraws.Count + _translucentDraws.Count;
_instancesIssued += totalInstances;
MaybeFlushDiag();
}
}
private void DrawGroup(InstanceGroup grp)
{
_gl.ActiveTexture(TextureUnit.Texture0);
_gl.BindTexture(TextureTarget.Texture2D, grp.TextureHandle);
_gl.BindBuffer(BufferTargetARB.ElementArrayBuffer, grp.Ibo);
// BaseInstance offsets the per-instance attribute fetches into our
// shared instance VBO so each group reads its own slice. Requires
// GL_ARB_base_instance (GL 4.2+); WB requires 4.3 so this is available.
_gl.DrawElementsInstancedBaseVertexBaseInstance(
PrimitiveType.Triangles,
(uint)grp.IndexCount,
DrawElementsType.UnsignedShort,
(void*)(grp.FirstIndex * sizeof(ushort)),
(uint)grp.InstanceCount,
grp.BaseVertex,
(uint)grp.FirstInstance);
}
private void MaybeFlushDiag()
{
long now = Environment.TickCount64;
if (now - _lastLogTick > 5000)
{
Console.WriteLine(
$"[WB-DIAG] entSeen={_entitiesSeen} entDrawn={_entitiesDrawn} meshMissing={_meshesMissing} drawsIssued={_drawsIssued} instances={_instancesIssued} groups={_groups.Count}");
_entitiesSeen = _entitiesDrawn = _meshesMissing = _drawsIssued = _instancesIssued = 0;
_lastLogTick = now;
}
}
private void ClassifyBatches(
ObjectRenderData renderData,
ulong gfxObjId,
Matrix4x4 model,
WorldEntity entity,
MeshRef meshRef,
ulong palHash,
AcSurfaceMetadataTable metaTable)
{
for (int batchIdx = 0; batchIdx < renderData.Batches.Count; batchIdx++)
{
var batch = renderData.Batches[batchIdx];
TranslucencyKind translucency;
if (metaTable.TryLookup(gfxObjId, batchIdx, out var meta))
{
translucency = meta.Translucency;
}
else
{
translucency = batch.IsAdditive ? TranslucencyKind.Additive
: batch.IsTransparent ? TranslucencyKind.AlphaBlend
: TranslucencyKind.Opaque;
}
uint texHandle = ResolveTexture(entity, meshRef, batch, palHash);
if (texHandle == 0) continue;
var key = new GroupKey(
batch.IBO, batch.FirstIndex, (int)batch.BaseVertex,
batch.IndexCount, texHandle, translucency);
if (!_groups.TryGetValue(key, out var grp))
{
grp = new InstanceGroup
{
Ibo = batch.IBO,
FirstIndex = batch.FirstIndex,
BaseVertex = (int)batch.BaseVertex,
IndexCount = batch.IndexCount,
TextureHandle = texHandle,
Translucency = translucency,
};
_groups[key] = grp;
}
grp.Matrices.Add(model);
}
}
private uint ResolveTexture(WorldEntity entity, MeshRef meshRef, ObjectRenderBatch batch, ulong palHash)
{
// WB stores the surface id on batch.Key.SurfaceId (TextureKey struct);
// batch.SurfaceId is unset (zero) for batches built by ObjectMeshManager.
uint surfaceId = batch.Key.SurfaceId;
if (surfaceId == 0 || surfaceId == 0xFFFFFFFF) return 0;
uint overrideOrigTex = 0;
bool hasOrigTexOverride = meshRef.SurfaceOverrides is not null
&& meshRef.SurfaceOverrides.TryGetValue(surfaceId, out overrideOrigTex);
uint? origTexOverride = hasOrigTexOverride ? overrideOrigTex : (uint?)null;
if (entity.PaletteOverride is not null)
{
// perf #4: pass the entity-precomputed palette hash so TextureCache
// can skip its internal HashPaletteOverride for repeat lookups
// within the same character.
return _textures.GetOrUploadWithPaletteOverride(
surfaceId, origTexOverride, entity.PaletteOverride, palHash);
}
else if (hasOrigTexOverride)
{
return _textures.GetOrUploadWithOrigTextureOverride(surfaceId, overrideOrigTex);
}
else
{
return _textures.GetOrUpload(surfaceId);
}
}
private void EnsureInstanceAttribs(uint vao)
{
if (!_patchedVaos.Add(vao)) return;
_gl.BindVertexArray(vao);
_gl.BindBuffer(BufferTargetARB.ArrayBuffer, _instanceVbo);
for (uint row = 0; row < 4; row++)
{
uint loc = 3 + row;
_gl.EnableVertexAttribArray(loc);
_gl.VertexAttribPointer(loc, 4, VertexAttribPointerType.Float, false, 64, (void*)(row * 16));
_gl.VertexAttribDivisor(loc, 1);
}
}
private static void WriteMatrix(float[] buf, int offset, in Matrix4x4 m)
{
buf[offset + 0] = m.M11; buf[offset + 1] = m.M12; buf[offset + 2] = m.M13; buf[offset + 3] = m.M14;
buf[offset + 4] = m.M21; buf[offset + 5] = m.M22; buf[offset + 6] = m.M23; buf[offset + 7] = m.M24;
buf[offset + 8] = m.M31; buf[offset + 9] = m.M32; buf[offset + 10] = m.M33; buf[offset + 11] = m.M34;
buf[offset + 12] = m.M41; buf[offset + 13] = m.M42; buf[offset + 14] = m.M43; buf[offset + 15] = m.M44;
}
public void Dispose()
{
if (_disposed) return;
_disposed = true;
_gl.DeleteBuffer(_instanceVbo);
}
private readonly record struct GroupKey(
uint Ibo,
uint FirstIndex,
int BaseVertex,
int IndexCount,
uint TextureHandle,
TranslucencyKind Translucency);
private sealed class InstanceGroup
{
public uint Ibo;
public uint FirstIndex;
public int BaseVertex;
public int IndexCount;
public uint TextureHandle;
public TranslucencyKind Translucency;
public int FirstInstance; // offset into the shared instance VBO (in instances, not bytes)
public int InstanceCount;
public float SortDistance; // squared distance from camera to first instance, for opaque sort
public readonly List<Matrix4x4> Matrices = new();
}
}