acdream/src/AcDream.App/Rendering/Shaders/mesh_modern.vert
Erik 0980bea48d fix(render): A7 Fix D D-3/D-4 — two-path lighting (objects plain-Lambert+sun, EnvCell wrap+no-sun) (#140)
mesh_modern unified all meshes into one calc_point_light path: it applied the
bake's half-Lambert wrap to objects (lighting character backs from a torch behind
them) and added the sun to EnvCell building shells (warm facade wash). Retail
splits these: objects = hardware plain Lambert max(0,N.L) + sun; EnvCell walls =
baked wrap, dynamics only, NO sun (minimize_envcell_lighting). Add a per-draw
uLightingMode (WbDrawDispatcher=0 object, EnvCellRenderer=1 envcell) selecting the
angular term (wrap vs plain Lambert) and gating the sun. Per-light cap + D-1 clamp
unchanged.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-18 21:38:30 +02:00

256 lines
12 KiB
GLSL
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#version 430 core
#extension GL_ARB_shader_draw_parameters : require
layout(location = 0) in vec3 aPosition;
layout(location = 1) in vec3 aNormal;
layout(location = 2) in vec2 aTexCoord;
struct InstanceData {
mat4 transform;
// Reserved for Phase B.4 follow-up (selection-blink retail-faithful
// highlight): vec4 highlightColor; — extend stride here, increase the
// _instanceSsbo upload size in WbDrawDispatcher, add a flat varying out,
// and consume in mesh_modern.frag.
};
struct BatchData {
uvec2 textureHandle; // bindless handle for sampler2DArray
uint textureLayer; // layer index (always 0 for per-instance composites)
uint flags; // reserved — N.5 dispatcher owns all blend state
// (glBlendFunc per pass). If a future phase wants
// shader-side per-batch additive flag (Decision 2
// fallback), encode it here as bit 0.
};
layout(std430, binding = 0) readonly buffer InstanceBuffer {
InstanceData Instances[];
};
// binding=1 here is the SSBO namespace — distinct from the UBO namespace.
// SceneLighting UBO also uses binding=1 in the fragment shader; GL keeps
// GL_SHADER_STORAGE_BUFFER and GL_UNIFORM_BUFFER binding tables separate.
// Task 10 dispatcher binds:
// glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, instanceSsbo)
// glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, batchSsbo)
// Existing SceneLightingUboBinding handles the UBO side.
layout(std430, binding = 1) readonly buffer BatchBuffer {
BatchData Batches[];
};
// === Phase U.3: per-cell screen-space clip gate (gl_ClipDistance) =============
// Two SSBOs add the clip mechanism without disturbing binding=0/1 above.
//
// binding=2 — SHARED per-frame clip regions, one CellClip per "slot". Uploaded
// ONCE per frame by ClipFrame.UploadShared (shared across WbDrawDispatcher +
// EnvCellRenderer). Slot 0 is RESERVED = no-clip (count 0 ⇒ every plane passes).
//
// binding=3 — PER-RENDERER per-instance slot index, parallel to the binding=0
// instance buffer and indexed by the IDENTICAL per-instance index
// (gl_BaseInstanceARB + gl_InstanceID). instanceClipSlot[i] selects which
// CellClip region instance i is clipped against. Default all-zeros in U.3 ⇒
// every instance maps to slot 0 ⇒ no clipping ⇒ identical render to pre-U.3.
//
// CellClip std430 layout (144 bytes/slot): a uint count + 3 pad uints (16 bytes)
// then vec4 planes[8] (8 × 16 = 128 bytes). vec4 array stride is 16 under std430.
// ClipFrame on the CPU side lays out the bytes to match exactly (verified by
// ClipFrameLayoutTests). A clip-space vertex is INSIDE iff dot(plane, gl_Position)
// >= 0 for every active plane (see ClipPlaneSet for the plane convention).
struct CellClip {
uint count;
uint _p0;
uint _p1;
uint _p2;
vec4 planes[8];
};
layout(std430, binding = 2) readonly buffer ClipRegionBuf {
CellClip clipRegions[];
};
layout(std430, binding = 3) readonly buffer ClipSlotBuf {
uint instanceClipSlot[];
};
// === Fix B (A7 #3): per-OBJECT light selection — minimize_object_lighting =====
// retail picks up-to-8 point/spot lights PER OBJECT by the object's own position
// (minimize_object_lighting 0x0054d480), so a torch always lights the wall it
// sits on, camera-INDEPENDENTLY. The previous single global nearest-8-to-CAMERA
// UBO set (LightManager.Tick) made a wall brighten as the camera approached
// (its torches swapping into the global top-8). Two SSBOs replace that for
// point/spot lights (the SUN + ambient still come from the SceneLighting UBO):
//
// binding=4 — GLOBAL point/spot light array, uploaded once per frame from
// LightManager.PointSnapshot. The index of a light here is stable for the frame.
// binding=5 — per-instance light SET: MaxLightsPerObject(8) int indices per
// instance INTO gLights[] (-1 = unused slot), parallel to the binding=0
// instance buffer and indexed by the SAME instanceIndex. WbDrawDispatcher fills
// it once per entity (the set is constant across the entity's parts/tuples).
struct GlobalLight {
vec4 posAndKind;
vec4 dirAndRange;
vec4 colorAndIntensity;
vec4 coneAngleEtc;
};
layout(std430, binding = 4) readonly buffer GlobalLightBuf {
GlobalLight gLights[];
};
layout(std430, binding = 5) readonly buffer InstanceLightSetBuf {
int instanceLightIdx[]; // 8 per instance; -1 = unused
};
// Core profile: redeclare gl_PerVertex so writing gl_ClipDistance[] is legal
// alongside gl_Position. The array is sized 8 to match the CellClip plane budget
// and the GL guarantee (GL_MAX_CLIP_DISTANCES >= 8). The host enables
// GL_CLIP_DISTANCE0..7 once at startup; unused planes are set to +1.0 below so
// they pass everything (no clipping) when the slot's count < 8.
out gl_PerVertex {
vec4 gl_Position;
float gl_ClipDistance[8];
};
uniform mat4 uViewProjection;
// Phase Post-A.5 (ISSUE #52, 2026-05-10): per-pass offset into Batches[].
// gl_DrawIDARB resets to 0 at the start of each glMultiDrawElementsIndirect
// call, so the transparent pass — which begins later in the indirect buffer
// — was fetching Batches[0..transparentCount) instead of its actual section
// at Batches[opaqueCount..end). The lifestone crystal (a transparent draw)
// ended up reading the FIRST OPAQUE batch's TextureHandle every frame. As
// the camera moved and the opaque front-to-back sort reordered which group
// landed at BatchData[0], the lifestone's apparent texture flickered to
// whatever was first — frequently the player character's body parts.
//
// WbDrawDispatcher.Draw sets this to 0 before the opaque MDI call and to
// _opaqueDrawCount before the transparent MDI call, matching WorldBuilder's
// uDrawIDOffset pattern in BaseObjectRenderManager.cs line 845.
uniform int uDrawIDOffset;
uniform int uLightingMode; // A7 Fix D: 0 = OBJECT (plain Lambert + sun), 1 = ENVCELL (half-Lambert wrap, no sun)
// SceneLighting UBO — binding=1 in the UBO namespace (GL keeps the SSBO and UBO
// binding tables separate, so this coexists with the binding=1 BatchBuffer SSBO
// above). IDENTICAL std140 layout to mesh_modern.frag.
//
// A7 (2026-06-15): lighting moved from the FRAGMENT shader to HERE (per-VERTEX) so
// torch/point lights Gouraud-interpolate across each triangle the way retail's
// fixed-function T&L does (D3D DrawEnvCell vertex bake + minimize_object_lighting for
// objects). A per-PIXEL evaluation made a tight bright "spotlight" pool on flat walls;
// per-vertex spreads it into a soft, broad gradient with no hard edge.
struct Light {
vec4 posAndKind;
vec4 dirAndRange;
vec4 colorAndIntensity;
vec4 coneAngleEtc;
};
layout(std140, binding = 1) uniform SceneLighting {
Light uLights[8];
vec4 uCellAmbient;
vec4 uFogParams;
vec4 uFogColor;
vec4 uCameraAndTime;
};
// Faithful calc_point_light (0x0059c8b0) contribution from ONE point/spot light —
// the wrap + norm shape, factored out so the per-object SSBO loop shares it. D =
// light vertex, used UN-normalised (length = dist); N is the unit vertex normal.
// Returns the RGB to ADD, already per-channel capped to the light's own colour.
vec3 pointContribution(vec3 N, vec3 worldPos, GlobalLight L) {
int kind = int(L.posAndKind.w);
vec3 toL = L.posAndKind.xyz - worldPos; // D (un-normalised)
float distsq = dot(toL, toL);
float d = sqrt(distsq);
float range = L.dirAndRange.w; // falloff_eff = Falloff × 1.3
if (d >= range || range <= 1e-4) return vec3(0.0);
// A7 Fix D D-3: angular term by lighting path. ENVCELL bake (mode 1) keeps the
// half-Lambert wrap (lights surfaces angled away, retail calc_point_light); OBJECT
// mode (0) uses plain Lambert max(0,N·L) so a torch BEHIND a character contributes
// nothing (retail's hardware path). toL is un-normalised (length d).
float angular = (uLightingMode == 1)
? (1.0 / 1.5) * (dot(N, toL) + 0.5 * d) // half-Lambert wrap (EnvCell bake)
: max(0.0, dot(N, toL)); // plain Lambert (object/hardware)
if (angular <= 0.0) return vec3(0.0);
// NORM branch (distance-cube): >1 m → distsq·d ≈ inverse-square soft far halo;
// <1 m → just d (dodge the near singularity). "Punchy near, soft far."
float norm = (distsq > 1.0) ? (distsq * d) : d;
float intensity = L.colorAndIntensity.w;
float scale = (1.0 - d / range) * intensity * (angular / norm);
if (kind == 2) {
// Spotlight: hard-edged cos-cone gate layered on the point ramp.
vec3 Ldir = toL / max(d, 1e-4);
float cos_edge = cos(L.coneAngleEtc.x * 0.5);
float cos_l = dot(-Ldir, L.dirAndRange.xyz);
if (cos_l <= cos_edge) scale = 0.0;
}
// Per-channel no-blowout cap to the light's OWN colour (un-intensity-scaled):
// a single light can't push a channel past its colour. Summed lit clamped in frag.
vec3 baseCol = L.colorAndIntensity.xyz;
return min(scale * baseCol, baseCol);
}
vec3 accumulateLights(vec3 N, vec3 worldPos, int instanceIndex) {
vec3 lit = uCellAmbient.xyz;
// SUN / directional — OBJECT path only (mode 0). retail's EnvCell path
// (minimize_envcell_lighting) enables only dynamic lights, NEVER the sun, so
// EnvCell walls (mode 1) get no directional sun wash (A7 Fix D D-4).
if (uLightingMode == 0) {
int activeLights = int(uCellAmbient.w);
for (int i = 0; i < 8; ++i) {
if (i >= activeLights) break;
if (int(uLights[i].posAndKind.w) != 0) continue; // directional only
vec3 Ldir = -uLights[i].dirAndRange.xyz;
float ndl = max(0.0, dot(N, Ldir));
lit += uLights[i].colorAndIntensity.xyz * uLights[i].colorAndIntensity.w * ndl;
}
}
// POINT / SPOT torches: their OWN accumulator (A7 Fix D, D-1). Retail's
// SetStaticLightingVertexColors sums the static point lights from BLACK and
// clamps the SUM to [0,1] before anything else (a baked emissive term), so a
// few warm intensity-100 torches can't push the whole pixel to white the way
// folding them into ambient+sun did. Mirrors LightBake.ComputeVertexColor
// (LightBakeConformanceTests). Per-light cap inside pointContribution is unchanged.
vec3 pointAcc = vec3(0.0);
int base = instanceIndex * 8;
for (int k = 0; k < 8; ++k) {
int gi = instanceLightIdx[base + k];
if (gi < 0) continue;
pointAcc += pointContribution(N, worldPos, gLights[gi]);
}
lit += min(pointAcc, vec3(1.0)); // clamp the torch sum on its own (retail baked emissive)
return lit; // frag still does the final min(lit, 1.0)
}
out vec3 vNormal;
out vec2 vTexCoord;
out vec3 vWorldPos;
out vec3 vLit; // A7: per-vertex Gouraud lighting (ambient + capped lights)
out flat uvec2 vTextureHandle;
out flat uint vTextureLayer;
void main() {
int instanceIndex = gl_BaseInstanceARB + gl_InstanceID;
mat4 model = Instances[instanceIndex].transform;
vec4 worldPos = model * vec4(aPosition, 1.0);
gl_Position = uViewProjection * worldPos;
// Phase U.3: per-instance clip gate. instanceClipSlot is indexed by the
// SAME instanceIndex used for the binding=0 transform above, so the slot
// travels with the instance through the MDI BaseInstance offsets. Slot 0
// (the U.3 default) has count 0 ⇒ the second loop sets all 8 distances to
// +1.0 ⇒ nothing is clipped.
uint _slot = instanceClipSlot[instanceIndex];
CellClip _c = clipRegions[_slot];
for (uint i = 0u; i < _c.count; ++i)
gl_ClipDistance[i] = dot(_c.planes[i], gl_Position);
for (uint i = _c.count; i < 8u; ++i)
gl_ClipDistance[i] = 1.0;
vWorldPos = worldPos.xyz;
vNormal = normalize(mat3(model) * aNormal);
vLit = accumulateLights(vNormal, vWorldPos, instanceIndex); // A7: per-vertex Gouraud (per-object lights)
vTexCoord = aTexCoord;
BatchData b = Batches[uDrawIDOffset + gl_DrawIDARB];
vTextureHandle = b.textureHandle;
vTextureLayer = b.textureLayer;
}