mesh_modern unified all meshes into one calc_point_light path: it applied the bake's half-Lambert wrap to objects (lighting character backs from a torch behind them) and added the sun to EnvCell building shells (warm facade wash). Retail splits these: objects = hardware plain Lambert max(0,N.L) + sun; EnvCell walls = baked wrap, dynamics only, NO sun (minimize_envcell_lighting). Add a per-draw uLightingMode (WbDrawDispatcher=0 object, EnvCellRenderer=1 envcell) selecting the angular term (wrap vs plain Lambert) and gating the sun. Per-light cap + D-1 clamp unchanged. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
256 lines
12 KiB
GLSL
256 lines
12 KiB
GLSL
#version 430 core
|
||
#extension GL_ARB_shader_draw_parameters : require
|
||
|
||
layout(location = 0) in vec3 aPosition;
|
||
layout(location = 1) in vec3 aNormal;
|
||
layout(location = 2) in vec2 aTexCoord;
|
||
|
||
struct InstanceData {
|
||
mat4 transform;
|
||
// Reserved for Phase B.4 follow-up (selection-blink retail-faithful
|
||
// highlight): vec4 highlightColor; — extend stride here, increase the
|
||
// _instanceSsbo upload size in WbDrawDispatcher, add a flat varying out,
|
||
// and consume in mesh_modern.frag.
|
||
};
|
||
|
||
struct BatchData {
|
||
uvec2 textureHandle; // bindless handle for sampler2DArray
|
||
uint textureLayer; // layer index (always 0 for per-instance composites)
|
||
uint flags; // reserved — N.5 dispatcher owns all blend state
|
||
// (glBlendFunc per pass). If a future phase wants
|
||
// shader-side per-batch additive flag (Decision 2
|
||
// fallback), encode it here as bit 0.
|
||
};
|
||
|
||
layout(std430, binding = 0) readonly buffer InstanceBuffer {
|
||
InstanceData Instances[];
|
||
};
|
||
|
||
// binding=1 here is the SSBO namespace — distinct from the UBO namespace.
|
||
// SceneLighting UBO also uses binding=1 in the fragment shader; GL keeps
|
||
// GL_SHADER_STORAGE_BUFFER and GL_UNIFORM_BUFFER binding tables separate.
|
||
// Task 10 dispatcher binds:
|
||
// glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, instanceSsbo)
|
||
// glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, batchSsbo)
|
||
// Existing SceneLightingUboBinding handles the UBO side.
|
||
layout(std430, binding = 1) readonly buffer BatchBuffer {
|
||
BatchData Batches[];
|
||
};
|
||
|
||
// === Phase U.3: per-cell screen-space clip gate (gl_ClipDistance) =============
|
||
// Two SSBOs add the clip mechanism without disturbing binding=0/1 above.
|
||
//
|
||
// binding=2 — SHARED per-frame clip regions, one CellClip per "slot". Uploaded
|
||
// ONCE per frame by ClipFrame.UploadShared (shared across WbDrawDispatcher +
|
||
// EnvCellRenderer). Slot 0 is RESERVED = no-clip (count 0 ⇒ every plane passes).
|
||
//
|
||
// binding=3 — PER-RENDERER per-instance slot index, parallel to the binding=0
|
||
// instance buffer and indexed by the IDENTICAL per-instance index
|
||
// (gl_BaseInstanceARB + gl_InstanceID). instanceClipSlot[i] selects which
|
||
// CellClip region instance i is clipped against. Default all-zeros in U.3 ⇒
|
||
// every instance maps to slot 0 ⇒ no clipping ⇒ identical render to pre-U.3.
|
||
//
|
||
// CellClip std430 layout (144 bytes/slot): a uint count + 3 pad uints (16 bytes)
|
||
// then vec4 planes[8] (8 × 16 = 128 bytes). vec4 array stride is 16 under std430.
|
||
// ClipFrame on the CPU side lays out the bytes to match exactly (verified by
|
||
// ClipFrameLayoutTests). A clip-space vertex is INSIDE iff dot(plane, gl_Position)
|
||
// >= 0 for every active plane (see ClipPlaneSet for the plane convention).
|
||
struct CellClip {
|
||
uint count;
|
||
uint _p0;
|
||
uint _p1;
|
||
uint _p2;
|
||
vec4 planes[8];
|
||
};
|
||
layout(std430, binding = 2) readonly buffer ClipRegionBuf {
|
||
CellClip clipRegions[];
|
||
};
|
||
layout(std430, binding = 3) readonly buffer ClipSlotBuf {
|
||
uint instanceClipSlot[];
|
||
};
|
||
|
||
// === Fix B (A7 #3): per-OBJECT light selection — minimize_object_lighting =====
|
||
// retail picks up-to-8 point/spot lights PER OBJECT by the object's own position
|
||
// (minimize_object_lighting 0x0054d480), so a torch always lights the wall it
|
||
// sits on, camera-INDEPENDENTLY. The previous single global nearest-8-to-CAMERA
|
||
// UBO set (LightManager.Tick) made a wall brighten as the camera approached
|
||
// (its torches swapping into the global top-8). Two SSBOs replace that for
|
||
// point/spot lights (the SUN + ambient still come from the SceneLighting UBO):
|
||
//
|
||
// binding=4 — GLOBAL point/spot light array, uploaded once per frame from
|
||
// LightManager.PointSnapshot. The index of a light here is stable for the frame.
|
||
// binding=5 — per-instance light SET: MaxLightsPerObject(8) int indices per
|
||
// instance INTO gLights[] (-1 = unused slot), parallel to the binding=0
|
||
// instance buffer and indexed by the SAME instanceIndex. WbDrawDispatcher fills
|
||
// it once per entity (the set is constant across the entity's parts/tuples).
|
||
struct GlobalLight {
|
||
vec4 posAndKind;
|
||
vec4 dirAndRange;
|
||
vec4 colorAndIntensity;
|
||
vec4 coneAngleEtc;
|
||
};
|
||
layout(std430, binding = 4) readonly buffer GlobalLightBuf {
|
||
GlobalLight gLights[];
|
||
};
|
||
layout(std430, binding = 5) readonly buffer InstanceLightSetBuf {
|
||
int instanceLightIdx[]; // 8 per instance; -1 = unused
|
||
};
|
||
|
||
// Core profile: redeclare gl_PerVertex so writing gl_ClipDistance[] is legal
|
||
// alongside gl_Position. The array is sized 8 to match the CellClip plane budget
|
||
// and the GL guarantee (GL_MAX_CLIP_DISTANCES >= 8). The host enables
|
||
// GL_CLIP_DISTANCE0..7 once at startup; unused planes are set to +1.0 below so
|
||
// they pass everything (no clipping) when the slot's count < 8.
|
||
out gl_PerVertex {
|
||
vec4 gl_Position;
|
||
float gl_ClipDistance[8];
|
||
};
|
||
|
||
uniform mat4 uViewProjection;
|
||
|
||
// Phase Post-A.5 (ISSUE #52, 2026-05-10): per-pass offset into Batches[].
|
||
// gl_DrawIDARB resets to 0 at the start of each glMultiDrawElementsIndirect
|
||
// call, so the transparent pass — which begins later in the indirect buffer
|
||
// — was fetching Batches[0..transparentCount) instead of its actual section
|
||
// at Batches[opaqueCount..end). The lifestone crystal (a transparent draw)
|
||
// ended up reading the FIRST OPAQUE batch's TextureHandle every frame. As
|
||
// the camera moved and the opaque front-to-back sort reordered which group
|
||
// landed at BatchData[0], the lifestone's apparent texture flickered to
|
||
// whatever was first — frequently the player character's body parts.
|
||
//
|
||
// WbDrawDispatcher.Draw sets this to 0 before the opaque MDI call and to
|
||
// _opaqueDrawCount before the transparent MDI call, matching WorldBuilder's
|
||
// uDrawIDOffset pattern in BaseObjectRenderManager.cs line 845.
|
||
uniform int uDrawIDOffset;
|
||
uniform int uLightingMode; // A7 Fix D: 0 = OBJECT (plain Lambert + sun), 1 = ENVCELL (half-Lambert wrap, no sun)
|
||
|
||
// SceneLighting UBO — binding=1 in the UBO namespace (GL keeps the SSBO and UBO
|
||
// binding tables separate, so this coexists with the binding=1 BatchBuffer SSBO
|
||
// above). IDENTICAL std140 layout to mesh_modern.frag.
|
||
//
|
||
// A7 (2026-06-15): lighting moved from the FRAGMENT shader to HERE (per-VERTEX) so
|
||
// torch/point lights Gouraud-interpolate across each triangle the way retail's
|
||
// fixed-function T&L does (D3D DrawEnvCell vertex bake + minimize_object_lighting for
|
||
// objects). A per-PIXEL evaluation made a tight bright "spotlight" pool on flat walls;
|
||
// per-vertex spreads it into a soft, broad gradient with no hard edge.
|
||
struct Light {
|
||
vec4 posAndKind;
|
||
vec4 dirAndRange;
|
||
vec4 colorAndIntensity;
|
||
vec4 coneAngleEtc;
|
||
};
|
||
layout(std140, binding = 1) uniform SceneLighting {
|
||
Light uLights[8];
|
||
vec4 uCellAmbient;
|
||
vec4 uFogParams;
|
||
vec4 uFogColor;
|
||
vec4 uCameraAndTime;
|
||
};
|
||
|
||
// Faithful calc_point_light (0x0059c8b0) contribution from ONE point/spot light —
|
||
// the wrap + norm shape, factored out so the per-object SSBO loop shares it. D =
|
||
// light − vertex, used UN-normalised (length = dist); N is the unit vertex normal.
|
||
// Returns the RGB to ADD, already per-channel capped to the light's own colour.
|
||
vec3 pointContribution(vec3 N, vec3 worldPos, GlobalLight L) {
|
||
int kind = int(L.posAndKind.w);
|
||
vec3 toL = L.posAndKind.xyz - worldPos; // D (un-normalised)
|
||
float distsq = dot(toL, toL);
|
||
float d = sqrt(distsq);
|
||
float range = L.dirAndRange.w; // falloff_eff = Falloff × 1.3
|
||
if (d >= range || range <= 1e-4) return vec3(0.0);
|
||
// A7 Fix D D-3: angular term by lighting path. ENVCELL bake (mode 1) keeps the
|
||
// half-Lambert wrap (lights surfaces angled away, retail calc_point_light); OBJECT
|
||
// mode (0) uses plain Lambert max(0,N·L) so a torch BEHIND a character contributes
|
||
// nothing (retail's hardware path). toL is un-normalised (length d).
|
||
float angular = (uLightingMode == 1)
|
||
? (1.0 / 1.5) * (dot(N, toL) + 0.5 * d) // half-Lambert wrap (EnvCell bake)
|
||
: max(0.0, dot(N, toL)); // plain Lambert (object/hardware)
|
||
if (angular <= 0.0) return vec3(0.0);
|
||
// NORM branch (distance-cube): >1 m → distsq·d ≈ inverse-square soft far halo;
|
||
// <1 m → just d (dodge the near singularity). "Punchy near, soft far."
|
||
float norm = (distsq > 1.0) ? (distsq * d) : d;
|
||
float intensity = L.colorAndIntensity.w;
|
||
float scale = (1.0 - d / range) * intensity * (angular / norm);
|
||
if (kind == 2) {
|
||
// Spotlight: hard-edged cos-cone gate layered on the point ramp.
|
||
vec3 Ldir = toL / max(d, 1e-4);
|
||
float cos_edge = cos(L.coneAngleEtc.x * 0.5);
|
||
float cos_l = dot(-Ldir, L.dirAndRange.xyz);
|
||
if (cos_l <= cos_edge) scale = 0.0;
|
||
}
|
||
// Per-channel no-blowout cap to the light's OWN colour (un-intensity-scaled):
|
||
// a single light can't push a channel past its colour. Summed lit clamped in frag.
|
||
vec3 baseCol = L.colorAndIntensity.xyz;
|
||
return min(scale * baseCol, baseCol);
|
||
}
|
||
|
||
vec3 accumulateLights(vec3 N, vec3 worldPos, int instanceIndex) {
|
||
vec3 lit = uCellAmbient.xyz;
|
||
|
||
// SUN / directional — OBJECT path only (mode 0). retail's EnvCell path
|
||
// (minimize_envcell_lighting) enables only dynamic lights, NEVER the sun, so
|
||
// EnvCell walls (mode 1) get no directional sun wash (A7 Fix D D-4).
|
||
if (uLightingMode == 0) {
|
||
int activeLights = int(uCellAmbient.w);
|
||
for (int i = 0; i < 8; ++i) {
|
||
if (i >= activeLights) break;
|
||
if (int(uLights[i].posAndKind.w) != 0) continue; // directional only
|
||
vec3 Ldir = -uLights[i].dirAndRange.xyz;
|
||
float ndl = max(0.0, dot(N, Ldir));
|
||
lit += uLights[i].colorAndIntensity.xyz * uLights[i].colorAndIntensity.w * ndl;
|
||
}
|
||
}
|
||
|
||
// POINT / SPOT torches: their OWN accumulator (A7 Fix D, D-1). Retail's
|
||
// SetStaticLightingVertexColors sums the static point lights from BLACK and
|
||
// clamps the SUM to [0,1] before anything else (a baked emissive term), so a
|
||
// few warm intensity-100 torches can't push the whole pixel to white the way
|
||
// folding them into ambient+sun did. Mirrors LightBake.ComputeVertexColor
|
||
// (LightBakeConformanceTests). Per-light cap inside pointContribution is unchanged.
|
||
vec3 pointAcc = vec3(0.0);
|
||
int base = instanceIndex * 8;
|
||
for (int k = 0; k < 8; ++k) {
|
||
int gi = instanceLightIdx[base + k];
|
||
if (gi < 0) continue;
|
||
pointAcc += pointContribution(N, worldPos, gLights[gi]);
|
||
}
|
||
lit += min(pointAcc, vec3(1.0)); // clamp the torch sum on its own (retail baked emissive)
|
||
|
||
return lit; // frag still does the final min(lit, 1.0)
|
||
}
|
||
|
||
out vec3 vNormal;
|
||
out vec2 vTexCoord;
|
||
out vec3 vWorldPos;
|
||
out vec3 vLit; // A7: per-vertex Gouraud lighting (ambient + capped lights)
|
||
out flat uvec2 vTextureHandle;
|
||
out flat uint vTextureLayer;
|
||
|
||
void main() {
|
||
int instanceIndex = gl_BaseInstanceARB + gl_InstanceID;
|
||
mat4 model = Instances[instanceIndex].transform;
|
||
|
||
vec4 worldPos = model * vec4(aPosition, 1.0);
|
||
gl_Position = uViewProjection * worldPos;
|
||
|
||
// Phase U.3: per-instance clip gate. instanceClipSlot is indexed by the
|
||
// SAME instanceIndex used for the binding=0 transform above, so the slot
|
||
// travels with the instance through the MDI BaseInstance offsets. Slot 0
|
||
// (the U.3 default) has count 0 ⇒ the second loop sets all 8 distances to
|
||
// +1.0 ⇒ nothing is clipped.
|
||
uint _slot = instanceClipSlot[instanceIndex];
|
||
CellClip _c = clipRegions[_slot];
|
||
for (uint i = 0u; i < _c.count; ++i)
|
||
gl_ClipDistance[i] = dot(_c.planes[i], gl_Position);
|
||
for (uint i = _c.count; i < 8u; ++i)
|
||
gl_ClipDistance[i] = 1.0;
|
||
|
||
vWorldPos = worldPos.xyz;
|
||
vNormal = normalize(mat3(model) * aNormal);
|
||
vLit = accumulateLights(vNormal, vWorldPos, instanceIndex); // A7: per-vertex Gouraud (per-object lights)
|
||
vTexCoord = aTexCoord;
|
||
|
||
BatchData b = Batches[uDrawIDOffset + gl_DrawIDARB];
|
||
vTextureHandle = b.textureHandle;
|
||
vTextureLayer = b.textureLayer;
|
||
}
|