#version 430 core #extension GL_ARB_shader_draw_parameters : require layout(location = 0) in vec3 aPosition; layout(location = 1) in vec3 aNormal; layout(location = 2) in vec2 aTexCoord; struct InstanceData { mat4 transform; // Reserved for Phase B.4 follow-up (selection-blink retail-faithful // highlight): vec4 highlightColor; — extend stride here, increase the // _instanceSsbo upload size in WbDrawDispatcher, add a flat varying out, // and consume in mesh_modern.frag. }; struct BatchData { uvec2 textureHandle; // bindless handle for sampler2DArray uint textureLayer; // layer index (always 0 for per-instance composites) uint flags; // reserved — N.5 dispatcher owns all blend state // (glBlendFunc per pass). If a future phase wants // shader-side per-batch additive flag (Decision 2 // fallback), encode it here as bit 0. }; layout(std430, binding = 0) readonly buffer InstanceBuffer { InstanceData Instances[]; }; // binding=1 here is the SSBO namespace — distinct from the UBO namespace. // SceneLighting UBO also uses binding=1 in the fragment shader; GL keeps // GL_SHADER_STORAGE_BUFFER and GL_UNIFORM_BUFFER binding tables separate. // Task 10 dispatcher binds: // glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, instanceSsbo) // glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, batchSsbo) // Existing SceneLightingUboBinding handles the UBO side. layout(std430, binding = 1) readonly buffer BatchBuffer { BatchData Batches[]; }; // === Phase U.3: per-cell screen-space clip gate (gl_ClipDistance) ============= // Two SSBOs add the clip mechanism without disturbing binding=0/1 above. // // binding=2 — SHARED per-frame clip regions, one CellClip per "slot". Uploaded // ONCE per frame by ClipFrame.UploadShared (shared across WbDrawDispatcher + // EnvCellRenderer). Slot 0 is RESERVED = no-clip (count 0 ⇒ every plane passes). // // binding=3 — PER-RENDERER per-instance slot index, parallel to the binding=0 // instance buffer and indexed by the IDENTICAL per-instance index // (gl_BaseInstanceARB + gl_InstanceID). instanceClipSlot[i] selects which // CellClip region instance i is clipped against. Default all-zeros in U.3 ⇒ // every instance maps to slot 0 ⇒ no clipping ⇒ identical render to pre-U.3. // // CellClip std430 layout (144 bytes/slot): a uint count + 3 pad uints (16 bytes) // then vec4 planes[8] (8 × 16 = 128 bytes). vec4 array stride is 16 under std430. // ClipFrame on the CPU side lays out the bytes to match exactly (verified by // ClipFrameLayoutTests). A clip-space vertex is INSIDE iff dot(plane, gl_Position) // >= 0 for every active plane (see ClipPlaneSet for the plane convention). struct CellClip { uint count; uint _p0; uint _p1; uint _p2; vec4 planes[8]; }; layout(std430, binding = 2) readonly buffer ClipRegionBuf { CellClip clipRegions[]; }; layout(std430, binding = 3) readonly buffer ClipSlotBuf { uint instanceClipSlot[]; }; // === Fix B (A7 #3): per-OBJECT light selection — minimize_object_lighting ===== // retail picks up-to-8 point/spot lights PER OBJECT by the object's own position // (minimize_object_lighting 0x0054d480), so a torch always lights the wall it // sits on, camera-INDEPENDENTLY. The previous single global nearest-8-to-CAMERA // UBO set (LightManager.Tick) made a wall brighten as the camera approached // (its torches swapping into the global top-8). Two SSBOs replace that for // point/spot lights (the SUN + ambient still come from the SceneLighting UBO): // // binding=4 — GLOBAL point/spot light array, uploaded once per frame from // LightManager.PointSnapshot. The index of a light here is stable for the frame. // binding=5 — per-instance light SET: MaxLightsPerObject(8) int indices per // instance INTO gLights[] (-1 = unused slot), parallel to the binding=0 // instance buffer and indexed by the SAME instanceIndex. WbDrawDispatcher fills // it once per entity (the set is constant across the entity's parts/tuples). struct GlobalLight { vec4 posAndKind; vec4 dirAndRange; vec4 colorAndIntensity; vec4 coneAngleEtc; }; layout(std430, binding = 4) readonly buffer GlobalLightBuf { GlobalLight gLights[]; }; layout(std430, binding = 5) readonly buffer InstanceLightSetBuf { int instanceLightIdx[]; // 8 per instance; -1 = unused }; // Core profile: redeclare gl_PerVertex so writing gl_ClipDistance[] is legal // alongside gl_Position. The array is sized 8 to match the CellClip plane budget // and the GL guarantee (GL_MAX_CLIP_DISTANCES >= 8). The host enables // GL_CLIP_DISTANCE0..7 once at startup; unused planes are set to +1.0 below so // they pass everything (no clipping) when the slot's count < 8. out gl_PerVertex { vec4 gl_Position; float gl_ClipDistance[8]; }; uniform mat4 uViewProjection; // Phase Post-A.5 (ISSUE #52, 2026-05-10): per-pass offset into Batches[]. // gl_DrawIDARB resets to 0 at the start of each glMultiDrawElementsIndirect // call, so the transparent pass — which begins later in the indirect buffer // — was fetching Batches[0..transparentCount) instead of its actual section // at Batches[opaqueCount..end). The lifestone crystal (a transparent draw) // ended up reading the FIRST OPAQUE batch's TextureHandle every frame. As // the camera moved and the opaque front-to-back sort reordered which group // landed at BatchData[0], the lifestone's apparent texture flickered to // whatever was first — frequently the player character's body parts. // // WbDrawDispatcher.Draw sets this to 0 before the opaque MDI call and to // _opaqueDrawCount before the transparent MDI call, matching WorldBuilder's // uDrawIDOffset pattern in BaseObjectRenderManager.cs line 845. uniform int uDrawIDOffset; uniform int uLightingMode; // A7 Fix D: 0 = OBJECT (plain Lambert + sun), 1 = ENVCELL (half-Lambert wrap, no sun) // SceneLighting UBO — binding=1 in the UBO namespace (GL keeps the SSBO and UBO // binding tables separate, so this coexists with the binding=1 BatchBuffer SSBO // above). IDENTICAL std140 layout to mesh_modern.frag. // // A7 (2026-06-15): lighting moved from the FRAGMENT shader to HERE (per-VERTEX) so // torch/point lights Gouraud-interpolate across each triangle the way retail's // fixed-function T&L does (D3D DrawEnvCell vertex bake + minimize_object_lighting for // objects). A per-PIXEL evaluation made a tight bright "spotlight" pool on flat walls; // per-vertex spreads it into a soft, broad gradient with no hard edge. struct Light { vec4 posAndKind; vec4 dirAndRange; vec4 colorAndIntensity; vec4 coneAngleEtc; }; layout(std140, binding = 1) uniform SceneLighting { Light uLights[8]; vec4 uCellAmbient; vec4 uFogParams; vec4 uFogColor; vec4 uCameraAndTime; }; // Faithful calc_point_light (0x0059c8b0) contribution from ONE point/spot light — // the wrap + norm shape, factored out so the per-object SSBO loop shares it. D = // light − vertex, used UN-normalised (length = dist); N is the unit vertex normal. // Returns the RGB to ADD, already per-channel capped to the light's own colour. vec3 pointContribution(vec3 N, vec3 worldPos, GlobalLight L) { int kind = int(L.posAndKind.w); vec3 toL = L.posAndKind.xyz - worldPos; // D (un-normalised) float distsq = dot(toL, toL); float d = sqrt(distsq); float range = L.dirAndRange.w; // falloff_eff = Falloff × 1.3 if (d >= range || range <= 1e-4) return vec3(0.0); // A7 Fix D D-3: angular term by lighting path. ENVCELL bake (mode 1) keeps the // half-Lambert wrap (lights surfaces angled away, retail calc_point_light); OBJECT // mode (0) uses plain Lambert max(0,N·L) so a torch BEHIND a character contributes // nothing (retail's hardware path). toL is un-normalised (length d). float angular = (uLightingMode == 1) ? (1.0 / 1.5) * (dot(N, toL) + 0.5 * d) // half-Lambert wrap (EnvCell bake) : max(0.0, dot(N, toL)); // plain Lambert (object/hardware) if (angular <= 0.0) return vec3(0.0); // NORM branch (distance-cube): >1 m → distsq·d ≈ inverse-square soft far halo; // <1 m → just d (dodge the near singularity). "Punchy near, soft far." float norm = (distsq > 1.0) ? (distsq * d) : d; float intensity = L.colorAndIntensity.w; float scale = (1.0 - d / range) * intensity * (angular / norm); if (kind == 2) { // Spotlight: hard-edged cos-cone gate layered on the point ramp. vec3 Ldir = toL / max(d, 1e-4); float cos_edge = cos(L.coneAngleEtc.x * 0.5); float cos_l = dot(-Ldir, L.dirAndRange.xyz); if (cos_l <= cos_edge) scale = 0.0; } // Per-channel no-blowout cap to the light's OWN colour (un-intensity-scaled): // a single light can't push a channel past its colour. Summed lit clamped in frag. vec3 baseCol = L.colorAndIntensity.xyz; return min(scale * baseCol, baseCol); } vec3 accumulateLights(vec3 N, vec3 worldPos, int instanceIndex) { vec3 lit = uCellAmbient.xyz; // SUN / directional — OBJECT path only (mode 0). retail's EnvCell path // (minimize_envcell_lighting) enables only dynamic lights, NEVER the sun, so // EnvCell walls (mode 1) get no directional sun wash (A7 Fix D D-4). if (uLightingMode == 0) { int activeLights = int(uCellAmbient.w); for (int i = 0; i < 8; ++i) { if (i >= activeLights) break; if (int(uLights[i].posAndKind.w) != 0) continue; // directional only vec3 Ldir = -uLights[i].dirAndRange.xyz; float ndl = max(0.0, dot(N, Ldir)); lit += uLights[i].colorAndIntensity.xyz * uLights[i].colorAndIntensity.w * ndl; } } // POINT / SPOT torches: their OWN accumulator (A7 Fix D, D-1). Retail's // SetStaticLightingVertexColors sums the static point lights from BLACK and // clamps the SUM to [0,1] before anything else (a baked emissive term), so a // few warm intensity-100 torches can't push the whole pixel to white the way // folding them into ambient+sun did. Mirrors LightBake.ComputeVertexColor // (LightBakeConformanceTests). Per-light cap inside pointContribution is unchanged. vec3 pointAcc = vec3(0.0); int base = instanceIndex * 8; for (int k = 0; k < 8; ++k) { int gi = instanceLightIdx[base + k]; if (gi < 0) continue; pointAcc += pointContribution(N, worldPos, gLights[gi]); } lit += min(pointAcc, vec3(1.0)); // clamp the torch sum on its own (retail baked emissive) return lit; // frag still does the final min(lit, 1.0) } out vec3 vNormal; out vec2 vTexCoord; out vec3 vWorldPos; out vec3 vLit; // A7: per-vertex Gouraud lighting (ambient + capped lights) out flat uvec2 vTextureHandle; out flat uint vTextureLayer; void main() { int instanceIndex = gl_BaseInstanceARB + gl_InstanceID; mat4 model = Instances[instanceIndex].transform; vec4 worldPos = model * vec4(aPosition, 1.0); gl_Position = uViewProjection * worldPos; // Phase U.3: per-instance clip gate. instanceClipSlot is indexed by the // SAME instanceIndex used for the binding=0 transform above, so the slot // travels with the instance through the MDI BaseInstance offsets. Slot 0 // (the U.3 default) has count 0 ⇒ the second loop sets all 8 distances to // +1.0 ⇒ nothing is clipped. uint _slot = instanceClipSlot[instanceIndex]; CellClip _c = clipRegions[_slot]; for (uint i = 0u; i < _c.count; ++i) gl_ClipDistance[i] = dot(_c.planes[i], gl_Position); for (uint i = _c.count; i < 8u; ++i) gl_ClipDistance[i] = 1.0; vWorldPos = worldPos.xyz; vNormal = normalize(mat3(model) * aNormal); vLit = accumulateLights(vNormal, vWorldPos, instanceIndex); // A7: per-vertex Gouraud (per-object lights) vTexCoord = aTexCoord; BatchData b = Batches[uDrawIDOffset + gl_DrawIDARB]; vTextureHandle = b.textureHandle; vTextureLayer = b.textureLayer; }