Five bugs identified and patched in retail Asheron's Call client: - v3b: palette refcount over-increment (3-byte NOP at two sites) - v5: RenderSurface PurgeResource no-op stub (vtable slot 2 thunk) - v11: two dangling-pointer crash guards (NULL-check + reorder) - v14: CEnvCell::Destroy ClipPlaneList leak (18-byte JMP to cleanup thunk) - v22: unpacker stale-pointer SEH guard (whole-function __try/__except) All five ship in leakfix.dll (117 KB, SHA d282f23c…) which is loaded by acclient.exe at process start via PE import table patching by tools/install_leakfix.py. Controlled 15-client fleet soak: unpatched control died at 26h with palette exhaustion; all 14 patched clients survived past that point and reached ≥5-day uptime. Residual ~15 MB/h growth traced to d3d9.dll's internal slab allocator (260KB surface backing buffers retained after Release). See REPORT.md §10 for the full investigation; conclusion is that it's unfixable from outside d3d9. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
118 lines
4.5 KiB
Bash
118 lines
4.5 KiB
Bash
#!/usr/bin/env bash
|
|
# Combined fleet monitor:
|
|
# - HB every 30 min
|
|
# - Snapshot every 1 h (appends to artifacts/snapshots/main.tsv)
|
|
# - Every 60 s: scan for acclient PIDs in-world (title has "Coldeve-"),
|
|
# apply v3b -> v5 -> v11 -> v12 in cascade. Skip Jerry (control).
|
|
#
|
|
# Each patcher is idempotent — re-runs are no-ops when bytes already in
|
|
# patched state. To keep event log clean, only emits AUTO-* events for
|
|
# PIDs we haven't already seen as "done" for a given patch.
|
|
set -u
|
|
PY="C:/Users/acbot/AppData/Local/Programs/Python/Python312/python.exe"
|
|
cd /c/Users/acbot/leakhunt
|
|
|
|
last_hb=0
|
|
last_snap=0
|
|
|
|
# Per-PID, per-patch tracking sets (sentinel files in /tmp)
|
|
SEEN_DIR="/tmp/fleet_mon_seen"
|
|
mkdir -p "$SEEN_DIR"
|
|
|
|
mark_seen() { touch "$SEEN_DIR/${1}-${2}"; }
|
|
is_seen() { [ -f "$SEEN_DIR/${1}-${2}" ]; }
|
|
|
|
while true; do
|
|
now=$(date +%s)
|
|
|
|
# ===== heartbeat: every 30 min =====
|
|
if [ $((now - last_hb)) -ge 1800 ]; then
|
|
last_hb=$now
|
|
hb=$(powershell.exe -NoProfile -Command \
|
|
"Get-Process acclient -EA SilentlyContinue | ForEach-Object { \"\$(\$_.Id)=\$([int](\$_.WorkingSet64/1MB))MB\" } | Sort-Object" \
|
|
2>/dev/null | tr -d '\r' | tr '\n' ' ')
|
|
alive=$(echo "$hb" | tr ' ' '\n' | grep -c '=')
|
|
echo "HB $(date -u +%Y-%m-%dT%H:%M:%S) $hb ALIVE=${alive}"
|
|
fi
|
|
|
|
# ===== snapshot: every 1 h =====
|
|
if [ $((now - last_snap)) -ge 3600 ]; then
|
|
last_snap=$now
|
|
snap_log="artifacts/snapshots/last_snap.log"
|
|
rows_before=$(wc -l < artifacts/snapshots/main.tsv 2>/dev/null || echo 0)
|
|
{
|
|
echo "=== run $(date) ==="
|
|
echo "pwd=$(pwd)"
|
|
echo "PY=$PY"
|
|
echo "py-version=$("$PY" --version 2>&1)"
|
|
echo "argv0-test=$("$PY" -c "import sys; print(sys.argv)" 2>&1)"
|
|
echo "--- invoking snapshot ---"
|
|
"$PY" tools/snapshot_compare.py artifacts/snapshots/main.tsv
|
|
echo "--- exit=$? ---"
|
|
} > "$snap_log" 2>&1
|
|
snap_exit=$?
|
|
rows_after=$(wc -l < artifacts/snapshots/main.tsv 2>/dev/null || echo 0)
|
|
rows_added=$((rows_after - rows_before))
|
|
if [ $rows_added -gt 0 ]; then
|
|
echo "SNAPSHOT @$(date +%H:%M) appended ${rows_added} rows"
|
|
else
|
|
echo "SNAPSHOT-FAIL @$(date +%H:%M) exit=$snap_exit rows_added=$rows_added (log: $snap_log)"
|
|
fi
|
|
fi
|
|
|
|
# ===== auto-patch cascade: every loop (60 s) =====
|
|
pid_titles=$(powershell.exe -NoProfile -Command \
|
|
"Get-Process acclient -EA SilentlyContinue | ForEach-Object { \"\$(\$_.Id)|\$(\$_.MainWindowTitle)\" }" \
|
|
2>/dev/null | tr -d '\r')
|
|
|
|
while IFS='|' read -r pid title; do
|
|
[ -z "$pid" ] && continue
|
|
# Only patch in-world clients (skip splash screen)
|
|
if [ -z "$title" ] || ! echo "$title" | grep -q "Coldeve-"; then continue; fi
|
|
# Skip Jerry (control)
|
|
if echo "$title" | grep -qi "Jerry"; then continue; fi
|
|
|
|
# Apply in cascade order — one patch per cycle so any AV from a
|
|
# bad patch only takes down one phase.
|
|
for patch in v3b v5 v11 v12 v14; do
|
|
if is_seen "$pid" "$patch"; then continue; fi
|
|
case "$patch" in
|
|
v3b) script="tools/patch_palette_v3b.py"; extra="" ;;
|
|
v5) script="tools/patch_purge_v5_test.py"; extra="" ;;
|
|
v11) script="tools/patch_v11_test.py"; extra="" ;;
|
|
v12) script="tools/patch_v12_test.py"; extra="" ;;
|
|
v14) script="tools/patch_v14_cenvcell_clipplane.py"; extra="--apply" ;;
|
|
esac
|
|
if [ -n "$extra" ]; then
|
|
result=$("$PY" "$script" "$pid" "$extra" 2>&1)
|
|
else
|
|
result=$("$PY" "$script" "$pid" 2>&1)
|
|
fi
|
|
tail=$(echo "$result" | tail -1)
|
|
# idempotent skip detection
|
|
if echo "$result" | grep -q "already patched\|already has a CALL"; then
|
|
mark_seen "$pid" "$patch"
|
|
continue
|
|
fi
|
|
# DLL-form detection: if v5 says slots already point elsewhere (not
|
|
# the no-op stub), the DLL applied this; treat as success.
|
|
if [ "$patch" = "v5" ] && echo "$result" | grep -q "UNEXPECTED.*not the no-op stub"; then
|
|
mark_seen "$pid" "$patch"
|
|
echo "AUTO-V5-DLL-APPLIED PID=$pid title=\"$title\" $(date +%H:%M:%S)"
|
|
continue
|
|
fi
|
|
if echo "$result" | grep -q "OK\|reverted; now\|patched; now"; then
|
|
mark_seen "$pid" "$patch"
|
|
echo "AUTO-${patch^^} PID=$pid title=\"$title\" $(date +%H:%M:%S)"
|
|
else
|
|
# Mark FAIL as seen so we don't retry-spam every 60s.
|
|
mark_seen "$pid" "$patch"
|
|
echo "AUTO-${patch^^}-FAIL PID=$pid title=\"$title\" tail=\"$tail\""
|
|
fi
|
|
# only do ONE patch action per PID per cycle (cascade staggered)
|
|
break
|
|
done
|
|
done <<< "$pid_titles"
|
|
|
|
sleep 60
|
|
done
|