Initial commit — leak-hunt project complete

Five bugs identified and patched in retail Asheron's Call client:
- v3b: palette refcount over-increment (3-byte NOP at two sites)
- v5: RenderSurface PurgeResource no-op stub (vtable slot 2 thunk)
- v11: two dangling-pointer crash guards (NULL-check + reorder)
- v14: CEnvCell::Destroy ClipPlaneList leak (18-byte JMP to cleanup thunk)
- v22: unpacker stale-pointer SEH guard (whole-function __try/__except)

All five ship in leakfix.dll (117 KB, SHA d282f23c…) which is loaded
by acclient.exe at process start via PE import table patching by
tools/install_leakfix.py.

Controlled 15-client fleet soak: unpatched control died at 26h with
palette exhaustion; all 14 patched clients survived past that point
and reached ≥5-day uptime.

Residual ~15 MB/h growth traced to d3d9.dll's internal slab allocator
(260KB surface backing buffers retained after Release). See REPORT.md
§10 for the full investigation; conclusion is that it's unfixable from
outside d3d9.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
acbot 2026-05-23 21:05:17 +02:00
commit 57b5e43d0e
199 changed files with 1648333 additions and 0 deletions

167
tools/runtime_patch.py Normal file
View file

@ -0,0 +1,167 @@
"""
runtime_patch.py <pid> [--dry-run] [--revert]
Live-patches a running acclient.exe to fix the RenderSurface freelist leak.
Method:
The leak is at vtable slot +0x2c (ReleaseSubObjects), currently pointing at
a no-op stub `DBObj::ReleaseSubObjects`. The same vtable already has the
correct teardown at slot +0x3c (RenderSurface::Destroy). We rewrite the
+0x2c slot to point at the value currently in +0x3c, making future
invocations of `obj->vtable->ReleaseSubObjects(obj)` actually call Destroy.
Vtable addresses (EoR, derived from dump diagnostic):
0x007caa08 RenderSurface vtable A (likely base RenderSurface)
0x007ca0d8 RenderSurface vtable B (likely RenderSurfaceD3D or similar)
Both will be patched.
Safety:
* Backs up the +0x2c slot value before patching, so --revert can undo.
* VirtualProtectEx to make the page writable, then restores original prot.
* Validates by reading +0x10..+0x20 of the vtable to confirm it has
function-pointer-looking values within acclient.exe's address range.
* Backup file: artifacts/soak/runtime_patch_backup_<pid>.json
"""
import argparse
import ctypes
import ctypes.wintypes as wt
import json
import os
import sys
PROCESS_VM_READ = 0x0010
PROCESS_VM_WRITE = 0x0020
PROCESS_VM_OPERATION = 0x0008
PROCESS_QUERY_INFORMATION = 0x0400
PAGE_EXECUTE_READWRITE = 0x40
PAGE_READWRITE = 0x04
# These EoR vtable addresses came from the refcount diagnostic on real dumps.
# Both saw mode m_numLinks=1 on leaked-buffer-owning RenderSurfaces.
VTABLES_TO_PATCH = [
("RenderSurface vtable A", 0x007caa08),
("RenderSurface vtable B", 0x007ca0d8),
]
RSO_SLOT_OFFSET = 0x2c # ReleaseSubObjects slot
DESTROY_SLOT_OFFSET = 0x3c # Destroy slot (correct teardown)
kernel32 = ctypes.windll.kernel32
OpenProcess = kernel32.OpenProcess
OpenProcess.argtypes = [wt.DWORD, wt.BOOL, wt.DWORD]
OpenProcess.restype = wt.HANDLE
CloseHandle = kernel32.CloseHandle
CloseHandle.argtypes = [wt.HANDLE]; CloseHandle.restype = wt.BOOL
ReadProcessMemory = kernel32.ReadProcessMemory
ReadProcessMemory.argtypes = [wt.HANDLE, wt.LPCVOID, wt.LPVOID, ctypes.c_size_t,
ctypes.POINTER(ctypes.c_size_t)]
ReadProcessMemory.restype = wt.BOOL
WriteProcessMemory = kernel32.WriteProcessMemory
WriteProcessMemory.argtypes = [wt.HANDLE, wt.LPVOID, wt.LPCVOID, ctypes.c_size_t,
ctypes.POINTER(ctypes.c_size_t)]
WriteProcessMemory.restype = wt.BOOL
VirtualProtectEx = kernel32.VirtualProtectEx
VirtualProtectEx.argtypes = [wt.HANDLE, wt.LPVOID, ctypes.c_size_t, wt.DWORD,
ctypes.POINTER(wt.DWORD)]
VirtualProtectEx.restype = wt.BOOL
def read_uint32(h, addr):
buf = ctypes.c_uint32(0)
n = ctypes.c_size_t(0)
if not ReadProcessMemory(h, addr, ctypes.byref(buf), 4, ctypes.byref(n)) or n.value != 4:
raise OSError(f"ReadProcessMemory 0x{addr:x} failed (err={ctypes.get_last_error()})")
return buf.value
def write_uint32(h, addr, value):
buf = ctypes.c_uint32(value)
n = ctypes.c_size_t(0)
old = wt.DWORD(0)
if not VirtualProtectEx(h, addr, 4, PAGE_READWRITE, ctypes.byref(old)):
raise OSError(f"VirtualProtectEx 0x{addr:x} RW failed (err={ctypes.get_last_error()})")
try:
if not WriteProcessMemory(h, addr, ctypes.byref(buf), 4, ctypes.byref(n)) or n.value != 4:
raise OSError(f"WriteProcessMemory 0x{addr:x} failed (err={ctypes.get_last_error()})")
finally:
VirtualProtectEx(h, addr, 4, old.value, ctypes.byref(old))
def sanity_check_vtable(h, vtbl):
# First few slots should all be function pointers within acclient.exe's
# range (roughly 0x00400000 - 0x00a00000 on EoR).
for off in (0x00, 0x14, 0x18, 0x2c, 0x3c):
v = read_uint32(h, vtbl + off)
if not (0x00400000 <= v < 0x00a00000):
return False, f"slot +0x{off:02x} = 0x{v:08x} not in code range"
return True, "ok"
def patch_process(pid, dry_run=False, revert=False):
h = OpenProcess(PROCESS_VM_READ | PROCESS_VM_WRITE | PROCESS_VM_OPERATION
| PROCESS_QUERY_INFORMATION, False, pid)
if not h:
print(f"OpenProcess({pid}) failed: err={ctypes.get_last_error()}", file=sys.stderr)
sys.exit(2)
try:
backup_file = f"C:\\Users\\acbot\\leakhunt\\artifacts\\soak\\runtime_patch_backup_{pid}.json"
if revert:
if not os.path.exists(backup_file):
print(f"no backup at {backup_file}", file=sys.stderr); sys.exit(3)
with open(backup_file) as f: backup = json.load(f)
for entry in backup["slots"]:
print(f"reverting +0x2c on {entry['name']} 0x{entry['vtable']:08x}: "
f"set 0x{entry['post']:08x} back to 0x{entry['pre']:08x}")
write_uint32(h, entry["vtable"] + RSO_SLOT_OFFSET, entry["pre"])
print("revert complete")
return
# Forward patch
applied = []
for name, vtbl in VTABLES_TO_PATCH:
ok, why = sanity_check_vtable(h, vtbl)
print(f"{name} 0x{vtbl:08x}: sanity={why}")
if not ok:
print(f" SKIP — vtable does not look valid")
continue
destroy_addr = read_uint32(h, vtbl + DESTROY_SLOT_OFFSET)
current_rso = read_uint32(h, vtbl + RSO_SLOT_OFFSET)
print(f" +0x2c (RSO) current: 0x{current_rso:08x}")
print(f" +0x3c (Destroy) current: 0x{destroy_addr:08x}")
if dry_run:
print(f" DRY-RUN: would write 0x{destroy_addr:08x} into +0x2c")
applied.append(dict(name=name, vtable=vtbl, pre=current_rso, post=destroy_addr, dry=True))
continue
write_uint32(h, vtbl + RSO_SLOT_OFFSET, destroy_addr)
# Verify
verify = read_uint32(h, vtbl + RSO_SLOT_OFFSET)
if verify != destroy_addr:
print(f" VERIFY FAILED: read back 0x{verify:08x} != 0x{destroy_addr:08x}")
continue
print(f" PATCHED: +0x2c now points at 0x{destroy_addr:08x}")
applied.append(dict(name=name, vtable=vtbl, pre=current_rso, post=destroy_addr))
# Save backup
if not dry_run and applied:
os.makedirs(os.path.dirname(backup_file), exist_ok=True)
with open(backup_file, "w") as f:
json.dump({"pid": pid, "slots": applied}, f, indent=2)
print(f"backup saved to {backup_file}")
finally:
CloseHandle(h)
def main():
ap = argparse.ArgumentParser()
ap.add_argument("pid", type=int)
ap.add_argument("--dry-run", action="store_true")
ap.add_argument("--revert", action="store_true")
args = ap.parse_args()
patch_process(args.pid, dry_run=args.dry_run, revert=args.revert)
if __name__ == "__main__":
main()