Initial commit — leak-hunt project complete

Five bugs identified and patched in retail Asheron's Call client:
- v3b: palette refcount over-increment (3-byte NOP at two sites)
- v5: RenderSurface PurgeResource no-op stub (vtable slot 2 thunk)
- v11: two dangling-pointer crash guards (NULL-check + reorder)
- v14: CEnvCell::Destroy ClipPlaneList leak (18-byte JMP to cleanup thunk)
- v22: unpacker stale-pointer SEH guard (whole-function __try/__except)

All five ship in leakfix.dll (117 KB, SHA d282f23c…) which is loaded
by acclient.exe at process start via PE import table patching by
tools/install_leakfix.py.

Controlled 15-client fleet soak: unpatched control died at 26h with
palette exhaustion; all 14 patched clients survived past that point
and reached ≥5-day uptime.

Residual ~15 MB/h growth traced to d3d9.dll's internal slab allocator
(260KB surface backing buffers retained after Release). See REPORT.md
§10 for the full investigation; conclusion is that it's unfixable from
outside d3d9.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
acbot 2026-05-23 21:05:17 +02:00
commit 57b5e43d0e
199 changed files with 1648333 additions and 0 deletions

221
tools/runtime_patch_v3.py Normal file
View file

@ -0,0 +1,221 @@
"""
runtime_patch_v3.py <pid> [--dry-run] [--revert]
Per-vtable thunks with class-specific field offsets, derived from
find_rendersurfaces.py diagnostic across two dumps:
vtable A 0x007caa08 (base RenderSurface): m_pSurfaceBits +0x58, sourceBits +0x40
vtable B 0x007ca0d8 (RenderSurfaceD3D-ish): buffers at +0x90 and +0x94
vtable C 0x007961e0 (another variant): buffers at +0x90 and +0x94
The v2 patch used +0x58 for ALL vtables; that was wrong for B/C and crashed
PID 17252 reading pixel data instead of a buffer pointer. This v3 emits a
separate thunk per vtable with the correct offsets.
Each thunk:
- Reads each buffer field
- Null-check, skip if zero
- Calls operator delete[] at EoR 0x005df164 (one buffer at a time)
- Nulls the field
- Returns 1 (uint8_t)
"""
import argparse, ctypes, ctypes.wintypes as wt, json, os, struct, sys
PROCESS_VM_READ = 0x0010
PROCESS_VM_WRITE = 0x0020
PROCESS_VM_OPERATION = 0x0008
PROCESS_QUERY_INFORMATION = 0x0400
MEM_COMMIT = 0x1000
MEM_RESERVE = 0x2000
MEM_RELEASE = 0x8000
PAGE_EXECUTE_READWRITE = 0x40
PAGE_READWRITE = 0x04
# Per-vtable patch metadata: (name, vtable_addr, [field_offsets_to_free])
VTABLES = [
("RS vtable A (base RenderSurface)", 0x007caa08, [0x40, 0x58]),
("RS vtable B (D3D variant)", 0x007ca0d8, [0x90, 0x94]),
("RS vtable C (other variant)", 0x007961e0, [0x90, 0x94]),
]
RSO_SLOT = 0x2c
OP_DELETE_ARR_EOR = 0x005df164
NO_OP_STUB = 0x004154a0
k32 = ctypes.windll.kernel32
def _setup_apis():
for fn, argt, rest in (
('OpenProcess', [wt.DWORD, wt.BOOL, wt.DWORD], wt.HANDLE),
('CloseHandle', [wt.HANDLE], wt.BOOL),
('ReadProcessMemory', [wt.HANDLE, wt.LPCVOID, wt.LPVOID, ctypes.c_size_t, ctypes.POINTER(ctypes.c_size_t)], wt.BOOL),
('WriteProcessMemory', [wt.HANDLE, wt.LPVOID, wt.LPCVOID, ctypes.c_size_t, ctypes.POINTER(ctypes.c_size_t)], wt.BOOL),
('VirtualAllocEx', [wt.HANDLE, wt.LPVOID, ctypes.c_size_t, wt.DWORD, wt.DWORD], wt.LPVOID),
('VirtualFreeEx', [wt.HANDLE, wt.LPVOID, ctypes.c_size_t, wt.DWORD], wt.BOOL),
('VirtualProtectEx', [wt.HANDLE, wt.LPVOID, ctypes.c_size_t, wt.DWORD, ctypes.POINTER(wt.DWORD)], wt.BOOL),
):
f = getattr(k32, fn); f.argtypes = argt; f.restype = rest
_setup_apis()
def read_uint32(h, addr):
b = ctypes.c_uint32(0); n = ctypes.c_size_t(0)
if not k32.ReadProcessMemory(h, addr, ctypes.byref(b), 4, ctypes.byref(n)) or n.value != 4:
raise OSError(f"read 0x{addr:x} err={ctypes.get_last_error()}")
return b.value
def write_bytes(h, addr, data):
n = ctypes.c_size_t(0); old = wt.DWORD(0)
if not k32.VirtualProtectEx(h, addr, len(data), PAGE_READWRITE, ctypes.byref(old)):
raise OSError(f"protect 0x{addr:x} err={ctypes.get_last_error()}")
try:
buf = (ctypes.c_ubyte * len(data))(*data)
if not k32.WriteProcessMemory(h, addr, buf, len(data), ctypes.byref(n)) or n.value != len(data):
raise OSError(f"write 0x{addr:x} err={ctypes.get_last_error()}")
finally:
k32.VirtualProtectEx(h, addr, len(data), old.value, ctypes.byref(old))
def build_thunk(remote_addr, offsets):
"""Build x86 __fastcall thunk that frees `[esi+offset]` for each offset.
Body:
push esi
mov esi, ecx
FOR each offset:
mov eax, [esi + offset]
test eax, eax
jz skip
push eax
call <op_delete_arr>
add esp, 4
mov dword ptr [esi + offset], 0
skip:
mov al, 1
pop esi
ret
"""
code = bytearray()
code += b"\x56" # push esi
code += b"\x8b\xf1" # mov esi, ecx
for off in offsets:
# mov eax, [esi + off]
if off < 0x80:
code += b"\x8b\x46" + bytes([off & 0xff])
else:
code += b"\x8b\x86" + struct.pack("<I", off & 0xffffffff)
# test eax, eax
code += b"\x85\xc0"
# jz skip (rel8 placeholder)
code += b"\x74\x00"; jz_idx = len(code) - 1
# push eax
code += b"\x50"
# call <op_delete_arr> (rel32)
call_idx = len(code); code += b"\xe8\x00\x00\x00\x00"
next_after = remote_addr + call_idx + 5
rel = (OP_DELETE_ARR_EOR - next_after) & 0xffffffff
code[call_idx+1:call_idx+5] = struct.pack("<I", rel)
# add esp, 4
code += b"\x83\xc4\x04"
# mov dword ptr [esi + off], 0
if off < 0x80:
code += b"\xc7\x46" + bytes([off & 0xff]) + b"\x00\x00\x00\x00"
else:
code += b"\xc7\x86" + struct.pack("<I", off & 0xffffffff) + b"\x00\x00\x00\x00"
# Fill jz displacement
code[jz_idx] = (len(code) - (jz_idx + 1)) & 0xff
code += b"\xb0\x01" # mov al, 1
code += b"\x5e" # pop esi
code += b"\xc3" # ret
return bytes(code)
def patch_process(pid, dry_run=False, revert=False):
h = k32.OpenProcess(PROCESS_VM_READ | PROCESS_VM_WRITE | PROCESS_VM_OPERATION
| PROCESS_QUERY_INFORMATION, False, pid)
if not h:
print(f"OpenProcess({pid}) failed err={ctypes.get_last_error()}"); sys.exit(2)
try:
backup_file = f"C:\\Users\\acbot\\leakhunt\\artifacts\\soak\\runtime_patch_v3_backup_{pid}.json"
if revert:
if not os.path.exists(backup_file):
print(f"no backup at {backup_file}"); sys.exit(3)
with open(backup_file) as f: backup = json.load(f)
for entry in backup["slots"]:
cur = read_uint32(h, entry["vtable"] + RSO_SLOT)
print(f"revert {entry['name']}: 0x{cur:08x} -> 0x{entry['pre']:08x}")
write_bytes(h, entry["vtable"] + RSO_SLOT, struct.pack("<I", entry["pre"]))
print("revert complete")
return
# Build thunks per vtable
per_vtable_thunks = []
running_size = 0
for name, vtbl, offs in VTABLES:
tk = build_thunk(0, offs) # size probe
per_vtable_thunks.append((name, vtbl, offs, len(tk)))
running_size += len(tk) + 4 # padding between thunks
print(f"total thunk size estimate: {running_size} bytes")
if dry_run:
for n, v, o, sz in per_vtable_thunks:
print(f" thunk for {n}: offsets {o}, size {sz} bytes")
print("DRY-RUN: would VirtualAllocEx + emit thunks + rewrite 3 vtables")
return
# Allocate page
remote = k32.VirtualAllocEx(h, None, max(0x200, running_size + 0x40),
MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE)
if not remote:
print(f"VirtualAllocEx failed err={ctypes.get_last_error()}"); sys.exit(4)
print(f"thunk page at 0x{remote:08x}")
applied = []
cur_addr = remote
for name, vtbl, offs, _ in per_vtable_thunks:
tk = build_thunk(cur_addr, offs)
# Write thunk
n = ctypes.c_size_t(0)
buf = (ctypes.c_ubyte * len(tk))(*tk)
if not k32.WriteProcessMemory(h, cur_addr, buf, len(tk), ctypes.byref(n)) or n.value != len(tk):
print(f" WriteProcessMemory thunk for {name} failed err={ctypes.get_last_error()}")
continue
print(f" thunk for {name} written at 0x{cur_addr:08x} ({len(tk)} bytes), offsets={offs}")
# Verify slot, rewrite
cur_slot = read_uint32(h, vtbl + RSO_SLOT)
if cur_slot != NO_OP_STUB:
print(f" vtable {vtbl:#x} slot +0x2c is 0x{cur_slot:08x}, expected 0x{NO_OP_STUB:08x} — SKIPPING")
cur_addr += len(tk) + 4
continue
write_bytes(h, vtbl + RSO_SLOT, struct.pack("<I", cur_addr))
verify_slot = read_uint32(h, vtbl + RSO_SLOT)
if verify_slot != cur_addr:
print(f" slot verify failed (read 0x{verify_slot:08x})")
cur_addr += len(tk) + 4
continue
print(f" vtable {vtbl:#x} +0x2c rewritten {cur_slot:#x} -> {cur_addr:#x}")
applied.append(dict(name=name, vtable=vtbl, offsets=offs, pre=cur_slot, post=cur_addr,
thunk_addr=cur_addr, thunk_size=len(tk)))
cur_addr += len(tk) + 4 # padding
if applied:
with open(backup_file, "w") as f:
json.dump({"pid": pid, "thunk_base": remote, "slots": applied}, f, indent=2)
print(f"backup saved to {backup_file}")
else:
print("NO slots patched")
k32.VirtualFreeEx(h, remote, 0, MEM_RELEASE)
finally:
k32.CloseHandle(h)
def main():
ap = argparse.ArgumentParser()
ap.add_argument("pid", type=int)
ap.add_argument("--dry-run", action="store_true")
ap.add_argument("--revert", action="store_true")
args = ap.parse_args()
patch_process(args.pid, dry_run=args.dry_run, revert=args.revert)
if __name__ == "__main__":
main()