leakhunt/tools/runtime_patch_v2.py
acbot 57b5e43d0e Initial commit — leak-hunt project complete
Five bugs identified and patched in retail Asheron's Call client:
- v3b: palette refcount over-increment (3-byte NOP at two sites)
- v5: RenderSurface PurgeResource no-op stub (vtable slot 2 thunk)
- v11: two dangling-pointer crash guards (NULL-check + reorder)
- v14: CEnvCell::Destroy ClipPlaneList leak (18-byte JMP to cleanup thunk)
- v22: unpacker stale-pointer SEH guard (whole-function __try/__except)

All five ship in leakfix.dll (117 KB, SHA d282f23c…) which is loaded
by acclient.exe at process start via PE import table patching by
tools/install_leakfix.py.

Controlled 15-client fleet soak: unpatched control died at 26h with
palette exhaustion; all 14 patched clients survived past that point
and reached ≥5-day uptime.

Residual ~15 MB/h growth traced to d3d9.dll's internal slab allocator
(260KB surface backing buffers retained after Release). See REPORT.md
§10 for the full investigation; conclusion is that it's unfixable from
outside d3d9.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-23 21:07:58 +02:00

276 lines
11 KiB
Python

"""
runtime_patch_v2.py <pid> [--dry-run] [--revert]
Proper Phase 8 prototype: injects a small custom x86 thunk into the target
process and rewrites the RenderSurface vtable `ReleaseSubObjects` slot to
point at it. The thunk frees the two heap-allocated buffers
(sourceData.sourceBits at offset 0x40 and m_pSurfaceBits at offset 0x58)
using EoR's operator delete[] at 0x005df164.
Differs from v1 in that v1 made the slot point at slot +0x3c's function,
which turned out to be a partial helper (frees only one buffer). The custom
thunk frees both, matching the body of 2013's RenderSurface::Destroy minus
the Begin() reset.
EoR addresses (from dump diagnostic + live disassembly):
vtable A: 0x007caa08
vtable B: 0x007ca0d8
slot +0x2c (RSO): current value 0x004154a0 (no-op stub)
operator delete[]: 0x005df164
offsets in struct: sourceData.sourceBits = +0x40, m_pSurfaceBits = +0x58
"""
import argparse, ctypes, ctypes.wintypes as wt, json, os, struct, sys
# Process access flags
PROCESS_VM_READ = 0x0010
PROCESS_VM_WRITE = 0x0020
PROCESS_VM_OPERATION = 0x0008
PROCESS_QUERY_INFORMATION = 0x0400
# Memory protection
MEM_COMMIT = 0x1000
MEM_RESERVE = 0x2000
MEM_RELEASE = 0x8000
PAGE_EXECUTE_READWRITE = 0x40
PAGE_READWRITE = 0x04
# === EoR offsets and addresses (from analysis above) ===
VTABLES = [
("RenderSurface vtable A", 0x007caa08),
("RenderSurface vtable B", 0x007ca0d8),
]
RSO_SLOT = 0x2c
OFF_SOURCEBITS = 0x40 # sourceData.sourceBits within RenderSurface
OFF_SURFACEBITS = 0x58 # m_pSurfaceBits within RenderSurface
OP_DELETE_ARR_EOR = 0x005df164 # operator delete[] in EoR
NO_OP_STUB = 0x004154a0 # the no-op DBObj::ReleaseSubObjects in EoR
k32 = ctypes.windll.kernel32
OpenProcess = k32.OpenProcess
OpenProcess.argtypes = [wt.DWORD, wt.BOOL, wt.DWORD]; OpenProcess.restype = wt.HANDLE
CloseHandle = k32.CloseHandle
CloseHandle.argtypes = [wt.HANDLE]; CloseHandle.restype = wt.BOOL
ReadProcessMemory = k32.ReadProcessMemory
ReadProcessMemory.argtypes = [wt.HANDLE, wt.LPCVOID, wt.LPVOID, ctypes.c_size_t,
ctypes.POINTER(ctypes.c_size_t)]; ReadProcessMemory.restype = wt.BOOL
WriteProcessMemory = k32.WriteProcessMemory
WriteProcessMemory.argtypes = [wt.HANDLE, wt.LPVOID, wt.LPCVOID, ctypes.c_size_t,
ctypes.POINTER(ctypes.c_size_t)]; WriteProcessMemory.restype = wt.BOOL
VirtualAllocEx = k32.VirtualAllocEx
VirtualAllocEx.argtypes = [wt.HANDLE, wt.LPVOID, ctypes.c_size_t, wt.DWORD, wt.DWORD]
VirtualAllocEx.restype = wt.LPVOID
VirtualFreeEx = k32.VirtualFreeEx
VirtualFreeEx.argtypes = [wt.HANDLE, wt.LPVOID, ctypes.c_size_t, wt.DWORD]
VirtualFreeEx.restype = wt.BOOL
VirtualProtectEx = k32.VirtualProtectEx
VirtualProtectEx.argtypes = [wt.HANDLE, wt.LPVOID, ctypes.c_size_t, wt.DWORD,
ctypes.POINTER(wt.DWORD)]; VirtualProtectEx.restype = wt.BOOL
def read_uint32(h, addr):
b = ctypes.c_uint32(0); n = ctypes.c_size_t(0)
if not ReadProcessMemory(h, addr, ctypes.byref(b), 4, ctypes.byref(n)) or n.value != 4:
raise OSError(f"read 0x{addr:x} err={ctypes.get_last_error()}")
return b.value
def write_bytes(h, addr, data):
n = ctypes.c_size_t(0)
old = wt.DWORD(0)
if not VirtualProtectEx(h, addr, len(data), PAGE_READWRITE, ctypes.byref(old)):
raise OSError(f"protect 0x{addr:x} err={ctypes.get_last_error()}")
try:
buf = (ctypes.c_ubyte * len(data))(*data)
if not WriteProcessMemory(h, addr, buf, len(data), ctypes.byref(n)) or n.value != len(data):
raise OSError(f"write 0x{addr:x} err={ctypes.get_last_error()}")
finally:
VirtualProtectEx(h, addr, len(data), old.value, ctypes.byref(old))
def build_thunk(remote_addr):
"""
Emit x86 machine code for the RSO thunk. Calling convention __fastcall:
ECX = this
No stack args
Returns uint8_t in AL.
Equivalent C:
uint8_t __fastcall RSO_thunk(void* this) {
void** p1 = (void**)((char*)this + 0x40);
void** p2 = (void**)((char*)this + 0x58);
if (*p1) { operator_delete_arr(*p1); *p1 = 0; }
if (*p2) { operator_delete_arr(*p2); *p2 = 0; }
return 1;
}
Assembly:
push esi
mov esi, ecx ; this -> esi
mov eax, [esi + 0x40] ; load sourceBits
test eax, eax
jz skip1
push eax
call <operator delete[]> ; relative call
add esp, 4
mov dword ptr [esi + 0x40], 0
skip1:
mov eax, [esi + 0x58] ; load surfaceBits
test eax, eax
jz skip2
push eax
call <operator delete[]>
add esp, 4
mov dword ptr [esi + 0x58], 0
skip2:
mov al, 1
pop esi
ret
"""
# We emit pass-1, compute call targets relative to remote_addr where the
# thunk will live.
code = bytearray()
# push esi
code += b"\x56"
# mov esi, ecx
code += b"\x8b\xf1"
# mov eax, [esi + 0x40]
code += b"\x8b\x46\x40"
# test eax, eax
code += b"\x85\xc0"
# jz skip1 (placeholder, 1-byte rel8)
code += b"\x74\x00"
jz1_idx = len(code) - 1
# push eax
code += b"\x50"
# call <delete[]> — 5 bytes, e8 + rel32
call_emit_idx = len(code)
code += b"\xe8\x00\x00\x00\x00"
# Compute and fill: relative = target - (next_instr_addr)
next_after_call = remote_addr + call_emit_idx + 5
rel = (OP_DELETE_ARR_EOR - next_after_call) & 0xffffffff
code[call_emit_idx+1:call_emit_idx+5] = struct.pack("<I", rel)
# add esp, 4
code += b"\x83\xc4\x04"
# mov dword ptr [esi + 0x40], 0
code += b"\xc7\x46\x40\x00\x00\x00\x00"
# Fill jz1 displacement: jump to skip1 here
code[jz1_idx] = (len(code) - (jz1_idx + 1)) & 0xff
# mov eax, [esi + 0x58]
code += b"\x8b\x46\x58"
# test eax, eax
code += b"\x85\xc0"
# jz skip2 (placeholder)
code += b"\x74\x00"
jz2_idx = len(code) - 1
# push eax
code += b"\x50"
# call <delete[]>
call_emit_idx2 = len(code)
code += b"\xe8\x00\x00\x00\x00"
next_after_call2 = remote_addr + call_emit_idx2 + 5
rel2 = (OP_DELETE_ARR_EOR - next_after_call2) & 0xffffffff
code[call_emit_idx2+1:call_emit_idx2+5] = struct.pack("<I", rel2)
# add esp, 4
code += b"\x83\xc4\x04"
# mov dword ptr [esi + 0x58], 0
code += b"\xc7\x46\x58\x00\x00\x00\x00"
# Fill jz2 displacement
code[jz2_idx] = (len(code) - (jz2_idx + 1)) & 0xff
# mov al, 1
code += b"\xb0\x01"
# pop esi
code += b"\x5e"
# ret
code += b"\xc3"
return bytes(code)
def patch_process(pid, dry_run=False, revert=False):
h = OpenProcess(PROCESS_VM_READ | PROCESS_VM_WRITE | PROCESS_VM_OPERATION
| PROCESS_QUERY_INFORMATION, False, pid)
if not h:
print(f"OpenProcess({pid}) failed err={ctypes.get_last_error()}"); sys.exit(2)
try:
backup_file = f"C:\\Users\\acbot\\leakhunt\\artifacts\\soak\\runtime_patch_v2_backup_{pid}.json"
if revert:
if not os.path.exists(backup_file):
print(f"no backup at {backup_file}"); sys.exit(3)
with open(backup_file) as f: backup = json.load(f)
for entry in backup["slots"]:
cur = read_uint32(h, entry["vtable"] + RSO_SLOT)
print(f"revert {entry['name']} 0x{entry['vtable']:08x}: "
f"current 0x{cur:08x} -> pre 0x{entry['pre']:08x}")
write_bytes(h, entry["vtable"] + RSO_SLOT, struct.pack("<I", entry["pre"]))
print("revert complete")
return
# Build a sample thunk to compute size, then alloc + emit at real addr
size_probe = len(build_thunk(0))
print(f"thunk size: {size_probe} bytes")
if dry_run:
print("DRY-RUN: would VirtualAllocEx + emit thunk + rewrite both vtables")
return
# Allocate executable memory in target process
remote = VirtualAllocEx(h, None, max(size_probe, 0x100),
MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE)
if not remote:
print(f"VirtualAllocEx failed err={ctypes.get_last_error()}"); sys.exit(4)
print(f"thunk page allocated at 0x{remote:08x}")
code = build_thunk(remote)
print(f"emitted thunk: {len(code)} bytes")
# Write thunk
n = ctypes.c_size_t(0)
buf = (ctypes.c_ubyte * len(code))(*code)
if not WriteProcessMemory(h, remote, buf, len(code), ctypes.byref(n)) or n.value != len(code):
print(f"WriteProcessMemory thunk failed err={ctypes.get_last_error()}")
VirtualFreeEx(h, remote, 0, MEM_RELEASE); sys.exit(5)
print(f"thunk written")
# Validate by reading back
verify = (ctypes.c_ubyte * len(code))()
if not ReadProcessMemory(h, remote, verify, len(code), ctypes.byref(n)) or n.value != len(code):
print(f"verify read failed err={ctypes.get_last_error()}"); sys.exit(6)
if bytes(verify) != code:
print(f"thunk bytes mismatch on readback")
VirtualFreeEx(h, remote, 0, MEM_RELEASE); sys.exit(7)
print(f"thunk verified")
# Rewrite vtable slots
applied = []
for name, vt in VTABLES:
cur = read_uint32(h, vt + RSO_SLOT)
if cur != NO_OP_STUB:
print(f" {name} 0x{vt:08x}: slot +0x2c is 0x{cur:08x}, expected 0x{NO_OP_STUB:08x} — SKIPPING (already patched?)")
continue
write_bytes(h, vt + RSO_SLOT, struct.pack("<I", remote))
verify_slot = read_uint32(h, vt + RSO_SLOT)
if verify_slot != remote:
print(f" {name}: slot verify failed (read 0x{verify_slot:08x})")
continue
print(f" {name} 0x{vt:08x}: +0x2c rewritten {cur:#x} -> {remote:#x}")
applied.append(dict(name=name, vtable=vt, pre=cur, post=remote))
if applied:
with open(backup_file, "w") as f:
json.dump({"pid": pid, "thunk": remote, "slots": applied,
"thunk_size": len(code)}, f, indent=2)
print(f"backup saved to {backup_file}")
else:
print("NO slots patched")
VirtualFreeEx(h, remote, 0, MEM_RELEASE)
finally:
CloseHandle(h)
def main():
ap = argparse.ArgumentParser()
ap.add_argument("pid", type=int)
ap.add_argument("--dry-run", action="store_true")
ap.add_argument("--revert", action="store_true")
args = ap.parse_args()
patch_process(args.pid, dry_run=args.dry_run, revert=args.revert)
if __name__ == "__main__":
main()