leakhunt/tools/patch_v8_thunk.py
acbot 57b5e43d0e Initial commit — leak-hunt project complete
Five bugs identified and patched in retail Asheron's Call client:
- v3b: palette refcount over-increment (3-byte NOP at two sites)
- v5: RenderSurface PurgeResource no-op stub (vtable slot 2 thunk)
- v11: two dangling-pointer crash guards (NULL-check + reorder)
- v14: CEnvCell::Destroy ClipPlaneList leak (18-byte JMP to cleanup thunk)
- v22: unpacker stale-pointer SEH guard (whole-function __try/__except)

All five ship in leakfix.dll (117 KB, SHA d282f23c…) which is loaded
by acclient.exe at process start via PE import table patching by
tools/install_leakfix.py.

Controlled 15-client fleet soak: unpatched control died at 26h with
palette exhaustion; all 14 patched clients survived past that point
and reached ≥5-day uptime.

Residual ~15 MB/h growth traced to d3d9.dll's internal slab allocator
(260KB surface backing buffers retained after Release). See REPORT.md
§10 for the full investigation; conclusion is that it's unfixable from
outside d3d9.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-23 21:07:58 +02:00

274 lines
10 KiB
Python

"""patch_v8_thunk.py <pid> [--revert]
EXPERIMENTAL v8-thunk: actively drain UIElement_UIItem pool by hooking
the tail-call JMP at end of UIElement_ItemList::ItemList_Flush.
Mechanism:
ItemList_Flush ends with `JMP UIElement_ItemList::UpdateEmptySlots`
at EoR 0x004e4a87 (5 bytes: E9 04 F9 FF FF).
Replace with `JMP <thunk>`. Thunk walks the array backward, calls
InternalDeleteItem on every WAITING UIItem, then tail-calls
UpdateEmptySlots so resize behavior is preserved.
This is the v8-minimal followup. Where v8-minimal (3 byte changes)
prevented NEW leaks, v8-thunk actively drains pre-existing leaks too.
Thunk (86 bytes, position-independent absolute calls):
push ebp; push edi; push esi; push ebx
mov ebx, ecx ; ebx = this
mov esi, [ebx+0x610] ; count
dec esi ; idx = count-1
loop_top:
test esi, esi
js loop_done
push esi
mov ecx, ebx
call UIElement_ListBox::GetItem (0x0046dc50)
test eax, eax
jz skip_item
mov edi, eax
mov eax, [edi] ; vtable
push 0x10000032
mov ecx, edi
call [eax+0x94] ; vtable[37] type check
test eax, eax
jz skip_item
mov ecx, edi
call UIItem_GetState (0x004e1e20)
cmp eax, 0x1000001c
jne skip_item
push edi
mov ecx, ebx
call InternalDeleteItem (0x004e41c0)
skip_item:
dec esi
jmp loop_top
loop_done:
mov ecx, ebx
pop ebx; pop esi; pop edi; pop ebp
jmp UpdateEmptySlots (0x004e4390)
"""
import argparse
import ctypes
import ctypes.wintypes as wt
import struct
import sys
PATCH_SITE_VA = 0x004e4a87
ORIG_JMP_BYTES = bytes([0xE9, 0x04, 0xF9, 0xFF, 0xFF]) # JMP UpdateEmptySlots (relative)
GETITEM_VA = 0x0046dc50
GETSTATE_VA = 0x004e1e20
INTDELETE_VA = 0x004e41c0
UPDATEEMPTYSLOTS_VA = 0x004e4390
def build_thunk(thunk_base: int) -> bytes:
"""Build the 86-byte thunk for placement at `thunk_base`."""
out = bytearray()
# Prologue
out += bytes([0x55]) # push ebp
out += bytes([0x57]) # push edi
out += bytes([0x56]) # push esi
out += bytes([0x53]) # push ebx
out += bytes([0x8B, 0xD9]) # mov ebx, ecx
out += bytes([0x8B, 0xB3, 0x10, 0x06, 0x00, 0x00]) # mov esi, [ebx+0x610]
out += bytes([0x4E]) # dec esi
loop_top_off = len(out) # 13
out += bytes([0x85, 0xF6]) # test esi, esi
# js loop_done — placeholder, fill rel8 at end
js_loopdone_off = len(out)
out += bytes([0x78, 0x00]) # js +0 (patch)
out += bytes([0x56]) # push esi
out += bytes([0x8B, 0xCB]) # mov ecx, ebx
# call GetItem (E8 rel32)
call_getitem_off = len(out)
out += bytes([0xE8, 0, 0, 0, 0])
out += bytes([0x85, 0xC0]) # test eax, eax
jz_skip1_off = len(out)
out += bytes([0x74, 0x00]) # jz skip_item
out += bytes([0x8B, 0xF8]) # mov edi, eax
out += bytes([0x8B, 0x07]) # mov eax, [edi]
out += bytes([0x68, 0x32, 0x00, 0x00, 0x10]) # push 0x10000032
out += bytes([0x8B, 0xCF]) # mov ecx, edi
out += bytes([0xFF, 0x90, 0x94, 0x00, 0x00, 0x00]) # call dword [eax+0x94]
out += bytes([0x85, 0xC0]) # test eax, eax
jz_skip2_off = len(out)
out += bytes([0x74, 0x00]) # jz skip_item
out += bytes([0x8B, 0xCF]) # mov ecx, edi
# call GetState
call_getstate_off = len(out)
out += bytes([0xE8, 0, 0, 0, 0])
out += bytes([0x3D, 0x1C, 0x00, 0x00, 0x10]) # cmp eax, 0x1000001c
jne_skip_off = len(out)
out += bytes([0x75, 0x00]) # jne skip_item
out += bytes([0x57]) # push edi
out += bytes([0x8B, 0xCB]) # mov ecx, ebx
# call InternalDeleteItem
call_intdel_off = len(out)
out += bytes([0xE8, 0, 0, 0, 0])
skip_item_off = len(out)
out += bytes([0x4E]) # dec esi
jmp_top_off = len(out)
out += bytes([0xEB, 0x00]) # jmp loop_top
loop_done_off = len(out)
out += bytes([0x8B, 0xCB]) # mov ecx, ebx
out += bytes([0x5B]) # pop ebx
out += bytes([0x5E]) # pop esi
out += bytes([0x5F]) # pop edi
out += bytes([0x5D]) # pop ebp
# jmp UpdateEmptySlots
jmp_upd_off = len(out)
out += bytes([0xE9, 0, 0, 0, 0])
# Now patch the relative offsets
def patch_rel8(at, target):
rel = target - (at + 2)
assert -128 <= rel <= 127, f"rel8 overflow: {rel}"
out[at + 1] = rel & 0xFF
def patch_rel32(at, target_va):
# at is the offset of the E8/E9 byte; rel32 is at at+1..at+4
site = thunk_base + at + 5
rel = target_va - site
out[at + 1:at + 5] = struct.pack("<i", rel)
patch_rel8(js_loopdone_off, loop_done_off)
patch_rel8(jz_skip1_off, skip_item_off)
patch_rel8(jz_skip2_off, skip_item_off)
patch_rel8(jne_skip_off, skip_item_off)
patch_rel8(jmp_top_off, loop_top_off)
patch_rel32(call_getitem_off, GETITEM_VA)
patch_rel32(call_getstate_off, GETSTATE_VA)
patch_rel32(call_intdel_off, INTDELETE_VA)
patch_rel32(jmp_upd_off, UPDATEEMPTYSLOTS_VA)
return bytes(out)
PROCESS_VM_READ = 0x0010
PROCESS_VM_WRITE = 0x0020
PROCESS_VM_OPERATION = 0x0008
PROCESS_QUERY_INFORMATION = 0x0400
MEM_COMMIT_RESERVE = 0x00001000 | 0x00002000
PAGE_EXECUTE_READWRITE = 0x40
k32 = ctypes.windll.kernel32
OpenProcess = k32.OpenProcess
OpenProcess.argtypes = [wt.DWORD, wt.BOOL, wt.DWORD]; OpenProcess.restype = wt.HANDLE
CloseHandle = k32.CloseHandle
CloseHandle.argtypes = [wt.HANDLE]; CloseHandle.restype = wt.BOOL
VirtualAllocEx = k32.VirtualAllocEx
VirtualAllocEx.argtypes = [wt.HANDLE, ctypes.c_void_p, ctypes.c_size_t, wt.DWORD, wt.DWORD]
VirtualAllocEx.restype = wt.LPVOID
WriteProcessMemory = k32.WriteProcessMemory
WriteProcessMemory.argtypes = [wt.HANDLE, wt.LPVOID, wt.LPCVOID, ctypes.c_size_t,
ctypes.POINTER(ctypes.c_size_t)]
WriteProcessMemory.restype = wt.BOOL
ReadProcessMemory = k32.ReadProcessMemory
ReadProcessMemory.argtypes = [wt.HANDLE, wt.LPCVOID, wt.LPVOID, ctypes.c_size_t,
ctypes.POINTER(ctypes.c_size_t)]
ReadProcessMemory.restype = wt.BOOL
VirtualProtectEx = k32.VirtualProtectEx
VirtualProtectEx.argtypes = [wt.HANDLE, wt.LPVOID, ctypes.c_size_t, wt.DWORD,
ctypes.POINTER(wt.DWORD)]
VirtualProtectEx.restype = wt.BOOL
def read_bytes(h, addr, n):
buf = (ctypes.c_ubyte * n)()
sz = ctypes.c_size_t(0)
if not ReadProcessMemory(h, addr, buf, n, ctypes.byref(sz)):
raise OSError(f"read 0x{addr:08x} err={ctypes.get_last_error()}")
return bytes(buf[:sz.value])
def write_bytes(h, addr, data):
old_prot = wt.DWORD(0)
if not VirtualProtectEx(h, addr, len(data), PAGE_EXECUTE_READWRITE, ctypes.byref(old_prot)):
raise OSError(f"VirtualProtectEx 0x{addr:08x} err={ctypes.get_last_error()}")
sz = ctypes.c_size_t(0)
ok = WriteProcessMemory(h, addr, data, len(data), ctypes.byref(sz))
err = ctypes.get_last_error() if not ok else 0
restored = wt.DWORD(0)
VirtualProtectEx(h, addr, len(data), old_prot.value, ctypes.byref(restored))
if not ok:
raise OSError(f"write 0x{addr:08x} err={err}")
def main():
ap = argparse.ArgumentParser()
ap.add_argument("pid", type=int)
ap.add_argument("--revert", action="store_true")
args = ap.parse_args()
h = OpenProcess(
PROCESS_VM_READ | PROCESS_VM_WRITE | PROCESS_VM_OPERATION | PROCESS_QUERY_INFORMATION,
False, args.pid,
)
if not h:
print(f"OpenProcess({args.pid}) err={ctypes.get_last_error()}"); sys.exit(2)
cur = read_bytes(h, PATCH_SITE_VA, 5)
print(f"PID {args.pid}")
print(f" patch site @ 0x{PATCH_SITE_VA:08x} current: {cur.hex()}")
if args.revert:
if cur == ORIG_JMP_BYTES:
print(f" already original — nothing to revert")
CloseHandle(h); return
# Restore original JMP UpdateEmptySlots
write_bytes(h, PATCH_SITE_VA, ORIG_JMP_BYTES)
after = read_bytes(h, PATCH_SITE_VA, 5)
print(f" reverted; bytes now: {after.hex()}")
CloseHandle(h); return
if cur != ORIG_JMP_BYTES:
if cur[0] == 0xE9:
print(f" already has a JMP somewhere — maybe already patched. Refusing to re-patch.")
else:
print(f" UNEXPECTED — bytes don't match expected JMP. Refusing.")
CloseHandle(h); sys.exit(3)
# Allocate thunk page
thunk_page = VirtualAllocEx(h, None, 0x100, MEM_COMMIT_RESERVE, PAGE_EXECUTE_READWRITE)
if not thunk_page:
print(f"VirtualAllocEx failed err={ctypes.get_last_error()}"); sys.exit(4)
print(f" thunk page @ 0x{thunk_page:08x}")
thunk = build_thunk(thunk_page)
print(f" thunk size: {len(thunk)} bytes")
print(f" thunk hex: {thunk.hex()}")
sz = ctypes.c_size_t(0)
if not WriteProcessMemory(h, thunk_page, thunk, len(thunk), ctypes.byref(sz)):
print(f"write thunk failed err={ctypes.get_last_error()}"); sys.exit(5)
# Build the JMP to thunk at the patch site
rel = thunk_page - (PATCH_SITE_VA + 5)
new_jmp = bytes([0xE9]) + struct.pack("<i", rel)
write_bytes(h, PATCH_SITE_VA, new_jmp)
after = read_bytes(h, PATCH_SITE_VA, 5)
print(f" patch site now: {after.hex()} (expected {new_jmp.hex()})")
if after != new_jmp:
print(f" MISMATCH"); sys.exit(6)
print(" OK")
CloseHandle(h)
if __name__ == "__main__":
main()