Initial commit — leak-hunt project complete

Five bugs identified and patched in retail Asheron's Call client:
- v3b: palette refcount over-increment (3-byte NOP at two sites)
- v5: RenderSurface PurgeResource no-op stub (vtable slot 2 thunk)
- v11: two dangling-pointer crash guards (NULL-check + reorder)
- v14: CEnvCell::Destroy ClipPlaneList leak (18-byte JMP to cleanup thunk)
- v22: unpacker stale-pointer SEH guard (whole-function __try/__except)

All five ship in leakfix.dll (117 KB, SHA d282f23c…) which is loaded
by acclient.exe at process start via PE import table patching by
tools/install_leakfix.py.

Controlled 15-client fleet soak: unpatched control died at 26h with
palette exhaustion; all 14 patched clients survived past that point
and reached ≥5-day uptime.

Residual ~15 MB/h growth traced to d3d9.dll's internal slab allocator
(260KB surface backing buffers retained after Release). See REPORT.md
§10 for the full investigation; conclusion is that it's unfixable from
outside d3d9.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
acbot 2026-05-23 21:05:17 +02:00
commit 57b5e43d0e
199 changed files with 1648333 additions and 0 deletions

288
tools/patch_v8_thunk_v2.py Normal file
View file

@ -0,0 +1,288 @@
"""patch_v8_thunk_v2.py <pid> [--revert]
v8-thunk-v2: SAFE drain of UIElement_UIItem leaked pool.
v1 (the broken one) hooked Flush drain ran during normal panel
refresh and ate items needed for display.
v2 hooks OnVisibilityChanged (vis=false branch) instead drain only
when the panel becomes hidden, NOT during Flush.
Mechanism:
At 0x004e499e, OnVisibilityChanged has 24 bytes implementing:
if ((this->+0x554 >> 0x11 & 1) && vis != 0) UpdateEmptySlots()
epilogue at 0x004e49b6: pop esi; pop ebx; ret 4
We replace those 24 bytes with `JMP <thunk>` (5 bytes) + 19 NOPs.
Thunk reproduces the original logic AND adds a drain path:
if (!(this->+0x554 >> 0x11 & 1)) return
if (vis != 0) UpdateEmptySlots() ; stock visible path
else ; vis == false: drain (NEW)
for up to 8 iterations:
idx = this->+0x610 - 1
if idx < 0: break
item_array = GetItem(this, idx)
if item_array == NULL: break ; (no more items)
real_item = item_array->vtable[37](0x10000032) ; type check
if real_item == 0: break ; (not a UIItem at end)
state = real_item->UIItem_GetState()
if state != WAITING: break ; (active item at end, stop)
InternalDeleteItem(this, real_item)
Safety:
- Only drains on vis=false (panel actually going hidden)
- Cap of 8 items per hide event (prevents catastrophic burst)
- Stops at first non-WAITING item at end (mimics UpdateEmptySlots's
own trim behavior never deletes active items)
- Preserves the original visible-true behavior exactly
Notable correctness:
- InternalDeleteItem is called with the result of vtable[37], NOT
the raw item from the array (mirrors UpdateEmptySlots's pattern)
"""
import argparse
import ctypes
import ctypes.wintypes as wt
import struct
import sys
PATCH_SITE_VA = 0x004e499e
PATCH_LEN = 24
# Original 24 bytes (will verify before patching):
ORIG_BYTES = bytes([
0x8B, 0x86, 0x54, 0x05, 0x00, 0x00, # mov eax, [esi+0x554]
0xC1, 0xE8, 0x11, # shr eax, 0x11
0xA8, 0x01, # test al, 1
0x74, 0x0B, # jz +0x0B (to 0x004e49b6)
0x84, 0xDB, # test bl, bl
0x74, 0x07, # jz +0x07
0x8B, 0xCE, # mov ecx, esi
0xE8, 0xDA, 0xF9, 0xFF, 0xFF, # call UpdateEmptySlots
])
assert len(ORIG_BYTES) == 24
GETITEM_VA = 0x0046dc50
GETSTATE_VA = 0x004e1e20
INTDELETE_VA = 0x004e41c0
UPDATEEMPTYSLOTS_VA = 0x004e4390
def build_thunk(base: int) -> bytes:
"""Build the v8-thunk-v2 at absolute address `base`."""
out = bytearray()
refs = {} # symbolic name -> (offset_of_rel_byte, target_va, size_of_call)
# ------ prologue ------
out += bytes([0x57]) # push edi (save caller's edi)
out += bytes([0x8B, 0x86, 0x54, 0x05, 0x00, 0x00]) # mov eax, [esi+0x554]
out += bytes([0xC1, 0xE8, 0x11]) # shr eax, 0x11
out += bytes([0xA8, 0x01]) # test al, 1
jz_epi_1 = len(out)
out += bytes([0x74, 0x00]) # jz .epi (patch)
out += bytes([0x84, 0xDB]) # test bl, bl
jnz_visible = len(out)
out += bytes([0x75, 0x00]) # jnz .visible (patch)
out += bytes([0xBF, 0x08, 0x00, 0x00, 0x00]) # mov edi, 8 (cap)
# ------ loop ------
loop_top = len(out)
out += bytes([0x8B, 0x86, 0x10, 0x06, 0x00, 0x00]) # mov eax, [esi+0x610]
out += bytes([0x48]) # dec eax
js_epi_1 = len(out)
out += bytes([0x78, 0x00]) # js .epi (patch)
out += bytes([0x50]) # push eax (idx)
out += bytes([0x8B, 0xCE]) # mov ecx, esi
call_getitem = len(out)
out += bytes([0xE8, 0, 0, 0, 0]) # call GetItem
out += bytes([0x85, 0xC0]) # test eax, eax
jz_epi_2 = len(out)
out += bytes([0x74, 0x00]) # jz .epi
out += bytes([0x68, 0x32, 0x00, 0x00, 0x10]) # push 0x10000032
out += bytes([0x8B, 0xC8]) # mov ecx, eax
out += bytes([0x8B, 0x10]) # mov edx, [eax]
out += bytes([0xFF, 0x92, 0x94, 0x00, 0x00, 0x00]) # call [edx+0x94]
out += bytes([0x85, 0xC0]) # test eax, eax
jz_epi_3 = len(out)
out += bytes([0x74, 0x00]) # jz .epi
out += bytes([0x50]) # push eax (save real_item)
out += bytes([0x8B, 0xC8]) # mov ecx, eax
call_getstate = len(out)
out += bytes([0xE8, 0, 0, 0, 0]) # call GetState
out += bytes([0x59]) # pop ecx (ecx = real_item)
out += bytes([0x3D, 0x1C, 0x00, 0x00, 0x10]) # cmp eax, 0x1000001c
jne_epi = len(out)
out += bytes([0x75, 0x00]) # jne .epi
out += bytes([0x51]) # push ecx (arg = real_item)
out += bytes([0x8B, 0xCE]) # mov ecx, esi
call_intdel = len(out)
out += bytes([0xE8, 0, 0, 0, 0]) # call InternalDeleteItem
out += bytes([0x4F]) # dec edi (cap--)
jnz_loop = len(out)
out += bytes([0x75, 0x00]) # jnz loop_top (patch)
jmp_epi = len(out)
out += bytes([0xEB, 0x00]) # jmp .epi
# ------ visible path ------
visible_label = len(out)
out += bytes([0x8B, 0xCE]) # mov ecx, esi
call_updemp = len(out)
out += bytes([0xE8, 0, 0, 0, 0]) # call UpdateEmptySlots
# ------ epilogue ------
epi_label = len(out)
out += bytes([0x5F]) # pop edi
out += bytes([0x5E]) # pop esi
out += bytes([0x5B]) # pop ebx
out += bytes([0xC2, 0x04, 0x00]) # ret 4
# ----- Resolve relative jumps and calls -----
def patch_rel8(at, target_off):
rel = target_off - (at + 2)
assert -128 <= rel <= 127, f"rel8 overflow {rel}"
out[at + 1] = rel & 0xFF
def patch_rel32_call(at, target_va):
# E8 at offset `at` ... rel32 at at+1..at+4. Next-instr addr = base+at+5.
rel = target_va - (base + at + 5)
out[at + 1:at + 5] = struct.pack("<i", rel)
patch_rel8(jz_epi_1, epi_label)
patch_rel8(jnz_visible, visible_label)
patch_rel8(js_epi_1, epi_label)
patch_rel8(jz_epi_2, epi_label)
patch_rel8(jz_epi_3, epi_label)
patch_rel8(jne_epi, epi_label)
patch_rel8(jnz_loop, loop_top)
patch_rel8(jmp_epi, epi_label)
patch_rel32_call(call_getitem, GETITEM_VA)
patch_rel32_call(call_getstate, GETSTATE_VA)
patch_rel32_call(call_intdel, INTDELETE_VA)
patch_rel32_call(call_updemp, UPDATEEMPTYSLOTS_VA)
return bytes(out)
PROCESS_VM_READ = 0x0010
PROCESS_VM_WRITE = 0x0020
PROCESS_VM_OPERATION = 0x0008
PROCESS_QUERY_INFORMATION = 0x0400
MEM_COMMIT_RESERVE = 0x00001000 | 0x00002000
PAGE_EXECUTE_READWRITE = 0x40
k32 = ctypes.windll.kernel32
OpenProcess = k32.OpenProcess
OpenProcess.argtypes = [wt.DWORD, wt.BOOL, wt.DWORD]; OpenProcess.restype = wt.HANDLE
CloseHandle = k32.CloseHandle
CloseHandle.argtypes = [wt.HANDLE]; CloseHandle.restype = wt.BOOL
VirtualAllocEx = k32.VirtualAllocEx
VirtualAllocEx.argtypes = [wt.HANDLE, ctypes.c_void_p, ctypes.c_size_t, wt.DWORD, wt.DWORD]
VirtualAllocEx.restype = wt.LPVOID
WriteProcessMemory = k32.WriteProcessMemory
WriteProcessMemory.argtypes = [wt.HANDLE, wt.LPVOID, wt.LPCVOID, ctypes.c_size_t,
ctypes.POINTER(ctypes.c_size_t)]
WriteProcessMemory.restype = wt.BOOL
ReadProcessMemory = k32.ReadProcessMemory
ReadProcessMemory.argtypes = [wt.HANDLE, wt.LPCVOID, wt.LPVOID, ctypes.c_size_t,
ctypes.POINTER(ctypes.c_size_t)]
ReadProcessMemory.restype = wt.BOOL
VirtualProtectEx = k32.VirtualProtectEx
VirtualProtectEx.argtypes = [wt.HANDLE, wt.LPVOID, ctypes.c_size_t, wt.DWORD,
ctypes.POINTER(wt.DWORD)]
VirtualProtectEx.restype = wt.BOOL
def read_bytes(h, addr, n):
buf = (ctypes.c_ubyte * n)()
sz = ctypes.c_size_t(0)
if not ReadProcessMemory(h, addr, buf, n, ctypes.byref(sz)):
raise OSError(f"read 0x{addr:08x} err={ctypes.get_last_error()}")
return bytes(buf[:sz.value])
def write_bytes(h, addr, data):
old_prot = wt.DWORD(0)
if not VirtualProtectEx(h, addr, len(data), PAGE_EXECUTE_READWRITE, ctypes.byref(old_prot)):
raise OSError(f"VirtualProtectEx 0x{addr:08x} err={ctypes.get_last_error()}")
sz = ctypes.c_size_t(0)
ok = WriteProcessMemory(h, addr, data, len(data), ctypes.byref(sz))
err = ctypes.get_last_error() if not ok else 0
restored = wt.DWORD(0)
VirtualProtectEx(h, addr, len(data), old_prot.value, ctypes.byref(restored))
if not ok:
raise OSError(f"write 0x{addr:08x} err={err}")
def main():
ap = argparse.ArgumentParser()
ap.add_argument("pid", type=int)
ap.add_argument("--revert", action="store_true")
args = ap.parse_args()
h = OpenProcess(
PROCESS_VM_READ | PROCESS_VM_WRITE | PROCESS_VM_OPERATION | PROCESS_QUERY_INFORMATION,
False, args.pid,
)
if not h:
print(f"OpenProcess({args.pid}) err={ctypes.get_last_error()}"); sys.exit(2)
cur = read_bytes(h, PATCH_SITE_VA, PATCH_LEN)
print(f"PID {args.pid}")
print(f" patch site @ 0x{PATCH_SITE_VA:08x} current: {cur.hex()}")
if args.revert:
if cur == ORIG_BYTES:
print(f" already original — nothing to revert")
CloseHandle(h); return
write_bytes(h, PATCH_SITE_VA, ORIG_BYTES)
after = read_bytes(h, PATCH_SITE_VA, PATCH_LEN)
print(f" reverted; bytes now: {after.hex()}")
CloseHandle(h); return
if cur != ORIG_BYTES:
print(f" UNEXPECTED — bytes don't match expected original.")
print(f" Expected: {ORIG_BYTES.hex()}")
CloseHandle(h); sys.exit(3)
# Allocate thunk
thunk_page = VirtualAllocEx(h, None, 0x200, MEM_COMMIT_RESERVE, PAGE_EXECUTE_READWRITE)
if not thunk_page:
print(f"VirtualAllocEx failed err={ctypes.get_last_error()}"); sys.exit(4)
print(f" thunk page @ 0x{thunk_page:08x}")
thunk = build_thunk(thunk_page)
print(f" thunk size: {len(thunk)} bytes")
print(f" thunk hex:")
for i in range(0, len(thunk), 16):
row = thunk[i:i+16].hex(' ')
print(f" +0x{i:02x}: {row}")
sz = ctypes.c_size_t(0)
if not WriteProcessMemory(h, thunk_page, thunk, len(thunk), ctypes.byref(sz)):
print(f"write thunk failed err={ctypes.get_last_error()}"); sys.exit(5)
# Build replacement: JMP thunk + 19 NOPs
rel = thunk_page - (PATCH_SITE_VA + 5)
replacement = bytes([0xE9]) + struct.pack("<i", rel) + bytes([0x90] * 19)
assert len(replacement) == PATCH_LEN
write_bytes(h, PATCH_SITE_VA, replacement)
after = read_bytes(h, PATCH_SITE_VA, PATCH_LEN)
print(f" patch site now: {after.hex()}")
if after != replacement:
print(f" MISMATCH"); sys.exit(6)
print(" OK")
CloseHandle(h)
if __name__ == "__main__":
main()