Initial commit — leak-hunt project complete
Five bugs identified and patched in retail Asheron's Call client: - v3b: palette refcount over-increment (3-byte NOP at two sites) - v5: RenderSurface PurgeResource no-op stub (vtable slot 2 thunk) - v11: two dangling-pointer crash guards (NULL-check + reorder) - v14: CEnvCell::Destroy ClipPlaneList leak (18-byte JMP to cleanup thunk) - v22: unpacker stale-pointer SEH guard (whole-function __try/__except) All five ship in leakfix.dll (117 KB, SHA d282f23c…) which is loaded by acclient.exe at process start via PE import table patching by tools/install_leakfix.py. Controlled 15-client fleet soak: unpatched control died at 26h with palette exhaustion; all 14 patched clients survived past that point and reached ≥5-day uptime. Residual ~15 MB/h growth traced to d3d9.dll's internal slab allocator (260KB surface backing buffers retained after Release). See REPORT.md §10 for the full investigation; conclusion is that it's unfixable from outside d3d9. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
commit
57b5e43d0e
199 changed files with 1648333 additions and 0 deletions
274
tools/patch_v8_thunk.py
Normal file
274
tools/patch_v8_thunk.py
Normal file
|
|
@ -0,0 +1,274 @@
|
|||
"""patch_v8_thunk.py <pid> [--revert]
|
||||
|
||||
EXPERIMENTAL v8-thunk: actively drain UIElement_UIItem pool by hooking
|
||||
the tail-call JMP at end of UIElement_ItemList::ItemList_Flush.
|
||||
|
||||
Mechanism:
|
||||
ItemList_Flush ends with `JMP UIElement_ItemList::UpdateEmptySlots`
|
||||
at EoR 0x004e4a87 (5 bytes: E9 04 F9 FF FF).
|
||||
Replace with `JMP <thunk>`. Thunk walks the array backward, calls
|
||||
InternalDeleteItem on every WAITING UIItem, then tail-calls
|
||||
UpdateEmptySlots so resize behavior is preserved.
|
||||
|
||||
This is the v8-minimal followup. Where v8-minimal (3 byte changes)
|
||||
prevented NEW leaks, v8-thunk actively drains pre-existing leaks too.
|
||||
|
||||
Thunk (86 bytes, position-independent absolute calls):
|
||||
push ebp; push edi; push esi; push ebx
|
||||
mov ebx, ecx ; ebx = this
|
||||
mov esi, [ebx+0x610] ; count
|
||||
dec esi ; idx = count-1
|
||||
loop_top:
|
||||
test esi, esi
|
||||
js loop_done
|
||||
push esi
|
||||
mov ecx, ebx
|
||||
call UIElement_ListBox::GetItem (0x0046dc50)
|
||||
test eax, eax
|
||||
jz skip_item
|
||||
mov edi, eax
|
||||
mov eax, [edi] ; vtable
|
||||
push 0x10000032
|
||||
mov ecx, edi
|
||||
call [eax+0x94] ; vtable[37] type check
|
||||
test eax, eax
|
||||
jz skip_item
|
||||
mov ecx, edi
|
||||
call UIItem_GetState (0x004e1e20)
|
||||
cmp eax, 0x1000001c
|
||||
jne skip_item
|
||||
push edi
|
||||
mov ecx, ebx
|
||||
call InternalDeleteItem (0x004e41c0)
|
||||
skip_item:
|
||||
dec esi
|
||||
jmp loop_top
|
||||
loop_done:
|
||||
mov ecx, ebx
|
||||
pop ebx; pop esi; pop edi; pop ebp
|
||||
jmp UpdateEmptySlots (0x004e4390)
|
||||
"""
|
||||
import argparse
|
||||
import ctypes
|
||||
import ctypes.wintypes as wt
|
||||
import struct
|
||||
import sys
|
||||
|
||||
|
||||
PATCH_SITE_VA = 0x004e4a87
|
||||
ORIG_JMP_BYTES = bytes([0xE9, 0x04, 0xF9, 0xFF, 0xFF]) # JMP UpdateEmptySlots (relative)
|
||||
|
||||
GETITEM_VA = 0x0046dc50
|
||||
GETSTATE_VA = 0x004e1e20
|
||||
INTDELETE_VA = 0x004e41c0
|
||||
UPDATEEMPTYSLOTS_VA = 0x004e4390
|
||||
|
||||
|
||||
def build_thunk(thunk_base: int) -> bytes:
|
||||
"""Build the 86-byte thunk for placement at `thunk_base`."""
|
||||
out = bytearray()
|
||||
|
||||
# Prologue
|
||||
out += bytes([0x55]) # push ebp
|
||||
out += bytes([0x57]) # push edi
|
||||
out += bytes([0x56]) # push esi
|
||||
out += bytes([0x53]) # push ebx
|
||||
out += bytes([0x8B, 0xD9]) # mov ebx, ecx
|
||||
out += bytes([0x8B, 0xB3, 0x10, 0x06, 0x00, 0x00]) # mov esi, [ebx+0x610]
|
||||
out += bytes([0x4E]) # dec esi
|
||||
|
||||
loop_top_off = len(out) # 13
|
||||
out += bytes([0x85, 0xF6]) # test esi, esi
|
||||
# js loop_done — placeholder, fill rel8 at end
|
||||
js_loopdone_off = len(out)
|
||||
out += bytes([0x78, 0x00]) # js +0 (patch)
|
||||
|
||||
out += bytes([0x56]) # push esi
|
||||
out += bytes([0x8B, 0xCB]) # mov ecx, ebx
|
||||
# call GetItem (E8 rel32)
|
||||
call_getitem_off = len(out)
|
||||
out += bytes([0xE8, 0, 0, 0, 0])
|
||||
|
||||
out += bytes([0x85, 0xC0]) # test eax, eax
|
||||
jz_skip1_off = len(out)
|
||||
out += bytes([0x74, 0x00]) # jz skip_item
|
||||
|
||||
out += bytes([0x8B, 0xF8]) # mov edi, eax
|
||||
out += bytes([0x8B, 0x07]) # mov eax, [edi]
|
||||
out += bytes([0x68, 0x32, 0x00, 0x00, 0x10]) # push 0x10000032
|
||||
out += bytes([0x8B, 0xCF]) # mov ecx, edi
|
||||
out += bytes([0xFF, 0x90, 0x94, 0x00, 0x00, 0x00]) # call dword [eax+0x94]
|
||||
out += bytes([0x85, 0xC0]) # test eax, eax
|
||||
jz_skip2_off = len(out)
|
||||
out += bytes([0x74, 0x00]) # jz skip_item
|
||||
|
||||
out += bytes([0x8B, 0xCF]) # mov ecx, edi
|
||||
# call GetState
|
||||
call_getstate_off = len(out)
|
||||
out += bytes([0xE8, 0, 0, 0, 0])
|
||||
|
||||
out += bytes([0x3D, 0x1C, 0x00, 0x00, 0x10]) # cmp eax, 0x1000001c
|
||||
jne_skip_off = len(out)
|
||||
out += bytes([0x75, 0x00]) # jne skip_item
|
||||
|
||||
out += bytes([0x57]) # push edi
|
||||
out += bytes([0x8B, 0xCB]) # mov ecx, ebx
|
||||
# call InternalDeleteItem
|
||||
call_intdel_off = len(out)
|
||||
out += bytes([0xE8, 0, 0, 0, 0])
|
||||
|
||||
skip_item_off = len(out)
|
||||
out += bytes([0x4E]) # dec esi
|
||||
jmp_top_off = len(out)
|
||||
out += bytes([0xEB, 0x00]) # jmp loop_top
|
||||
|
||||
loop_done_off = len(out)
|
||||
out += bytes([0x8B, 0xCB]) # mov ecx, ebx
|
||||
out += bytes([0x5B]) # pop ebx
|
||||
out += bytes([0x5E]) # pop esi
|
||||
out += bytes([0x5F]) # pop edi
|
||||
out += bytes([0x5D]) # pop ebp
|
||||
# jmp UpdateEmptySlots
|
||||
jmp_upd_off = len(out)
|
||||
out += bytes([0xE9, 0, 0, 0, 0])
|
||||
|
||||
# Now patch the relative offsets
|
||||
def patch_rel8(at, target):
|
||||
rel = target - (at + 2)
|
||||
assert -128 <= rel <= 127, f"rel8 overflow: {rel}"
|
||||
out[at + 1] = rel & 0xFF
|
||||
|
||||
def patch_rel32(at, target_va):
|
||||
# at is the offset of the E8/E9 byte; rel32 is at at+1..at+4
|
||||
site = thunk_base + at + 5
|
||||
rel = target_va - site
|
||||
out[at + 1:at + 5] = struct.pack("<i", rel)
|
||||
|
||||
patch_rel8(js_loopdone_off, loop_done_off)
|
||||
patch_rel8(jz_skip1_off, skip_item_off)
|
||||
patch_rel8(jz_skip2_off, skip_item_off)
|
||||
patch_rel8(jne_skip_off, skip_item_off)
|
||||
patch_rel8(jmp_top_off, loop_top_off)
|
||||
|
||||
patch_rel32(call_getitem_off, GETITEM_VA)
|
||||
patch_rel32(call_getstate_off, GETSTATE_VA)
|
||||
patch_rel32(call_intdel_off, INTDELETE_VA)
|
||||
patch_rel32(jmp_upd_off, UPDATEEMPTYSLOTS_VA)
|
||||
|
||||
return bytes(out)
|
||||
|
||||
|
||||
PROCESS_VM_READ = 0x0010
|
||||
PROCESS_VM_WRITE = 0x0020
|
||||
PROCESS_VM_OPERATION = 0x0008
|
||||
PROCESS_QUERY_INFORMATION = 0x0400
|
||||
MEM_COMMIT_RESERVE = 0x00001000 | 0x00002000
|
||||
PAGE_EXECUTE_READWRITE = 0x40
|
||||
|
||||
|
||||
k32 = ctypes.windll.kernel32
|
||||
OpenProcess = k32.OpenProcess
|
||||
OpenProcess.argtypes = [wt.DWORD, wt.BOOL, wt.DWORD]; OpenProcess.restype = wt.HANDLE
|
||||
CloseHandle = k32.CloseHandle
|
||||
CloseHandle.argtypes = [wt.HANDLE]; CloseHandle.restype = wt.BOOL
|
||||
VirtualAllocEx = k32.VirtualAllocEx
|
||||
VirtualAllocEx.argtypes = [wt.HANDLE, ctypes.c_void_p, ctypes.c_size_t, wt.DWORD, wt.DWORD]
|
||||
VirtualAllocEx.restype = wt.LPVOID
|
||||
WriteProcessMemory = k32.WriteProcessMemory
|
||||
WriteProcessMemory.argtypes = [wt.HANDLE, wt.LPVOID, wt.LPCVOID, ctypes.c_size_t,
|
||||
ctypes.POINTER(ctypes.c_size_t)]
|
||||
WriteProcessMemory.restype = wt.BOOL
|
||||
ReadProcessMemory = k32.ReadProcessMemory
|
||||
ReadProcessMemory.argtypes = [wt.HANDLE, wt.LPCVOID, wt.LPVOID, ctypes.c_size_t,
|
||||
ctypes.POINTER(ctypes.c_size_t)]
|
||||
ReadProcessMemory.restype = wt.BOOL
|
||||
VirtualProtectEx = k32.VirtualProtectEx
|
||||
VirtualProtectEx.argtypes = [wt.HANDLE, wt.LPVOID, ctypes.c_size_t, wt.DWORD,
|
||||
ctypes.POINTER(wt.DWORD)]
|
||||
VirtualProtectEx.restype = wt.BOOL
|
||||
|
||||
|
||||
def read_bytes(h, addr, n):
|
||||
buf = (ctypes.c_ubyte * n)()
|
||||
sz = ctypes.c_size_t(0)
|
||||
if not ReadProcessMemory(h, addr, buf, n, ctypes.byref(sz)):
|
||||
raise OSError(f"read 0x{addr:08x} err={ctypes.get_last_error()}")
|
||||
return bytes(buf[:sz.value])
|
||||
|
||||
|
||||
def write_bytes(h, addr, data):
|
||||
old_prot = wt.DWORD(0)
|
||||
if not VirtualProtectEx(h, addr, len(data), PAGE_EXECUTE_READWRITE, ctypes.byref(old_prot)):
|
||||
raise OSError(f"VirtualProtectEx 0x{addr:08x} err={ctypes.get_last_error()}")
|
||||
sz = ctypes.c_size_t(0)
|
||||
ok = WriteProcessMemory(h, addr, data, len(data), ctypes.byref(sz))
|
||||
err = ctypes.get_last_error() if not ok else 0
|
||||
restored = wt.DWORD(0)
|
||||
VirtualProtectEx(h, addr, len(data), old_prot.value, ctypes.byref(restored))
|
||||
if not ok:
|
||||
raise OSError(f"write 0x{addr:08x} err={err}")
|
||||
|
||||
|
||||
def main():
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument("pid", type=int)
|
||||
ap.add_argument("--revert", action="store_true")
|
||||
args = ap.parse_args()
|
||||
|
||||
h = OpenProcess(
|
||||
PROCESS_VM_READ | PROCESS_VM_WRITE | PROCESS_VM_OPERATION | PROCESS_QUERY_INFORMATION,
|
||||
False, args.pid,
|
||||
)
|
||||
if not h:
|
||||
print(f"OpenProcess({args.pid}) err={ctypes.get_last_error()}"); sys.exit(2)
|
||||
|
||||
cur = read_bytes(h, PATCH_SITE_VA, 5)
|
||||
print(f"PID {args.pid}")
|
||||
print(f" patch site @ 0x{PATCH_SITE_VA:08x} current: {cur.hex()}")
|
||||
|
||||
if args.revert:
|
||||
if cur == ORIG_JMP_BYTES:
|
||||
print(f" already original — nothing to revert")
|
||||
CloseHandle(h); return
|
||||
# Restore original JMP UpdateEmptySlots
|
||||
write_bytes(h, PATCH_SITE_VA, ORIG_JMP_BYTES)
|
||||
after = read_bytes(h, PATCH_SITE_VA, 5)
|
||||
print(f" reverted; bytes now: {after.hex()}")
|
||||
CloseHandle(h); return
|
||||
|
||||
if cur != ORIG_JMP_BYTES:
|
||||
if cur[0] == 0xE9:
|
||||
print(f" already has a JMP somewhere — maybe already patched. Refusing to re-patch.")
|
||||
else:
|
||||
print(f" UNEXPECTED — bytes don't match expected JMP. Refusing.")
|
||||
CloseHandle(h); sys.exit(3)
|
||||
|
||||
# Allocate thunk page
|
||||
thunk_page = VirtualAllocEx(h, None, 0x100, MEM_COMMIT_RESERVE, PAGE_EXECUTE_READWRITE)
|
||||
if not thunk_page:
|
||||
print(f"VirtualAllocEx failed err={ctypes.get_last_error()}"); sys.exit(4)
|
||||
print(f" thunk page @ 0x{thunk_page:08x}")
|
||||
|
||||
thunk = build_thunk(thunk_page)
|
||||
print(f" thunk size: {len(thunk)} bytes")
|
||||
print(f" thunk hex: {thunk.hex()}")
|
||||
|
||||
sz = ctypes.c_size_t(0)
|
||||
if not WriteProcessMemory(h, thunk_page, thunk, len(thunk), ctypes.byref(sz)):
|
||||
print(f"write thunk failed err={ctypes.get_last_error()}"); sys.exit(5)
|
||||
|
||||
# Build the JMP to thunk at the patch site
|
||||
rel = thunk_page - (PATCH_SITE_VA + 5)
|
||||
new_jmp = bytes([0xE9]) + struct.pack("<i", rel)
|
||||
write_bytes(h, PATCH_SITE_VA, new_jmp)
|
||||
|
||||
after = read_bytes(h, PATCH_SITE_VA, 5)
|
||||
print(f" patch site now: {after.hex()} (expected {new_jmp.hex()})")
|
||||
if after != new_jmp:
|
||||
print(f" MISMATCH"); sys.exit(6)
|
||||
print(" OK")
|
||||
CloseHandle(h)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Add table
Add a link
Reference in a new issue