Five bugs identified and patched in retail Asheron's Call client: - v3b: palette refcount over-increment (3-byte NOP at two sites) - v5: RenderSurface PurgeResource no-op stub (vtable slot 2 thunk) - v11: two dangling-pointer crash guards (NULL-check + reorder) - v14: CEnvCell::Destroy ClipPlaneList leak (18-byte JMP to cleanup thunk) - v22: unpacker stale-pointer SEH guard (whole-function __try/__except) All five ship in leakfix.dll (117 KB, SHA d282f23c…) which is loaded by acclient.exe at process start via PE import table patching by tools/install_leakfix.py. Controlled 15-client fleet soak: unpatched control died at 26h with palette exhaustion; all 14 patched clients survived past that point and reached ≥5-day uptime. Residual ~15 MB/h growth traced to d3d9.dll's internal slab allocator (260KB surface backing buffers retained after Release). See REPORT.md §10 for the full investigation; conclusion is that it's unfixable from outside d3d9. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
288 lines
12 KiB
Python
288 lines
12 KiB
Python
"""patch_v8_thunk_v2.py <pid> [--revert]
|
|
|
|
v8-thunk-v2: SAFE drain of UIElement_UIItem leaked pool.
|
|
|
|
v1 (the broken one) hooked Flush — drain ran during normal panel
|
|
refresh and ate items needed for display.
|
|
|
|
v2 hooks OnVisibilityChanged (vis=false branch) instead — drain only
|
|
when the panel becomes hidden, NOT during Flush.
|
|
|
|
Mechanism:
|
|
At 0x004e499e, OnVisibilityChanged has 24 bytes implementing:
|
|
if ((this->+0x554 >> 0x11 & 1) && vis != 0) UpdateEmptySlots()
|
|
epilogue at 0x004e49b6: pop esi; pop ebx; ret 4
|
|
|
|
We replace those 24 bytes with `JMP <thunk>` (5 bytes) + 19 NOPs.
|
|
|
|
Thunk reproduces the original logic AND adds a drain path:
|
|
if (!(this->+0x554 >> 0x11 & 1)) return
|
|
if (vis != 0) UpdateEmptySlots() ; stock visible path
|
|
else ; vis == false: drain (NEW)
|
|
for up to 8 iterations:
|
|
idx = this->+0x610 - 1
|
|
if idx < 0: break
|
|
item_array = GetItem(this, idx)
|
|
if item_array == NULL: break ; (no more items)
|
|
real_item = item_array->vtable[37](0x10000032) ; type check
|
|
if real_item == 0: break ; (not a UIItem at end)
|
|
state = real_item->UIItem_GetState()
|
|
if state != WAITING: break ; (active item at end, stop)
|
|
InternalDeleteItem(this, real_item)
|
|
|
|
Safety:
|
|
- Only drains on vis=false (panel actually going hidden)
|
|
- Cap of 8 items per hide event (prevents catastrophic burst)
|
|
- Stops at first non-WAITING item at end (mimics UpdateEmptySlots's
|
|
own trim behavior — never deletes active items)
|
|
- Preserves the original visible-true behavior exactly
|
|
|
|
Notable correctness:
|
|
- InternalDeleteItem is called with the result of vtable[37], NOT
|
|
the raw item from the array (mirrors UpdateEmptySlots's pattern)
|
|
"""
|
|
import argparse
|
|
import ctypes
|
|
import ctypes.wintypes as wt
|
|
import struct
|
|
import sys
|
|
|
|
|
|
PATCH_SITE_VA = 0x004e499e
|
|
PATCH_LEN = 24
|
|
# Original 24 bytes (will verify before patching):
|
|
ORIG_BYTES = bytes([
|
|
0x8B, 0x86, 0x54, 0x05, 0x00, 0x00, # mov eax, [esi+0x554]
|
|
0xC1, 0xE8, 0x11, # shr eax, 0x11
|
|
0xA8, 0x01, # test al, 1
|
|
0x74, 0x0B, # jz +0x0B (to 0x004e49b6)
|
|
0x84, 0xDB, # test bl, bl
|
|
0x74, 0x07, # jz +0x07
|
|
0x8B, 0xCE, # mov ecx, esi
|
|
0xE8, 0xDA, 0xF9, 0xFF, 0xFF, # call UpdateEmptySlots
|
|
])
|
|
assert len(ORIG_BYTES) == 24
|
|
|
|
GETITEM_VA = 0x0046dc50
|
|
GETSTATE_VA = 0x004e1e20
|
|
INTDELETE_VA = 0x004e41c0
|
|
UPDATEEMPTYSLOTS_VA = 0x004e4390
|
|
|
|
|
|
def build_thunk(base: int) -> bytes:
|
|
"""Build the v8-thunk-v2 at absolute address `base`."""
|
|
out = bytearray()
|
|
refs = {} # symbolic name -> (offset_of_rel_byte, target_va, size_of_call)
|
|
|
|
# ------ prologue ------
|
|
out += bytes([0x57]) # push edi (save caller's edi)
|
|
out += bytes([0x8B, 0x86, 0x54, 0x05, 0x00, 0x00]) # mov eax, [esi+0x554]
|
|
out += bytes([0xC1, 0xE8, 0x11]) # shr eax, 0x11
|
|
out += bytes([0xA8, 0x01]) # test al, 1
|
|
jz_epi_1 = len(out)
|
|
out += bytes([0x74, 0x00]) # jz .epi (patch)
|
|
out += bytes([0x84, 0xDB]) # test bl, bl
|
|
jnz_visible = len(out)
|
|
out += bytes([0x75, 0x00]) # jnz .visible (patch)
|
|
|
|
out += bytes([0xBF, 0x08, 0x00, 0x00, 0x00]) # mov edi, 8 (cap)
|
|
|
|
# ------ loop ------
|
|
loop_top = len(out)
|
|
out += bytes([0x8B, 0x86, 0x10, 0x06, 0x00, 0x00]) # mov eax, [esi+0x610]
|
|
out += bytes([0x48]) # dec eax
|
|
js_epi_1 = len(out)
|
|
out += bytes([0x78, 0x00]) # js .epi (patch)
|
|
|
|
out += bytes([0x50]) # push eax (idx)
|
|
out += bytes([0x8B, 0xCE]) # mov ecx, esi
|
|
call_getitem = len(out)
|
|
out += bytes([0xE8, 0, 0, 0, 0]) # call GetItem
|
|
out += bytes([0x85, 0xC0]) # test eax, eax
|
|
jz_epi_2 = len(out)
|
|
out += bytes([0x74, 0x00]) # jz .epi
|
|
|
|
out += bytes([0x68, 0x32, 0x00, 0x00, 0x10]) # push 0x10000032
|
|
out += bytes([0x8B, 0xC8]) # mov ecx, eax
|
|
out += bytes([0x8B, 0x10]) # mov edx, [eax]
|
|
out += bytes([0xFF, 0x92, 0x94, 0x00, 0x00, 0x00]) # call [edx+0x94]
|
|
out += bytes([0x85, 0xC0]) # test eax, eax
|
|
jz_epi_3 = len(out)
|
|
out += bytes([0x74, 0x00]) # jz .epi
|
|
|
|
out += bytes([0x50]) # push eax (save real_item)
|
|
out += bytes([0x8B, 0xC8]) # mov ecx, eax
|
|
call_getstate = len(out)
|
|
out += bytes([0xE8, 0, 0, 0, 0]) # call GetState
|
|
out += bytes([0x59]) # pop ecx (ecx = real_item)
|
|
out += bytes([0x3D, 0x1C, 0x00, 0x00, 0x10]) # cmp eax, 0x1000001c
|
|
jne_epi = len(out)
|
|
out += bytes([0x75, 0x00]) # jne .epi
|
|
|
|
out += bytes([0x51]) # push ecx (arg = real_item)
|
|
out += bytes([0x8B, 0xCE]) # mov ecx, esi
|
|
call_intdel = len(out)
|
|
out += bytes([0xE8, 0, 0, 0, 0]) # call InternalDeleteItem
|
|
|
|
out += bytes([0x4F]) # dec edi (cap--)
|
|
jnz_loop = len(out)
|
|
out += bytes([0x75, 0x00]) # jnz loop_top (patch)
|
|
jmp_epi = len(out)
|
|
out += bytes([0xEB, 0x00]) # jmp .epi
|
|
|
|
# ------ visible path ------
|
|
visible_label = len(out)
|
|
out += bytes([0x8B, 0xCE]) # mov ecx, esi
|
|
call_updemp = len(out)
|
|
out += bytes([0xE8, 0, 0, 0, 0]) # call UpdateEmptySlots
|
|
|
|
# ------ epilogue ------
|
|
epi_label = len(out)
|
|
out += bytes([0x5F]) # pop edi
|
|
out += bytes([0x5E]) # pop esi
|
|
out += bytes([0x5B]) # pop ebx
|
|
out += bytes([0xC2, 0x04, 0x00]) # ret 4
|
|
|
|
# ----- Resolve relative jumps and calls -----
|
|
def patch_rel8(at, target_off):
|
|
rel = target_off - (at + 2)
|
|
assert -128 <= rel <= 127, f"rel8 overflow {rel}"
|
|
out[at + 1] = rel & 0xFF
|
|
|
|
def patch_rel32_call(at, target_va):
|
|
# E8 at offset `at` ... rel32 at at+1..at+4. Next-instr addr = base+at+5.
|
|
rel = target_va - (base + at + 5)
|
|
out[at + 1:at + 5] = struct.pack("<i", rel)
|
|
|
|
patch_rel8(jz_epi_1, epi_label)
|
|
patch_rel8(jnz_visible, visible_label)
|
|
patch_rel8(js_epi_1, epi_label)
|
|
patch_rel8(jz_epi_2, epi_label)
|
|
patch_rel8(jz_epi_3, epi_label)
|
|
patch_rel8(jne_epi, epi_label)
|
|
patch_rel8(jnz_loop, loop_top)
|
|
patch_rel8(jmp_epi, epi_label)
|
|
|
|
patch_rel32_call(call_getitem, GETITEM_VA)
|
|
patch_rel32_call(call_getstate, GETSTATE_VA)
|
|
patch_rel32_call(call_intdel, INTDELETE_VA)
|
|
patch_rel32_call(call_updemp, UPDATEEMPTYSLOTS_VA)
|
|
|
|
return bytes(out)
|
|
|
|
|
|
PROCESS_VM_READ = 0x0010
|
|
PROCESS_VM_WRITE = 0x0020
|
|
PROCESS_VM_OPERATION = 0x0008
|
|
PROCESS_QUERY_INFORMATION = 0x0400
|
|
MEM_COMMIT_RESERVE = 0x00001000 | 0x00002000
|
|
PAGE_EXECUTE_READWRITE = 0x40
|
|
|
|
|
|
k32 = ctypes.windll.kernel32
|
|
OpenProcess = k32.OpenProcess
|
|
OpenProcess.argtypes = [wt.DWORD, wt.BOOL, wt.DWORD]; OpenProcess.restype = wt.HANDLE
|
|
CloseHandle = k32.CloseHandle
|
|
CloseHandle.argtypes = [wt.HANDLE]; CloseHandle.restype = wt.BOOL
|
|
VirtualAllocEx = k32.VirtualAllocEx
|
|
VirtualAllocEx.argtypes = [wt.HANDLE, ctypes.c_void_p, ctypes.c_size_t, wt.DWORD, wt.DWORD]
|
|
VirtualAllocEx.restype = wt.LPVOID
|
|
WriteProcessMemory = k32.WriteProcessMemory
|
|
WriteProcessMemory.argtypes = [wt.HANDLE, wt.LPVOID, wt.LPCVOID, ctypes.c_size_t,
|
|
ctypes.POINTER(ctypes.c_size_t)]
|
|
WriteProcessMemory.restype = wt.BOOL
|
|
ReadProcessMemory = k32.ReadProcessMemory
|
|
ReadProcessMemory.argtypes = [wt.HANDLE, wt.LPCVOID, wt.LPVOID, ctypes.c_size_t,
|
|
ctypes.POINTER(ctypes.c_size_t)]
|
|
ReadProcessMemory.restype = wt.BOOL
|
|
VirtualProtectEx = k32.VirtualProtectEx
|
|
VirtualProtectEx.argtypes = [wt.HANDLE, wt.LPVOID, ctypes.c_size_t, wt.DWORD,
|
|
ctypes.POINTER(wt.DWORD)]
|
|
VirtualProtectEx.restype = wt.BOOL
|
|
|
|
|
|
def read_bytes(h, addr, n):
|
|
buf = (ctypes.c_ubyte * n)()
|
|
sz = ctypes.c_size_t(0)
|
|
if not ReadProcessMemory(h, addr, buf, n, ctypes.byref(sz)):
|
|
raise OSError(f"read 0x{addr:08x} err={ctypes.get_last_error()}")
|
|
return bytes(buf[:sz.value])
|
|
|
|
|
|
def write_bytes(h, addr, data):
|
|
old_prot = wt.DWORD(0)
|
|
if not VirtualProtectEx(h, addr, len(data), PAGE_EXECUTE_READWRITE, ctypes.byref(old_prot)):
|
|
raise OSError(f"VirtualProtectEx 0x{addr:08x} err={ctypes.get_last_error()}")
|
|
sz = ctypes.c_size_t(0)
|
|
ok = WriteProcessMemory(h, addr, data, len(data), ctypes.byref(sz))
|
|
err = ctypes.get_last_error() if not ok else 0
|
|
restored = wt.DWORD(0)
|
|
VirtualProtectEx(h, addr, len(data), old_prot.value, ctypes.byref(restored))
|
|
if not ok:
|
|
raise OSError(f"write 0x{addr:08x} err={err}")
|
|
|
|
|
|
def main():
|
|
ap = argparse.ArgumentParser()
|
|
ap.add_argument("pid", type=int)
|
|
ap.add_argument("--revert", action="store_true")
|
|
args = ap.parse_args()
|
|
|
|
h = OpenProcess(
|
|
PROCESS_VM_READ | PROCESS_VM_WRITE | PROCESS_VM_OPERATION | PROCESS_QUERY_INFORMATION,
|
|
False, args.pid,
|
|
)
|
|
if not h:
|
|
print(f"OpenProcess({args.pid}) err={ctypes.get_last_error()}"); sys.exit(2)
|
|
|
|
cur = read_bytes(h, PATCH_SITE_VA, PATCH_LEN)
|
|
print(f"PID {args.pid}")
|
|
print(f" patch site @ 0x{PATCH_SITE_VA:08x} current: {cur.hex()}")
|
|
|
|
if args.revert:
|
|
if cur == ORIG_BYTES:
|
|
print(f" already original — nothing to revert")
|
|
CloseHandle(h); return
|
|
write_bytes(h, PATCH_SITE_VA, ORIG_BYTES)
|
|
after = read_bytes(h, PATCH_SITE_VA, PATCH_LEN)
|
|
print(f" reverted; bytes now: {after.hex()}")
|
|
CloseHandle(h); return
|
|
|
|
if cur != ORIG_BYTES:
|
|
print(f" UNEXPECTED — bytes don't match expected original.")
|
|
print(f" Expected: {ORIG_BYTES.hex()}")
|
|
CloseHandle(h); sys.exit(3)
|
|
|
|
# Allocate thunk
|
|
thunk_page = VirtualAllocEx(h, None, 0x200, MEM_COMMIT_RESERVE, PAGE_EXECUTE_READWRITE)
|
|
if not thunk_page:
|
|
print(f"VirtualAllocEx failed err={ctypes.get_last_error()}"); sys.exit(4)
|
|
print(f" thunk page @ 0x{thunk_page:08x}")
|
|
|
|
thunk = build_thunk(thunk_page)
|
|
print(f" thunk size: {len(thunk)} bytes")
|
|
print(f" thunk hex:")
|
|
for i in range(0, len(thunk), 16):
|
|
row = thunk[i:i+16].hex(' ')
|
|
print(f" +0x{i:02x}: {row}")
|
|
|
|
sz = ctypes.c_size_t(0)
|
|
if not WriteProcessMemory(h, thunk_page, thunk, len(thunk), ctypes.byref(sz)):
|
|
print(f"write thunk failed err={ctypes.get_last_error()}"); sys.exit(5)
|
|
|
|
# Build replacement: JMP thunk + 19 NOPs
|
|
rel = thunk_page - (PATCH_SITE_VA + 5)
|
|
replacement = bytes([0xE9]) + struct.pack("<i", rel) + bytes([0x90] * 19)
|
|
assert len(replacement) == PATCH_LEN
|
|
write_bytes(h, PATCH_SITE_VA, replacement)
|
|
|
|
after = read_bytes(h, PATCH_SITE_VA, PATCH_LEN)
|
|
print(f" patch site now: {after.hex()}")
|
|
if after != replacement:
|
|
print(f" MISMATCH"); sys.exit(6)
|
|
print(" OK")
|
|
CloseHandle(h)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|