Five bugs identified and patched in retail Asheron's Call client: - v3b: palette refcount over-increment (3-byte NOP at two sites) - v5: RenderSurface PurgeResource no-op stub (vtable slot 2 thunk) - v11: two dangling-pointer crash guards (NULL-check + reorder) - v14: CEnvCell::Destroy ClipPlaneList leak (18-byte JMP to cleanup thunk) - v22: unpacker stale-pointer SEH guard (whole-function __try/__except) All five ship in leakfix.dll (117 KB, SHA d282f23c…) which is loaded by acclient.exe at process start via PE import table patching by tools/install_leakfix.py. Controlled 15-client fleet soak: unpatched control died at 26h with palette exhaustion; all 14 patched clients survived past that point and reached ≥5-day uptime. Residual ~15 MB/h growth traced to d3d9.dll's internal slab allocator (260KB surface backing buffers retained after Release). See REPORT.md §10 for the full investigation; conclusion is that it's unfixable from outside d3d9. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
276 lines
11 KiB
Python
276 lines
11 KiB
Python
"""
|
|
runtime_patch_v2.py <pid> [--dry-run] [--revert]
|
|
|
|
Proper Phase 8 prototype: injects a small custom x86 thunk into the target
|
|
process and rewrites the RenderSurface vtable `ReleaseSubObjects` slot to
|
|
point at it. The thunk frees the two heap-allocated buffers
|
|
(sourceData.sourceBits at offset 0x40 and m_pSurfaceBits at offset 0x58)
|
|
using EoR's operator delete[] at 0x005df164.
|
|
|
|
Differs from v1 in that v1 made the slot point at slot +0x3c's function,
|
|
which turned out to be a partial helper (frees only one buffer). The custom
|
|
thunk frees both, matching the body of 2013's RenderSurface::Destroy minus
|
|
the Begin() reset.
|
|
|
|
EoR addresses (from dump diagnostic + live disassembly):
|
|
vtable A: 0x007caa08
|
|
vtable B: 0x007ca0d8
|
|
slot +0x2c (RSO): current value 0x004154a0 (no-op stub)
|
|
operator delete[]: 0x005df164
|
|
offsets in struct: sourceData.sourceBits = +0x40, m_pSurfaceBits = +0x58
|
|
"""
|
|
import argparse, ctypes, ctypes.wintypes as wt, json, os, struct, sys
|
|
|
|
# Process access flags
|
|
PROCESS_VM_READ = 0x0010
|
|
PROCESS_VM_WRITE = 0x0020
|
|
PROCESS_VM_OPERATION = 0x0008
|
|
PROCESS_QUERY_INFORMATION = 0x0400
|
|
|
|
# Memory protection
|
|
MEM_COMMIT = 0x1000
|
|
MEM_RESERVE = 0x2000
|
|
MEM_RELEASE = 0x8000
|
|
PAGE_EXECUTE_READWRITE = 0x40
|
|
PAGE_READWRITE = 0x04
|
|
|
|
# === EoR offsets and addresses (from analysis above) ===
|
|
VTABLES = [
|
|
("RenderSurface vtable A", 0x007caa08),
|
|
("RenderSurface vtable B", 0x007ca0d8),
|
|
]
|
|
RSO_SLOT = 0x2c
|
|
OFF_SOURCEBITS = 0x40 # sourceData.sourceBits within RenderSurface
|
|
OFF_SURFACEBITS = 0x58 # m_pSurfaceBits within RenderSurface
|
|
OP_DELETE_ARR_EOR = 0x005df164 # operator delete[] in EoR
|
|
NO_OP_STUB = 0x004154a0 # the no-op DBObj::ReleaseSubObjects in EoR
|
|
|
|
k32 = ctypes.windll.kernel32
|
|
OpenProcess = k32.OpenProcess
|
|
OpenProcess.argtypes = [wt.DWORD, wt.BOOL, wt.DWORD]; OpenProcess.restype = wt.HANDLE
|
|
CloseHandle = k32.CloseHandle
|
|
CloseHandle.argtypes = [wt.HANDLE]; CloseHandle.restype = wt.BOOL
|
|
ReadProcessMemory = k32.ReadProcessMemory
|
|
ReadProcessMemory.argtypes = [wt.HANDLE, wt.LPCVOID, wt.LPVOID, ctypes.c_size_t,
|
|
ctypes.POINTER(ctypes.c_size_t)]; ReadProcessMemory.restype = wt.BOOL
|
|
WriteProcessMemory = k32.WriteProcessMemory
|
|
WriteProcessMemory.argtypes = [wt.HANDLE, wt.LPVOID, wt.LPCVOID, ctypes.c_size_t,
|
|
ctypes.POINTER(ctypes.c_size_t)]; WriteProcessMemory.restype = wt.BOOL
|
|
VirtualAllocEx = k32.VirtualAllocEx
|
|
VirtualAllocEx.argtypes = [wt.HANDLE, wt.LPVOID, ctypes.c_size_t, wt.DWORD, wt.DWORD]
|
|
VirtualAllocEx.restype = wt.LPVOID
|
|
VirtualFreeEx = k32.VirtualFreeEx
|
|
VirtualFreeEx.argtypes = [wt.HANDLE, wt.LPVOID, ctypes.c_size_t, wt.DWORD]
|
|
VirtualFreeEx.restype = wt.BOOL
|
|
VirtualProtectEx = k32.VirtualProtectEx
|
|
VirtualProtectEx.argtypes = [wt.HANDLE, wt.LPVOID, ctypes.c_size_t, wt.DWORD,
|
|
ctypes.POINTER(wt.DWORD)]; VirtualProtectEx.restype = wt.BOOL
|
|
|
|
|
|
def read_uint32(h, addr):
|
|
b = ctypes.c_uint32(0); n = ctypes.c_size_t(0)
|
|
if not ReadProcessMemory(h, addr, ctypes.byref(b), 4, ctypes.byref(n)) or n.value != 4:
|
|
raise OSError(f"read 0x{addr:x} err={ctypes.get_last_error()}")
|
|
return b.value
|
|
|
|
def write_bytes(h, addr, data):
|
|
n = ctypes.c_size_t(0)
|
|
old = wt.DWORD(0)
|
|
if not VirtualProtectEx(h, addr, len(data), PAGE_READWRITE, ctypes.byref(old)):
|
|
raise OSError(f"protect 0x{addr:x} err={ctypes.get_last_error()}")
|
|
try:
|
|
buf = (ctypes.c_ubyte * len(data))(*data)
|
|
if not WriteProcessMemory(h, addr, buf, len(data), ctypes.byref(n)) or n.value != len(data):
|
|
raise OSError(f"write 0x{addr:x} err={ctypes.get_last_error()}")
|
|
finally:
|
|
VirtualProtectEx(h, addr, len(data), old.value, ctypes.byref(old))
|
|
|
|
|
|
def build_thunk(remote_addr):
|
|
"""
|
|
Emit x86 machine code for the RSO thunk. Calling convention __fastcall:
|
|
ECX = this
|
|
No stack args
|
|
Returns uint8_t in AL.
|
|
|
|
Equivalent C:
|
|
uint8_t __fastcall RSO_thunk(void* this) {
|
|
void** p1 = (void**)((char*)this + 0x40);
|
|
void** p2 = (void**)((char*)this + 0x58);
|
|
if (*p1) { operator_delete_arr(*p1); *p1 = 0; }
|
|
if (*p2) { operator_delete_arr(*p2); *p2 = 0; }
|
|
return 1;
|
|
}
|
|
|
|
Assembly:
|
|
push esi
|
|
mov esi, ecx ; this -> esi
|
|
mov eax, [esi + 0x40] ; load sourceBits
|
|
test eax, eax
|
|
jz skip1
|
|
push eax
|
|
call <operator delete[]> ; relative call
|
|
add esp, 4
|
|
mov dword ptr [esi + 0x40], 0
|
|
skip1:
|
|
mov eax, [esi + 0x58] ; load surfaceBits
|
|
test eax, eax
|
|
jz skip2
|
|
push eax
|
|
call <operator delete[]>
|
|
add esp, 4
|
|
mov dword ptr [esi + 0x58], 0
|
|
skip2:
|
|
mov al, 1
|
|
pop esi
|
|
ret
|
|
"""
|
|
# We emit pass-1, compute call targets relative to remote_addr where the
|
|
# thunk will live.
|
|
code = bytearray()
|
|
# push esi
|
|
code += b"\x56"
|
|
# mov esi, ecx
|
|
code += b"\x8b\xf1"
|
|
# mov eax, [esi + 0x40]
|
|
code += b"\x8b\x46\x40"
|
|
# test eax, eax
|
|
code += b"\x85\xc0"
|
|
# jz skip1 (placeholder, 1-byte rel8)
|
|
code += b"\x74\x00"
|
|
jz1_idx = len(code) - 1
|
|
# push eax
|
|
code += b"\x50"
|
|
# call <delete[]> — 5 bytes, e8 + rel32
|
|
call_emit_idx = len(code)
|
|
code += b"\xe8\x00\x00\x00\x00"
|
|
# Compute and fill: relative = target - (next_instr_addr)
|
|
next_after_call = remote_addr + call_emit_idx + 5
|
|
rel = (OP_DELETE_ARR_EOR - next_after_call) & 0xffffffff
|
|
code[call_emit_idx+1:call_emit_idx+5] = struct.pack("<I", rel)
|
|
# add esp, 4
|
|
code += b"\x83\xc4\x04"
|
|
# mov dword ptr [esi + 0x40], 0
|
|
code += b"\xc7\x46\x40\x00\x00\x00\x00"
|
|
# Fill jz1 displacement: jump to skip1 here
|
|
code[jz1_idx] = (len(code) - (jz1_idx + 1)) & 0xff
|
|
# mov eax, [esi + 0x58]
|
|
code += b"\x8b\x46\x58"
|
|
# test eax, eax
|
|
code += b"\x85\xc0"
|
|
# jz skip2 (placeholder)
|
|
code += b"\x74\x00"
|
|
jz2_idx = len(code) - 1
|
|
# push eax
|
|
code += b"\x50"
|
|
# call <delete[]>
|
|
call_emit_idx2 = len(code)
|
|
code += b"\xe8\x00\x00\x00\x00"
|
|
next_after_call2 = remote_addr + call_emit_idx2 + 5
|
|
rel2 = (OP_DELETE_ARR_EOR - next_after_call2) & 0xffffffff
|
|
code[call_emit_idx2+1:call_emit_idx2+5] = struct.pack("<I", rel2)
|
|
# add esp, 4
|
|
code += b"\x83\xc4\x04"
|
|
# mov dword ptr [esi + 0x58], 0
|
|
code += b"\xc7\x46\x58\x00\x00\x00\x00"
|
|
# Fill jz2 displacement
|
|
code[jz2_idx] = (len(code) - (jz2_idx + 1)) & 0xff
|
|
# mov al, 1
|
|
code += b"\xb0\x01"
|
|
# pop esi
|
|
code += b"\x5e"
|
|
# ret
|
|
code += b"\xc3"
|
|
return bytes(code)
|
|
|
|
|
|
def patch_process(pid, dry_run=False, revert=False):
|
|
h = OpenProcess(PROCESS_VM_READ | PROCESS_VM_WRITE | PROCESS_VM_OPERATION
|
|
| PROCESS_QUERY_INFORMATION, False, pid)
|
|
if not h:
|
|
print(f"OpenProcess({pid}) failed err={ctypes.get_last_error()}"); sys.exit(2)
|
|
try:
|
|
backup_file = f"C:\\Users\\acbot\\leakhunt\\artifacts\\soak\\runtime_patch_v2_backup_{pid}.json"
|
|
if revert:
|
|
if not os.path.exists(backup_file):
|
|
print(f"no backup at {backup_file}"); sys.exit(3)
|
|
with open(backup_file) as f: backup = json.load(f)
|
|
for entry in backup["slots"]:
|
|
cur = read_uint32(h, entry["vtable"] + RSO_SLOT)
|
|
print(f"revert {entry['name']} 0x{entry['vtable']:08x}: "
|
|
f"current 0x{cur:08x} -> pre 0x{entry['pre']:08x}")
|
|
write_bytes(h, entry["vtable"] + RSO_SLOT, struct.pack("<I", entry["pre"]))
|
|
print("revert complete")
|
|
return
|
|
|
|
# Build a sample thunk to compute size, then alloc + emit at real addr
|
|
size_probe = len(build_thunk(0))
|
|
print(f"thunk size: {size_probe} bytes")
|
|
if dry_run:
|
|
print("DRY-RUN: would VirtualAllocEx + emit thunk + rewrite both vtables")
|
|
return
|
|
|
|
# Allocate executable memory in target process
|
|
remote = VirtualAllocEx(h, None, max(size_probe, 0x100),
|
|
MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE)
|
|
if not remote:
|
|
print(f"VirtualAllocEx failed err={ctypes.get_last_error()}"); sys.exit(4)
|
|
print(f"thunk page allocated at 0x{remote:08x}")
|
|
code = build_thunk(remote)
|
|
print(f"emitted thunk: {len(code)} bytes")
|
|
|
|
# Write thunk
|
|
n = ctypes.c_size_t(0)
|
|
buf = (ctypes.c_ubyte * len(code))(*code)
|
|
if not WriteProcessMemory(h, remote, buf, len(code), ctypes.byref(n)) or n.value != len(code):
|
|
print(f"WriteProcessMemory thunk failed err={ctypes.get_last_error()}")
|
|
VirtualFreeEx(h, remote, 0, MEM_RELEASE); sys.exit(5)
|
|
print(f"thunk written")
|
|
|
|
# Validate by reading back
|
|
verify = (ctypes.c_ubyte * len(code))()
|
|
if not ReadProcessMemory(h, remote, verify, len(code), ctypes.byref(n)) or n.value != len(code):
|
|
print(f"verify read failed err={ctypes.get_last_error()}"); sys.exit(6)
|
|
if bytes(verify) != code:
|
|
print(f"thunk bytes mismatch on readback")
|
|
VirtualFreeEx(h, remote, 0, MEM_RELEASE); sys.exit(7)
|
|
print(f"thunk verified")
|
|
|
|
# Rewrite vtable slots
|
|
applied = []
|
|
for name, vt in VTABLES:
|
|
cur = read_uint32(h, vt + RSO_SLOT)
|
|
if cur != NO_OP_STUB:
|
|
print(f" {name} 0x{vt:08x}: slot +0x2c is 0x{cur:08x}, expected 0x{NO_OP_STUB:08x} — SKIPPING (already patched?)")
|
|
continue
|
|
write_bytes(h, vt + RSO_SLOT, struct.pack("<I", remote))
|
|
verify_slot = read_uint32(h, vt + RSO_SLOT)
|
|
if verify_slot != remote:
|
|
print(f" {name}: slot verify failed (read 0x{verify_slot:08x})")
|
|
continue
|
|
print(f" {name} 0x{vt:08x}: +0x2c rewritten {cur:#x} -> {remote:#x}")
|
|
applied.append(dict(name=name, vtable=vt, pre=cur, post=remote))
|
|
|
|
if applied:
|
|
with open(backup_file, "w") as f:
|
|
json.dump({"pid": pid, "thunk": remote, "slots": applied,
|
|
"thunk_size": len(code)}, f, indent=2)
|
|
print(f"backup saved to {backup_file}")
|
|
else:
|
|
print("NO slots patched")
|
|
VirtualFreeEx(h, remote, 0, MEM_RELEASE)
|
|
finally:
|
|
CloseHandle(h)
|
|
|
|
|
|
def main():
|
|
ap = argparse.ArgumentParser()
|
|
ap.add_argument("pid", type=int)
|
|
ap.add_argument("--dry-run", action="store_true")
|
|
ap.add_argument("--revert", action="store_true")
|
|
args = ap.parse_args()
|
|
patch_process(args.pid, dry_run=args.dry_run, revert=args.revert)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|