leakhunt/tools/probe_iter3_fast.py
acbot 57b5e43d0e Initial commit — leak-hunt project complete
Five bugs identified and patched in retail Asheron's Call client:
- v3b: palette refcount over-increment (3-byte NOP at two sites)
- v5: RenderSurface PurgeResource no-op stub (vtable slot 2 thunk)
- v11: two dangling-pointer crash guards (NULL-check + reorder)
- v14: CEnvCell::Destroy ClipPlaneList leak (18-byte JMP to cleanup thunk)
- v22: unpacker stale-pointer SEH guard (whole-function __try/__except)

All five ship in leakfix.dll (117 KB, SHA d282f23c…) which is loaded
by acclient.exe at process start via PE import table patching by
tools/install_leakfix.py.

Controlled 15-client fleet soak: unpatched control died at 26h with
palette exhaustion; all 14 patched clients survived past that point
and reached ≥5-day uptime.

Residual ~15 MB/h growth traced to d3d9.dll's internal slab allocator
(260KB surface backing buffers retained after Release). See REPORT.md
§10 for the full investigation; conclusion is that it's unfixable from
outside d3d9.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-23 21:07:58 +02:00

125 lines
4.9 KiB
Python

"""probe_iter3_fast.py <pid>
Fast version: uses bytes.find() to locate vtable bytes in each region,
then evaluates predicate only on those candidates."""
import ctypes, ctypes.wintypes as wt, sys, struct
PROCESS_VM_READ = 0x10
PROCESS_QUERY_INFORMATION = 0x400
k = ctypes.windll.kernel32
k.OpenProcess.argtypes = [wt.DWORD, wt.BOOL, wt.DWORD]; k.OpenProcess.restype = wt.HANDLE
k.ReadProcessMemory.argtypes = [wt.HANDLE, wt.LPCVOID, wt.LPVOID, ctypes.c_size_t, ctypes.POINTER(ctypes.c_size_t)]
k.ReadProcessMemory.restype = wt.BOOL
k.VirtualQueryEx.argtypes = [wt.HANDLE, wt.LPCVOID, ctypes.c_void_p, ctypes.c_size_t]
k.VirtualQueryEx.restype = ctypes.c_size_t
class MBI(ctypes.Structure):
_fields_ = [
("BaseAddress", ctypes.c_void_p),
("AllocationBase", ctypes.c_void_p),
("AllocationProtect", wt.DWORD),
("RegionSize", ctypes.c_size_t),
("State", wt.DWORD),
("Protect", wt.DWORD),
("Type", wt.DWORD),
]
MEM_COMMIT = 0x1000
MEM_PRIVATE = 0x20000
CPHYS_VTABLE_BYTES = struct.pack('<I', 0x007C78EC)
OFF_HASH_NEXT = 0x04
OFF_ID = 0x08
OFF_PARENT = 0x40
OFF_CELL = 0x90
OFF_STATE = 0xA8
OFF_TRANSTATE = 0xAC
OFF_MOVMGR = 0xC4
OFF_WEENIE = 0x12C
OBJ_MIN_SIZE = 0x130
def rd(h, va, n):
buf = (ctypes.c_ubyte * n)(); sz = ctypes.c_size_t(0)
if not k.ReadProcessMemory(h, va, buf, n, ctypes.byref(sz)): return None
return bytes(buf[:sz.value])
pid = int(sys.argv[1])
h = k.OpenProcess(PROCESS_VM_READ | PROCESS_QUERY_INFORMATION, False, pid)
if not h: print(f"OpenProcess err={ctypes.get_last_error()}"); sys.exit(2)
n_total = 0
n_triple = 0
buckets = {
"weenie=NULL movmgr=NULL state=0": 0,
"weenie=NULL movmgr=NULL state!=0": 0,
"weenie=NULL movmgr!=NULL": 0,
"weenie!=NULL state=0": 0,
"weenie!=NULL state!=0": 0,
}
samples = {label: [] for label in buckets}
region_scan = 0
mbi = MBI()
addr = 0
while k.VirtualQueryEx(h, addr, ctypes.byref(mbi), ctypes.sizeof(mbi)):
region_base = mbi.BaseAddress or 0
region_size = mbi.RegionSize
if (mbi.State == MEM_COMMIT and mbi.Type == MEM_PRIVATE
and (mbi.Protect & 0xFF) in (0x04, 0x40)):
data = rd(h, region_base, region_size)
if data:
region_scan += 1
# bytes.find() in a loop is implemented in C — much faster
off = 0
while True:
off = data.find(CPHYS_VTABLE_BYTES, off)
if off < 0: break
# Require word alignment (real DWORD vtable pointer)
if off & 3:
off += 1
continue
# Bounds check
if off + OBJ_MIN_SIZE > len(data):
break
n_total += 1
parent = struct.unpack_from('<I', data, off + OFF_PARENT)[0]
cell = struct.unpack_from('<I', data, off + OFF_CELL)[0]
hash_next = struct.unpack_from('<I', data, off + OFF_HASH_NEXT)[0]
if parent == 0 and cell == 0 and hash_next == 0:
n_triple += 1
obj_id = struct.unpack_from('<I', data, off + OFF_ID)[0]
state = struct.unpack_from('<I', data, off + OFF_STATE)[0]
transtate = struct.unpack_from('<I', data, off + OFF_TRANSTATE)[0]
movmgr = struct.unpack_from('<I', data, off + OFF_MOVMGR)[0]
weenie = struct.unpack_from('<I', data, off + OFF_WEENIE)[0]
obj_va = region_base + off
if weenie:
bucket = "weenie!=NULL state!=0" if state else "weenie!=NULL state=0"
elif movmgr:
bucket = "weenie=NULL movmgr!=NULL"
elif state:
bucket = "weenie=NULL movmgr=NULL state!=0"
else:
bucket = "weenie=NULL movmgr=NULL state=0"
buckets[bucket] += 1
if len(samples[bucket]) < 3:
samples[bucket].append((obj_va, obj_id, state, transtate, movmgr, weenie))
off += 4
next_addr = region_base + region_size
if next_addr <= addr: break
addr = next_addr
if addr >= 0x80000000: break
k.CloseHandle(h)
print(f"pid {pid}: regions_scanned={region_scan} total_CPhysicsObj={n_total} iter-3-triple={n_triple}")
print()
print(f" {'bucket':45s} {'count':>6}")
for label, count in buckets.items():
print(f" {label:45s} {count:>6}")
print()
print("Sample dumps (up to 3 per bucket):")
for label, samples_list in samples.items():
if not samples_list: continue
print(f" [{label}]")
for obj_va, obj_id, state, transtate, movmgr, weenie in samples_list:
print(f" @0x{obj_va:08x} id=0x{obj_id:08x} state=0x{state:08x} ts=0x{transtate:08x} movmgr=0x{movmgr:08x} weenie=0x{weenie:08x}")