leakhunt/tools/probe_260k_holders.py
acbot 57b5e43d0e Initial commit — leak-hunt project complete
Five bugs identified and patched in retail Asheron's Call client:
- v3b: palette refcount over-increment (3-byte NOP at two sites)
- v5: RenderSurface PurgeResource no-op stub (vtable slot 2 thunk)
- v11: two dangling-pointer crash guards (NULL-check + reorder)
- v14: CEnvCell::Destroy ClipPlaneList leak (18-byte JMP to cleanup thunk)
- v22: unpacker stale-pointer SEH guard (whole-function __try/__except)

All five ship in leakfix.dll (117 KB, SHA d282f23c…) which is loaded
by acclient.exe at process start via PE import table patching by
tools/install_leakfix.py.

Controlled 15-client fleet soak: unpatched control died at 26h with
palette exhaustion; all 14 patched clients survived past that point
and reached ≥5-day uptime.

Residual ~15 MB/h growth traced to d3d9.dll's internal slab allocator
(260KB surface backing buffers retained after Release). See REPORT.md
§10 for the full investigation; conclusion is that it's unfixable from
outside d3d9.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-23 21:07:58 +02:00

175 lines
7.1 KiB
Python

"""probe_260k_holders.py <pid>
Walk all 260KB private-RW regions in target process. For each, scan first
few hundred bytes (the "header") and check:
- First DWORD: is it a pointer into d3d9.dll's image range? (= a vtable
pointer for a d3d9-managed object)
- First 32 DWORDs: any pointers back into the process's heap?
- Find any pointer in the process's heap that points to this region's base.
Goal: determine whether the 260KB blocks are
(a) live d3d9-managed objects (have a d3d9 vtable at offset 0)
(b) raw backing buffers with no AC-side holder pointers
(c) AC-held buffers with at least one pointer from AC's heap"""
import ctypes, ctypes.wintypes as wt, sys, struct
PROCESS_VM_READ = 0x10
PROCESS_QUERY_INFORMATION = 0x400
k = ctypes.windll.kernel32
k.OpenProcess.argtypes = [wt.DWORD, wt.BOOL, wt.DWORD]; k.OpenProcess.restype = wt.HANDLE
k.ReadProcessMemory.argtypes = [wt.HANDLE, wt.LPCVOID, wt.LPVOID, ctypes.c_size_t, ctypes.POINTER(ctypes.c_size_t)]
k.ReadProcessMemory.restype = wt.BOOL
k.VirtualQueryEx.argtypes = [wt.HANDLE, wt.LPCVOID, ctypes.c_void_p, ctypes.c_size_t]
k.VirtualQueryEx.restype = ctypes.c_size_t
class MBI(ctypes.Structure):
_fields_ = [("BaseAddress", ctypes.c_void_p), ("AllocationBase", ctypes.c_void_p),
("AllocationProtect", wt.DWORD), ("RegionSize", ctypes.c_size_t),
("State", wt.DWORD), ("Protect", wt.DWORD), ("Type", wt.DWORD)]
def rd(h, va, n):
buf = (ctypes.c_ubyte * n)(); sz = ctypes.c_size_t(0)
if not k.ReadProcessMemory(h, va, buf, n, ctypes.byref(sz)): return None
return bytes(buf[:sz.value])
pid = int(sys.argv[1])
h = k.OpenProcess(PROCESS_VM_READ | PROCESS_QUERY_INFORMATION, False, pid)
if not h: print("OpenProcess fail"); sys.exit(2)
# Build a list of all committed RW regions and their (base, size, type).
regions = []
mbi = MBI(); addr = 0
while k.VirtualQueryEx(h, addr, ctypes.byref(mbi), ctypes.sizeof(mbi)):
base = mbi.BaseAddress or 0
sz = mbi.RegionSize
if mbi.State == 0x1000 and (mbi.Protect & 0xFF) in (0x04, 0x40):
regions.append((base, sz, mbi.Type, mbi.Protect))
next_addr = base + sz
if next_addr <= addr: break
addr = next_addr
if addr >= 0x80000000: break
# Find image ranges for d3d9.dll via PSAPI
psapi = ctypes.windll.psapi
psapi.EnumProcessModulesEx.argtypes = [wt.HANDLE, ctypes.POINTER(wt.HMODULE),
wt.DWORD, ctypes.POINTER(wt.DWORD), wt.DWORD]
psapi.EnumProcessModulesEx.restype = wt.BOOL
psapi.GetModuleFileNameExA.argtypes = [wt.HANDLE, wt.HMODULE, ctypes.c_char_p, wt.DWORD]
psapi.GetModuleFileNameExA.restype = wt.DWORD
class MODULEINFO(ctypes.Structure):
_fields_ = [("lpBaseOfDll", ctypes.c_void_p),
("SizeOfImage", wt.DWORD),
("EntryPoint", ctypes.c_void_p)]
psapi.GetModuleInformation.argtypes = [wt.HANDLE, wt.HMODULE, ctypes.POINTER(MODULEINFO), wt.DWORD]
psapi.GetModuleInformation.restype = wt.BOOL
# Need a handle with QUERY + READ for psapi. Re-open with extra rights.
PROCESS_QUERY_LIMITED_INFORMATION = 0x1000
h2 = k.OpenProcess(0x410, False, pid) # PROCESS_VM_READ | PROCESS_QUERY_INFORMATION
if not h2: h2 = h
d3d9_lo, d3d9_hi = 0, 0
ac_image_lo, ac_image_hi = 0, 0
needed = wt.DWORD(0)
hmods = (wt.HMODULE * 1024)()
if psapi.EnumProcessModulesEx(h2, hmods, ctypes.sizeof(hmods), ctypes.byref(needed), 0x03):
n = needed.value // ctypes.sizeof(wt.HMODULE)
name = ctypes.create_string_buffer(260)
info = MODULEINFO()
for i in range(n):
psapi.GetModuleFileNameExA(h2, hmods[i], name, 260)
nm = name.value.decode(errors='replace').lower()
if not (psapi.GetModuleInformation(h2, hmods[i], ctypes.byref(info), ctypes.sizeof(info))):
continue
base = info.lpBaseOfDll or 0
sz = info.SizeOfImage
if 'd3d9' in nm:
d3d9_lo = base; d3d9_hi = base + sz
elif nm.endswith('\\acclient.exe') or nm.endswith('/acclient.exe'):
ac_image_lo = base; ac_image_hi = base + sz
else:
print(f"EnumProcessModulesEx failed err={ctypes.GetLastError()}", file=sys.stderr)
print(f"d3d9.dll range: 0x{d3d9_lo:08x} - 0x{d3d9_hi:08x}")
print(f"acclient image: 0x{ac_image_lo:08x} - 0x{ac_image_hi:08x}")
# Find the 260KB-sized regions (size = 266240 bytes exactly, or "near 260KB").
TARGET_SIZE = 266240 # 256K + 4K
candidates = [(b, s) for (b, s, t, p) in regions if s == TARGET_SIZE and (t & 0x20000)]
print(f"\nFound {len(candidates)} regions of exactly {TARGET_SIZE} bytes (260KB) "
f"in private RW.\n")
# For up to N candidates, classify
N = min(20, len(candidates))
print(f"Sampling first {N} for content + holder counts:")
# Pre-flatten all heap RW regions to a list of (base, data) for the holder-scan.
# That's expensive. Limit total bytes to 200 MB so we don't OOM.
print(" - loading heap regions for holder-scan (capped at 200 MB)...")
total_loaded = 0
heap_blob = []
MAX_BYTES = 200 * 1024 * 1024
for (b, s, t, p) in regions:
if total_loaded + s > MAX_BYTES: break
if s > 64 * 1024 * 1024: continue # skip huge regions
if not (t & 0x20000): continue
data = rd(h, b, s)
if data is not None:
heap_blob.append((b, data))
total_loaded += s
print(f" loaded {total_loaded/1024/1024:.1f} MB across {len(heap_blob)} regions")
def count_holders(target_va, max_count=5):
"""Return list of (holder_va, surrounding_hex) where target_va appears
as a DWORD-aligned pointer in heap memory."""
tb = struct.pack('<I', target_va)
hits = []
for (base, data) in heap_blob:
off = 0
while True:
off = data.find(tb, off)
if off < 0: break
if (off & 3) == 0:
hits.append((base + off, data[max(0, off-8):off+8].hex(' ')))
if len(hits) >= max_count: return hits
off += 4
return hits
vtable_d3d9 = 0
vtable_other = 0
no_vtable_at_0 = 0
holder_counts = []
for (b, s) in candidates[:N]:
head = rd(h, b, 32)
if head is None:
print(f" 0x{b:08x}: rd fail")
continue
first = struct.unpack('<I', head[:4])[0]
is_d3d9_vt = d3d9_lo <= first < d3d9_hi
is_ac_vt = ac_image_lo <= first < ac_image_hi
if is_d3d9_vt: vtable_d3d9 += 1
elif is_ac_vt or first != 0: vtable_other += 1
else: no_vtable_at_0 += 1
holders = count_holders(b, max_count=3)
holder_counts.append(len(holders))
cat = "d3d9-vtable" if is_d3d9_vt else ("ac-vtable" if is_ac_vt else (
"non-zero" if first != 0 else "zero"))
print(f" 0x{b:08x}: first_dword=0x{first:08x} ({cat}) "
f"holders={len(holders)}", end='')
for hv, ctx in holders[:1]:
print(f" ex@0x{hv:08x}", end='')
print()
print(f"\nSummary of {N} sampled 260KB regions:")
print(f" with d3d9-vtable at offset 0: {vtable_d3d9}")
print(f" with other non-zero at offset 0: {vtable_other}")
print(f" with zero at offset 0: {no_vtable_at_0}")
total_holders = sum(holder_counts)
print(f" total pointers TO these regions from heap: {total_holders}")
print(f" regions with >=1 holder: {sum(1 for c in holder_counts if c > 0)}")
print(f" regions with 0 holders: {sum(1 for c in holder_counts if c == 0)}")
k.CloseHandle(h)