leakhunt/tools/find_rendersurfaces.py
acbot 57b5e43d0e Initial commit — leak-hunt project complete
Five bugs identified and patched in retail Asheron's Call client:
- v3b: palette refcount over-increment (3-byte NOP at two sites)
- v5: RenderSurface PurgeResource no-op stub (vtable slot 2 thunk)
- v11: two dangling-pointer crash guards (NULL-check + reorder)
- v14: CEnvCell::Destroy ClipPlaneList leak (18-byte JMP to cleanup thunk)
- v22: unpacker stale-pointer SEH guard (whole-function __try/__except)

All five ship in leakfix.dll (117 KB, SHA d282f23c…) which is loaded
by acclient.exe at process start via PE import table patching by
tools/install_leakfix.py.

Controlled 15-client fleet soak: unpatched control died at 26h with
palette exhaustion; all 14 patched clients survived past that point
and reached ≥5-day uptime.

Residual ~15 MB/h growth traced to d3d9.dll's internal slab allocator
(260KB surface backing buffers retained after Release). See REPORT.md
§10 for the full investigation; conclusion is that it's unfixable from
outside d3d9.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-23 21:07:58 +02:00

165 lines
7 KiB
Python

"""
find_rendersurfaces.py <dump.dmp>
Definitive diagnostic to distinguish:
F1 (cache-freelist leak): leaked surfaces have m_numLinks == 1 (cache-only)
F2 (upstream-holder leak): leaked surfaces have m_numLinks > 1
Method:
1. Enumerate the leaked 256-512 KB private RW regions (the BGRA buffers).
2. For each leaked region's base address R, search the entire committed
memory of the dump for any 4-byte value == R. That match location L
is a pointer field — most likely RenderSurface::m_pSurfaceBits.
3. From L, walk backwards in 4-byte steps looking for a DWORD that points
into acclient.exe's image range. That DWORD is the containing object's
vtable; its address is the object's base B.
4. Read m_numLinks at B + 0x24 (DBObj layout).
5. Histogram by (vtable, m_numLinks). The dominant vtable is RenderSurface.
The mode of m_numLinks for that vtable answers the question.
"""
import os
import struct
import sys
from collections import Counter, defaultdict
from minidump.minidumpfile import MinidumpFile
def _enum_int(v):
if v is None: return 0
if hasattr(v, 'value'): return int(v.value)
return int(v)
def main():
if len(sys.argv) < 2:
print("usage: find_rendersurfaces.py <dump.dmp>", file=sys.stderr); sys.exit(1)
path = sys.argv[1]
if not os.path.exists(path):
print(f"not found: {path}", file=sys.stderr); sys.exit(1)
md = MinidumpFile.parse(path)
reader = md.get_reader().get_buffered_reader()
# Acclient.exe image range — used to validate vtable pointers
acl = next((m for m in md.modules.modules
if os.path.basename(m.name).lower() == "acclient.exe"), None)
if acl is None:
print("acclient.exe not in module list", file=sys.stderr); sys.exit(1)
acl_lo, acl_hi = acl.baseaddress, acl.baseaddress + acl.size
print(f"acclient.exe: 0x{acl_lo:08x} - 0x{acl_hi:08x} size={acl.size}")
# Step 1: leaked 256-512KB private RW regions
leaked_regions = set()
for r in md.memory_info.infos:
st = _enum_int(r.State); ty = _enum_int(r.Type); pr = _enum_int(r.Protect) & 0xFF
if st == 0x1000 and ty == 0x20000 and pr in (0x04, 0x40) \
and 256*1024 <= r.RegionSize < 512*1024:
leaked_regions.add(r.BaseAddress)
print(f"leaked 256-512 KB private regions: {len(leaked_regions)}")
# Step 2: scan every committed (private OR mapped — but not Image) RW region
# for occurrences of any leaked-region base address as a 4-byte LE value.
# We're looking for pointers in heap allocations, not file-backed data.
scan_regions = []
for r in md.memory_info.infos:
st = _enum_int(r.State); ty = _enum_int(r.Type); pr = _enum_int(r.Protect) & 0xFF
if st != 0x1000: # only COMMIT
continue
if ty == 0x1000000: # skip Image (DLL .data sections — unlikely to hold our pointers)
continue
if pr in (0x04, 0x40): # READWRITE or EXECUTE_READWRITE
scan_regions.append((r.BaseAddress, r.RegionSize))
print(f"scanning {len(scan_regions)} writable regions...")
matches = [] # list of (pointer_location, region_value)
total_scanned_bytes = 0
for base, size in scan_regions:
try:
reader.move(base)
buf = reader.read(size)
except Exception:
continue
if not buf:
continue
total_scanned_bytes += len(buf)
# Walk 4-byte aligned positions
# Fast path: chunk through using struct.unpack_from
end = (len(buf) // 4) * 4
for off in range(0, end, 4):
val = struct.unpack_from("<I", buf, off)[0]
if val in leaked_regions:
matches.append((base + off, val))
print(f"scanned {total_scanned_bytes/(1024*1024):.1f} MB")
print(f"pointers to leaked regions found: {len(matches)}")
# Step 3 + 4: for each pointer location, walk backwards looking for the vtable
# of the containing object. Then read m_numLinks at +0x24.
findings = [] # list of (object_base, vtable, m_numLinks, m_pMaintainer, m_pSurfaceBits_offset)
for ptr_loc, region_val in matches:
# Walk backwards in 4-byte steps; max object size 0x140
found = False
for back in range(0x20, 0x140, 4):
obj_base = ptr_loc - back
try:
reader.move(obj_base)
hdr = reader.read(0x40)
except Exception:
continue
if not hdr or len(hdr) < 0x40:
continue
vtbl = struct.unpack_from("<I", hdr, 0)[0]
if not (acl_lo <= vtbl < acl_hi):
continue
# Candidate. Sanity-check m_pMaintainer (offset 0x20) and m_numLinks (0x24)
mtnr, num_links = struct.unpack_from("<II", hdr, 0x20)
if not (1 <= num_links <= 1000): # plausible refcount
continue
if mtnr != 0 and not (0x00010000 <= mtnr < 0x80000000):
continue
findings.append((obj_base, vtbl, num_links, mtnr, back))
found = True
break
print(f"resolved RenderSurface objects: {len(findings)}")
print()
# Group by vtable
by_vtable = defaultdict(list)
for obj_base, vtbl, num_links, mtnr, off in findings:
by_vtable[vtbl].append((num_links, off))
print(f"vtable groups (sorted by candidate count):")
print(f" {'vtable':>10} {'rva':>10} {'count':>6} {'mode m_numLinks':<20} {'m_pSurfaceBits offset histogram':<48}")
for vtbl in sorted(by_vtable, key=lambda v: -len(by_vtable[v])):
rows = by_vtable[vtbl]
rc_counter = Counter(r[0] for r in rows)
off_counter = Counter(r[1] for r in rows)
rc_top = ", ".join(f"{k}->{v}" for k, v in rc_counter.most_common(5))
# Show ALL offsets seen, not just top 3
off_full = ", ".join(f"0x{k:x}->{v}" for k, v in off_counter.most_common(8))
print(f" 0x{vtbl:08x} 0x{vtbl-acl_lo:08x} {len(rows):>6} {rc_top:<20} {off_full}")
# The dominant vtable + dominant m_pSurfaceBits offset is RenderSurface.
# Report its m_numLinks distribution.
top_vtable, top_rows = max(by_vtable.items(), key=lambda kv: len(kv[1]))
print()
print(f"=== DIAGNOSTIC RESULT ===")
print(f"dominant vtable: 0x{top_vtable:08x} RVA 0x{top_vtable-acl_lo:08x}")
rc_counter = Counter(r[0] for r in top_rows)
print(f"m_numLinks distribution for that vtable ({len(top_rows)} objects):")
for rc, cnt in rc_counter.most_common(10):
pct = 100.0 * cnt / len(top_rows)
print(f" m_numLinks = {rc:>4} {cnt:>5} objects ({pct:.1f}%)")
mode_rc = rc_counter.most_common(1)[0][0]
print()
if mode_rc == 1:
print(f"VERDICT: mode m_numLinks == 1 → cache-only (FINDING_001 family — cache-freelist leak)")
elif mode_rc > 1:
print(f"VERDICT: mode m_numLinks == {mode_rc} → external holder(s) present (FINDING_002 family — upstream leak)")
else:
print(f"VERDICT: unexpected mode m_numLinks == {mode_rc}; inspect manually")
if __name__ == "__main__":
main()