Five bugs identified and patched in retail Asheron's Call client: - v3b: palette refcount over-increment (3-byte NOP at two sites) - v5: RenderSurface PurgeResource no-op stub (vtable slot 2 thunk) - v11: two dangling-pointer crash guards (NULL-check + reorder) - v14: CEnvCell::Destroy ClipPlaneList leak (18-byte JMP to cleanup thunk) - v22: unpacker stale-pointer SEH guard (whole-function __try/__except) All five ship in leakfix.dll (117 KB, SHA d282f23c…) which is loaded by acclient.exe at process start via PE import table patching by tools/install_leakfix.py. Controlled 15-client fleet soak: unpatched control died at 26h with palette exhaustion; all 14 patched clients survived past that point and reached ≥5-day uptime. Residual ~15 MB/h growth traced to d3d9.dll's internal slab allocator (260KB surface backing buffers retained after Release). See REPORT.md §10 for the full investigation; conclusion is that it's unfixable from outside d3d9. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
165 lines
7 KiB
Python
165 lines
7 KiB
Python
"""
|
|
find_rendersurfaces.py <dump.dmp>
|
|
|
|
Definitive diagnostic to distinguish:
|
|
F1 (cache-freelist leak): leaked surfaces have m_numLinks == 1 (cache-only)
|
|
F2 (upstream-holder leak): leaked surfaces have m_numLinks > 1
|
|
|
|
Method:
|
|
1. Enumerate the leaked 256-512 KB private RW regions (the BGRA buffers).
|
|
2. For each leaked region's base address R, search the entire committed
|
|
memory of the dump for any 4-byte value == R. That match location L
|
|
is a pointer field — most likely RenderSurface::m_pSurfaceBits.
|
|
3. From L, walk backwards in 4-byte steps looking for a DWORD that points
|
|
into acclient.exe's image range. That DWORD is the containing object's
|
|
vtable; its address is the object's base B.
|
|
4. Read m_numLinks at B + 0x24 (DBObj layout).
|
|
5. Histogram by (vtable, m_numLinks). The dominant vtable is RenderSurface.
|
|
The mode of m_numLinks for that vtable answers the question.
|
|
"""
|
|
import os
|
|
import struct
|
|
import sys
|
|
from collections import Counter, defaultdict
|
|
|
|
from minidump.minidumpfile import MinidumpFile
|
|
|
|
|
|
def _enum_int(v):
|
|
if v is None: return 0
|
|
if hasattr(v, 'value'): return int(v.value)
|
|
return int(v)
|
|
|
|
|
|
def main():
|
|
if len(sys.argv) < 2:
|
|
print("usage: find_rendersurfaces.py <dump.dmp>", file=sys.stderr); sys.exit(1)
|
|
path = sys.argv[1]
|
|
if not os.path.exists(path):
|
|
print(f"not found: {path}", file=sys.stderr); sys.exit(1)
|
|
|
|
md = MinidumpFile.parse(path)
|
|
reader = md.get_reader().get_buffered_reader()
|
|
|
|
# Acclient.exe image range — used to validate vtable pointers
|
|
acl = next((m for m in md.modules.modules
|
|
if os.path.basename(m.name).lower() == "acclient.exe"), None)
|
|
if acl is None:
|
|
print("acclient.exe not in module list", file=sys.stderr); sys.exit(1)
|
|
acl_lo, acl_hi = acl.baseaddress, acl.baseaddress + acl.size
|
|
print(f"acclient.exe: 0x{acl_lo:08x} - 0x{acl_hi:08x} size={acl.size}")
|
|
|
|
# Step 1: leaked 256-512KB private RW regions
|
|
leaked_regions = set()
|
|
for r in md.memory_info.infos:
|
|
st = _enum_int(r.State); ty = _enum_int(r.Type); pr = _enum_int(r.Protect) & 0xFF
|
|
if st == 0x1000 and ty == 0x20000 and pr in (0x04, 0x40) \
|
|
and 256*1024 <= r.RegionSize < 512*1024:
|
|
leaked_regions.add(r.BaseAddress)
|
|
print(f"leaked 256-512 KB private regions: {len(leaked_regions)}")
|
|
|
|
# Step 2: scan every committed (private OR mapped — but not Image) RW region
|
|
# for occurrences of any leaked-region base address as a 4-byte LE value.
|
|
# We're looking for pointers in heap allocations, not file-backed data.
|
|
scan_regions = []
|
|
for r in md.memory_info.infos:
|
|
st = _enum_int(r.State); ty = _enum_int(r.Type); pr = _enum_int(r.Protect) & 0xFF
|
|
if st != 0x1000: # only COMMIT
|
|
continue
|
|
if ty == 0x1000000: # skip Image (DLL .data sections — unlikely to hold our pointers)
|
|
continue
|
|
if pr in (0x04, 0x40): # READWRITE or EXECUTE_READWRITE
|
|
scan_regions.append((r.BaseAddress, r.RegionSize))
|
|
print(f"scanning {len(scan_regions)} writable regions...")
|
|
|
|
matches = [] # list of (pointer_location, region_value)
|
|
total_scanned_bytes = 0
|
|
for base, size in scan_regions:
|
|
try:
|
|
reader.move(base)
|
|
buf = reader.read(size)
|
|
except Exception:
|
|
continue
|
|
if not buf:
|
|
continue
|
|
total_scanned_bytes += len(buf)
|
|
# Walk 4-byte aligned positions
|
|
# Fast path: chunk through using struct.unpack_from
|
|
end = (len(buf) // 4) * 4
|
|
for off in range(0, end, 4):
|
|
val = struct.unpack_from("<I", buf, off)[0]
|
|
if val in leaked_regions:
|
|
matches.append((base + off, val))
|
|
print(f"scanned {total_scanned_bytes/(1024*1024):.1f} MB")
|
|
print(f"pointers to leaked regions found: {len(matches)}")
|
|
|
|
# Step 3 + 4: for each pointer location, walk backwards looking for the vtable
|
|
# of the containing object. Then read m_numLinks at +0x24.
|
|
findings = [] # list of (object_base, vtable, m_numLinks, m_pMaintainer, m_pSurfaceBits_offset)
|
|
for ptr_loc, region_val in matches:
|
|
# Walk backwards in 4-byte steps; max object size 0x140
|
|
found = False
|
|
for back in range(0x20, 0x140, 4):
|
|
obj_base = ptr_loc - back
|
|
try:
|
|
reader.move(obj_base)
|
|
hdr = reader.read(0x40)
|
|
except Exception:
|
|
continue
|
|
if not hdr or len(hdr) < 0x40:
|
|
continue
|
|
vtbl = struct.unpack_from("<I", hdr, 0)[0]
|
|
if not (acl_lo <= vtbl < acl_hi):
|
|
continue
|
|
# Candidate. Sanity-check m_pMaintainer (offset 0x20) and m_numLinks (0x24)
|
|
mtnr, num_links = struct.unpack_from("<II", hdr, 0x20)
|
|
if not (1 <= num_links <= 1000): # plausible refcount
|
|
continue
|
|
if mtnr != 0 and not (0x00010000 <= mtnr < 0x80000000):
|
|
continue
|
|
findings.append((obj_base, vtbl, num_links, mtnr, back))
|
|
found = True
|
|
break
|
|
|
|
print(f"resolved RenderSurface objects: {len(findings)}")
|
|
print()
|
|
|
|
# Group by vtable
|
|
by_vtable = defaultdict(list)
|
|
for obj_base, vtbl, num_links, mtnr, off in findings:
|
|
by_vtable[vtbl].append((num_links, off))
|
|
|
|
print(f"vtable groups (sorted by candidate count):")
|
|
print(f" {'vtable':>10} {'rva':>10} {'count':>6} {'mode m_numLinks':<20} {'m_pSurfaceBits offset histogram':<48}")
|
|
for vtbl in sorted(by_vtable, key=lambda v: -len(by_vtable[v])):
|
|
rows = by_vtable[vtbl]
|
|
rc_counter = Counter(r[0] for r in rows)
|
|
off_counter = Counter(r[1] for r in rows)
|
|
rc_top = ", ".join(f"{k}->{v}" for k, v in rc_counter.most_common(5))
|
|
# Show ALL offsets seen, not just top 3
|
|
off_full = ", ".join(f"0x{k:x}->{v}" for k, v in off_counter.most_common(8))
|
|
print(f" 0x{vtbl:08x} 0x{vtbl-acl_lo:08x} {len(rows):>6} {rc_top:<20} {off_full}")
|
|
|
|
# The dominant vtable + dominant m_pSurfaceBits offset is RenderSurface.
|
|
# Report its m_numLinks distribution.
|
|
top_vtable, top_rows = max(by_vtable.items(), key=lambda kv: len(kv[1]))
|
|
print()
|
|
print(f"=== DIAGNOSTIC RESULT ===")
|
|
print(f"dominant vtable: 0x{top_vtable:08x} RVA 0x{top_vtable-acl_lo:08x}")
|
|
rc_counter = Counter(r[0] for r in top_rows)
|
|
print(f"m_numLinks distribution for that vtable ({len(top_rows)} objects):")
|
|
for rc, cnt in rc_counter.most_common(10):
|
|
pct = 100.0 * cnt / len(top_rows)
|
|
print(f" m_numLinks = {rc:>4} {cnt:>5} objects ({pct:.1f}%)")
|
|
mode_rc = rc_counter.most_common(1)[0][0]
|
|
print()
|
|
if mode_rc == 1:
|
|
print(f"VERDICT: mode m_numLinks == 1 → cache-only (FINDING_001 family — cache-freelist leak)")
|
|
elif mode_rc > 1:
|
|
print(f"VERDICT: mode m_numLinks == {mode_rc} → external holder(s) present (FINDING_002 family — upstream leak)")
|
|
else:
|
|
print(f"VERDICT: unexpected mode m_numLinks == {mode_rc}; inspect manually")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|