Five bugs identified and patched in retail Asheron's Call client: - v3b: palette refcount over-increment (3-byte NOP at two sites) - v5: RenderSurface PurgeResource no-op stub (vtable slot 2 thunk) - v11: two dangling-pointer crash guards (NULL-check + reorder) - v14: CEnvCell::Destroy ClipPlaneList leak (18-byte JMP to cleanup thunk) - v22: unpacker stale-pointer SEH guard (whole-function __try/__except) All five ship in leakfix.dll (117 KB, SHA d282f23c…) which is loaded by acclient.exe at process start via PE import table patching by tools/install_leakfix.py. Controlled 15-client fleet soak: unpatched control died at 26h with palette exhaustion; all 14 patched clients survived past that point and reached ≥5-day uptime. Residual ~15 MB/h growth traced to d3d9.dll's internal slab allocator (260KB surface backing buffers retained after Release). See REPORT.md §10 for the full investigation; conclusion is that it's unfixable from outside d3d9. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
123 lines
4.7 KiB
Python
123 lines
4.7 KiB
Python
"""
|
|
scan_rendersurface_refcounts.py <dump.dmp>
|
|
|
|
Walks every committed private RW region in the dump, looking for objects
|
|
that look like RenderSurface instances:
|
|
|
|
* Located inside any heap region (covered by minidump memory64).
|
|
* First DWORD is a vtable pointer into acclient.exe's .rdata range.
|
|
* Looks like a DBObj-derived object: m_pMaintainer (offset 0x20) is a
|
|
pointer that itself looks like a heap object.
|
|
|
|
For each candidate, reads m_numLinks at offset 0x24.
|
|
|
|
Why: the leak hypothesis fork is
|
|
|
|
cache-freelist hypothesis → m_numLinks == 1 for leaked surfaces
|
|
UI-held-ref hypothesis → m_numLinks > 1 for leaked surfaces
|
|
|
|
We can answer this empirically from a single dump.
|
|
|
|
Strategy:
|
|
1. Find the RenderSurface vtable in EoR by clustering. Look at objects
|
|
across the heap; whatever vtable value is the most popular among
|
|
objects of size ~0x120 is RenderSurface's vtable.
|
|
2. Histogram m_numLinks across those.
|
|
"""
|
|
import argparse
|
|
import os
|
|
import struct
|
|
import sys
|
|
from collections import Counter, defaultdict
|
|
|
|
from minidump.minidumpfile import MinidumpFile
|
|
|
|
|
|
def _enum_int(v):
|
|
if v is None: return 0
|
|
if hasattr(v, 'value'): return int(v.value)
|
|
return int(v)
|
|
|
|
|
|
def main():
|
|
ap = argparse.ArgumentParser()
|
|
ap.add_argument("dump")
|
|
ap.add_argument("--surface-size", type=lambda x: int(x, 0), default=0x120,
|
|
help="expected RenderSurface size (default 0x120)")
|
|
ap.add_argument("--scan-step", type=lambda x: int(x, 0), default=8,
|
|
help="alignment step for object header search (default 8)")
|
|
args = ap.parse_args()
|
|
|
|
md = MinidumpFile.parse(args.dump)
|
|
reader = md.get_reader().get_buffered_reader()
|
|
|
|
# Find acclient.exe range
|
|
acl = None
|
|
for m in md.modules.modules:
|
|
if os.path.basename(m.name).lower() == "acclient.exe":
|
|
acl = m; break
|
|
if acl is None:
|
|
print("acclient.exe not in module list", file=sys.stderr); sys.exit(1)
|
|
acl_lo = acl.baseaddress
|
|
acl_hi = acl.baseaddress + acl.size
|
|
print(f"acclient.exe: 0x{acl_lo:08x} - 0x{acl_hi:08x}")
|
|
|
|
# Iterate committed private RW regions, scan for object-headers
|
|
region_count = 0
|
|
vtable_hits = Counter() # vtable -> count of objects with that first-DWORD
|
|
by_vtable_refcounts = defaultdict(list) # vtable -> list of m_numLinks values
|
|
|
|
for r in md.memory_info.infos:
|
|
st = _enum_int(r.State); ty = _enum_int(r.Type); pr = _enum_int(r.Protect) & 0xFF
|
|
if st != 0x1000 or ty != 0x20000 or pr not in (0x04, 0x40):
|
|
continue
|
|
# Scan the region for object-headers at aligned positions
|
|
# An "object" header is a DWORD that points into acclient.exe + a sane
|
|
# m_pMaintainer field at offset 0x20. We don't impose a size constraint
|
|
# because the heap large-block path may pad differently.
|
|
region_base = r.BaseAddress
|
|
region_size = r.RegionSize
|
|
# Cap scan to avoid huge regions
|
|
scan_size = min(region_size, 0x4000)
|
|
try:
|
|
reader.move(region_base)
|
|
buf = reader.read(scan_size)
|
|
except Exception:
|
|
continue
|
|
if not buf:
|
|
continue
|
|
region_count += 1
|
|
for off in range(0, len(buf) - 0x28, args.scan_step):
|
|
try:
|
|
vtbl = struct.unpack_from('<I', buf, off)[0]
|
|
except struct.error:
|
|
break
|
|
if not (acl_lo <= vtbl < acl_hi):
|
|
continue
|
|
# Looks like a candidate. Read m_pMaintainer at +0x20 and m_numLinks at +0x24.
|
|
try:
|
|
maintainer, num_links = struct.unpack_from('<II', buf, off + 0x20)
|
|
except struct.error:
|
|
continue
|
|
# m_numLinks should be a small positive int
|
|
if not (1 <= num_links <= 10000):
|
|
continue
|
|
# m_pMaintainer should be a user-mode pointer (or null for non-cached objs)
|
|
if maintainer != 0 and not (0x00010000 <= maintainer < 0x80000000):
|
|
continue
|
|
vtable_hits[vtbl] += 1
|
|
by_vtable_refcounts[vtbl].append(num_links)
|
|
|
|
print(f"scanned {region_count} regions, found {sum(vtable_hits.values())} candidate DBObj-like headers")
|
|
print()
|
|
print(f"top 20 vtable signatures (by candidate count):")
|
|
print(f" {'vtbl_abs':>10} {'vtbl_rva':>10} {'count':>6} {'refcount distrib (mode -> count)':<40}")
|
|
for vt, n in vtable_hits.most_common(20):
|
|
rc_list = by_vtable_refcounts[vt]
|
|
rc_counter = Counter(rc_list)
|
|
rc_str = ", ".join(f"{k}->{v}" for k, v in rc_counter.most_common(6))
|
|
print(f" 0x{vt:08x} 0x{vt-acl_lo:08x} {n:>6} {rc_str}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|