Initial commit — leak-hunt project complete
Five bugs identified and patched in retail Asheron's Call client: - v3b: palette refcount over-increment (3-byte NOP at two sites) - v5: RenderSurface PurgeResource no-op stub (vtable slot 2 thunk) - v11: two dangling-pointer crash guards (NULL-check + reorder) - v14: CEnvCell::Destroy ClipPlaneList leak (18-byte JMP to cleanup thunk) - v22: unpacker stale-pointer SEH guard (whole-function __try/__except) All five ship in leakfix.dll (117 KB, SHA d282f23c…) which is loaded by acclient.exe at process start via PE import table patching by tools/install_leakfix.py. Controlled 15-client fleet soak: unpatched control died at 26h with palette exhaustion; all 14 patched clients survived past that point and reached ≥5-day uptime. Residual ~15 MB/h growth traced to d3d9.dll's internal slab allocator (260KB surface backing buffers retained after Release). See REPORT.md §10 for the full investigation; conclusion is that it's unfixable from outside d3d9. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
commit
57b5e43d0e
199 changed files with 1648333 additions and 0 deletions
123
tools/scan_rendersurface_refcounts.py
Normal file
123
tools/scan_rendersurface_refcounts.py
Normal file
|
|
@ -0,0 +1,123 @@
|
|||
"""
|
||||
scan_rendersurface_refcounts.py <dump.dmp>
|
||||
|
||||
Walks every committed private RW region in the dump, looking for objects
|
||||
that look like RenderSurface instances:
|
||||
|
||||
* Located inside any heap region (covered by minidump memory64).
|
||||
* First DWORD is a vtable pointer into acclient.exe's .rdata range.
|
||||
* Looks like a DBObj-derived object: m_pMaintainer (offset 0x20) is a
|
||||
pointer that itself looks like a heap object.
|
||||
|
||||
For each candidate, reads m_numLinks at offset 0x24.
|
||||
|
||||
Why: the leak hypothesis fork is
|
||||
|
||||
cache-freelist hypothesis → m_numLinks == 1 for leaked surfaces
|
||||
UI-held-ref hypothesis → m_numLinks > 1 for leaked surfaces
|
||||
|
||||
We can answer this empirically from a single dump.
|
||||
|
||||
Strategy:
|
||||
1. Find the RenderSurface vtable in EoR by clustering. Look at objects
|
||||
across the heap; whatever vtable value is the most popular among
|
||||
objects of size ~0x120 is RenderSurface's vtable.
|
||||
2. Histogram m_numLinks across those.
|
||||
"""
|
||||
import argparse
|
||||
import os
|
||||
import struct
|
||||
import sys
|
||||
from collections import Counter, defaultdict
|
||||
|
||||
from minidump.minidumpfile import MinidumpFile
|
||||
|
||||
|
||||
def _enum_int(v):
|
||||
if v is None: return 0
|
||||
if hasattr(v, 'value'): return int(v.value)
|
||||
return int(v)
|
||||
|
||||
|
||||
def main():
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument("dump")
|
||||
ap.add_argument("--surface-size", type=lambda x: int(x, 0), default=0x120,
|
||||
help="expected RenderSurface size (default 0x120)")
|
||||
ap.add_argument("--scan-step", type=lambda x: int(x, 0), default=8,
|
||||
help="alignment step for object header search (default 8)")
|
||||
args = ap.parse_args()
|
||||
|
||||
md = MinidumpFile.parse(args.dump)
|
||||
reader = md.get_reader().get_buffered_reader()
|
||||
|
||||
# Find acclient.exe range
|
||||
acl = None
|
||||
for m in md.modules.modules:
|
||||
if os.path.basename(m.name).lower() == "acclient.exe":
|
||||
acl = m; break
|
||||
if acl is None:
|
||||
print("acclient.exe not in module list", file=sys.stderr); sys.exit(1)
|
||||
acl_lo = acl.baseaddress
|
||||
acl_hi = acl.baseaddress + acl.size
|
||||
print(f"acclient.exe: 0x{acl_lo:08x} - 0x{acl_hi:08x}")
|
||||
|
||||
# Iterate committed private RW regions, scan for object-headers
|
||||
region_count = 0
|
||||
vtable_hits = Counter() # vtable -> count of objects with that first-DWORD
|
||||
by_vtable_refcounts = defaultdict(list) # vtable -> list of m_numLinks values
|
||||
|
||||
for r in md.memory_info.infos:
|
||||
st = _enum_int(r.State); ty = _enum_int(r.Type); pr = _enum_int(r.Protect) & 0xFF
|
||||
if st != 0x1000 or ty != 0x20000 or pr not in (0x04, 0x40):
|
||||
continue
|
||||
# Scan the region for object-headers at aligned positions
|
||||
# An "object" header is a DWORD that points into acclient.exe + a sane
|
||||
# m_pMaintainer field at offset 0x20. We don't impose a size constraint
|
||||
# because the heap large-block path may pad differently.
|
||||
region_base = r.BaseAddress
|
||||
region_size = r.RegionSize
|
||||
# Cap scan to avoid huge regions
|
||||
scan_size = min(region_size, 0x4000)
|
||||
try:
|
||||
reader.move(region_base)
|
||||
buf = reader.read(scan_size)
|
||||
except Exception:
|
||||
continue
|
||||
if not buf:
|
||||
continue
|
||||
region_count += 1
|
||||
for off in range(0, len(buf) - 0x28, args.scan_step):
|
||||
try:
|
||||
vtbl = struct.unpack_from('<I', buf, off)[0]
|
||||
except struct.error:
|
||||
break
|
||||
if not (acl_lo <= vtbl < acl_hi):
|
||||
continue
|
||||
# Looks like a candidate. Read m_pMaintainer at +0x20 and m_numLinks at +0x24.
|
||||
try:
|
||||
maintainer, num_links = struct.unpack_from('<II', buf, off + 0x20)
|
||||
except struct.error:
|
||||
continue
|
||||
# m_numLinks should be a small positive int
|
||||
if not (1 <= num_links <= 10000):
|
||||
continue
|
||||
# m_pMaintainer should be a user-mode pointer (or null for non-cached objs)
|
||||
if maintainer != 0 and not (0x00010000 <= maintainer < 0x80000000):
|
||||
continue
|
||||
vtable_hits[vtbl] += 1
|
||||
by_vtable_refcounts[vtbl].append(num_links)
|
||||
|
||||
print(f"scanned {region_count} regions, found {sum(vtable_hits.values())} candidate DBObj-like headers")
|
||||
print()
|
||||
print(f"top 20 vtable signatures (by candidate count):")
|
||||
print(f" {'vtbl_abs':>10} {'vtbl_rva':>10} {'count':>6} {'refcount distrib (mode -> count)':<40}")
|
||||
for vt, n in vtable_hits.most_common(20):
|
||||
rc_list = by_vtable_refcounts[vt]
|
||||
rc_counter = Counter(rc_list)
|
||||
rc_str = ", ".join(f"{k}->{v}" for k, v in rc_counter.most_common(6))
|
||||
print(f" 0x{vt:08x} 0x{vt-acl_lo:08x} {n:>6} {rc_str}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Add table
Add a link
Reference in a new issue