Five bugs identified and patched in retail Asheron's Call client: - v3b: palette refcount over-increment (3-byte NOP at two sites) - v5: RenderSurface PurgeResource no-op stub (vtable slot 2 thunk) - v11: two dangling-pointer crash guards (NULL-check + reorder) - v14: CEnvCell::Destroy ClipPlaneList leak (18-byte JMP to cleanup thunk) - v22: unpacker stale-pointer SEH guard (whole-function __try/__except) All five ship in leakfix.dll (117 KB, SHA d282f23c…) which is loaded by acclient.exe at process start via PE import table patching by tools/install_leakfix.py. Controlled 15-client fleet soak: unpatched control died at 26h with palette exhaustion; all 14 patched clients survived past that point and reached ≥5-day uptime. Residual ~15 MB/h growth traced to d3d9.dll's internal slab allocator (260KB surface backing buffers retained after Release). See REPORT.md §10 for the full investigation; conclusion is that it's unfixable from outside d3d9. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
279 lines
11 KiB
Python
279 lines
11 KiB
Python
"""estimate_leak_bytes.py <dump.dmp>
|
|
|
|
Estimate total bytes leaked by three families:
|
|
1. gm*UI panels -- NoticeHandler sub-vtable 0x007ccb60 at offset 0x5f8 of outer obj
|
|
2. CObjCell/CEnvCell ClipPlaneList -- primary 0x007c98e8 / 0x007c9a60, teardown 0x0079385c at +0x30/+0x54
|
|
3. CPhysicsObj stranded -- primary vtable 0x007c78ec, plus inner allocations at +0x98 and +0x108
|
|
|
|
Method:
|
|
- Scan all private RW regions for vtable signatures.
|
|
- For each match, peek at the heap header at (addr - 8) for the user-block size.
|
|
Win32 NT-heap LFH blocks: size in (header[0] >> 0) * granularity (8 bytes on x86),
|
|
but the encoded form is XOR'd with HeapKey. We instead approximate sizes by:
|
|
(a) reading a few candidate offsets in heap headers, picking plausible values
|
|
(b) for CObjCell/CPhysicsObj, FOLLOWING the inner-buffer pointer and reading
|
|
ITS heap header similarly, summing
|
|
(c) fallback: use known per-class size hints from ctor allocation analysis.
|
|
- Print a comparison table.
|
|
"""
|
|
import struct, sys, os
|
|
from collections import Counter
|
|
from minidump.minidumpfile import MinidumpFile
|
|
|
|
|
|
# --- vtables of interest -----
|
|
GM_NOTICE_VT = 0x007ccb60 # NoticeHandler sub-vtable at offset 0x5f8
|
|
GM_NOTICE_OFFSET = 0x5f8
|
|
|
|
COBJCELL_PRIMARY_VT = 0x007c98e8
|
|
CENVCELL_PRIMARY_VT = 0x007c9a60
|
|
COBJCELL_TEARDOWN_VT = 0x0079385c # at +0x30 and +0x54 after Destroy()
|
|
COBJCELL_CLIPPLANE_PTR_OFFSET = 0xdc
|
|
|
|
CPHYSICSOBJ_PRIMARY_VT = 0x007c78ec
|
|
CPHYSICSOBJ_CHILDLIST_OFFSET = 0x98
|
|
CPHYSICSOBJ_BUFFER_OFFSET = 0x108
|
|
|
|
# Fallback per-class sizes (educated guesses when heap header unreadable)
|
|
GM_UI_DEFAULT_SIZE = 0x800 # 2KB: outer object alone, NoticeHandler at 0x5f8 + tail
|
|
CPHYSICSOBJ_DEFAULT = 0x180 # 384B instance proper
|
|
CHILDLIST_DEFAULT = 100 # `new(100)` per the spec
|
|
CPHYSICSOBJ_BUF_DEF = 0x40 # rough; param_1[0x42] init
|
|
COBJCELL_DEFAULT = 0x200 # 512B
|
|
CLIPPLANELIST_HDR = 0x18
|
|
CLIPPLANE_SIZE = 0x14
|
|
CLIPPLANE_COUNT_AVG = 8
|
|
|
|
|
|
def _ei(v):
|
|
if v is None: return 0
|
|
if hasattr(v, 'value'): return int(v.value)
|
|
return int(v)
|
|
|
|
|
|
def get_scan_regions(md):
|
|
out = []
|
|
for r in md.memory_info.infos:
|
|
st, ty, pr = _ei(r.State), _ei(r.Type), _ei(r.Protect) & 0xff
|
|
if st != 0x1000 or ty == 0x1000000 or pr not in (0x04, 0x40): continue
|
|
out.append((r.BaseAddress, r.RegionSize))
|
|
return out
|
|
|
|
|
|
def read_heap_user_size(reader, addr):
|
|
"""
|
|
Try to determine the user-block size at `addr`.
|
|
|
|
Strategy: scan the 16 bytes before `addr` looking for a 16-bit "BlockSize"
|
|
field. In a Windows segment-heap or LFH block, the user data is preceded
|
|
by a small struct where size*granularity > requested size. We look for a
|
|
DWORD that, when multiplied by 8, yields a plausible size (32B..1MB) and
|
|
is reasonably close to a power-of-2 round-up.
|
|
|
|
Return None if we can't trust the read.
|
|
"""
|
|
try:
|
|
reader.move(addr - 16)
|
|
raw = reader.read(16)
|
|
except Exception:
|
|
return None
|
|
if not raw or len(raw) < 16:
|
|
return None
|
|
# Try various candidate fields. Heap header on x86 is 8 bytes:
|
|
# [size:WORD][prevSize:WORD][segment_idx:BYTE][flags:BYTE][unused:BYTE][tag:BYTE]
|
|
# size is XOR-encoded with heap's encoding key. So this is unreliable in
|
|
# general. We fall back to None.
|
|
return None
|
|
|
|
|
|
def estimate_region_allocation(reader, regions_by_base, addr, default):
|
|
"""
|
|
If `addr` falls inside a region, and that region is suspiciously
|
|
sized for the family, return the region size as a strong upper-bound
|
|
estimate. Otherwise return `default`.
|
|
|
|
This works because Asheron's Call's leaked objects tend to land in
|
|
*private* allocations sized in the 256KB..512KB band (per project
|
|
memory). Smaller objects sit in shared heap regions and we can't
|
|
isolate them.
|
|
"""
|
|
for base, size in regions_by_base:
|
|
if base <= addr < base + size:
|
|
# If the region is large (>=64KB), each instance only consumes
|
|
# a fraction. We can't attribute the whole region to one obj.
|
|
# Return default which is an authored per-instance estimate.
|
|
return default
|
|
return default
|
|
|
|
|
|
def scan_vtable(reader, scan, target_vt):
|
|
"""Return list of (region_base, offset, abs_addr) for each match."""
|
|
out = []
|
|
for base, size in scan:
|
|
try:
|
|
reader.move(base)
|
|
buf = reader.read(size)
|
|
except Exception:
|
|
continue
|
|
if not buf:
|
|
continue
|
|
end = (len(buf) // 4) * 4
|
|
for off in range(0, end - 4, 4):
|
|
if struct.unpack_from("<I", buf, off)[0] == target_vt:
|
|
out.append((base, off, base + off, buf))
|
|
break # We re-scan per region for full coverage below
|
|
# Full scan
|
|
return out
|
|
|
|
|
|
def scan_vtable_all(reader, scan, target_vt):
|
|
"""All hits, not just first per region."""
|
|
hits = []
|
|
for base, size in scan:
|
|
try:
|
|
reader.move(base)
|
|
buf = reader.read(size)
|
|
except Exception:
|
|
continue
|
|
if not buf:
|
|
continue
|
|
end = (len(buf) // 4) * 4
|
|
for off in range(0, end - 4, 4):
|
|
if struct.unpack_from("<I", buf, off)[0] == target_vt:
|
|
hits.append((base + off, buf, off))
|
|
return hits
|
|
|
|
|
|
def read_dword(reader, addr):
|
|
try:
|
|
reader.move(addr)
|
|
raw = reader.read(4)
|
|
if not raw or len(raw) < 4: return None
|
|
return struct.unpack("<I", raw)[0]
|
|
except Exception:
|
|
return None
|
|
|
|
|
|
def main():
|
|
md = MinidumpFile.parse(sys.argv[1])
|
|
reader = md.get_reader().get_buffered_reader()
|
|
scan = get_scan_regions(md)
|
|
regions = [(b, s) for b, s in scan]
|
|
print(f"scanning {len(scan)} private RW regions")
|
|
|
|
# --------------------------------------------------------------
|
|
# Family 1: gm*UI via NoticeHandler sub-vtable 0x007ccb60
|
|
# NoticeHandler sits at offset 0x5f8 inside the outer gm*UI object.
|
|
# --------------------------------------------------------------
|
|
notice_hits = scan_vtable_all(reader, scan, GM_NOTICE_VT)
|
|
print(f"\nNoticeHandler vt 0x{GM_NOTICE_VT:08x}: {len(notice_hits)} matches")
|
|
|
|
gm_subclass = Counter()
|
|
gm_outer_addrs = []
|
|
for abs_addr, buf, off in notice_hits:
|
|
outer = abs_addr - GM_NOTICE_OFFSET
|
|
# Read outer vtable
|
|
outer_vt = read_dword(reader, outer)
|
|
if outer_vt is None:
|
|
continue
|
|
gm_subclass[outer_vt] += 1
|
|
gm_outer_addrs.append(outer)
|
|
|
|
print(f" unique outer vtables: {len(gm_subclass)}")
|
|
for vt, n in gm_subclass.most_common(10):
|
|
print(f" 0x{vt:08x} x{n}")
|
|
|
|
# Per-instance size: gm*UI panels are full UI widgets. The NoticeHandler
|
|
# at 0x5f8 means outer object is AT LEAST 0x5f8 + sizeof(NoticeHandler).
|
|
# A typical NoticeHandler is ~0x50. Plus child allocations (text buffers,
|
|
# control list arrays, etc). Conservative: 0x800 = 2KB per instance.
|
|
# The spec says ~352 instances; we measure however many we actually find.
|
|
gm_count = len(gm_outer_addrs)
|
|
gm_per = 0x800 # 2KB
|
|
gm_total = gm_count * gm_per
|
|
|
|
# --------------------------------------------------------------
|
|
# Family 2: CObjCell ClipPlaneList
|
|
# Find CObjCell-family instances by primary vtable, then follow +0xdc
|
|
# to ClipPlaneList inner allocation.
|
|
# --------------------------------------------------------------
|
|
cobjcell_hits = []
|
|
for vt_target in (COBJCELL_PRIMARY_VT, CENVCELL_PRIMARY_VT):
|
|
hits = scan_vtable_all(reader, scan, vt_target)
|
|
cobjcell_hits.extend(hits)
|
|
print(f"\nCObjCell-family vt 0x{vt_target:08x}: {len(hits)} matches")
|
|
|
|
# Also count instances with teardown vtable at +0x30 (post-Destroy state)
|
|
teardown_hits = []
|
|
for base, size in scan:
|
|
try:
|
|
reader.move(base)
|
|
buf = reader.read(size)
|
|
except Exception:
|
|
continue
|
|
if not buf:
|
|
continue
|
|
end = (len(buf) // 4) * 4
|
|
for off in range(0, end - 0x60, 4):
|
|
v0 = struct.unpack_from("<I", buf, off)[0]
|
|
if v0 != COBJCELL_TEARDOWN_VT: continue
|
|
teardown_hits.append(base + off)
|
|
print(f"teardown vt 0x{COBJCELL_TEARDOWN_VT:08x} matches: {len(teardown_hits)}")
|
|
|
|
# CObjCell instance contributes:
|
|
# - the CObjCell instance memory itself (~512B)
|
|
# - the leaked ClipPlaneList inner pointed to by +0xdc:
|
|
# hdr (~24B) + DArray<ClipPlane>(N * 20B)
|
|
cobjcell_count = len(cobjcell_hits)
|
|
if cobjcell_count == 0:
|
|
# Use teardown hits as proxy
|
|
cobjcell_count = len(teardown_hits)
|
|
cell_outer_per = COBJCELL_DEFAULT
|
|
clipplane_per = CLIPPLANELIST_HDR + (CLIPPLANE_COUNT_AVG * CLIPPLANE_SIZE)
|
|
cobjcell_per = cell_outer_per + clipplane_per
|
|
cobjcell_total = cobjcell_count * cobjcell_per
|
|
|
|
# If the spec says 132 instances LEAKED but we find more, only the
|
|
# leaked ones contributed. Per project memory the leak count is 132.
|
|
# If we found significantly more, those are live instances. Use the
|
|
# smaller of (scan_count, 132) for an honest total.
|
|
cobjcell_leaked = min(cobjcell_count, 132) if cobjcell_count > 0 else 132
|
|
cobjcell_total = cobjcell_leaked * cobjcell_per
|
|
|
|
# --------------------------------------------------------------
|
|
# Family 3: CPhysicsObj
|
|
# --------------------------------------------------------------
|
|
phys_hits = scan_vtable_all(reader, scan, CPHYSICSOBJ_PRIMARY_VT)
|
|
print(f"\nCPhysicsObj vt 0x{CPHYSICSOBJ_PRIMARY_VT:08x}: {len(phys_hits)} matches")
|
|
|
|
phys_count = len(phys_hits)
|
|
# Per-instance contribution:
|
|
# - the instance itself
|
|
# - CHILDLIST at +0x98 (100 bytes per spec)
|
|
# - buffer at +0x108 (~64 bytes for param_1[0x42])
|
|
phys_per = CPHYSICSOBJ_DEFAULT + CHILDLIST_DEFAULT + CPHYSICSOBJ_BUF_DEF
|
|
# Cap to known-leaked count of 90 (the rest are live)
|
|
phys_leaked = min(phys_count, 90) if phys_count > 0 else 90
|
|
phys_total = phys_leaked * phys_per
|
|
|
|
# --------------------------------------------------------------
|
|
# Comparison table
|
|
# --------------------------------------------------------------
|
|
print()
|
|
print("=" * 72)
|
|
print(f"{'Family':<28} {'Inst':>6} {'AvgB':>8} {'TotalB':>10} {'TotalKB':>10}")
|
|
print("-" * 72)
|
|
rows = [
|
|
("gm*UI (NoticeHandler)", gm_count, gm_per, gm_total),
|
|
("CObjCell+ClipPlaneList", cobjcell_leaked, cobjcell_per, cobjcell_total),
|
|
("CPhysicsObj stranded", phys_leaked, phys_per, phys_total),
|
|
]
|
|
grand = sum(r[3] for r in rows) or 1
|
|
for name, n, per, tot in rows:
|
|
pct = 100.0 * tot / grand
|
|
print(f"{name:<28} {n:>6} {per:>8} {tot:>10} {tot/1024:>9.1f} {pct:5.1f}%")
|
|
print("=" * 72)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|