leakhunt/tools/find_ust_backtraces.py
acbot 57b5e43d0e Initial commit — leak-hunt project complete
Five bugs identified and patched in retail Asheron's Call client:
- v3b: palette refcount over-increment (3-byte NOP at two sites)
- v5: RenderSurface PurgeResource no-op stub (vtable slot 2 thunk)
- v11: two dangling-pointer crash guards (NULL-check + reorder)
- v14: CEnvCell::Destroy ClipPlaneList leak (18-byte JMP to cleanup thunk)
- v22: unpacker stale-pointer SEH guard (whole-function __try/__except)

All five ship in leakfix.dll (117 KB, SHA d282f23c…) which is loaded
by acclient.exe at process start via PE import table patching by
tools/install_leakfix.py.

Controlled 15-client fleet soak: unpatched control died at 26h with
palette exhaustion; all 14 patched clients survived past that point
and reached ≥5-day uptime.

Residual ~15 MB/h growth traced to d3d9.dll's internal slab allocator
(260KB surface backing buffers retained after Release). See REPORT.md
§10 for the full investigation; conclusion is that it's unfixable from
outside d3d9.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-23 21:07:58 +02:00

148 lines
5.6 KiB
Python

"""
find_ust_backtraces.py <dump.dmp>
Scans writable memory in the dump for runs of consecutive 4-byte values that
all look like return addresses in acclient.exe's executable image range —
these are UST backtraces stored in the heap's UserStackTraceDB.
The Win10 x86 UST database is a `STACK_TRACE_DATABASE` allocated by ntdll;
each entry is a `RTL_STACK_TRACE_ENTRY` with:
void* HashChain
ULONG TraceCount
USHORT Index
USHORT Depth
PVOID BackTrace[Depth] // depth typically 12-16
We don't try to parse the DB structure (too version-dependent). Instead we
detect entries by their backtrace shape: 8+ consecutive pointers into the
acclient code range, surrounded by non-pointer data.
For each detected backtrace, report (top frame, depth, first 8 frames).
Histogram by leaf frame to find the dominant allocation site.
"""
import os, struct, sys
from collections import Counter, defaultdict
from minidump.minidumpfile import MinidumpFile
def _ei(v):
if v is None: return 0
if hasattr(v, 'value'): return int(v.value)
return int(v)
def main():
md = MinidumpFile.parse(sys.argv[1])
reader = md.get_reader().get_buffered_reader()
# Find acclient.exe image range
acl = next((m for m in md.modules.modules
if os.path.basename(m.name).lower() == "acclient.exe"), None)
if acl is None: sys.exit("acclient.exe not in modules")
acl_lo, acl_hi = acl.baseaddress, acl.baseaddress + acl.size
# Constrain to the .text section. Without PE-header parsing here we
# bound: text starts at base+0x1000 (typical), ends around 0x800000 in
# EoR (the highest 2013 function RVAs land near 0x6c000; data sections
# follow). False positives from .rdata UTF-16 strings (e.g., 0x0066006f
# = "of") dominated the first run; tightening to a code-only range
# eliminates them.
text_lo = acl.baseaddress + 0x1000
text_hi = acl.baseaddress + 0x6c0000 # estimated end of .text
acl_lo, acl_hi = text_lo, text_hi
print(f"acclient.exe text range (estimated): 0x{acl_lo:08x} - 0x{acl_hi:08x}")
# Iterate writable committed regions (heap-like)
regions = []
for r in md.memory_info.infos:
st, ty, pr = _ei(r.State), _ei(r.Type), _ei(r.Protect) & 0xff
if st != 0x1000: continue
if ty == 0x1000000: continue # skip Image
if pr in (0x04, 0x40):
regions.append((r.BaseAddress, r.RegionSize))
print(f"scanning {len(regions)} writable regions")
MIN_DEPTH = 6 # min consecutive acclient pointers to call it a backtrace
MAX_DEPTH = 32 # cap scan length per candidate
leaf_hist = Counter()
backtraces = [] # list of (location, depth, frames)
total_scanned = 0
for base, size in regions:
try:
reader.move(base)
buf = reader.read(size)
except Exception:
continue
if not buf: continue
total_scanned += len(buf)
# Walk 4-byte aligned positions
end = (len(buf) // 4) * 4
n = end // 4
# Pre-decode as uint32 array for speed
words = struct.unpack_from(f"<{n}I", buf, 0)
def looks_like_code_addr(v):
"""True if v looks like an actual code-section pointer, not data."""
if not (acl_lo <= v < acl_hi):
return False
# UTF-16 ASCII pattern: bytes 1 and 3 are 0x00, bytes 0 and 2 are
# printable ASCII (0x20-0x7F). Rejects values like 0x0066006f
# ("of"), 0x00610074 ("ta"), etc.
b0 = v & 0xFF; b1 = (v >> 8) & 0xFF
b2 = (v >> 16) & 0xFF; b3 = (v >> 24) & 0xFF
if b1 == 0 and b3 == 0 and (0x20 <= b0 <= 0x7F) and (0x20 <= b2 <= 0x7F):
return False
return True
i = 0
while i < n:
# Find run of consecutive acclient pointers
if not looks_like_code_addr(words[i]):
i += 1; continue
j = i
while j < n and looks_like_code_addr(words[j]) and (j - i) < MAX_DEPTH:
j += 1
depth = j - i
if depth >= MIN_DEPTH:
frames = words[i:j]
# Uniqueness check — real backtraces have >70% unique frames.
# Repeated-value runs (e.g. pixel data 0x00a000a0 repeated)
# have low uniqueness.
uniq = len(set(frames))
if uniq < max(4, int(depth * 0.7)):
i = j; continue
location = base + i * 4
backtraces.append((location, depth, frames))
leaf_hist[frames[0]] += 1
i = j
print(f"scanned {total_scanned/(1024*1024):.1f} MB")
print(f"backtraces detected (>= {MIN_DEPTH} frames): {len(backtraces)}")
print()
# Histogram by leaf frame (top of stack)
print(f"top 30 leaf frames (most common 'top-of-call-stack' address):")
print(f" {'leaf abs':>10} {'leaf rva':>10} {'count':>6} ")
for leaf, cnt in leaf_hist.most_common(30):
print(f" 0x{leaf:08x} 0x{leaf-acl_lo:08x} {cnt:>6}")
print()
# For the top leaf, show a few sample backtraces
if leaf_hist:
top_leaf, _ = leaf_hist.most_common(1)[0]
print(f"=== sample backtraces with top leaf 0x{top_leaf:08x} (RVA 0x{top_leaf-acl_lo:08x}) ===")
shown = 0
for loc, depth, frames in backtraces:
if frames[0] != top_leaf: continue
print(f" at 0x{loc:08x} depth={depth}")
for k, fr in enumerate(frames[:12]):
print(f" [{k:2d}] 0x{fr:08x} RVA 0x{fr-acl_lo:08x}")
shown += 1
if shown >= 5: break
if __name__ == "__main__":
main()