""" find_ust_backtraces.py Scans writable memory in the dump for runs of consecutive 4-byte values that all look like return addresses in acclient.exe's executable image range — these are UST backtraces stored in the heap's UserStackTraceDB. The Win10 x86 UST database is a `STACK_TRACE_DATABASE` allocated by ntdll; each entry is a `RTL_STACK_TRACE_ENTRY` with: void* HashChain ULONG TraceCount USHORT Index USHORT Depth PVOID BackTrace[Depth] // depth typically 12-16 We don't try to parse the DB structure (too version-dependent). Instead we detect entries by their backtrace shape: 8+ consecutive pointers into the acclient code range, surrounded by non-pointer data. For each detected backtrace, report (top frame, depth, first 8 frames). Histogram by leaf frame to find the dominant allocation site. """ import os, struct, sys from collections import Counter, defaultdict from minidump.minidumpfile import MinidumpFile def _ei(v): if v is None: return 0 if hasattr(v, 'value'): return int(v.value) return int(v) def main(): md = MinidumpFile.parse(sys.argv[1]) reader = md.get_reader().get_buffered_reader() # Find acclient.exe image range acl = next((m for m in md.modules.modules if os.path.basename(m.name).lower() == "acclient.exe"), None) if acl is None: sys.exit("acclient.exe not in modules") acl_lo, acl_hi = acl.baseaddress, acl.baseaddress + acl.size # Constrain to the .text section. Without PE-header parsing here we # bound: text starts at base+0x1000 (typical), ends around 0x800000 in # EoR (the highest 2013 function RVAs land near 0x6c000; data sections # follow). False positives from .rdata UTF-16 strings (e.g., 0x0066006f # = "of") dominated the first run; tightening to a code-only range # eliminates them. text_lo = acl.baseaddress + 0x1000 text_hi = acl.baseaddress + 0x6c0000 # estimated end of .text acl_lo, acl_hi = text_lo, text_hi print(f"acclient.exe text range (estimated): 0x{acl_lo:08x} - 0x{acl_hi:08x}") # Iterate writable committed regions (heap-like) regions = [] for r in md.memory_info.infos: st, ty, pr = _ei(r.State), _ei(r.Type), _ei(r.Protect) & 0xff if st != 0x1000: continue if ty == 0x1000000: continue # skip Image if pr in (0x04, 0x40): regions.append((r.BaseAddress, r.RegionSize)) print(f"scanning {len(regions)} writable regions") MIN_DEPTH = 6 # min consecutive acclient pointers to call it a backtrace MAX_DEPTH = 32 # cap scan length per candidate leaf_hist = Counter() backtraces = [] # list of (location, depth, frames) total_scanned = 0 for base, size in regions: try: reader.move(base) buf = reader.read(size) except Exception: continue if not buf: continue total_scanned += len(buf) # Walk 4-byte aligned positions end = (len(buf) // 4) * 4 n = end // 4 # Pre-decode as uint32 array for speed words = struct.unpack_from(f"<{n}I", buf, 0) def looks_like_code_addr(v): """True if v looks like an actual code-section pointer, not data.""" if not (acl_lo <= v < acl_hi): return False # UTF-16 ASCII pattern: bytes 1 and 3 are 0x00, bytes 0 and 2 are # printable ASCII (0x20-0x7F). Rejects values like 0x0066006f # ("of"), 0x00610074 ("ta"), etc. b0 = v & 0xFF; b1 = (v >> 8) & 0xFF b2 = (v >> 16) & 0xFF; b3 = (v >> 24) & 0xFF if b1 == 0 and b3 == 0 and (0x20 <= b0 <= 0x7F) and (0x20 <= b2 <= 0x7F): return False return True i = 0 while i < n: # Find run of consecutive acclient pointers if not looks_like_code_addr(words[i]): i += 1; continue j = i while j < n and looks_like_code_addr(words[j]) and (j - i) < MAX_DEPTH: j += 1 depth = j - i if depth >= MIN_DEPTH: frames = words[i:j] # Uniqueness check — real backtraces have >70% unique frames. # Repeated-value runs (e.g. pixel data 0x00a000a0 repeated) # have low uniqueness. uniq = len(set(frames)) if uniq < max(4, int(depth * 0.7)): i = j; continue location = base + i * 4 backtraces.append((location, depth, frames)) leaf_hist[frames[0]] += 1 i = j print(f"scanned {total_scanned/(1024*1024):.1f} MB") print(f"backtraces detected (>= {MIN_DEPTH} frames): {len(backtraces)}") print() # Histogram by leaf frame (top of stack) print(f"top 30 leaf frames (most common 'top-of-call-stack' address):") print(f" {'leaf abs':>10} {'leaf rva':>10} {'count':>6} ") for leaf, cnt in leaf_hist.most_common(30): print(f" 0x{leaf:08x} 0x{leaf-acl_lo:08x} {cnt:>6}") print() # For the top leaf, show a few sample backtraces if leaf_hist: top_leaf, _ = leaf_hist.most_common(1)[0] print(f"=== sample backtraces with top leaf 0x{top_leaf:08x} (RVA 0x{top_leaf-acl_lo:08x}) ===") shown = 0 for loc, depth, frames in backtraces: if frames[0] != top_leaf: continue print(f" at 0x{loc:08x} depth={depth}") for k, fr in enumerate(frames[:12]): print(f" [{k:2d}] 0x{fr:08x} RVA 0x{fr-acl_lo:08x}") shown += 1 if shown >= 5: break if __name__ == "__main__": main()