""" analyze_dump.py Parses a Windows minidump and computes VA-region stats with no PDB dependency: * total committed memory, broken down by Type (Private/Mapped/Image) * top-N largest committed regions with module/path attribution * size-bucket histogram of committed regions * module list with image base and size Output: writes .stats.json next to the dump and prints a short human summary to stdout. """ import json import os import sys from collections import Counter, defaultdict from minidump.minidumpfile import MinidumpFile MEM_COMMIT = 0x1000 MEM_RESERVE = 0x2000 MEM_FREE = 0x10000 MEM_PRIVATE = 0x20000 MEM_MAPPED = 0x40000 MEM_IMAGE = 0x1000000 def _enum_int(v): """minidump library may return State/Type as Enum or int — normalize to int.""" if v is None: return 0 if hasattr(v, 'value'): return int(v.value) return int(v) PROT_NAMES = { 0x01: "NOACCESS", 0x02: "READONLY", 0x04: "READWRITE", 0x08: "WRITECOPY", 0x10: "EXECUTE", 0x20: "EXECUTE_READ", 0x40: "EXECUTE_READWRITE", 0x80: "EXECUTE_WRITECOPY", } def fmt_prot(p): base = p & 0xFF name = PROT_NAMES.get(base, f"0x{base:02x}") if p & 0x100: name += "|GUARD" if p & 0x200: name += "|NOCACHE" if p & 0x400: name += "|WRITECOMBINE" return name def fmt_state(s): if s == MEM_COMMIT: return "COMMIT" if s == MEM_RESERVE: return "RESERVE" if s == MEM_FREE: return "FREE" return f"0x{s:x}" def fmt_type(t): if t == MEM_PRIVATE: return "Private" if t == MEM_MAPPED: return "Mapped" if t == MEM_IMAGE: return "Image" return f"0x{t:x}" def power_of_2_bucket(sz): """Return string like '64KB-128KB'.""" if sz <= 0: return "0" p = sz.bit_length() - 1 lo = 1 << p hi = lo << 1 def fmt(n): if n >= 1024*1024*1024: return f"{n//(1024*1024*1024)}GB" if n >= 1024*1024: return f"{n//(1024*1024)}MB" if n >= 1024: return f"{n//1024}KB" return f"{n}B" return f"{fmt(lo)}-{fmt(hi)}" def main(): if len(sys.argv) < 2: print("usage: analyze_dump.py ", file=sys.stderr); sys.exit(1) path = sys.argv[1] if not os.path.exists(path): print(f"not found: {path}", file=sys.stderr); sys.exit(1) md = MinidumpFile.parse(path) out = { "path": path, "file_size_mb": round(os.path.getsize(path)/(1024*1024), 1), } # System info si = md.sysinfo if si is not None: out["sysinfo"] = { "ProcessorArchitecture": str(si.ProcessorArchitecture), "ProductType": str(si.ProductType), "MajorVersion": si.MajorVersion, "MinorVersion": si.MinorVersion, "BuildNumber": si.BuildNumber, } # Modules mods = [] if md.modules: for m in md.modules.modules: mods.append({ "name": os.path.basename(m.name), "base": m.baseaddress, "size": m.size, "ts": m.timestamp, }) out["modules"] = mods out["modules_count"] = len(mods) # Build a "what module owns this address" lookup def mod_owning(addr): for m in mods: if m["base"] <= addr < m["base"] + m["size"]: return m["name"] return None # Memory info — the VAD-like list (state/type/protection per region) regions = [] by_state_type = Counter() # (state, type) -> bytes by_state_type_count = Counter() # (state, type) -> count bucket_committed = Counter() if md.memory_info and md.memory_info.infos: for r in md.memory_info.infos: base = r.BaseAddress sz = r.RegionSize st = _enum_int(r.State) ty = _enum_int(r.Type) pr = _enum_int(r.Protect) regions.append({ "base": base, "size": sz, "state": st, "type": ty, "protect": pr, "owner": mod_owning(base), }) by_state_type[(st, ty)] += sz by_state_type_count[(st, ty)] += 1 if st == MEM_COMMIT: bucket_committed[power_of_2_bucket(sz)] += sz # Largest committed regions committed = sorted([r for r in regions if r["state"] == MEM_COMMIT], key=lambda r: r["size"], reverse=True) out["top20_committed"] = [ { "base": f"0x{r['base']:08x}", "size": r["size"], "size_h": _h(r["size"]), "type": fmt_type(r["type"]), "prot": fmt_prot(r["protect"]), "owner": r["owner"], } for r in committed[:20] ] out["regions_count"] = len(regions) out["committed_total"] = sum(r["size"] for r in regions if r["state"] == MEM_COMMIT) out["committed_private_total"] = sum(r["size"] for r in regions if r["state"] == MEM_COMMIT and r["type"] == MEM_PRIVATE) out["committed_image_total"] = sum(r["size"] for r in regions if r["state"] == MEM_COMMIT and r["type"] == MEM_IMAGE) out["committed_mapped_total"] = sum(r["size"] for r in regions if r["state"] == MEM_COMMIT and r["type"] == MEM_MAPPED) # Per-module image commit (sums all committed Image regions per owner module) by_module_image = defaultdict(int) for r in regions: if r["state"] == MEM_COMMIT and r["type"] == MEM_IMAGE and r["owner"]: by_module_image[r["owner"]] += r["size"] out["top_image_modules"] = sorted( [{"module": k, "image_bytes": v} for k, v in by_module_image.items()], key=lambda x: x["image_bytes"], reverse=True )[:15] # Per-bucket committed (mostly interesting for private) out["committed_size_buckets"] = [ {"bucket": k, "bytes": v, "count": sum(1 for r in regions if r["state"] == MEM_COMMIT and power_of_2_bucket(r["size"]) == k)} for k, v in sorted(bucket_committed.items(), key=lambda x: x[1], reverse=True) ] # Specifically: large private committed regions w/ exec/rw protect (heap suspects) heap_suspects = [r for r in regions if r["state"] == MEM_COMMIT and r["type"] == MEM_PRIVATE and (r["protect"] & 0xFF) in (0x04, 0x40) # RW / EXECUTE_READWRITE and r["size"] >= 64*1024] # at least 64 KB heap_suspects.sort(key=lambda r: r["size"], reverse=True) out["heap_suspect_regions"] = [ { "base": f"0x{r['base']:08x}", "size": r["size"], "size_h": _h(r["size"]), "prot": fmt_prot(r["protect"]), } for r in heap_suspects[:50] ] out["heap_suspect_total"] = sum(r["size"] for r in heap_suspects) out["heap_suspect_count"] = len(heap_suspects) # Write JSON out_path = path + ".stats.json" with open(out_path, "w", encoding="utf8") as f: json.dump(out, f, indent=2) # Pretty summary to stdout print(f"=== {os.path.basename(path)} ===") print(f"file: {out['file_size_mb']} MB regions: {out['regions_count']} modules: {out['modules_count']}") print(f" committed_total {_h(out['committed_total'])}") print(f" private {_h(out['committed_private_total'])}") print(f" image {_h(out['committed_image_total'])}") print(f" mapped {_h(out['committed_mapped_total'])}") print(f" heap_suspect (private RW, >=64KB): {_h(out['heap_suspect_total'])} across {out['heap_suspect_count']} regions") print(f"") print(f" top 10 image modules by committed size:") for m in out["top_image_modules"][:10]: print(f" {_h(m['image_bytes']):>10} {m['module']}") print(f"") print(f" top 10 committed regions:") for r in out["top20_committed"][:10]: own = r["owner"] or "" print(f" {r['size_h']:>10} {r['base']} {r['type']:>8} {r['prot']:<28} {own}") print(f"") print(f" wrote {out_path}") def _h(n): if n >= 1024*1024*1024: return f"{n/(1024*1024*1024):.2f} GB" if n >= 1024*1024: return f"{n/(1024*1024):.2f} MB" if n >= 1024: return f"{n/1024:.1f} KB" return f"{n} B" if __name__ == "__main__": main()