leakhunt/tools/analyze_dump.py
acbot 57b5e43d0e Initial commit — leak-hunt project complete
Five bugs identified and patched in retail Asheron's Call client:
- v3b: palette refcount over-increment (3-byte NOP at two sites)
- v5: RenderSurface PurgeResource no-op stub (vtable slot 2 thunk)
- v11: two dangling-pointer crash guards (NULL-check + reorder)
- v14: CEnvCell::Destroy ClipPlaneList leak (18-byte JMP to cleanup thunk)
- v22: unpacker stale-pointer SEH guard (whole-function __try/__except)

All five ship in leakfix.dll (117 KB, SHA d282f23c…) which is loaded
by acclient.exe at process start via PE import table patching by
tools/install_leakfix.py.

Controlled 15-client fleet soak: unpatched control died at 26h with
palette exhaustion; all 14 patched clients survived past that point
and reached ≥5-day uptime.

Residual ~15 MB/h growth traced to d3d9.dll's internal slab allocator
(260KB surface backing buffers retained after Release). See REPORT.md
§10 for the full investigation; conclusion is that it's unfixable from
outside d3d9.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-23 21:07:58 +02:00

239 lines
8.4 KiB
Python

"""
analyze_dump.py <dump.dmp>
Parses a Windows minidump and computes VA-region stats with no PDB
dependency:
* total committed memory, broken down by Type (Private/Mapped/Image)
* top-N largest committed regions with module/path attribution
* size-bucket histogram of committed regions
* module list with image base and size
Output: writes <dump.dmp>.stats.json next to the dump and prints a
short human summary to stdout.
"""
import json
import os
import sys
from collections import Counter, defaultdict
from minidump.minidumpfile import MinidumpFile
MEM_COMMIT = 0x1000
MEM_RESERVE = 0x2000
MEM_FREE = 0x10000
MEM_PRIVATE = 0x20000
MEM_MAPPED = 0x40000
MEM_IMAGE = 0x1000000
def _enum_int(v):
"""minidump library may return State/Type as Enum or int — normalize to int."""
if v is None:
return 0
if hasattr(v, 'value'):
return int(v.value)
return int(v)
PROT_NAMES = {
0x01: "NOACCESS", 0x02: "READONLY", 0x04: "READWRITE", 0x08: "WRITECOPY",
0x10: "EXECUTE", 0x20: "EXECUTE_READ", 0x40: "EXECUTE_READWRITE",
0x80: "EXECUTE_WRITECOPY",
}
def fmt_prot(p):
base = p & 0xFF
name = PROT_NAMES.get(base, f"0x{base:02x}")
if p & 0x100: name += "|GUARD"
if p & 0x200: name += "|NOCACHE"
if p & 0x400: name += "|WRITECOMBINE"
return name
def fmt_state(s):
if s == MEM_COMMIT: return "COMMIT"
if s == MEM_RESERVE: return "RESERVE"
if s == MEM_FREE: return "FREE"
return f"0x{s:x}"
def fmt_type(t):
if t == MEM_PRIVATE: return "Private"
if t == MEM_MAPPED: return "Mapped"
if t == MEM_IMAGE: return "Image"
return f"0x{t:x}"
def power_of_2_bucket(sz):
"""Return string like '64KB-128KB'."""
if sz <= 0: return "0"
p = sz.bit_length() - 1
lo = 1 << p
hi = lo << 1
def fmt(n):
if n >= 1024*1024*1024: return f"{n//(1024*1024*1024)}GB"
if n >= 1024*1024: return f"{n//(1024*1024)}MB"
if n >= 1024: return f"{n//1024}KB"
return f"{n}B"
return f"{fmt(lo)}-{fmt(hi)}"
def main():
if len(sys.argv) < 2:
print("usage: analyze_dump.py <dump.dmp>", file=sys.stderr); sys.exit(1)
path = sys.argv[1]
if not os.path.exists(path):
print(f"not found: {path}", file=sys.stderr); sys.exit(1)
md = MinidumpFile.parse(path)
out = {
"path": path,
"file_size_mb": round(os.path.getsize(path)/(1024*1024), 1),
}
# System info
si = md.sysinfo
if si is not None:
out["sysinfo"] = {
"ProcessorArchitecture": str(si.ProcessorArchitecture),
"ProductType": str(si.ProductType),
"MajorVersion": si.MajorVersion,
"MinorVersion": si.MinorVersion,
"BuildNumber": si.BuildNumber,
}
# Modules
mods = []
if md.modules:
for m in md.modules.modules:
mods.append({
"name": os.path.basename(m.name),
"base": m.baseaddress,
"size": m.size,
"ts": m.timestamp,
})
out["modules"] = mods
out["modules_count"] = len(mods)
# Build a "what module owns this address" lookup
def mod_owning(addr):
for m in mods:
if m["base"] <= addr < m["base"] + m["size"]:
return m["name"]
return None
# Memory info — the VAD-like list (state/type/protection per region)
regions = []
by_state_type = Counter() # (state, type) -> bytes
by_state_type_count = Counter() # (state, type) -> count
bucket_committed = Counter()
if md.memory_info and md.memory_info.infos:
for r in md.memory_info.infos:
base = r.BaseAddress
sz = r.RegionSize
st = _enum_int(r.State)
ty = _enum_int(r.Type)
pr = _enum_int(r.Protect)
regions.append({
"base": base,
"size": sz,
"state": st,
"type": ty,
"protect": pr,
"owner": mod_owning(base),
})
by_state_type[(st, ty)] += sz
by_state_type_count[(st, ty)] += 1
if st == MEM_COMMIT:
bucket_committed[power_of_2_bucket(sz)] += sz
# Largest committed regions
committed = sorted([r for r in regions if r["state"] == MEM_COMMIT],
key=lambda r: r["size"], reverse=True)
out["top20_committed"] = [
{
"base": f"0x{r['base']:08x}",
"size": r["size"],
"size_h": _h(r["size"]),
"type": fmt_type(r["type"]),
"prot": fmt_prot(r["protect"]),
"owner": r["owner"],
}
for r in committed[:20]
]
out["regions_count"] = len(regions)
out["committed_total"] = sum(r["size"] for r in regions if r["state"] == MEM_COMMIT)
out["committed_private_total"] = sum(r["size"] for r in regions
if r["state"] == MEM_COMMIT and r["type"] == MEM_PRIVATE)
out["committed_image_total"] = sum(r["size"] for r in regions
if r["state"] == MEM_COMMIT and r["type"] == MEM_IMAGE)
out["committed_mapped_total"] = sum(r["size"] for r in regions
if r["state"] == MEM_COMMIT and r["type"] == MEM_MAPPED)
# Per-module image commit (sums all committed Image regions per owner module)
by_module_image = defaultdict(int)
for r in regions:
if r["state"] == MEM_COMMIT and r["type"] == MEM_IMAGE and r["owner"]:
by_module_image[r["owner"]] += r["size"]
out["top_image_modules"] = sorted(
[{"module": k, "image_bytes": v} for k, v in by_module_image.items()],
key=lambda x: x["image_bytes"], reverse=True
)[:15]
# Per-bucket committed (mostly interesting for private)
out["committed_size_buckets"] = [
{"bucket": k, "bytes": v, "count": sum(1 for r in regions if r["state"] == MEM_COMMIT and power_of_2_bucket(r["size"]) == k)}
for k, v in sorted(bucket_committed.items(), key=lambda x: x[1], reverse=True)
]
# Specifically: large private committed regions w/ exec/rw protect (heap suspects)
heap_suspects = [r for r in regions
if r["state"] == MEM_COMMIT
and r["type"] == MEM_PRIVATE
and (r["protect"] & 0xFF) in (0x04, 0x40) # RW / EXECUTE_READWRITE
and r["size"] >= 64*1024] # at least 64 KB
heap_suspects.sort(key=lambda r: r["size"], reverse=True)
out["heap_suspect_regions"] = [
{
"base": f"0x{r['base']:08x}",
"size": r["size"],
"size_h": _h(r["size"]),
"prot": fmt_prot(r["protect"]),
}
for r in heap_suspects[:50]
]
out["heap_suspect_total"] = sum(r["size"] for r in heap_suspects)
out["heap_suspect_count"] = len(heap_suspects)
# Write JSON
out_path = path + ".stats.json"
with open(out_path, "w", encoding="utf8") as f:
json.dump(out, f, indent=2)
# Pretty summary to stdout
print(f"=== {os.path.basename(path)} ===")
print(f"file: {out['file_size_mb']} MB regions: {out['regions_count']} modules: {out['modules_count']}")
print(f" committed_total {_h(out['committed_total'])}")
print(f" private {_h(out['committed_private_total'])}")
print(f" image {_h(out['committed_image_total'])}")
print(f" mapped {_h(out['committed_mapped_total'])}")
print(f" heap_suspect (private RW, >=64KB): {_h(out['heap_suspect_total'])} across {out['heap_suspect_count']} regions")
print(f"")
print(f" top 10 image modules by committed size:")
for m in out["top_image_modules"][:10]:
print(f" {_h(m['image_bytes']):>10} {m['module']}")
print(f"")
print(f" top 10 committed regions:")
for r in out["top20_committed"][:10]:
own = r["owner"] or ""
print(f" {r['size_h']:>10} {r['base']} {r['type']:>8} {r['prot']:<28} {own}")
print(f"")
print(f" wrote {out_path}")
def _h(n):
if n >= 1024*1024*1024: return f"{n/(1024*1024*1024):.2f} GB"
if n >= 1024*1024: return f"{n/(1024*1024):.2f} MB"
if n >= 1024: return f"{n/1024:.1f} KB"
return f"{n} B"
if __name__ == "__main__":
main()