"""sample_big_regions.py Sample the first 64 bytes of every 256K-1M private RW region. Classify by content pattern: - Texture data: every 4th byte looks like alpha (mostly 0xFF or 0x00) - Vtable-headed: first DWORD is in image range (0x00400000-0x00900000) - Heap header: starts with ptr-like values into ranges we recognize - Random/unknown Then for vtable-headed regions, report which vtable.""" import ctypes, ctypes.wintypes as wt, sys, struct, collections PROCESS_VM_READ = 0x10 PROCESS_QUERY_INFORMATION = 0x400 k = ctypes.windll.kernel32 k.OpenProcess.argtypes = [wt.DWORD, wt.BOOL, wt.DWORD]; k.OpenProcess.restype = wt.HANDLE k.ReadProcessMemory.argtypes = [wt.HANDLE, wt.LPCVOID, wt.LPVOID, ctypes.c_size_t, ctypes.POINTER(ctypes.c_size_t)] k.ReadProcessMemory.restype = wt.BOOL k.VirtualQueryEx.argtypes = [wt.HANDLE, wt.LPCVOID, ctypes.c_void_p, ctypes.c_size_t] k.VirtualQueryEx.restype = ctypes.c_size_t class MBI(ctypes.Structure): _fields_ = [ ("BaseAddress", ctypes.c_void_p), ("AllocationBase", ctypes.c_void_p), ("AllocationProtect", wt.DWORD), ("RegionSize", ctypes.c_size_t), ("State", wt.DWORD), ("Protect", wt.DWORD), ("Type", wt.DWORD), ] def rd(h, va, n): buf = (ctypes.c_ubyte * n)(); sz = ctypes.c_size_t(0) if not k.ReadProcessMemory(h, va, buf, n, ctypes.byref(sz)): return None return bytes(buf[:sz.value]) def classify_sample(b, region_size): if not b or len(b) < 32: return ("unreadable", None) # Vtable-headed: first DWORD points into image range first_dw = struct.unpack_from('= 8: # Count how many are 0xFF (opaque) or 0x00 (transparent) if alphas.count(0xFF) >= 12 or alphas.count(0x00) >= 12: return ("texture_rgba", None) # All-zero if b.count(0) >= 56: return ("mostly_zero", None) # Float-heavy: many 4-byte words look like reasonable floats floats_in_range = 0 for i in range(0, 32, 4): f = struct.unpack_from('= 6: return ("float_data", None) return ("unknown", None) pid = int(sys.argv[1]) h = k.OpenProcess(PROCESS_VM_READ | PROCESS_QUERY_INFORMATION, False, pid) if not h: print(f"OpenProcess err={ctypes.get_last_error()}"); sys.exit(2) classes = collections.Counter() class_bytes = collections.Counter() vtable_counts = collections.Counter() size_distribution = collections.Counter() mbi = MBI() addr = 0 total_regions = 0 while k.VirtualQueryEx(h, addr, ctypes.byref(mbi), ctypes.sizeof(mbi)): region_base = mbi.BaseAddress or 0 region_size = mbi.RegionSize if (mbi.State == 0x1000 and mbi.Type == 0x20000 and (mbi.Protect & 0xFF) in (0x04, 0x40) and 256*1024 <= region_size <= 1024*1024): sample = rd(h, region_base, 64) cls, vt = classify_sample(sample, region_size) classes[cls] += 1 class_bytes[cls] += region_size size_distribution[region_size] += 1 if cls == "vtable": vtable_counts[vt] += 1 total_regions += 1 next_addr = region_base + region_size if next_addr <= addr: break addr = next_addr if addr >= 0x80000000: break k.CloseHandle(h) def mb(n): return f"{n/(1024*1024):,.1f}" print(f"=== pid {pid} — 256K-1M private RW region content classification ===") print(f"Total regions in size range: {total_regions}") print() print(f" {'class':<16} {'count':>6} {'MB':>8}") for cls, count in classes.most_common(): print(f" {cls:<16} {count:>6} {mb(class_bytes[cls]):>8}") print() print("Top region sizes (look for size buckets that match texture dimensions):") for sz, n in size_distribution.most_common(15): print(f" {sz:>10} bytes ({mb(sz):>6} MB) × {n} regions total={mb(sz*n):>6} MB") print() if vtable_counts: print("Vtable-headed regions (top 10 vtables):") for vt, n in vtable_counts.most_common(10): print(f" 0x{vt:08x} × {n}")