Five bugs identified and patched in retail Asheron's Call client: - v3b: palette refcount over-increment (3-byte NOP at two sites) - v5: RenderSurface PurgeResource no-op stub (vtable slot 2 thunk) - v11: two dangling-pointer crash guards (NULL-check + reorder) - v14: CEnvCell::Destroy ClipPlaneList leak (18-byte JMP to cleanup thunk) - v22: unpacker stale-pointer SEH guard (whole-function __try/__except) All five ship in leakfix.dll (117 KB, SHA d282f23c…) which is loaded by acclient.exe at process start via PE import table patching by tools/install_leakfix.py. Controlled 15-client fleet soak: unpatched control died at 26h with palette exhaustion; all 14 patched clients survived past that point and reached ≥5-day uptime. Residual ~15 MB/h growth traced to d3d9.dll's internal slab allocator (260KB surface backing buffers retained after Release). See REPORT.md §10 for the full investigation; conclusion is that it's unfixable from outside d3d9. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
179 lines
7.5 KiB
Python
179 lines
7.5 KiB
Python
"""
|
|
rendersurface_refcount_v2.py <dump.dmp>
|
|
|
|
Definitive empirical test of F2 hypothesis using the CORRECT EoR
|
|
RenderSurface addresses/offsets found in Phase 7:
|
|
|
|
- RenderSurface vtable (full): 0x0079bfe8
|
|
- RenderSurface vtable (sub): 0x0079bffc
|
|
- sizeof(RenderSurface): 0x14c bytes
|
|
- refcount offset: +0x04
|
|
- m_pSurfaceBits offset: +0x128
|
|
- sourceData.sourceBits offset: +0xa0
|
|
|
|
Method:
|
|
1. Scan all committed writable memory for DWORDs == 0x0079bfe8 or
|
|
0x0079bffc — those are vtable pointers at offset 0 of RenderSurface
|
|
instances.
|
|
2. For each match (the object's base address), read:
|
|
- refcount at +0x04
|
|
- m_pSurfaceBits at +0x128
|
|
- sourceData.sourceBits at +0xa0
|
|
3. Histogram refcounts.
|
|
4. Cross-reference with the leaked 256-512 KB private regions: how
|
|
many RenderSurface instances actually have a leaked-region
|
|
pointer in their m_pSurfaceBits field?
|
|
|
|
Verdict:
|
|
- If refcount mode is 1: surfaces are unreferenced (could still leak if
|
|
nothing destroys them, but unlikely)
|
|
- If refcount mode > 1: external holders exist (F2 confirmed)
|
|
- If most surfaces have leaked m_pSurfaceBits AND refcount > 1: that's
|
|
the smoking gun for F2 — the surface is alive (held) AND its buffer
|
|
is in the leaked set
|
|
"""
|
|
import os, struct, sys
|
|
from collections import Counter, defaultdict
|
|
from minidump.minidumpfile import MinidumpFile
|
|
|
|
|
|
VTABLE_FULL = 0x0079bfe8 # base RenderSurface, sizeof 0x14c — not used at runtime
|
|
VTABLE_SUB = 0x0079bffc # base RS subobject vtable
|
|
VTABLE_D3D = 0x007e68a4 # RenderSurfaceD3D — THE RUNTIME VTABLE, sizeof 0x164
|
|
OFF_REFCOUNT = 0x04
|
|
OFF_SOURCEBITS = 0xa0
|
|
OFF_SOURCE_FLAGS = 0xa4
|
|
OFF_SURFACEBITS = 0x128
|
|
|
|
|
|
def _ei(v):
|
|
if v is None: return 0
|
|
if hasattr(v, 'value'): return int(v.value)
|
|
return int(v)
|
|
|
|
|
|
def main():
|
|
md = MinidumpFile.parse(sys.argv[1])
|
|
reader = md.get_reader().get_buffered_reader()
|
|
|
|
# Find leaked 256-512 KB regions
|
|
leaked_regions = set()
|
|
for r in md.memory_info.infos:
|
|
st = _ei(r.State); ty = _ei(r.Type); pr = _ei(r.Protect) & 0xff
|
|
if st == 0x1000 and ty == 0x20000 and pr in (0x04, 0x40) \
|
|
and 256*1024 <= r.RegionSize < 512*1024:
|
|
leaked_regions.add(r.BaseAddress)
|
|
print(f"leaked 256-512KB regions: {len(leaked_regions)}")
|
|
|
|
# Scan all committed writable memory for vtable pointer values
|
|
scan_regions = []
|
|
for r in md.memory_info.infos:
|
|
st = _ei(r.State); ty = _ei(r.Type); pr = _ei(r.Protect) & 0xff
|
|
if st != 0x1000: continue
|
|
if ty == 0x1000000: continue
|
|
if pr in (0x04, 0x40):
|
|
scan_regions.append((r.BaseAddress, r.RegionSize))
|
|
print(f"scanning {len(scan_regions)} writable regions")
|
|
|
|
target_vtables = {VTABLE_FULL, VTABLE_SUB, VTABLE_D3D}
|
|
objects = []
|
|
total = 0
|
|
for base, size in scan_regions:
|
|
try:
|
|
reader.move(base)
|
|
buf = reader.read(size)
|
|
except Exception:
|
|
continue
|
|
if not buf: continue
|
|
total += len(buf)
|
|
end = (len(buf) // 4) * 4
|
|
for off in range(0, end - 0x130, 4): # leave room to read up to +0x130
|
|
vtbl = struct.unpack_from("<I", buf, off)[0]
|
|
if vtbl not in target_vtables:
|
|
continue
|
|
obj_addr = base + off
|
|
try:
|
|
refcount = struct.unpack_from("<I", buf, off + OFF_REFCOUNT)[0]
|
|
surfacebits = struct.unpack_from("<I", buf, off + OFF_SURFACEBITS)[0]
|
|
sourcebits = struct.unpack_from("<I", buf, off + OFF_SOURCEBITS)[0]
|
|
source_flag = struct.unpack_from("<I", buf, off + OFF_SOURCE_FLAGS)[0]
|
|
except struct.error:
|
|
continue
|
|
# Also read D3D extra fields (0x14c-0x164) for the D3D variant
|
|
try:
|
|
d3d_fields = [struct.unpack_from("<I", buf, off + 0x14c + 4*i)[0]
|
|
for i in range(6)]
|
|
except struct.error:
|
|
d3d_fields = [0]*6
|
|
objects.append({
|
|
"addr": obj_addr,
|
|
"vtbl": vtbl,
|
|
"refcount": refcount,
|
|
"surfacebits": surfacebits,
|
|
"sourcebits": sourcebits,
|
|
"source_flag": source_flag,
|
|
"d3d_fields": d3d_fields,
|
|
})
|
|
print(f"scanned {total/(1024*1024):.1f} MB")
|
|
print(f"RenderSurface-like objects found (vtbl match): {len(objects)}")
|
|
|
|
# Histogram by vtable
|
|
by_vt = defaultdict(list)
|
|
for o in objects:
|
|
by_vt[o["vtbl"]].append(o)
|
|
|
|
for vt, objs in by_vt.items():
|
|
print(f"\n=== vtable 0x{vt:08x} ({len(objs)} objects) ===")
|
|
refcount_hist = Counter(o["refcount"] for o in objs)
|
|
print(" refcount histogram (top 10):")
|
|
for rc, cnt in refcount_hist.most_common(10):
|
|
pct = 100.0 * cnt / len(objs)
|
|
print(f" refcount={rc:<6} {cnt:>5} ({pct:.1f}%)")
|
|
|
|
# How many have m_pSurfaceBits pointing into the leaked set?
|
|
leaked_owners = [o for o in objs if o["surfacebits"] in
|
|
{region for region in leaked_regions} or
|
|
any(region <= o["surfacebits"] < region + 0x80000 for region in leaked_regions)]
|
|
# The pointer may be region_base + 0x30 (heap header offset), so check
|
|
# if surfacebits is just past any leaked region's base
|
|
precise = [o for o in objs if o["surfacebits"] - 0x30 in leaked_regions
|
|
or o["surfacebits"] in leaked_regions]
|
|
print(f" RenderSurfaces with m_pSurfaceBits in leaked range: {len(leaked_owners)}")
|
|
print(f" RenderSurfaces with m_pSurfaceBits == leaked_base or +0x30: {len(precise)}")
|
|
|
|
if precise:
|
|
rc_leaked = Counter(o["refcount"] for o in precise)
|
|
print(f" refcount distribution for LEAKED-buffer owners:")
|
|
for rc, cnt in rc_leaked.most_common(8):
|
|
pct = 100.0 * cnt / len(precise)
|
|
print(f" refcount={rc:<6} {cnt:>5} ({pct:.1f}%)")
|
|
|
|
# Check D3D extra fields for leaked-region pointers
|
|
print(f"\n=== D3D extra fields (+0x14c..+0x160) leaked-region check ===")
|
|
for i in range(6):
|
|
offset = 0x14c + 4*i
|
|
non_null = sum(1 for o in objects if o["d3d_fields"][i] != 0)
|
|
in_leaked = sum(1 for o in objects
|
|
if o["d3d_fields"][i] - 0x30 in leaked_regions
|
|
or o["d3d_fields"][i] in leaked_regions)
|
|
# Also check the field is just any address in heap of leaked region size
|
|
near_leaked = sum(1 for o in objects
|
|
if any(rb <= o["d3d_fields"][i] < rb + 0x80000 for rb in leaked_regions))
|
|
print(f" +0x{offset:03x}: non-null={non_null}/{len(objects)}, in_leaked={in_leaked}, near_leaked_region={near_leaked}")
|
|
|
|
# Total RenderSurfaces
|
|
if objects:
|
|
print(f"\n=== ALL RenderSurfaces combined ===")
|
|
rc_all = Counter(o["refcount"] for o in objects)
|
|
for rc, cnt in rc_all.most_common(10):
|
|
pct = 100.0 * cnt / len(objects)
|
|
print(f" refcount={rc:<6} {cnt:>5} ({pct:.1f}%)")
|
|
# Pointer status
|
|
with_bits = sum(1 for o in objects if o["surfacebits"] != 0)
|
|
print(f" RenderSurfaces with non-null m_pSurfaceBits: {with_bits} / {len(objects)}")
|
|
with_source = sum(1 for o in objects if o["sourcebits"] != 0)
|
|
print(f" RenderSurfaces with non-null sourceData.sourceBits: {with_source} / {len(objects)}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|