Initial commit — leak-hunt project complete
Five bugs identified and patched in retail Asheron's Call client: - v3b: palette refcount over-increment (3-byte NOP at two sites) - v5: RenderSurface PurgeResource no-op stub (vtable slot 2 thunk) - v11: two dangling-pointer crash guards (NULL-check + reorder) - v14: CEnvCell::Destroy ClipPlaneList leak (18-byte JMP to cleanup thunk) - v22: unpacker stale-pointer SEH guard (whole-function __try/__except) All five ship in leakfix.dll (117 KB, SHA d282f23c…) which is loaded by acclient.exe at process start via PE import table patching by tools/install_leakfix.py. Controlled 15-client fleet soak: unpatched control died at 26h with palette exhaustion; all 14 patched clients survived past that point and reached ≥5-day uptime. Residual ~15 MB/h growth traced to d3d9.dll's internal slab allocator (260KB surface backing buffers retained after Release). See REPORT.md §10 for the full investigation; conclusion is that it's unfixable from outside d3d9. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
commit
57b5e43d0e
199 changed files with 1648333 additions and 0 deletions
46
templates/activity-phases.json
Normal file
46
templates/activity-phases.json
Normal file
|
|
@ -0,0 +1,46 @@
|
|||
{
|
||||
"phases": [
|
||||
{
|
||||
"name": "idle",
|
||||
"duration_min": 60,
|
||||
"description": "Sit at lifestone. No input. Establishes baseline allocator noise.",
|
||||
"actions": []
|
||||
},
|
||||
{
|
||||
"name": "wander",
|
||||
"duration_min": 60,
|
||||
"description": "Walk a fixed route around Holtburg town. Targets streaming + landblock loads.",
|
||||
"actions": [
|
||||
{ "type": "walk_route", "waypoints": ["lifestone", "town-square", "marketplace", "south-gate", "lifestone"], "loop": true }
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "chat",
|
||||
"duration_min": 60,
|
||||
"description": "Spam /say and /tell. Targets chat-log buffers.",
|
||||
"actions": [
|
||||
{ "type": "send_chat", "channel": "say", "message_template": "test {counter}", "interval_sec": 2 }
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "target-cycle",
|
||||
"duration_min": 60,
|
||||
"description": "Tab through nearby targetables. No combat. Targets selection + tooltip allocation.",
|
||||
"actions": [
|
||||
{ "type": "press_key", "key": "Tab", "interval_sec": 3 }
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "ui-cycle",
|
||||
"duration_min": 60,
|
||||
"description": "Open/close inventory, character pane, spells pane. Targets UI-widget allocation.",
|
||||
"actions": [
|
||||
{ "type": "press_key", "key": "i", "interval_sec": 5 },
|
||||
{ "type": "press_key", "key": "c", "interval_sec": 7 },
|
||||
{ "type": "press_key", "key": "s", "interval_sec": 9 }
|
||||
]
|
||||
}
|
||||
],
|
||||
"snapshot_interval_min": 15,
|
||||
"notes": "Phase 2 schedule. Run one phase per session, fresh from bench-verified snapshot. Compare growth rates across phases to localize the leak's subsystem."
|
||||
}
|
||||
52
templates/login.ahk
Normal file
52
templates/login.ahk
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
; AutoHotkey v2 — login skeleton for retail acclient.exe
|
||||
;
|
||||
; Drives the launcher login screen. Fills in test credentials, clicks
|
||||
; through character select. Adjust ImageSearch / Click coordinates after
|
||||
; first manual run — UI layouts depend on resolution and skin.
|
||||
;
|
||||
; Usage: launch this after supervisor.ps1 starts acclient.exe
|
||||
|
||||
#Requires AutoHotkey v2.0
|
||||
#SingleInstance Force
|
||||
|
||||
; --- config ---
|
||||
USERNAME := "testaccount"
|
||||
PASSWORD := "testpassword"
|
||||
CHAR_SLOT := 1
|
||||
WAIT_TIMEOUT_S := 60
|
||||
; --- end config ---
|
||||
|
||||
WinTitle := "Asheron's Call"
|
||||
|
||||
; Wait for the AC window
|
||||
if not WinWait(WinTitle, , WAIT_TIMEOUT_S) {
|
||||
MsgBox "AC window not found within " WAIT_TIMEOUT_S "s — aborting"
|
||||
ExitApp 1
|
||||
}
|
||||
WinActivate WinTitle
|
||||
Sleep 2000
|
||||
|
||||
; Type username
|
||||
Send USERNAME
|
||||
Send "{Tab}"
|
||||
Send PASSWORD
|
||||
Send "{Enter}"
|
||||
|
||||
; Wait for character select screen — adjust the wait for your skin
|
||||
Sleep 8000
|
||||
|
||||
; Select character (slot 1 is top of list)
|
||||
Loop CHAR_SLOT - 1 {
|
||||
Send "{Down}"
|
||||
Sleep 200
|
||||
}
|
||||
Send "{Enter}"
|
||||
|
||||
; Wait for in-world load
|
||||
Sleep 15000
|
||||
|
||||
; If you got here, you're in-world.
|
||||
; The supervisor doesn't need anything else from us; the controller DLL
|
||||
; (Phase 3) drives in-game activity.
|
||||
|
||||
ExitApp 0
|
||||
26
templates/snapshot.ps1
Normal file
26
templates/snapshot.ps1
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
# Take one UMDH stack-tagged heap snapshot of a running process.
|
||||
#
|
||||
# Requirements:
|
||||
# - gflags /i acclient.exe +ust (one-time, registry-set)
|
||||
# - _NT_SYMBOL_PATH pointing at acclient.pdb directory
|
||||
# - umdh.exe on PATH (Windows Debugging Tools)
|
||||
|
||||
param(
|
||||
[Parameter(Mandatory=$true)][int]$ProcessId,
|
||||
[Parameter(Mandatory=$true)][string]$Out
|
||||
)
|
||||
|
||||
$ErrorActionPreference = "Stop"
|
||||
|
||||
if (-not $env:_NT_SYMBOL_PATH) {
|
||||
Write-Warning "_NT_SYMBOL_PATH not set — symbols may not resolve"
|
||||
}
|
||||
|
||||
& umdh.exe -p:$ProcessId -f:$Out
|
||||
|
||||
if (-not (Test-Path $Out)) {
|
||||
throw "umdh produced no output at $Out"
|
||||
}
|
||||
|
||||
$size = (Get-Item $Out).Length
|
||||
Write-Host "snapshot: $Out ($size bytes)"
|
||||
119
templates/supervisor.ps1
Normal file
119
templates/supervisor.ps1
Normal file
|
|
@ -0,0 +1,119 @@
|
|||
# Supervisor harness for the AC client memory-leak hunt.
|
||||
#
|
||||
# What this does:
|
||||
# - Sets _NT_SYMBOL_PATH so umdh/cdb resolve symbols against acclient.pdb
|
||||
# - Verifies gflags +ust is enabled (required for stack-tagged allocations)
|
||||
# - Starts ACE (optionally) and the AC client
|
||||
# - Periodically calls snapshot.ps1 to capture UMDH snapshots
|
||||
# - Watches for process exit; on crash, captures procdump + final snapshot
|
||||
#
|
||||
# Skeleton — flesh out at Phase 1 time. Configurable up top.
|
||||
|
||||
param([Parameter(Mandatory=$true)][string]$Phase)
|
||||
|
||||
#region Config
|
||||
|
||||
$AcExe = "C:\Turbine\Asheron's Call\acclient.exe"
|
||||
$PdbDir = "C:\Users\acbot\leakhunt\pdb"
|
||||
$OutRoot = "C:\Users\acbot\leakhunt\artifacts"
|
||||
$LauncherPs1 = "C:\Users\acbot\leakhunt\bin\launch_acclient.ps1" # gitignored, has creds
|
||||
$UmdhExe = "C:\Program Files (x86)\Windows Kits\10\Debuggers\x86\umdh.exe"
|
||||
$GflagsExe = "C:\Program Files (x86)\Windows Kits\10\Debuggers\x86\gflags.exe"
|
||||
$SnapshotEvery = 1800 # seconds (30 min — Phase 1 default; bump down for Phase 4)
|
||||
$MaxDuration = 14400 # seconds (4h default — bump for Phase 4)
|
||||
$AceCwd = $null # Coldeve real server in use; no local ACE
|
||||
|
||||
#endregion
|
||||
|
||||
$ErrorActionPreference = "Stop"
|
||||
|
||||
function Write-Step([string]$msg) {
|
||||
Write-Host "[$(Get-Date -Format HH:mm:ss)] $msg" -ForegroundColor Cyan
|
||||
}
|
||||
|
||||
$phaseDir = Join-Path $OutRoot $Phase
|
||||
New-Item -ItemType Directory -Path $phaseDir -Force | Out-Null
|
||||
Write-Step "Output: $phaseDir"
|
||||
|
||||
# 1. Symbol path for umdh / cdb
|
||||
$env:_NT_SYMBOL_PATH = $PdbDir
|
||||
Write-Step "_NT_SYMBOL_PATH = $env:_NT_SYMBOL_PATH"
|
||||
|
||||
# 2. Confirm gflags +ust is set on acclient.exe (via IFEO registry — no admin needed)
|
||||
$ifeoFlag = (Get-ItemProperty "HKLM:\SOFTWARE\Microsoft\Windows NT\CurrentVersion\Image File Execution Options\acclient.exe" -ErrorAction SilentlyContinue).GlobalFlag
|
||||
if (-not $ifeoFlag -or -not ($ifeoFlag -band 0x1000)) {
|
||||
throw "gflags +ust NOT set on acclient.exe (GlobalFlag=$ifeoFlag). Run elevated: gflags /i acclient.exe +ust"
|
||||
}
|
||||
Write-Step "gflags +ust verified (GlobalFlag=0x$([Convert]::ToString($ifeoFlag,16)))"
|
||||
|
||||
# 3. (Optional) start ACE
|
||||
if ($AceCwd -ne $null) {
|
||||
Write-Step "Starting ACE in $AceCwd ..."
|
||||
$aceProc = Start-Process -FilePath "pwsh" -ArgumentList "-c", $AceCmd `
|
||||
-WorkingDirectory $AceCwd -PassThru -WindowStyle Minimized
|
||||
Start-Sleep -Seconds 8
|
||||
if ($aceProc.HasExited) {
|
||||
throw "ACE exited during startup. Check $AceCwd."
|
||||
}
|
||||
} else {
|
||||
Write-Step "ACE not auto-started — assume user/operator has it running"
|
||||
}
|
||||
|
||||
# 4. Launch acclient via the credentialed launcher (auto-login via -a/-v/-h CLI args)
|
||||
Write-Step "Launching acclient via $LauncherPs1 ..."
|
||||
& $LauncherPs1
|
||||
Start-Sleep -Seconds 5
|
||||
$acProc = Get-Process -Name acclient -ErrorAction Stop | Sort-Object StartTime -Descending | Select-Object -First 1
|
||||
$pid_ac = $acProc.Id
|
||||
Write-Step "acclient pid = $pid_ac"
|
||||
|
||||
# Wait for in-world plateau (working set typically settles past ~500 MB once cell data loads)
|
||||
Write-Step "Waiting for in-world plateau (working set >= 500 MB) ..."
|
||||
$plateauDeadline = (Get-Date).AddSeconds(180)
|
||||
while ((Get-Date) -lt $plateauDeadline) {
|
||||
Start-Sleep -Seconds 5
|
||||
if ($acProc.HasExited) { throw "acclient exited during login. ExitCode=$($acProc.ExitCode)" }
|
||||
$ws = (Get-Process -Id $pid_ac).WorkingSet64
|
||||
if ($ws -gt 500MB) {
|
||||
Write-Step "Plateau detected: WS=$([math]::Round($ws/1MB,1)) MB"
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
# 6. Snapshot loop
|
||||
$start = Get-Date
|
||||
$snapIdx = 1
|
||||
$snapshotScript = Join-Path $PSScriptRoot "snapshot.ps1"
|
||||
|
||||
while ($true) {
|
||||
Start-Sleep -Seconds $SnapshotEvery
|
||||
|
||||
if ($acProc.HasExited) {
|
||||
Write-Step "acclient EXITED — code $($acProc.ExitCode)"
|
||||
# Capture dump if process still around (sometimes lingers briefly)
|
||||
# & procdump -ma $pid_ac "$phaseDir\crash.dmp" 2>&1 | Out-Null
|
||||
break
|
||||
}
|
||||
|
||||
$snapPath = Join-Path $phaseDir ("snap_{0:D3}.txt" -f $snapIdx)
|
||||
Write-Step "snapshot $snapIdx -> $snapPath"
|
||||
& $snapshotScript -ProcessId $pid_ac -Out $snapPath
|
||||
|
||||
$snapIdx++
|
||||
|
||||
if (((Get-Date) - $start).TotalSeconds -gt $MaxDuration) {
|
||||
Write-Step "Max duration reached. Final snapshot done."
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
# 7. Final diff
|
||||
if ($snapIdx -gt 2) {
|
||||
$first = Join-Path $phaseDir ("snap_001.txt")
|
||||
$last = Join-Path $phaseDir ("snap_{0:D3}.txt" -f ($snapIdx - 1))
|
||||
$diff = Join-Path $phaseDir "diff_first_to_last.txt"
|
||||
Write-Step "Diff: $first -> $last"
|
||||
& $UmdhExe -d $first $last -f:$diff
|
||||
}
|
||||
|
||||
Write-Step "Supervisor done."
|
||||
48
templates/trace.cdb
Normal file
48
templates/trace.cdb
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
$$ cdb scripting template — attach to acclient.exe, set non-blocking
|
||||
$$ breakpoints on suspected allocator functions, count hits, auto-detach.
|
||||
$$
|
||||
$$ Usage:
|
||||
$$ cdb.exe -pn acclient.exe -cf <this-file> -logo <output.log>
|
||||
$$
|
||||
$$ Or attach by PID:
|
||||
$$ cdb.exe -p <pid> -cf <this-file> -logo <output.log>
|
||||
$$
|
||||
$$ Tips:
|
||||
$$ - `gc` = "go conditional" — continue without breaking the debuggee
|
||||
$$ - `qd` = "quit detached" — leaves the debuggee running, exits cdb
|
||||
$$ - Counter $t0..$t19 are persistent across breakpoint hits
|
||||
$$ - Don't put `;` inside breakpoint action strings without escaping —
|
||||
$$ cdb's command parser splits on `;` even inside actions.
|
||||
|
||||
.logopen /t leak-trace.log
|
||||
|
||||
$$ Symbol path — local PDB only, no symbol server.
|
||||
.sympath C:\leak-hunt\pdb
|
||||
.symopt+ 0x40
|
||||
.reload /f acclient.exe
|
||||
|
||||
$$ Verify the symbol we care about resolves (replace as needed)
|
||||
$$ x acclient!CChatManager::AddLine
|
||||
|
||||
$$ ============================================================
|
||||
$$ Counters
|
||||
$$ ============================================================
|
||||
r $t0 = 0 $$ alloc-site hits
|
||||
r $t1 = 0 $$ free-site hits
|
||||
r $t2 = 0 $$ unmatched (leak candidate) hits
|
||||
|
||||
$$ ============================================================
|
||||
$$ Breakpoint pattern: increment counter, log every Nth, auto-detach at M
|
||||
$$ ============================================================
|
||||
$$ Replace <ALLOC_FN> and <FREE_FN> with the suspected function names.
|
||||
|
||||
bp acclient!<ALLOC_FN> "r $t0 = @$t0 + 1; .if (@$t0 % 1000 == 0) { .printf \"alloc hits: %d\\n\", @$t0 }; .if (@$t0 >= 100000) { .printf \"AUTO-DETACH at %d\\n\", @$t0; qd } .else { gc }"
|
||||
|
||||
bp acclient!<FREE_FN> "r $t1 = @$t1 + 1; .if (@$t1 % 1000 == 0) { .printf \"free hits: %d\\n\", @$t1 }; gc"
|
||||
|
||||
$$ Optional: dump `this` struct on first hit
|
||||
$$ bp acclient!<ALLOC_FN> "r $t0 = @$t0 + 1; .if (@$t0 == 1) { dt acclient!<ClassName> @ecx }; gc"
|
||||
|
||||
g
|
||||
|
||||
.logclose
|
||||
Loading…
Add table
Add a link
Reference in a new issue