Five bugs identified and patched in retail Asheron's Call client: - v3b: palette refcount over-increment (3-byte NOP at two sites) - v5: RenderSurface PurgeResource no-op stub (vtable slot 2 thunk) - v11: two dangling-pointer crash guards (NULL-check + reorder) - v14: CEnvCell::Destroy ClipPlaneList leak (18-byte JMP to cleanup thunk) - v22: unpacker stale-pointer SEH guard (whole-function __try/__except) All five ship in leakfix.dll (117 KB, SHA d282f23c…) which is loaded by acclient.exe at process start via PE import table patching by tools/install_leakfix.py. Controlled 15-client fleet soak: unpatched control died at 26h with palette exhaustion; all 14 patched clients survived past that point and reached ≥5-day uptime. Residual ~15 MB/h growth traced to d3d9.dll's internal slab allocator (260KB surface backing buffers retained after Release). See REPORT.md §10 for the full investigation; conclusion is that it's unfixable from outside d3d9. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
119 lines
4.4 KiB
PowerShell
119 lines
4.4 KiB
PowerShell
# Supervisor harness for the AC client memory-leak hunt.
|
|
#
|
|
# What this does:
|
|
# - Sets _NT_SYMBOL_PATH so umdh/cdb resolve symbols against acclient.pdb
|
|
# - Verifies gflags +ust is enabled (required for stack-tagged allocations)
|
|
# - Starts ACE (optionally) and the AC client
|
|
# - Periodically calls snapshot.ps1 to capture UMDH snapshots
|
|
# - Watches for process exit; on crash, captures procdump + final snapshot
|
|
#
|
|
# Skeleton — flesh out at Phase 1 time. Configurable up top.
|
|
|
|
param([Parameter(Mandatory=$true)][string]$Phase)
|
|
|
|
#region Config
|
|
|
|
$AcExe = "C:\Turbine\Asheron's Call\acclient.exe"
|
|
$PdbDir = "C:\Users\acbot\leakhunt\pdb"
|
|
$OutRoot = "C:\Users\acbot\leakhunt\artifacts"
|
|
$LauncherPs1 = "C:\Users\acbot\leakhunt\bin\launch_acclient.ps1" # gitignored, has creds
|
|
$UmdhExe = "C:\Program Files (x86)\Windows Kits\10\Debuggers\x86\umdh.exe"
|
|
$GflagsExe = "C:\Program Files (x86)\Windows Kits\10\Debuggers\x86\gflags.exe"
|
|
$SnapshotEvery = 1800 # seconds (30 min — Phase 1 default; bump down for Phase 4)
|
|
$MaxDuration = 14400 # seconds (4h default — bump for Phase 4)
|
|
$AceCwd = $null # Coldeve real server in use; no local ACE
|
|
|
|
#endregion
|
|
|
|
$ErrorActionPreference = "Stop"
|
|
|
|
function Write-Step([string]$msg) {
|
|
Write-Host "[$(Get-Date -Format HH:mm:ss)] $msg" -ForegroundColor Cyan
|
|
}
|
|
|
|
$phaseDir = Join-Path $OutRoot $Phase
|
|
New-Item -ItemType Directory -Path $phaseDir -Force | Out-Null
|
|
Write-Step "Output: $phaseDir"
|
|
|
|
# 1. Symbol path for umdh / cdb
|
|
$env:_NT_SYMBOL_PATH = $PdbDir
|
|
Write-Step "_NT_SYMBOL_PATH = $env:_NT_SYMBOL_PATH"
|
|
|
|
# 2. Confirm gflags +ust is set on acclient.exe (via IFEO registry — no admin needed)
|
|
$ifeoFlag = (Get-ItemProperty "HKLM:\SOFTWARE\Microsoft\Windows NT\CurrentVersion\Image File Execution Options\acclient.exe" -ErrorAction SilentlyContinue).GlobalFlag
|
|
if (-not $ifeoFlag -or -not ($ifeoFlag -band 0x1000)) {
|
|
throw "gflags +ust NOT set on acclient.exe (GlobalFlag=$ifeoFlag). Run elevated: gflags /i acclient.exe +ust"
|
|
}
|
|
Write-Step "gflags +ust verified (GlobalFlag=0x$([Convert]::ToString($ifeoFlag,16)))"
|
|
|
|
# 3. (Optional) start ACE
|
|
if ($AceCwd -ne $null) {
|
|
Write-Step "Starting ACE in $AceCwd ..."
|
|
$aceProc = Start-Process -FilePath "pwsh" -ArgumentList "-c", $AceCmd `
|
|
-WorkingDirectory $AceCwd -PassThru -WindowStyle Minimized
|
|
Start-Sleep -Seconds 8
|
|
if ($aceProc.HasExited) {
|
|
throw "ACE exited during startup. Check $AceCwd."
|
|
}
|
|
} else {
|
|
Write-Step "ACE not auto-started — assume user/operator has it running"
|
|
}
|
|
|
|
# 4. Launch acclient via the credentialed launcher (auto-login via -a/-v/-h CLI args)
|
|
Write-Step "Launching acclient via $LauncherPs1 ..."
|
|
& $LauncherPs1
|
|
Start-Sleep -Seconds 5
|
|
$acProc = Get-Process -Name acclient -ErrorAction Stop | Sort-Object StartTime -Descending | Select-Object -First 1
|
|
$pid_ac = $acProc.Id
|
|
Write-Step "acclient pid = $pid_ac"
|
|
|
|
# Wait for in-world plateau (working set typically settles past ~500 MB once cell data loads)
|
|
Write-Step "Waiting for in-world plateau (working set >= 500 MB) ..."
|
|
$plateauDeadline = (Get-Date).AddSeconds(180)
|
|
while ((Get-Date) -lt $plateauDeadline) {
|
|
Start-Sleep -Seconds 5
|
|
if ($acProc.HasExited) { throw "acclient exited during login. ExitCode=$($acProc.ExitCode)" }
|
|
$ws = (Get-Process -Id $pid_ac).WorkingSet64
|
|
if ($ws -gt 500MB) {
|
|
Write-Step "Plateau detected: WS=$([math]::Round($ws/1MB,1)) MB"
|
|
break
|
|
}
|
|
}
|
|
|
|
# 6. Snapshot loop
|
|
$start = Get-Date
|
|
$snapIdx = 1
|
|
$snapshotScript = Join-Path $PSScriptRoot "snapshot.ps1"
|
|
|
|
while ($true) {
|
|
Start-Sleep -Seconds $SnapshotEvery
|
|
|
|
if ($acProc.HasExited) {
|
|
Write-Step "acclient EXITED — code $($acProc.ExitCode)"
|
|
# Capture dump if process still around (sometimes lingers briefly)
|
|
# & procdump -ma $pid_ac "$phaseDir\crash.dmp" 2>&1 | Out-Null
|
|
break
|
|
}
|
|
|
|
$snapPath = Join-Path $phaseDir ("snap_{0:D3}.txt" -f $snapIdx)
|
|
Write-Step "snapshot $snapIdx -> $snapPath"
|
|
& $snapshotScript -ProcessId $pid_ac -Out $snapPath
|
|
|
|
$snapIdx++
|
|
|
|
if (((Get-Date) - $start).TotalSeconds -gt $MaxDuration) {
|
|
Write-Step "Max duration reached. Final snapshot done."
|
|
break
|
|
}
|
|
}
|
|
|
|
# 7. Final diff
|
|
if ($snapIdx -gt 2) {
|
|
$first = Join-Path $phaseDir ("snap_001.txt")
|
|
$last = Join-Path $phaseDir ("snap_{0:D3}.txt" -f ($snapIdx - 1))
|
|
$diff = Join-Path $phaseDir "diff_first_to_last.txt"
|
|
Write-Step "Diff: $first -> $last"
|
|
& $UmdhExe -d $first $last -f:$diff
|
|
}
|
|
|
|
Write-Step "Supervisor done."
|