fix(agent): use --resume on existing sessions, --session-id only for new

Claude Code rejects --session-id on a session that already exists on disk
('Session ID ... is already in use'). The first message of a conversation
must use --session-id to create; every message after must use --resume.

Detect by checking ~/.claude/projects/<encoded-cwd>/<uuid>.jsonl. Plus a
belt-and-suspenders retry: if --session-id surprisingly fails with the
'already in use' string, automatically retry with --resume.

This was the bug that caused chat windows to fail on the second message.
This commit is contained in:
Erik 2026-04-25 20:51:46 +02:00
parent 0745aefdb9
commit 6d5819d297

View file

@ -44,8 +44,23 @@ class ClaudeError(RuntimeError):
"""Raised when the claude CLI returns a non-zero exit or unparseable output."""
def _session_exists(session_id: str) -> bool:
"""True if Claude Code has already persisted a JSONL for this session.
Claude Code stores sessions at ~/.claude/projects/<encoded-cwd>/<uuid>.jsonl
where non-alphanumerics in the cwd are replaced with hyphens.
"""
encoded = "".join(c if c.isalnum() else "-" for c in CLAUDE_CWD)
path = Path.home() / ".claude" / "projects" / encoded / f"{session_id}.jsonl"
return path.is_file()
async def ask_claude(message: str, session_id: str) -> ClaudeResult:
"""Send `message` to `claude -p` resuming session_id; return parsed result.
"""Send `message` to `claude -p` for `session_id`; return parsed result.
On the FIRST message of a session uses `--session-id <uuid>` to create it.
On subsequent messages uses `--resume <uuid>` because claude rejects
`--session-id` on existing sessions ("Session ID ... is already in use").
Raises ClaudeError on subprocess failure, JSON parse failure, or timeout.
"""
@ -75,10 +90,15 @@ async def ask_claude(message: str, session_id: str) -> ClaudeResult:
]
)
# Pick --session-id (creates) vs --resume (continues) based on whether
# the session JSONL already exists on disk.
is_new = not _session_exists(session_id)
session_flag = "--session-id" if is_new else "--resume"
args = [
CLAUDE_BIN,
"-p",
"--session-id",
session_flag,
session_id,
"--output-format",
"json",
@ -90,7 +110,11 @@ async def ask_claude(message: str, session_id: str) -> ClaudeResult:
]
logger.info(
"claude exec: session=%s msg_len=%d cwd=%s", session_id, len(message), CLAUDE_CWD
"claude exec: session=%s mode=%s msg_len=%d cwd=%s",
session_id,
"new" if is_new else "resume",
len(message),
CLAUDE_CWD,
)
proc = await asyncio.create_subprocess_exec(
@ -114,8 +138,40 @@ async def ask_claude(message: str, session_id: str) -> ClaudeResult:
raise ClaudeError(f"claude timed out after {CLAUDE_TIMEOUT_S}s")
if proc.returncode != 0:
stderr_text = stderr.decode("utf-8", "replace")
# If we picked the wrong flag (e.g. JSONL deleted from disk between
# our check and exec, or a never-flushed session), claude prints
# "Session ID … is already in use." Re-issue with --resume.
if is_new and "already in use" in stderr_text:
logger.info("session %s actually exists; retrying with --resume", session_id)
args2 = list(args)
args2[2] = "--resume"
proc2 = await asyncio.create_subprocess_exec(
*args2,
stdin=asyncio.subprocess.PIPE,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
cwd=CLAUDE_CWD,
)
try:
stdout, stderr = await asyncio.wait_for(
proc2.communicate(input=message.encode("utf-8")),
timeout=CLAUDE_TIMEOUT_S,
)
except asyncio.TimeoutError:
try:
proc2.kill()
except ProcessLookupError:
pass
raise ClaudeError(f"claude timed out after {CLAUDE_TIMEOUT_S}s")
if proc2.returncode != 0:
raise ClaudeError(
f"claude exited {proc.returncode}: {stderr.decode('utf-8', 'replace')[:500]}"
f"claude exited {proc2.returncode} after retry: "
f"{stderr.decode('utf-8', 'replace')[:500]}"
)
else:
raise ClaudeError(
f"claude exited {proc.returncode}: {stderr_text[:500]}"
)
raw_text = stdout.decode("utf-8", "replace").strip()