fix(agent): block Agent + Gmail/Drive/Calendar tools, brief model not to probe
Two complementary changes after observing the model probe boundaries (it tried mcp__claude_ai_Gmail__search_threads, then tried to delegate to a subagent via the Agent tool, then suggested the user edit settings.local.json to add Gmail tools): 1. claude_wrapper.py adds to --disallowed-tools: - Agent (subagent spawning — should never delegate) - WebFetch (already; settings.json re-allows acpedia.org only) - Every Gmail/Calendar/Drive connector tool name we know about 2. CLAUDE.md adds a 'Non-negotiable scope rules' section: - Be a read-only game-state QA service, nothing else - Don't attempt tools outside your role - Don't explain how to bypass restrictions - Don't suggest settings.json edits - Don't enumerate hidden tools when asked Soft (system-prompt) + hard (CLI flag) defenses combined.
This commit is contained in:
parent
e780f249d1
commit
0633865598
2 changed files with 34 additions and 0 deletions
|
|
@ -110,6 +110,10 @@ async def ask_claude(message: str, session_id: str) -> ClaudeResult:
|
|||
"NotebookEdit",
|
||||
# Network built-ins
|
||||
"WebSearch",
|
||||
"WebFetch", # blocked here; settings.json re-allows acpedia.org
|
||||
# Subagent spawning — the assistant must NEVER delegate to a
|
||||
# general-purpose subagent (which would have its own tool set).
|
||||
"Agent",
|
||||
# Tool / session meta-tools — these can list, load, or chain
|
||||
# into other tools and must NOT be reachable.
|
||||
"ToolSearch",
|
||||
|
|
@ -132,6 +136,24 @@ async def ask_claude(message: str, session_id: str) -> ClaudeResult:
|
|||
"CronDelete",
|
||||
"ScheduleWakeup",
|
||||
"RemoteTrigger",
|
||||
# Anthropic first-party connectors from the user's claude.ai
|
||||
# account. These are off-mission for an Overlord assistant and
|
||||
# would leak personal data outside the game-state domain.
|
||||
"mcp__claude_ai_Gmail__create_draft",
|
||||
"mcp__claude_ai_Gmail__create_label",
|
||||
"mcp__claude_ai_Gmail__get_message",
|
||||
"mcp__claude_ai_Gmail__get_thread",
|
||||
"mcp__claude_ai_Gmail__list_drafts",
|
||||
"mcp__claude_ai_Gmail__list_labels",
|
||||
"mcp__claude_ai_Gmail__label_message",
|
||||
"mcp__claude_ai_Gmail__label_thread",
|
||||
"mcp__claude_ai_Gmail__search_messages",
|
||||
"mcp__claude_ai_Gmail__search_threads",
|
||||
"mcp__claude_ai_Gmail__send_message",
|
||||
"mcp__claude_ai_Gmail__unlabel_message",
|
||||
"mcp__claude_ai_Gmail__unlabel_thread",
|
||||
"mcp__claude_ai_Google_Calendar__authenticate",
|
||||
"mcp__claude_ai_Google_Drive__authenticate",
|
||||
]
|
||||
)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue