From d82a69b624b97948207182df38c9a25bbd21f3b8 Mon Sep 17 00:00:00 2001 From: nikshepsvn Date: Sun, 17 May 2026 17:53:41 +0530 Subject: [PATCH] fix(tools): prune acp_command from delegate_task schema when no ACP CLI is on PATH Defense-in-depth follow-up to the runtime guard added in the previous commit. Models on headless hosts (Railway / Fly / Docker / fresh VPS) without any ACP CLI installed occasionally hallucinate ``acp_command="copilot"`` from the schema description, despite the explicit "Do NOT set" instruction. The runtime guard prevented the crash but the model still wasted a tool turn and got an opaque silent fallback. This commit removes the temptation at its source: ``_build_dynamic_schema_overrides`` now strips ``acp_command`` and ``acp_args`` from both the top-level and per-task schemas when none of the known ACP CLIs (``copilot``, ``claude``, ``codex``) are detectable on PATH. The model literally never sees the fields, so it cannot pass them. The runtime guard from the previous commit stays in place as defense-in-depth for internal callers, tests, and any future code path that bypasses the schema. ``_acp_binary_available`` is intentionally NOT cached: ``shutil.which`` is cheap, and avoiding the cache means the schema reacts to mid-session installs without requiring a process restart. Tests: - ``test_schema_prunes_acp_command_when_no_acp_binary`` - ``test_schema_keeps_acp_command_when_binary_available`` - ``test_acp_binary_available_checks_known_clis`` Full ``test_delegate.py`` suite: 136/136 pass. Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/tools/test_delegate.py | 63 ++++++++++++++++++++++++++++++++++++ tools/delegate_tool.py | 43 ++++++++++++++++++++++++ 2 files changed, 106 insertions(+) diff --git a/tests/tools/test_delegate.py b/tests/tools/test_delegate.py index 6231e03030a..0fa8a965cb6 100644 --- a/tests/tools/test_delegate.py +++ b/tests/tools/test_delegate.py @@ -602,6 +602,69 @@ class TestToolNamePreservation(unittest.TestCase): self.assertEqual(captured["provider"], "copilot-acp") self.assertEqual(captured["acp_command"], "copilot") + def test_schema_prunes_acp_command_when_no_acp_binary(self): + """Schema-level defense: delegate_task tool schema must NOT advertise + acp_command / acp_args to the model when no ACP binary is installed. + + Headless deploys (Railway / Fly / Docker / fresh VPS) typically have + none of copilot / claude / codex. Without the schema prune, models + occasionally hallucinate ``acp_command="copilot"`` from the field's + description and crash subagent runs. + """ + from tools.delegate_tool import _build_dynamic_schema_overrides + + with patch("tools.delegate_tool._acp_binary_available", return_value=False): + overrides = _build_dynamic_schema_overrides() + + props = overrides["parameters"]["properties"] + self.assertNotIn("acp_command", props, "top-level acp_command must be pruned") + self.assertNotIn("acp_args", props, "top-level acp_args must be pruned") + + task_item_props = props["tasks"]["items"]["properties"] + self.assertNotIn( + "acp_command", task_item_props, "per-task acp_command must be pruned" + ) + self.assertNotIn( + "acp_args", task_item_props, "per-task acp_args must be pruned" + ) + + def test_schema_keeps_acp_command_when_binary_available(self): + """Backward compat: when an ACP CLI IS on PATH, schema is unchanged. + Users with working ACP setups must still be able to invoke it. + """ + from tools.delegate_tool import _build_dynamic_schema_overrides + + with patch("tools.delegate_tool._acp_binary_available", return_value=True): + overrides = _build_dynamic_schema_overrides() + + props = overrides["parameters"]["properties"] + self.assertIn("acp_command", props) + self.assertIn("acp_args", props) + + task_item_props = props["tasks"]["items"]["properties"] + self.assertIn("acp_command", task_item_props) + self.assertIn("acp_args", task_item_props) + + def test_acp_binary_available_checks_known_clis(self): + """_acp_binary_available must check the known ACP CLI names via + shutil.which — guards against typos or accidental list trimming. + """ + from tools.delegate_tool import _KNOWN_ACP_BINARIES, _acp_binary_available + + self.assertIn("copilot", _KNOWN_ACP_BINARIES) + + calls = [] + + def fake_which(name): + calls.append(name) + return None + + with patch("shutil.which", side_effect=fake_which): + self.assertFalse(_acp_binary_available()) + + for name in _KNOWN_ACP_BINARIES: + self.assertIn(name, calls) + def test_saved_tool_names_set_on_child_before_run(self): """_run_single_child must set _delegate_saved_tool_names on the child from model_tools._last_resolved_tool_names before run_conversation.""" diff --git a/tools/delegate_tool.py b/tools/delegate_tool.py index 72559b1a52f..8a5a060fd48 100644 --- a/tools/delegate_tool.py +++ b/tools/delegate_tool.py @@ -3271,6 +3271,30 @@ def _build_role_param_description() -> str: ) +# Known ACP-compatible CLIs that delegate_task can shell out to. Kept +# narrow on purpose: only the ones agent/copilot_acp_client.py and friends +# actually understand. Add new entries here when a new ACP CLI ships. +_KNOWN_ACP_BINARIES: tuple[str, ...] = ("copilot", "claude", "codex") + + +def _acp_binary_available() -> bool: + """True iff at least one known ACP CLI is on PATH. + + Used to gate inclusion of ``acp_command`` / ``acp_args`` in the + delegate_task schema. On headless hosts (Railway / Fly / Docker / + fresh VPS) without any of these binaries, exposing the fields invites + the model to hallucinate ``acp_command="copilot"`` from the schema's + description, which used to crash subagent runs and take the gateway + down. Pruning the fields from the schema removes the temptation. + + Not cached: ``shutil.which`` is cheap and we want the schema to react + to mid-session installs without forcing a process restart. + """ + import shutil as _shutil + + return any(_shutil.which(name) for name in _KNOWN_ACP_BINARIES) + + def _build_dynamic_schema_overrides() -> dict: """Return per-call schema overrides reflecting current config. @@ -3287,6 +3311,25 @@ def _build_dynamic_schema_overrides() -> dict: } overrides_params["properties"]["tasks"]["description"] = _build_tasks_param_description() overrides_params["properties"]["role"]["description"] = _build_role_param_description() + + # Prune ACP overrides from the schema when no known ACP CLI is on PATH. + # The runtime guard in _build_child_agent remains as defense-in-depth for + # internal callers / tests / future code paths that skip the schema layer. + if not _acp_binary_available(): + overrides_params["properties"].pop("acp_command", None) + overrides_params["properties"].pop("acp_args", None) + tasks_schema = dict(overrides_params["properties"].get("tasks", {})) + if "items" in tasks_schema: + items = dict(tasks_schema["items"]) + if "properties" in items: + items["properties"] = { + k: v + for k, v in items["properties"].items() + if k not in ("acp_command", "acp_args") + } + tasks_schema["items"] = items + overrides_params["properties"]["tasks"] = tasks_schema + return { "description": _build_top_level_description(), "parameters": overrides_params,