fix(tools): prune acp_command from delegate_task schema when no ACP CLI is on PATH

Defense-in-depth follow-up to the runtime guard added in the previous commit.
Models on headless hosts (Railway / Fly / Docker / fresh VPS) without any ACP
CLI installed occasionally hallucinate ``acp_command="copilot"`` from the
schema description, despite the explicit "Do NOT set" instruction. The runtime
guard prevented the crash but the model still wasted a tool turn and got an
opaque silent fallback.

This commit removes the temptation at its source: ``_build_dynamic_schema_overrides``
now strips ``acp_command`` and ``acp_args`` from both the top-level and per-task
schemas when none of the known ACP CLIs (``copilot``, ``claude``, ``codex``) are
detectable on PATH. The model literally never sees the fields, so it cannot
pass them.

The runtime guard from the previous commit stays in place as defense-in-depth
for internal callers, tests, and any future code path that bypasses the schema.

``_acp_binary_available`` is intentionally NOT cached: ``shutil.which`` is
cheap, and avoiding the cache means the schema reacts to mid-session installs
without requiring a process restart.

Tests:
- ``test_schema_prunes_acp_command_when_no_acp_binary``
- ``test_schema_keeps_acp_command_when_binary_available``
- ``test_acp_binary_available_checks_known_clis``

Full ``test_delegate.py`` suite: 136/136 pass.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
nikshepsvn 2026-05-17 17:53:41 +05:30 committed by Teknium
parent 2e0b591076
commit d82a69b624
2 changed files with 106 additions and 0 deletions

View file

@ -602,6 +602,69 @@ class TestToolNamePreservation(unittest.TestCase):
self.assertEqual(captured["provider"], "copilot-acp")
self.assertEqual(captured["acp_command"], "copilot")
def test_schema_prunes_acp_command_when_no_acp_binary(self):
"""Schema-level defense: delegate_task tool schema must NOT advertise
acp_command / acp_args to the model when no ACP binary is installed.
Headless deploys (Railway / Fly / Docker / fresh VPS) typically have
none of copilot / claude / codex. Without the schema prune, models
occasionally hallucinate ``acp_command="copilot"`` from the field's
description and crash subagent runs.
"""
from tools.delegate_tool import _build_dynamic_schema_overrides
with patch("tools.delegate_tool._acp_binary_available", return_value=False):
overrides = _build_dynamic_schema_overrides()
props = overrides["parameters"]["properties"]
self.assertNotIn("acp_command", props, "top-level acp_command must be pruned")
self.assertNotIn("acp_args", props, "top-level acp_args must be pruned")
task_item_props = props["tasks"]["items"]["properties"]
self.assertNotIn(
"acp_command", task_item_props, "per-task acp_command must be pruned"
)
self.assertNotIn(
"acp_args", task_item_props, "per-task acp_args must be pruned"
)
def test_schema_keeps_acp_command_when_binary_available(self):
"""Backward compat: when an ACP CLI IS on PATH, schema is unchanged.
Users with working ACP setups must still be able to invoke it.
"""
from tools.delegate_tool import _build_dynamic_schema_overrides
with patch("tools.delegate_tool._acp_binary_available", return_value=True):
overrides = _build_dynamic_schema_overrides()
props = overrides["parameters"]["properties"]
self.assertIn("acp_command", props)
self.assertIn("acp_args", props)
task_item_props = props["tasks"]["items"]["properties"]
self.assertIn("acp_command", task_item_props)
self.assertIn("acp_args", task_item_props)
def test_acp_binary_available_checks_known_clis(self):
"""_acp_binary_available must check the known ACP CLI names via
shutil.which guards against typos or accidental list trimming.
"""
from tools.delegate_tool import _KNOWN_ACP_BINARIES, _acp_binary_available
self.assertIn("copilot", _KNOWN_ACP_BINARIES)
calls = []
def fake_which(name):
calls.append(name)
return None
with patch("shutil.which", side_effect=fake_which):
self.assertFalse(_acp_binary_available())
for name in _KNOWN_ACP_BINARIES:
self.assertIn(name, calls)
def test_saved_tool_names_set_on_child_before_run(self):
"""_run_single_child must set _delegate_saved_tool_names on the child
from model_tools._last_resolved_tool_names before run_conversation."""

View file

@ -3271,6 +3271,30 @@ def _build_role_param_description() -> str:
)
# Known ACP-compatible CLIs that delegate_task can shell out to. Kept
# narrow on purpose: only the ones agent/copilot_acp_client.py and friends
# actually understand. Add new entries here when a new ACP CLI ships.
_KNOWN_ACP_BINARIES: tuple[str, ...] = ("copilot", "claude", "codex")
def _acp_binary_available() -> bool:
"""True iff at least one known ACP CLI is on PATH.
Used to gate inclusion of ``acp_command`` / ``acp_args`` in the
delegate_task schema. On headless hosts (Railway / Fly / Docker /
fresh VPS) without any of these binaries, exposing the fields invites
the model to hallucinate ``acp_command="copilot"`` from the schema's
description, which used to crash subagent runs and take the gateway
down. Pruning the fields from the schema removes the temptation.
Not cached: ``shutil.which`` is cheap and we want the schema to react
to mid-session installs without forcing a process restart.
"""
import shutil as _shutil
return any(_shutil.which(name) for name in _KNOWN_ACP_BINARIES)
def _build_dynamic_schema_overrides() -> dict:
"""Return per-call schema overrides reflecting current config.
@ -3287,6 +3311,25 @@ def _build_dynamic_schema_overrides() -> dict:
}
overrides_params["properties"]["tasks"]["description"] = _build_tasks_param_description()
overrides_params["properties"]["role"]["description"] = _build_role_param_description()
# Prune ACP overrides from the schema when no known ACP CLI is on PATH.
# The runtime guard in _build_child_agent remains as defense-in-depth for
# internal callers / tests / future code paths that skip the schema layer.
if not _acp_binary_available():
overrides_params["properties"].pop("acp_command", None)
overrides_params["properties"].pop("acp_args", None)
tasks_schema = dict(overrides_params["properties"].get("tasks", {}))
if "items" in tasks_schema:
items = dict(tasks_schema["items"])
if "properties" in items:
items["properties"] = {
k: v
for k, v in items["properties"].items()
if k not in ("acp_command", "acp_args")
}
tasks_schema["items"] = items
overrides_params["properties"]["tasks"] = tasks_schema
return {
"description": _build_top_level_description(),
"parameters": overrides_params,