mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-07-04 12:33:08 +00:00
fix(tools): prune acp_command from delegate_task schema when no ACP CLI is on PATH
Defense-in-depth follow-up to the runtime guard added in the previous commit. Models on headless hosts (Railway / Fly / Docker / fresh VPS) without any ACP CLI installed occasionally hallucinate ``acp_command="copilot"`` from the schema description, despite the explicit "Do NOT set" instruction. The runtime guard prevented the crash but the model still wasted a tool turn and got an opaque silent fallback. This commit removes the temptation at its source: ``_build_dynamic_schema_overrides`` now strips ``acp_command`` and ``acp_args`` from both the top-level and per-task schemas when none of the known ACP CLIs (``copilot``, ``claude``, ``codex``) are detectable on PATH. The model literally never sees the fields, so it cannot pass them. The runtime guard from the previous commit stays in place as defense-in-depth for internal callers, tests, and any future code path that bypasses the schema. ``_acp_binary_available`` is intentionally NOT cached: ``shutil.which`` is cheap, and avoiding the cache means the schema reacts to mid-session installs without requiring a process restart. Tests: - ``test_schema_prunes_acp_command_when_no_acp_binary`` - ``test_schema_keeps_acp_command_when_binary_available`` - ``test_acp_binary_available_checks_known_clis`` Full ``test_delegate.py`` suite: 136/136 pass. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
2e0b591076
commit
d82a69b624
2 changed files with 106 additions and 0 deletions
|
|
@ -602,6 +602,69 @@ class TestToolNamePreservation(unittest.TestCase):
|
|||
self.assertEqual(captured["provider"], "copilot-acp")
|
||||
self.assertEqual(captured["acp_command"], "copilot")
|
||||
|
||||
def test_schema_prunes_acp_command_when_no_acp_binary(self):
|
||||
"""Schema-level defense: delegate_task tool schema must NOT advertise
|
||||
acp_command / acp_args to the model when no ACP binary is installed.
|
||||
|
||||
Headless deploys (Railway / Fly / Docker / fresh VPS) typically have
|
||||
none of copilot / claude / codex. Without the schema prune, models
|
||||
occasionally hallucinate ``acp_command="copilot"`` from the field's
|
||||
description and crash subagent runs.
|
||||
"""
|
||||
from tools.delegate_tool import _build_dynamic_schema_overrides
|
||||
|
||||
with patch("tools.delegate_tool._acp_binary_available", return_value=False):
|
||||
overrides = _build_dynamic_schema_overrides()
|
||||
|
||||
props = overrides["parameters"]["properties"]
|
||||
self.assertNotIn("acp_command", props, "top-level acp_command must be pruned")
|
||||
self.assertNotIn("acp_args", props, "top-level acp_args must be pruned")
|
||||
|
||||
task_item_props = props["tasks"]["items"]["properties"]
|
||||
self.assertNotIn(
|
||||
"acp_command", task_item_props, "per-task acp_command must be pruned"
|
||||
)
|
||||
self.assertNotIn(
|
||||
"acp_args", task_item_props, "per-task acp_args must be pruned"
|
||||
)
|
||||
|
||||
def test_schema_keeps_acp_command_when_binary_available(self):
|
||||
"""Backward compat: when an ACP CLI IS on PATH, schema is unchanged.
|
||||
Users with working ACP setups must still be able to invoke it.
|
||||
"""
|
||||
from tools.delegate_tool import _build_dynamic_schema_overrides
|
||||
|
||||
with patch("tools.delegate_tool._acp_binary_available", return_value=True):
|
||||
overrides = _build_dynamic_schema_overrides()
|
||||
|
||||
props = overrides["parameters"]["properties"]
|
||||
self.assertIn("acp_command", props)
|
||||
self.assertIn("acp_args", props)
|
||||
|
||||
task_item_props = props["tasks"]["items"]["properties"]
|
||||
self.assertIn("acp_command", task_item_props)
|
||||
self.assertIn("acp_args", task_item_props)
|
||||
|
||||
def test_acp_binary_available_checks_known_clis(self):
|
||||
"""_acp_binary_available must check the known ACP CLI names via
|
||||
shutil.which — guards against typos or accidental list trimming.
|
||||
"""
|
||||
from tools.delegate_tool import _KNOWN_ACP_BINARIES, _acp_binary_available
|
||||
|
||||
self.assertIn("copilot", _KNOWN_ACP_BINARIES)
|
||||
|
||||
calls = []
|
||||
|
||||
def fake_which(name):
|
||||
calls.append(name)
|
||||
return None
|
||||
|
||||
with patch("shutil.which", side_effect=fake_which):
|
||||
self.assertFalse(_acp_binary_available())
|
||||
|
||||
for name in _KNOWN_ACP_BINARIES:
|
||||
self.assertIn(name, calls)
|
||||
|
||||
def test_saved_tool_names_set_on_child_before_run(self):
|
||||
"""_run_single_child must set _delegate_saved_tool_names on the child
|
||||
from model_tools._last_resolved_tool_names before run_conversation."""
|
||||
|
|
|
|||
|
|
@ -3271,6 +3271,30 @@ def _build_role_param_description() -> str:
|
|||
)
|
||||
|
||||
|
||||
# Known ACP-compatible CLIs that delegate_task can shell out to. Kept
|
||||
# narrow on purpose: only the ones agent/copilot_acp_client.py and friends
|
||||
# actually understand. Add new entries here when a new ACP CLI ships.
|
||||
_KNOWN_ACP_BINARIES: tuple[str, ...] = ("copilot", "claude", "codex")
|
||||
|
||||
|
||||
def _acp_binary_available() -> bool:
|
||||
"""True iff at least one known ACP CLI is on PATH.
|
||||
|
||||
Used to gate inclusion of ``acp_command`` / ``acp_args`` in the
|
||||
delegate_task schema. On headless hosts (Railway / Fly / Docker /
|
||||
fresh VPS) without any of these binaries, exposing the fields invites
|
||||
the model to hallucinate ``acp_command="copilot"`` from the schema's
|
||||
description, which used to crash subagent runs and take the gateway
|
||||
down. Pruning the fields from the schema removes the temptation.
|
||||
|
||||
Not cached: ``shutil.which`` is cheap and we want the schema to react
|
||||
to mid-session installs without forcing a process restart.
|
||||
"""
|
||||
import shutil as _shutil
|
||||
|
||||
return any(_shutil.which(name) for name in _KNOWN_ACP_BINARIES)
|
||||
|
||||
|
||||
def _build_dynamic_schema_overrides() -> dict:
|
||||
"""Return per-call schema overrides reflecting current config.
|
||||
|
||||
|
|
@ -3287,6 +3311,25 @@ def _build_dynamic_schema_overrides() -> dict:
|
|||
}
|
||||
overrides_params["properties"]["tasks"]["description"] = _build_tasks_param_description()
|
||||
overrides_params["properties"]["role"]["description"] = _build_role_param_description()
|
||||
|
||||
# Prune ACP overrides from the schema when no known ACP CLI is on PATH.
|
||||
# The runtime guard in _build_child_agent remains as defense-in-depth for
|
||||
# internal callers / tests / future code paths that skip the schema layer.
|
||||
if not _acp_binary_available():
|
||||
overrides_params["properties"].pop("acp_command", None)
|
||||
overrides_params["properties"].pop("acp_args", None)
|
||||
tasks_schema = dict(overrides_params["properties"].get("tasks", {}))
|
||||
if "items" in tasks_schema:
|
||||
items = dict(tasks_schema["items"])
|
||||
if "properties" in items:
|
||||
items["properties"] = {
|
||||
k: v
|
||||
for k, v in items["properties"].items()
|
||||
if k not in ("acp_command", "acp_args")
|
||||
}
|
||||
tasks_schema["items"] = items
|
||||
overrides_params["properties"]["tasks"] = tasks_schema
|
||||
|
||||
return {
|
||||
"description": _build_top_level_description(),
|
||||
"parameters": overrides_params,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue