mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
feat: make tool-use enforcement configurable via agent.tool_use_enforcement (#3551)
The TOOL_USE_ENFORCEMENT_GUIDANCE injection (added in #3528) was hardcoded to only match gpt/codex model names. This makes it a config option so users can turn it on for any model family. New config key: agent.tool_use_enforcement - "auto" (default): matches gpt/codex (existing behavior) - true: inject for all models - false: never inject - list of strings: custom model-name substrings to match e.g. ["gpt", "codex", "deepseek", "qwen"] No version bump needed — deep merge provides the default automatically for existing installs. 12 new tests covering all config modes.
This commit is contained in:
parent
d26ee20659
commit
901494d728
3 changed files with 160 additions and 7 deletions
|
|
@ -138,6 +138,12 @@ DEFAULT_CONFIG = {
|
|||
"toolsets": ["hermes-cli"],
|
||||
"agent": {
|
||||
"max_turns": 90,
|
||||
# Tool-use enforcement: injects system prompt guidance that tells the
|
||||
# model to actually call tools instead of describing intended actions.
|
||||
# Values: "auto" (default — applies to gpt/codex models), true/false
|
||||
# (force on/off for all models), or a list of model-name substrings
|
||||
# to match (e.g. ["gpt", "codex", "gemini", "qwen"]).
|
||||
"tool_use_enforcement": "auto",
|
||||
},
|
||||
|
||||
"terminal": {
|
||||
|
|
|
|||
35
run_agent.py
35
run_agent.py
|
|
@ -1080,6 +1080,13 @@ class AIAgent:
|
|||
except Exception:
|
||||
pass
|
||||
|
||||
# Tool-use enforcement config: "auto" (default — matches hardcoded
|
||||
# model list), true (always), false (never), or list of substrings.
|
||||
_agent_section = _agent_cfg.get("agent", {})
|
||||
if not isinstance(_agent_section, dict):
|
||||
_agent_section = {}
|
||||
self._tool_use_enforcement = _agent_section.get("tool_use_enforcement", "auto")
|
||||
|
||||
# Initialize context compressor for automatic context management
|
||||
# Compresses conversation when approaching model's context limit
|
||||
# Configuration via config.yaml (compression section)
|
||||
|
|
@ -2510,14 +2517,28 @@ class AIAgent:
|
|||
if tool_guidance:
|
||||
prompt_parts.append(" ".join(tool_guidance))
|
||||
|
||||
# Some model families benefit from explicit tool-use enforcement.
|
||||
# Without this, they tend to describe intended actions as text
|
||||
# ("I will run the tests") instead of actually making tool calls.
|
||||
# TOOL_USE_ENFORCEMENT_MODELS is a tuple of substrings to match.
|
||||
# Inject only when the model has tools available.
|
||||
# Tool-use enforcement: tells the model to actually call tools instead
|
||||
# of describing intended actions. Controlled by config.yaml
|
||||
# agent.tool_use_enforcement:
|
||||
# "auto" (default) — matches TOOL_USE_ENFORCEMENT_MODELS
|
||||
# true — always inject (all models)
|
||||
# false — never inject
|
||||
# list — custom model-name substrings to match
|
||||
if self.valid_tool_names:
|
||||
model_lower = (self.model or "").lower()
|
||||
if any(p in model_lower for p in TOOL_USE_ENFORCEMENT_MODELS):
|
||||
_enforce = self._tool_use_enforcement
|
||||
_inject = False
|
||||
if _enforce is True or (isinstance(_enforce, str) and _enforce.lower() in ("true", "always", "yes", "on")):
|
||||
_inject = True
|
||||
elif _enforce is False or (isinstance(_enforce, str) and _enforce.lower() in ("false", "never", "no", "off")):
|
||||
_inject = False
|
||||
elif isinstance(_enforce, list):
|
||||
model_lower = (self.model or "").lower()
|
||||
_inject = any(p.lower() in model_lower for p in _enforce if isinstance(p, str))
|
||||
else:
|
||||
# "auto" or any unrecognised value — use hardcoded defaults
|
||||
model_lower = (self.model or "").lower()
|
||||
_inject = any(p in model_lower for p in TOOL_USE_ENFORCEMENT_MODELS)
|
||||
if _inject:
|
||||
prompt_parts.append(TOOL_USE_ENFORCEMENT_GUIDANCE)
|
||||
|
||||
# Honcho CLI awareness: tell Hermes about its own management commands
|
||||
|
|
|
|||
|
|
@ -617,6 +617,132 @@ class TestBuildSystemPrompt:
|
|||
assert mock_skills.call_args.kwargs["available_toolsets"] == {"web", "skills"}
|
||||
|
||||
|
||||
class TestToolUseEnforcementConfig:
|
||||
"""Tests for the agent.tool_use_enforcement config option."""
|
||||
|
||||
def _make_agent(self, model="openai/gpt-4.1", tool_use_enforcement="auto"):
|
||||
"""Create an agent with tools and a specific enforcement config."""
|
||||
with (
|
||||
patch(
|
||||
"run_agent.get_tool_definitions",
|
||||
return_value=_make_tool_defs("terminal", "web_search"),
|
||||
),
|
||||
patch("run_agent.check_toolset_requirements", return_value={}),
|
||||
patch("run_agent.OpenAI"),
|
||||
patch(
|
||||
"hermes_cli.config.load_config",
|
||||
return_value={"agent": {"tool_use_enforcement": tool_use_enforcement}},
|
||||
),
|
||||
):
|
||||
a = AIAgent(
|
||||
model=model,
|
||||
api_key="test-key-1234567890",
|
||||
quiet_mode=True,
|
||||
skip_context_files=True,
|
||||
skip_memory=True,
|
||||
)
|
||||
a.client = MagicMock()
|
||||
return a
|
||||
|
||||
def test_auto_injects_for_gpt(self):
|
||||
from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
|
||||
agent = self._make_agent(model="openai/gpt-4.1", tool_use_enforcement="auto")
|
||||
prompt = agent._build_system_prompt()
|
||||
assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt
|
||||
|
||||
def test_auto_injects_for_codex(self):
|
||||
from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
|
||||
agent = self._make_agent(model="openai/codex-mini", tool_use_enforcement="auto")
|
||||
prompt = agent._build_system_prompt()
|
||||
assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt
|
||||
|
||||
def test_auto_skips_for_claude(self):
|
||||
from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
|
||||
agent = self._make_agent(model="anthropic/claude-sonnet-4", tool_use_enforcement="auto")
|
||||
prompt = agent._build_system_prompt()
|
||||
assert TOOL_USE_ENFORCEMENT_GUIDANCE not in prompt
|
||||
|
||||
def test_true_forces_for_all_models(self):
|
||||
from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
|
||||
agent = self._make_agent(model="anthropic/claude-sonnet-4", tool_use_enforcement=True)
|
||||
prompt = agent._build_system_prompt()
|
||||
assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt
|
||||
|
||||
def test_string_true_forces_for_all_models(self):
|
||||
from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
|
||||
agent = self._make_agent(model="anthropic/claude-sonnet-4", tool_use_enforcement="true")
|
||||
prompt = agent._build_system_prompt()
|
||||
assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt
|
||||
|
||||
def test_always_forces_for_all_models(self):
|
||||
from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
|
||||
agent = self._make_agent(model="deepseek/deepseek-r1", tool_use_enforcement="always")
|
||||
prompt = agent._build_system_prompt()
|
||||
assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt
|
||||
|
||||
def test_false_disables_for_gpt(self):
|
||||
from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
|
||||
agent = self._make_agent(model="openai/gpt-4.1", tool_use_enforcement=False)
|
||||
prompt = agent._build_system_prompt()
|
||||
assert TOOL_USE_ENFORCEMENT_GUIDANCE not in prompt
|
||||
|
||||
def test_string_false_disables(self):
|
||||
from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
|
||||
agent = self._make_agent(model="openai/gpt-4.1", tool_use_enforcement="off")
|
||||
prompt = agent._build_system_prompt()
|
||||
assert TOOL_USE_ENFORCEMENT_GUIDANCE not in prompt
|
||||
|
||||
def test_custom_list_matches(self):
|
||||
from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
|
||||
agent = self._make_agent(
|
||||
model="deepseek/deepseek-r1",
|
||||
tool_use_enforcement=["deepseek", "gemini"],
|
||||
)
|
||||
prompt = agent._build_system_prompt()
|
||||
assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt
|
||||
|
||||
def test_custom_list_no_match(self):
|
||||
from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
|
||||
agent = self._make_agent(
|
||||
model="anthropic/claude-sonnet-4",
|
||||
tool_use_enforcement=["deepseek", "gemini"],
|
||||
)
|
||||
prompt = agent._build_system_prompt()
|
||||
assert TOOL_USE_ENFORCEMENT_GUIDANCE not in prompt
|
||||
|
||||
def test_custom_list_case_insensitive(self):
|
||||
from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
|
||||
agent = self._make_agent(
|
||||
model="openai/GPT-4.1",
|
||||
tool_use_enforcement=["GPT", "Codex"],
|
||||
)
|
||||
prompt = agent._build_system_prompt()
|
||||
assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt
|
||||
|
||||
def test_no_tools_never_injects(self):
|
||||
"""Even with enforcement=true, no injection when agent has no tools."""
|
||||
from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
|
||||
with (
|
||||
patch("run_agent.get_tool_definitions", return_value=[]),
|
||||
patch("run_agent.check_toolset_requirements", return_value={}),
|
||||
patch("run_agent.OpenAI"),
|
||||
patch(
|
||||
"hermes_cli.config.load_config",
|
||||
return_value={"agent": {"tool_use_enforcement": True}},
|
||||
),
|
||||
):
|
||||
a = AIAgent(
|
||||
api_key="test-key-1234567890",
|
||||
quiet_mode=True,
|
||||
skip_context_files=True,
|
||||
skip_memory=True,
|
||||
enabled_toolsets=[],
|
||||
)
|
||||
a.client = MagicMock()
|
||||
prompt = a._build_system_prompt()
|
||||
assert TOOL_USE_ENFORCEMENT_GUIDANCE not in prompt
|
||||
|
||||
|
||||
class TestInvalidateSystemPrompt:
|
||||
def test_clears_cache(self, agent):
|
||||
agent._cached_system_prompt = "cached value"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue