mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-30 01:41:43 +00:00
Merge branch 'main' into rewbs/tool-use-charge-to-subscription
This commit is contained in:
commit
6e4598ce1e
269 changed files with 33678 additions and 2273 deletions
|
|
@ -589,6 +589,164 @@ class TestBuildSystemPrompt:
|
|||
prompt = agent._build_system_prompt()
|
||||
assert "NOUS SUBSCRIPTION BLOCK" in prompt
|
||||
|
||||
def test_skills_prompt_derives_available_toolsets_from_loaded_tools(self):
|
||||
tools = _make_tool_defs("web_search", "skills_list", "skill_view", "skill_manage")
|
||||
toolset_map = {
|
||||
"web_search": "web",
|
||||
"skills_list": "skills",
|
||||
"skill_view": "skills",
|
||||
"skill_manage": "skills",
|
||||
}
|
||||
|
||||
with (
|
||||
patch("run_agent.get_tool_definitions", return_value=tools),
|
||||
patch(
|
||||
"run_agent.check_toolset_requirements",
|
||||
side_effect=AssertionError("should not re-check toolset requirements"),
|
||||
),
|
||||
patch("run_agent.get_toolset_for_tool", create=True, side_effect=toolset_map.get),
|
||||
patch("run_agent.build_skills_system_prompt", return_value="SKILLS_PROMPT") as mock_skills,
|
||||
patch("run_agent.OpenAI"),
|
||||
):
|
||||
agent = AIAgent(
|
||||
api_key="test-k...7890",
|
||||
quiet_mode=True,
|
||||
skip_context_files=True,
|
||||
skip_memory=True,
|
||||
)
|
||||
|
||||
prompt = agent._build_system_prompt()
|
||||
|
||||
assert "SKILLS_PROMPT" in prompt
|
||||
assert mock_skills.call_args.kwargs["available_tools"] == set(toolset_map)
|
||||
assert mock_skills.call_args.kwargs["available_toolsets"] == {"web", "skills"}
|
||||
|
||||
|
||||
class TestToolUseEnforcementConfig:
|
||||
"""Tests for the agent.tool_use_enforcement config option."""
|
||||
|
||||
def _make_agent(self, model="openai/gpt-4.1", tool_use_enforcement="auto"):
|
||||
"""Create an agent with tools and a specific enforcement config."""
|
||||
with (
|
||||
patch(
|
||||
"run_agent.get_tool_definitions",
|
||||
return_value=_make_tool_defs("terminal", "web_search"),
|
||||
),
|
||||
patch("run_agent.check_toolset_requirements", return_value={}),
|
||||
patch("run_agent.OpenAI"),
|
||||
patch(
|
||||
"hermes_cli.config.load_config",
|
||||
return_value={"agent": {"tool_use_enforcement": tool_use_enforcement}},
|
||||
),
|
||||
):
|
||||
a = AIAgent(
|
||||
model=model,
|
||||
api_key="test-key-1234567890",
|
||||
quiet_mode=True,
|
||||
skip_context_files=True,
|
||||
skip_memory=True,
|
||||
)
|
||||
a.client = MagicMock()
|
||||
return a
|
||||
|
||||
def test_auto_injects_for_gpt(self):
|
||||
from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
|
||||
agent = self._make_agent(model="openai/gpt-4.1", tool_use_enforcement="auto")
|
||||
prompt = agent._build_system_prompt()
|
||||
assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt
|
||||
|
||||
def test_auto_injects_for_codex(self):
|
||||
from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
|
||||
agent = self._make_agent(model="openai/codex-mini", tool_use_enforcement="auto")
|
||||
prompt = agent._build_system_prompt()
|
||||
assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt
|
||||
|
||||
def test_auto_skips_for_claude(self):
|
||||
from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
|
||||
agent = self._make_agent(model="anthropic/claude-sonnet-4", tool_use_enforcement="auto")
|
||||
prompt = agent._build_system_prompt()
|
||||
assert TOOL_USE_ENFORCEMENT_GUIDANCE not in prompt
|
||||
|
||||
def test_true_forces_for_all_models(self):
|
||||
from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
|
||||
agent = self._make_agent(model="anthropic/claude-sonnet-4", tool_use_enforcement=True)
|
||||
prompt = agent._build_system_prompt()
|
||||
assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt
|
||||
|
||||
def test_string_true_forces_for_all_models(self):
|
||||
from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
|
||||
agent = self._make_agent(model="anthropic/claude-sonnet-4", tool_use_enforcement="true")
|
||||
prompt = agent._build_system_prompt()
|
||||
assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt
|
||||
|
||||
def test_always_forces_for_all_models(self):
|
||||
from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
|
||||
agent = self._make_agent(model="deepseek/deepseek-r1", tool_use_enforcement="always")
|
||||
prompt = agent._build_system_prompt()
|
||||
assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt
|
||||
|
||||
def test_false_disables_for_gpt(self):
|
||||
from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
|
||||
agent = self._make_agent(model="openai/gpt-4.1", tool_use_enforcement=False)
|
||||
prompt = agent._build_system_prompt()
|
||||
assert TOOL_USE_ENFORCEMENT_GUIDANCE not in prompt
|
||||
|
||||
def test_string_false_disables(self):
|
||||
from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
|
||||
agent = self._make_agent(model="openai/gpt-4.1", tool_use_enforcement="off")
|
||||
prompt = agent._build_system_prompt()
|
||||
assert TOOL_USE_ENFORCEMENT_GUIDANCE not in prompt
|
||||
|
||||
def test_custom_list_matches(self):
|
||||
from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
|
||||
agent = self._make_agent(
|
||||
model="deepseek/deepseek-r1",
|
||||
tool_use_enforcement=["deepseek", "gemini"],
|
||||
)
|
||||
prompt = agent._build_system_prompt()
|
||||
assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt
|
||||
|
||||
def test_custom_list_no_match(self):
|
||||
from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
|
||||
agent = self._make_agent(
|
||||
model="anthropic/claude-sonnet-4",
|
||||
tool_use_enforcement=["deepseek", "gemini"],
|
||||
)
|
||||
prompt = agent._build_system_prompt()
|
||||
assert TOOL_USE_ENFORCEMENT_GUIDANCE not in prompt
|
||||
|
||||
def test_custom_list_case_insensitive(self):
|
||||
from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
|
||||
agent = self._make_agent(
|
||||
model="openai/GPT-4.1",
|
||||
tool_use_enforcement=["GPT", "Codex"],
|
||||
)
|
||||
prompt = agent._build_system_prompt()
|
||||
assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt
|
||||
|
||||
def test_no_tools_never_injects(self):
|
||||
"""Even with enforcement=true, no injection when agent has no tools."""
|
||||
from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
|
||||
with (
|
||||
patch("run_agent.get_tool_definitions", return_value=[]),
|
||||
patch("run_agent.check_toolset_requirements", return_value={}),
|
||||
patch("run_agent.OpenAI"),
|
||||
patch(
|
||||
"hermes_cli.config.load_config",
|
||||
return_value={"agent": {"tool_use_enforcement": True}},
|
||||
),
|
||||
):
|
||||
a = AIAgent(
|
||||
api_key="test-key-1234567890",
|
||||
quiet_mode=True,
|
||||
skip_context_files=True,
|
||||
skip_memory=True,
|
||||
enabled_toolsets=[],
|
||||
)
|
||||
a.client = MagicMock()
|
||||
prompt = a._build_system_prompt()
|
||||
assert TOOL_USE_ENFORCEMENT_GUIDANCE not in prompt
|
||||
|
||||
|
||||
class TestInvalidateSystemPrompt:
|
||||
def test_clears_cache(self, agent):
|
||||
|
|
@ -610,7 +768,7 @@ class TestBuildApiKwargs:
|
|||
kwargs = agent._build_api_kwargs(messages)
|
||||
assert kwargs["model"] == agent.model
|
||||
assert kwargs["messages"] is messages
|
||||
assert kwargs["timeout"] == 900.0
|
||||
assert kwargs["timeout"] == 1800.0
|
||||
|
||||
def test_provider_preferences_injected(self, agent):
|
||||
agent.providers_allowed = ["Anthropic"]
|
||||
|
|
@ -1345,19 +1503,11 @@ class TestRunConversation:
|
|||
assert result["final_response"] == "Recovered after compression"
|
||||
assert result["completed"] is True
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("first_content", "second_content", "expected_final"),
|
||||
[
|
||||
("Part 1 ", "Part 2", "Part 1 Part 2"),
|
||||
("<think>internal reasoning</think>", "Recovered final answer", "Recovered final answer"),
|
||||
],
|
||||
)
|
||||
def test_length_finish_reason_requests_continuation(
|
||||
self, agent, first_content, second_content, expected_final
|
||||
):
|
||||
def test_length_finish_reason_requests_continuation(self, agent):
|
||||
"""Normal truncation (partial real content) triggers continuation."""
|
||||
self._setup_agent(agent)
|
||||
first = _mock_response(content=first_content, finish_reason="length")
|
||||
second = _mock_response(content=second_content, finish_reason="stop")
|
||||
first = _mock_response(content="Part 1 ", finish_reason="length")
|
||||
second = _mock_response(content="Part 2", finish_reason="stop")
|
||||
agent.client.chat.completions.create.side_effect = [first, second]
|
||||
|
||||
with (
|
||||
|
|
@ -1369,12 +1519,58 @@ class TestRunConversation:
|
|||
|
||||
assert result["completed"] is True
|
||||
assert result["api_calls"] == 2
|
||||
assert result["final_response"] == expected_final
|
||||
assert result["final_response"] == "Part 1 Part 2"
|
||||
|
||||
second_call_messages = agent.client.chat.completions.create.call_args_list[1].kwargs["messages"]
|
||||
assert second_call_messages[-1]["role"] == "user"
|
||||
assert "truncated by the output length limit" in second_call_messages[-1]["content"]
|
||||
|
||||
def test_length_thinking_exhausted_skips_continuation(self, agent):
|
||||
"""When finish_reason='length' but content is only thinking, skip retries."""
|
||||
self._setup_agent(agent)
|
||||
resp = _mock_response(
|
||||
content="<think>internal reasoning</think>",
|
||||
finish_reason="length",
|
||||
)
|
||||
agent.client.chat.completions.create.return_value = resp
|
||||
|
||||
with (
|
||||
patch.object(agent, "_persist_session"),
|
||||
patch.object(agent, "_save_trajectory"),
|
||||
patch.object(agent, "_cleanup_task_resources"),
|
||||
):
|
||||
result = agent.run_conversation("hello")
|
||||
|
||||
# Should return immediately — no continuation, only 1 API call
|
||||
assert result["completed"] is False
|
||||
assert result["api_calls"] == 1
|
||||
assert "reasoning" in result["error"].lower()
|
||||
assert "output tokens" in result["error"].lower()
|
||||
# Should have a user-friendly response (not None)
|
||||
assert result["final_response"] is not None
|
||||
assert "Thinking Budget Exhausted" in result["final_response"]
|
||||
assert "/thinkon" in result["final_response"]
|
||||
|
||||
def test_length_empty_content_detected_as_thinking_exhausted(self, agent):
|
||||
"""When finish_reason='length' and content is None/empty, detect exhaustion."""
|
||||
self._setup_agent(agent)
|
||||
resp = _mock_response(content=None, finish_reason="length")
|
||||
agent.client.chat.completions.create.return_value = resp
|
||||
|
||||
with (
|
||||
patch.object(agent, "_persist_session"),
|
||||
patch.object(agent, "_save_trajectory"),
|
||||
patch.object(agent, "_cleanup_task_resources"),
|
||||
):
|
||||
result = agent.run_conversation("hello")
|
||||
|
||||
assert result["completed"] is False
|
||||
assert result["api_calls"] == 1
|
||||
assert "reasoning" in result["error"].lower()
|
||||
# User-friendly message is returned
|
||||
assert result["final_response"] is not None
|
||||
assert "Thinking Budget Exhausted" in result["final_response"]
|
||||
|
||||
|
||||
class TestRetryExhaustion:
|
||||
"""Regression: retry_count > max_retries was dead code (off-by-one).
|
||||
|
|
@ -2316,6 +2512,8 @@ class TestFallbackAnthropicProvider:
|
|||
def test_fallback_to_anthropic_sets_api_mode(self, agent):
|
||||
agent._fallback_activated = False
|
||||
agent._fallback_model = {"provider": "anthropic", "model": "claude-sonnet-4-20250514"}
|
||||
agent._fallback_chain = [agent._fallback_model]
|
||||
agent._fallback_index = 0
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_client.base_url = "https://api.anthropic.com/v1"
|
||||
|
|
@ -2337,6 +2535,8 @@ class TestFallbackAnthropicProvider:
|
|||
def test_fallback_to_anthropic_enables_prompt_caching(self, agent):
|
||||
agent._fallback_activated = False
|
||||
agent._fallback_model = {"provider": "anthropic", "model": "claude-sonnet-4-20250514"}
|
||||
agent._fallback_chain = [agent._fallback_model]
|
||||
agent._fallback_index = 0
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_client.base_url = "https://api.anthropic.com/v1"
|
||||
|
|
@ -2354,6 +2554,8 @@ class TestFallbackAnthropicProvider:
|
|||
def test_fallback_to_openrouter_uses_openai_client(self, agent):
|
||||
agent._fallback_activated = False
|
||||
agent._fallback_model = {"provider": "openrouter", "model": "anthropic/claude-sonnet-4"}
|
||||
agent._fallback_chain = [agent._fallback_model]
|
||||
agent._fallback_index = 0
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_client.base_url = "https://openrouter.ai/api/v1"
|
||||
|
|
@ -2602,6 +2804,50 @@ class TestStreamingApiCall:
|
|||
assert tc[0].function.name == "search"
|
||||
assert tc[1].function.name == "read"
|
||||
|
||||
def test_ollama_reused_index_separate_tool_calls(self, agent):
|
||||
"""Ollama sends every tool call at index 0 with different ids.
|
||||
|
||||
Without the fix, names and arguments get concatenated into one slot.
|
||||
"""
|
||||
chunks = [
|
||||
_make_chunk(tool_calls=[_make_tc_delta(0, "call_a", "search", '{"q":"hello"}')]),
|
||||
# Second tool call at the SAME index 0, but different id
|
||||
_make_chunk(tool_calls=[_make_tc_delta(0, "call_b", "read_file", '{"path":"x.py"}')]),
|
||||
_make_chunk(finish_reason="tool_calls"),
|
||||
]
|
||||
agent.client.chat.completions.create.return_value = iter(chunks)
|
||||
|
||||
resp = agent._interruptible_streaming_api_call({"messages": []})
|
||||
|
||||
tc = resp.choices[0].message.tool_calls
|
||||
assert len(tc) == 2, f"Expected 2 tool calls, got {len(tc)}: {[t.function.name for t in tc]}"
|
||||
assert tc[0].function.name == "search"
|
||||
assert tc[0].function.arguments == '{"q":"hello"}'
|
||||
assert tc[0].id == "call_a"
|
||||
assert tc[1].function.name == "read_file"
|
||||
assert tc[1].function.arguments == '{"path":"x.py"}'
|
||||
assert tc[1].id == "call_b"
|
||||
|
||||
def test_ollama_reused_index_streamed_args(self, agent):
|
||||
"""Ollama with streamed arguments across multiple chunks at same index."""
|
||||
chunks = [
|
||||
_make_chunk(tool_calls=[_make_tc_delta(0, "call_a", "search", '{"q":')]),
|
||||
_make_chunk(tool_calls=[_make_tc_delta(0, None, None, '"hello"}')]),
|
||||
# New tool call, same index 0
|
||||
_make_chunk(tool_calls=[_make_tc_delta(0, "call_b", "read", '{}')]),
|
||||
_make_chunk(finish_reason="tool_calls"),
|
||||
]
|
||||
agent.client.chat.completions.create.return_value = iter(chunks)
|
||||
|
||||
resp = agent._interruptible_streaming_api_call({"messages": []})
|
||||
|
||||
tc = resp.choices[0].message.tool_calls
|
||||
assert len(tc) == 2
|
||||
assert tc[0].function.name == "search"
|
||||
assert tc[0].function.arguments == '{"q":"hello"}'
|
||||
assert tc[1].function.name == "read"
|
||||
assert tc[1].function.arguments == '{}'
|
||||
|
||||
def test_content_and_tool_calls_together(self, agent):
|
||||
chunks = [
|
||||
_make_chunk(content="I'll search"),
|
||||
|
|
@ -3003,6 +3249,8 @@ class TestFallbackSetsOAuthFlag:
|
|||
def test_fallback_to_anthropic_oauth_sets_flag(self, agent):
|
||||
agent._fallback_activated = False
|
||||
agent._fallback_model = {"provider": "anthropic", "model": "claude-sonnet-4-6"}
|
||||
agent._fallback_chain = [agent._fallback_model]
|
||||
agent._fallback_index = 0
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_client.base_url = "https://api.anthropic.com/v1"
|
||||
|
|
@ -3024,6 +3272,8 @@ class TestFallbackSetsOAuthFlag:
|
|||
def test_fallback_to_anthropic_api_key_clears_flag(self, agent):
|
||||
agent._fallback_activated = False
|
||||
agent._fallback_model = {"provider": "anthropic", "model": "claude-sonnet-4-6"}
|
||||
agent._fallback_chain = [agent._fallback_model]
|
||||
agent._fallback_index = 0
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_client.base_url = "https://api.anthropic.com/v1"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue