mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-08 03:01:47 +00:00
Merge remote-tracking branch 'origin/main' into sid/types-and-lints
# Conflicts: # gateway/platforms/base.py # gateway/platforms/qqbot/adapter.py # gateway/platforms/slack.py # hermes_cli/main.py # scripts/batch_runner.py # tools/skills_tool.py # uv.lock
This commit is contained in:
commit
a9ed7cb3b4
117 changed files with 7791 additions and 611 deletions
170
tests/acp/test_approval_isolation.py
Normal file
170
tests/acp/test_approval_isolation.py
Normal file
|
|
@ -0,0 +1,170 @@
|
|||
"""Tests for GHSA-96vc-wcxf-jjff and GHSA-qg5c-hvr5-hjgr.
|
||||
|
||||
Two related ACP approval-flow issues:
|
||||
- 96vc: ACP didn't set HERMES_EXEC_ASK, so `check_all_command_guards`
|
||||
took the non-interactive auto-approve path and never consulted the
|
||||
ACP-supplied callback.
|
||||
- qg5c: `_approval_callback` was a module-global in terminal_tool;
|
||||
overlapping ACP sessions overwrote each other's callback slot.
|
||||
|
||||
Both fixed together by:
|
||||
1. Setting HERMES_EXEC_ASK inside _run_agent (wraps the agent call).
|
||||
2. Storing the callback in thread-local state so concurrent executor
|
||||
threads don't collide.
|
||||
"""
|
||||
|
||||
import os
|
||||
import threading
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
class TestThreadLocalApprovalCallback:
|
||||
"""GHSA-qg5c-hvr5-hjgr: set_approval_callback must be per-thread so
|
||||
concurrent ACP sessions don't stomp on each other's handlers."""
|
||||
|
||||
def test_set_and_get_in_same_thread(self):
|
||||
from tools.terminal_tool import (
|
||||
set_approval_callback,
|
||||
_get_approval_callback,
|
||||
)
|
||||
|
||||
cb1 = lambda cmd, desc: "once" # noqa: E731
|
||||
set_approval_callback(cb1)
|
||||
assert _get_approval_callback() is cb1
|
||||
|
||||
def test_callback_not_visible_in_different_thread(self):
|
||||
"""Thread A's callback is NOT visible to Thread B."""
|
||||
from tools.terminal_tool import (
|
||||
set_approval_callback,
|
||||
_get_approval_callback,
|
||||
)
|
||||
|
||||
cb_a = lambda cmd, desc: "thread_a" # noqa: E731
|
||||
cb_b = lambda cmd, desc: "thread_b" # noqa: E731
|
||||
|
||||
seen_in_a = []
|
||||
seen_in_b = []
|
||||
|
||||
def thread_a():
|
||||
set_approval_callback(cb_a)
|
||||
# Pause so thread B has time to set its own callback
|
||||
import time
|
||||
time.sleep(0.05)
|
||||
seen_in_a.append(_get_approval_callback())
|
||||
|
||||
def thread_b():
|
||||
set_approval_callback(cb_b)
|
||||
import time
|
||||
time.sleep(0.05)
|
||||
seen_in_b.append(_get_approval_callback())
|
||||
|
||||
ta = threading.Thread(target=thread_a)
|
||||
tb = threading.Thread(target=thread_b)
|
||||
ta.start()
|
||||
tb.start()
|
||||
ta.join()
|
||||
tb.join()
|
||||
|
||||
# Each thread must see ONLY its own callback — not the other's
|
||||
assert seen_in_a == [cb_a]
|
||||
assert seen_in_b == [cb_b]
|
||||
|
||||
def test_main_thread_callback_not_leaked_to_worker(self):
|
||||
"""A callback set in the main thread does NOT leak into a
|
||||
freshly-spawned worker thread."""
|
||||
from tools.terminal_tool import (
|
||||
set_approval_callback,
|
||||
_get_approval_callback,
|
||||
)
|
||||
|
||||
cb_main = lambda cmd, desc: "main" # noqa: E731
|
||||
set_approval_callback(cb_main)
|
||||
|
||||
worker_saw = []
|
||||
|
||||
def worker():
|
||||
worker_saw.append(_get_approval_callback())
|
||||
|
||||
t = threading.Thread(target=worker)
|
||||
t.start()
|
||||
t.join()
|
||||
|
||||
# Worker thread has no callback set — TLS is empty for it
|
||||
assert worker_saw == [None]
|
||||
# Main thread still has its callback
|
||||
assert _get_approval_callback() is cb_main
|
||||
|
||||
def test_sudo_password_callback_also_thread_local(self):
|
||||
"""Same protection applies to the sudo password callback."""
|
||||
from tools.terminal_tool import (
|
||||
set_sudo_password_callback,
|
||||
_get_sudo_password_callback,
|
||||
)
|
||||
|
||||
cb_main = lambda: "main-password" # noqa: E731
|
||||
set_sudo_password_callback(cb_main)
|
||||
|
||||
worker_saw = []
|
||||
|
||||
def worker():
|
||||
worker_saw.append(_get_sudo_password_callback())
|
||||
|
||||
t = threading.Thread(target=worker)
|
||||
t.start()
|
||||
t.join()
|
||||
|
||||
assert worker_saw == [None]
|
||||
assert _get_sudo_password_callback() is cb_main
|
||||
|
||||
|
||||
class TestAcpExecAskGate:
|
||||
"""GHSA-96vc-wcxf-jjff: ACP's _run_agent must set HERMES_INTERACTIVE so
|
||||
that tools.approval.check_all_command_guards takes the CLI-interactive
|
||||
path (consults the registered callback via prompt_dangerous_approval)
|
||||
instead of the non-interactive auto-approve shortcut.
|
||||
|
||||
(HERMES_EXEC_ASK takes the gateway-queue path which requires a
|
||||
notify_cb registered in _gateway_notify_cbs — not applicable to ACP,
|
||||
which uses a direct callback shape.)"""
|
||||
|
||||
def test_interactive_env_var_routes_to_callback(self, monkeypatch):
|
||||
"""When HERMES_INTERACTIVE is set and an approval callback is
|
||||
registered, a dangerous command must route through the callback."""
|
||||
# Clean env
|
||||
monkeypatch.delenv("HERMES_INTERACTIVE", raising=False)
|
||||
monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False)
|
||||
monkeypatch.delenv("HERMES_EXEC_ASK", raising=False)
|
||||
monkeypatch.delenv("HERMES_YOLO_MODE", raising=False)
|
||||
|
||||
from tools.approval import check_all_command_guards
|
||||
|
||||
called_with = []
|
||||
|
||||
def fake_cb(command, description, *, allow_permanent=True):
|
||||
called_with.append((command, description))
|
||||
return "once"
|
||||
|
||||
# Without HERMES_INTERACTIVE: takes auto-approve path, callback NOT called
|
||||
result = check_all_command_guards(
|
||||
"rm -rf /tmp/test-exec-ask", "local", approval_callback=fake_cb,
|
||||
)
|
||||
assert result["approved"] is True
|
||||
assert called_with == [], (
|
||||
"without HERMES_INTERACTIVE the non-interactive auto-approve "
|
||||
"path should fire without consulting the callback"
|
||||
)
|
||||
|
||||
# With HERMES_INTERACTIVE: callback IS called, approval flows through it
|
||||
monkeypatch.setenv("HERMES_INTERACTIVE", "1")
|
||||
called_with.clear()
|
||||
result = check_all_command_guards(
|
||||
"rm -rf /tmp/test-exec-ask", "local", approval_callback=fake_cb,
|
||||
)
|
||||
assert called_with, (
|
||||
"with HERMES_INTERACTIVE the approval path should consult the "
|
||||
"registered callback — this was the ACP bypass in "
|
||||
"GHSA-96vc-wcxf-jjff"
|
||||
)
|
||||
assert result["approved"] is True
|
||||
|
|
@ -73,3 +73,17 @@ class TestApprovalMapping:
|
|||
result = cb("rm -rf /", "dangerous")
|
||||
|
||||
assert result == "deny"
|
||||
|
||||
def test_approval_none_response_returns_deny(self):
|
||||
"""When request_permission resolves to None, the callback should return 'deny'."""
|
||||
loop = MagicMock(spec=asyncio.AbstractEventLoop)
|
||||
mock_rp = MagicMock(name="request_permission")
|
||||
|
||||
future = MagicMock(spec=Future)
|
||||
future.result.return_value = None
|
||||
|
||||
with patch("acp_adapter.permissions.asyncio.run_coroutine_threadsafe", return_value=future):
|
||||
cb = make_approval_callback(mock_rp, loop, session_id="s1", timeout=1.0)
|
||||
result = cb("echo hi", "demo")
|
||||
|
||||
assert result == "deny"
|
||||
|
|
|
|||
|
|
@ -95,19 +95,37 @@ class TestInitialize:
|
|||
|
||||
class TestAuthenticate:
|
||||
@pytest.mark.asyncio
|
||||
async def test_authenticate_with_provider_configured(self, agent, monkeypatch):
|
||||
async def test_authenticate_with_matching_method_id(self, agent, monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
"acp_adapter.server.has_provider",
|
||||
lambda: True,
|
||||
"acp_adapter.server.detect_provider",
|
||||
lambda: "openrouter",
|
||||
)
|
||||
resp = await agent.authenticate(method_id="openrouter")
|
||||
assert isinstance(resp, AuthenticateResponse)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_authenticate_is_case_insensitive(self, agent, monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
"acp_adapter.server.detect_provider",
|
||||
lambda: "openrouter",
|
||||
)
|
||||
resp = await agent.authenticate(method_id="OpenRouter")
|
||||
assert isinstance(resp, AuthenticateResponse)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_authenticate_rejects_mismatched_method_id(self, agent, monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
"acp_adapter.server.detect_provider",
|
||||
lambda: "openrouter",
|
||||
)
|
||||
resp = await agent.authenticate(method_id="totally-invalid-method")
|
||||
assert resp is None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_authenticate_without_provider(self, agent, monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
"acp_adapter.server.has_provider",
|
||||
lambda: False,
|
||||
"acp_adapter.server.detect_provider",
|
||||
lambda: None,
|
||||
)
|
||||
resp = await agent.authenticate(method_id="openrouter")
|
||||
assert resp is None
|
||||
|
|
@ -252,6 +270,57 @@ class TestListAndFork:
|
|||
|
||||
mock_list.assert_called_once_with(cwd="/mnt/e/Projects/AI/browser-link-3")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_list_sessions_pagination_first_page(self, agent):
|
||||
from acp_adapter import server as acp_server
|
||||
|
||||
infos = [
|
||||
{"session_id": f"s{i}", "cwd": "/tmp", "title": None, "updated_at": 0.0}
|
||||
for i in range(acp_server._LIST_SESSIONS_PAGE_SIZE + 5)
|
||||
]
|
||||
with patch.object(agent.session_manager, "list_sessions", return_value=infos):
|
||||
resp = await agent.list_sessions()
|
||||
|
||||
assert len(resp.sessions) == acp_server._LIST_SESSIONS_PAGE_SIZE
|
||||
assert resp.next_cursor == resp.sessions[-1].session_id
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_list_sessions_pagination_no_more(self, agent):
|
||||
infos = [
|
||||
{"session_id": f"s{i}", "cwd": "/tmp", "title": None, "updated_at": 0.0}
|
||||
for i in range(3)
|
||||
]
|
||||
with patch.object(agent.session_manager, "list_sessions", return_value=infos):
|
||||
resp = await agent.list_sessions()
|
||||
|
||||
assert len(resp.sessions) == 3
|
||||
assert resp.next_cursor is None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_list_sessions_cursor_resumes_after_match(self, agent):
|
||||
infos = [
|
||||
{"session_id": "s1", "cwd": "/tmp", "title": None, "updated_at": 0.0},
|
||||
{"session_id": "s2", "cwd": "/tmp", "title": None, "updated_at": 0.0},
|
||||
{"session_id": "s3", "cwd": "/tmp", "title": None, "updated_at": 0.0},
|
||||
]
|
||||
with patch.object(agent.session_manager, "list_sessions", return_value=infos):
|
||||
resp = await agent.list_sessions(cursor="s1")
|
||||
|
||||
assert [s.session_id for s in resp.sessions] == ["s2", "s3"]
|
||||
assert resp.next_cursor is None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_list_sessions_unknown_cursor_returns_empty(self, agent):
|
||||
infos = [
|
||||
{"session_id": "s1", "cwd": "/tmp", "title": None, "updated_at": 0.0},
|
||||
{"session_id": "s2", "cwd": "/tmp", "title": None, "updated_at": 0.0},
|
||||
]
|
||||
with patch.object(agent.session_manager, "list_sessions", return_value=infos):
|
||||
resp = await agent.list_sessions(cursor="does-not-exist")
|
||||
|
||||
assert resp.sessions == []
|
||||
assert resp.next_cursor is None
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# session configuration / model routing
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -414,7 +414,11 @@ class TestRunOauthSetupToken:
|
|||
token = run_oauth_setup_token()
|
||||
|
||||
assert token == "from-cred-file"
|
||||
mock_run.assert_called_once()
|
||||
# Don't assert exact call count — the contract is "credentials flow
|
||||
# through", not "exactly one subprocess call". xdist cross-test
|
||||
# pollution (other tests shimming subprocess via plugins) has flaked
|
||||
# assert_called_once() in CI.
|
||||
assert mock_run.called
|
||||
|
||||
def test_returns_token_from_env_var(self, monkeypatch, tmp_path):
|
||||
"""Falls back to CLAUDE_CODE_OAUTH_TOKEN env var when no cred files."""
|
||||
|
|
|
|||
238
tests/agent/test_anthropic_normalize_v2.py
Normal file
238
tests/agent/test_anthropic_normalize_v2.py
Normal file
|
|
@ -0,0 +1,238 @@
|
|||
"""Regression tests: normalize_anthropic_response_v2 vs v1.
|
||||
|
||||
Constructs mock Anthropic responses and asserts that the v2 function
|
||||
(returning NormalizedResponse) produces identical field values to the
|
||||
original v1 function (returning SimpleNamespace + finish_reason).
|
||||
"""
|
||||
|
||||
import json
|
||||
import pytest
|
||||
from types import SimpleNamespace
|
||||
|
||||
from agent.anthropic_adapter import (
|
||||
normalize_anthropic_response,
|
||||
normalize_anthropic_response_v2,
|
||||
)
|
||||
from agent.transports.types import NormalizedResponse, ToolCall
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers to build mock Anthropic SDK responses
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _text_block(text: str):
|
||||
return SimpleNamespace(type="text", text=text)
|
||||
|
||||
|
||||
def _thinking_block(thinking: str, signature: str = "sig_abc"):
|
||||
return SimpleNamespace(type="thinking", thinking=thinking, signature=signature)
|
||||
|
||||
|
||||
def _tool_use_block(id: str, name: str, input: dict):
|
||||
return SimpleNamespace(type="tool_use", id=id, name=name, input=input)
|
||||
|
||||
|
||||
def _response(content_blocks, stop_reason="end_turn"):
|
||||
return SimpleNamespace(
|
||||
content=content_blocks,
|
||||
stop_reason=stop_reason,
|
||||
usage=SimpleNamespace(
|
||||
input_tokens=10,
|
||||
output_tokens=5,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestTextOnly:
|
||||
"""Text-only response — no tools, no thinking."""
|
||||
|
||||
def setup_method(self):
|
||||
self.resp = _response([_text_block("Hello world")])
|
||||
self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp)
|
||||
self.v2 = normalize_anthropic_response_v2(self.resp)
|
||||
|
||||
def test_type(self):
|
||||
assert isinstance(self.v2, NormalizedResponse)
|
||||
|
||||
def test_content_matches(self):
|
||||
assert self.v2.content == self.v1_msg.content
|
||||
|
||||
def test_finish_reason_matches(self):
|
||||
assert self.v2.finish_reason == self.v1_finish
|
||||
|
||||
def test_no_tool_calls(self):
|
||||
assert self.v2.tool_calls is None
|
||||
assert self.v1_msg.tool_calls is None
|
||||
|
||||
def test_no_reasoning(self):
|
||||
assert self.v2.reasoning is None
|
||||
assert self.v1_msg.reasoning is None
|
||||
|
||||
|
||||
class TestWithToolCalls:
|
||||
"""Response with tool calls."""
|
||||
|
||||
def setup_method(self):
|
||||
self.resp = _response(
|
||||
[
|
||||
_text_block("I'll check that"),
|
||||
_tool_use_block("toolu_abc", "terminal", {"command": "ls"}),
|
||||
_tool_use_block("toolu_def", "read_file", {"path": "/tmp"}),
|
||||
],
|
||||
stop_reason="tool_use",
|
||||
)
|
||||
self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp)
|
||||
self.v2 = normalize_anthropic_response_v2(self.resp)
|
||||
|
||||
def test_finish_reason(self):
|
||||
assert self.v2.finish_reason == "tool_calls"
|
||||
assert self.v1_finish == "tool_calls"
|
||||
|
||||
def test_tool_call_count(self):
|
||||
assert len(self.v2.tool_calls) == 2
|
||||
assert len(self.v1_msg.tool_calls) == 2
|
||||
|
||||
def test_tool_call_ids_match(self):
|
||||
for i in range(2):
|
||||
assert self.v2.tool_calls[i].id == self.v1_msg.tool_calls[i].id
|
||||
|
||||
def test_tool_call_names_match(self):
|
||||
assert self.v2.tool_calls[0].name == "terminal"
|
||||
assert self.v2.tool_calls[1].name == "read_file"
|
||||
for i in range(2):
|
||||
assert self.v2.tool_calls[i].name == self.v1_msg.tool_calls[i].function.name
|
||||
|
||||
def test_tool_call_arguments_match(self):
|
||||
for i in range(2):
|
||||
assert self.v2.tool_calls[i].arguments == self.v1_msg.tool_calls[i].function.arguments
|
||||
|
||||
def test_content_preserved(self):
|
||||
assert self.v2.content == self.v1_msg.content
|
||||
assert "check that" in self.v2.content
|
||||
|
||||
|
||||
class TestWithThinking:
|
||||
"""Response with thinking blocks (Claude 3.5+ extended thinking)."""
|
||||
|
||||
def setup_method(self):
|
||||
self.resp = _response([
|
||||
_thinking_block("Let me think about this carefully..."),
|
||||
_text_block("The answer is 42."),
|
||||
])
|
||||
self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp)
|
||||
self.v2 = normalize_anthropic_response_v2(self.resp)
|
||||
|
||||
def test_reasoning_matches(self):
|
||||
assert self.v2.reasoning == self.v1_msg.reasoning
|
||||
assert "think about this" in self.v2.reasoning
|
||||
|
||||
def test_reasoning_details_in_provider_data(self):
|
||||
v1_details = self.v1_msg.reasoning_details
|
||||
v2_details = self.v2.provider_data.get("reasoning_details") if self.v2.provider_data else None
|
||||
assert v1_details is not None
|
||||
assert v2_details is not None
|
||||
assert len(v2_details) == len(v1_details)
|
||||
|
||||
def test_content_excludes_thinking(self):
|
||||
assert self.v2.content == "The answer is 42."
|
||||
|
||||
|
||||
class TestMixed:
|
||||
"""Response with thinking + text + tool calls."""
|
||||
|
||||
def setup_method(self):
|
||||
self.resp = _response(
|
||||
[
|
||||
_thinking_block("Planning my approach..."),
|
||||
_text_block("I'll run the command"),
|
||||
_tool_use_block("toolu_xyz", "terminal", {"command": "pwd"}),
|
||||
],
|
||||
stop_reason="tool_use",
|
||||
)
|
||||
self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp)
|
||||
self.v2 = normalize_anthropic_response_v2(self.resp)
|
||||
|
||||
def test_all_fields_present(self):
|
||||
assert self.v2.content is not None
|
||||
assert self.v2.tool_calls is not None
|
||||
assert self.v2.reasoning is not None
|
||||
assert self.v2.finish_reason == "tool_calls"
|
||||
|
||||
def test_content_matches(self):
|
||||
assert self.v2.content == self.v1_msg.content
|
||||
|
||||
def test_reasoning_matches(self):
|
||||
assert self.v2.reasoning == self.v1_msg.reasoning
|
||||
|
||||
def test_tool_call_matches(self):
|
||||
assert self.v2.tool_calls[0].id == self.v1_msg.tool_calls[0].id
|
||||
assert self.v2.tool_calls[0].name == self.v1_msg.tool_calls[0].function.name
|
||||
|
||||
|
||||
class TestStopReasons:
|
||||
"""Verify finish_reason mapping matches between v1 and v2."""
|
||||
|
||||
@pytest.mark.parametrize("stop_reason,expected", [
|
||||
("end_turn", "stop"),
|
||||
("tool_use", "tool_calls"),
|
||||
("max_tokens", "length"),
|
||||
("stop_sequence", "stop"),
|
||||
("refusal", "content_filter"),
|
||||
("model_context_window_exceeded", "length"),
|
||||
("unknown_future_reason", "stop"),
|
||||
])
|
||||
def test_stop_reason_mapping(self, stop_reason, expected):
|
||||
resp = _response([_text_block("x")], stop_reason=stop_reason)
|
||||
v1_msg, v1_finish = normalize_anthropic_response(resp)
|
||||
v2 = normalize_anthropic_response_v2(resp)
|
||||
assert v2.finish_reason == v1_finish == expected
|
||||
|
||||
|
||||
class TestStripToolPrefix:
|
||||
"""Verify mcp_ prefix stripping works identically."""
|
||||
|
||||
def test_prefix_stripped(self):
|
||||
resp = _response(
|
||||
[_tool_use_block("toolu_1", "mcp_terminal", {"cmd": "ls"})],
|
||||
stop_reason="tool_use",
|
||||
)
|
||||
v1_msg, _ = normalize_anthropic_response(resp, strip_tool_prefix=True)
|
||||
v2 = normalize_anthropic_response_v2(resp, strip_tool_prefix=True)
|
||||
assert v1_msg.tool_calls[0].function.name == "terminal"
|
||||
assert v2.tool_calls[0].name == "terminal"
|
||||
|
||||
def test_prefix_kept(self):
|
||||
resp = _response(
|
||||
[_tool_use_block("toolu_1", "mcp_terminal", {"cmd": "ls"})],
|
||||
stop_reason="tool_use",
|
||||
)
|
||||
v1_msg, _ = normalize_anthropic_response(resp, strip_tool_prefix=False)
|
||||
v2 = normalize_anthropic_response_v2(resp, strip_tool_prefix=False)
|
||||
assert v1_msg.tool_calls[0].function.name == "mcp_terminal"
|
||||
assert v2.tool_calls[0].name == "mcp_terminal"
|
||||
|
||||
|
||||
class TestEdgeCases:
|
||||
"""Edge cases: empty content, no blocks, etc."""
|
||||
|
||||
def test_empty_content_blocks(self):
|
||||
resp = _response([])
|
||||
v1_msg, v1_finish = normalize_anthropic_response(resp)
|
||||
v2 = normalize_anthropic_response_v2(resp)
|
||||
assert v2.content == v1_msg.content
|
||||
assert v2.content is None
|
||||
|
||||
def test_no_reasoning_details_means_none_provider_data(self):
|
||||
resp = _response([_text_block("hi")])
|
||||
v2 = normalize_anthropic_response_v2(resp)
|
||||
assert v2.provider_data is None
|
||||
|
||||
def test_v2_returns_dataclass_not_namespace(self):
|
||||
resp = _response([_text_block("hi")])
|
||||
v2 = normalize_anthropic_response_v2(resp)
|
||||
assert isinstance(v2, NormalizedResponse)
|
||||
assert not isinstance(v2, SimpleNamespace)
|
||||
146
tests/agent/test_copilot_acp_client.py
Normal file
146
tests/agent/test_copilot_acp_client.py
Normal file
|
|
@ -0,0 +1,146 @@
|
|||
"""Focused regressions for the Copilot ACP shim safety layer."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import io
|
||||
import json
|
||||
import os
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
from agent.copilot_acp_client import CopilotACPClient
|
||||
|
||||
|
||||
class _FakeProcess:
|
||||
def __init__(self) -> None:
|
||||
self.stdin = io.StringIO()
|
||||
|
||||
|
||||
class CopilotACPClientSafetyTests(unittest.TestCase):
|
||||
def setUp(self) -> None:
|
||||
self.client = CopilotACPClient(acp_cwd="/tmp")
|
||||
|
||||
def _dispatch(self, message: dict, *, cwd: str) -> dict:
|
||||
process = _FakeProcess()
|
||||
handled = self.client._handle_server_message(
|
||||
message,
|
||||
process=process,
|
||||
cwd=cwd,
|
||||
text_parts=[],
|
||||
reasoning_parts=[],
|
||||
)
|
||||
self.assertTrue(handled)
|
||||
payload = process.stdin.getvalue().strip()
|
||||
self.assertTrue(payload)
|
||||
return json.loads(payload)
|
||||
|
||||
def test_request_permission_is_not_auto_allowed(self) -> None:
|
||||
response = self._dispatch(
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"id": 1,
|
||||
"method": "session/request_permission",
|
||||
"params": {},
|
||||
},
|
||||
cwd="/tmp",
|
||||
)
|
||||
|
||||
outcome = (((response.get("result") or {}).get("outcome") or {}).get("outcome"))
|
||||
self.assertEqual(outcome, "cancelled")
|
||||
|
||||
def test_read_text_file_blocks_internal_hermes_hub_files(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
home = Path(tmpdir) / "home"
|
||||
blocked = home / ".hermes" / "skills" / ".hub" / "index-cache" / "entry.json"
|
||||
blocked.parent.mkdir(parents=True, exist_ok=True)
|
||||
blocked.write_text('{"token":"sk-test-secret-1234567890"}')
|
||||
|
||||
with patch.dict(
|
||||
os.environ,
|
||||
{"HOME": str(home), "HERMES_HOME": str(home / ".hermes")},
|
||||
clear=False,
|
||||
):
|
||||
response = self._dispatch(
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"id": 2,
|
||||
"method": "fs/read_text_file",
|
||||
"params": {"path": str(blocked)},
|
||||
},
|
||||
cwd=str(home),
|
||||
)
|
||||
|
||||
self.assertIn("error", response)
|
||||
|
||||
def test_read_text_file_redacts_sensitive_content(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
root = Path(tmpdir)
|
||||
secret_file = root / "config.env"
|
||||
secret_file.write_text("OPENAI_API_KEY=sk-proj-abc123def456ghi789jkl012")
|
||||
|
||||
response = self._dispatch(
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"id": 3,
|
||||
"method": "fs/read_text_file",
|
||||
"params": {"path": str(secret_file)},
|
||||
},
|
||||
cwd=str(root),
|
||||
)
|
||||
|
||||
content = ((response.get("result") or {}).get("content") or "")
|
||||
self.assertNotIn("abc123def456", content)
|
||||
self.assertIn("OPENAI_API_KEY=", content)
|
||||
|
||||
def test_write_text_file_reuses_write_denylist(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
home = Path(tmpdir) / "home"
|
||||
target = home / ".ssh" / "id_rsa"
|
||||
target.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
with patch("agent.copilot_acp_client.is_write_denied", return_value=True, create=True):
|
||||
response = self._dispatch(
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"id": 4,
|
||||
"method": "fs/write_text_file",
|
||||
"params": {
|
||||
"path": str(target),
|
||||
"content": "fake-private-key",
|
||||
},
|
||||
},
|
||||
cwd=str(home),
|
||||
)
|
||||
|
||||
self.assertIn("error", response)
|
||||
self.assertFalse(target.exists())
|
||||
|
||||
def test_write_text_file_respects_safe_root(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
root = Path(tmpdir)
|
||||
safe_root = root / "workspace"
|
||||
safe_root.mkdir()
|
||||
outside = root / "outside.txt"
|
||||
|
||||
with patch.dict(os.environ, {"HERMES_WRITE_SAFE_ROOT": str(safe_root)}, clear=False):
|
||||
response = self._dispatch(
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"id": 5,
|
||||
"method": "fs/write_text_file",
|
||||
"params": {
|
||||
"path": str(outside),
|
||||
"content": "should-not-write",
|
||||
},
|
||||
},
|
||||
cwd=str(root),
|
||||
)
|
||||
|
||||
self.assertIn("error", response)
|
||||
self.assertFalse(outside.exists())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
|
@ -516,13 +516,12 @@ class TestGatewayFormatting:
|
|||
assert "**" in text # Markdown bold
|
||||
|
||||
def test_gateway_format_hides_cost(self, populated_db):
|
||||
"""Gateway format omits dollar figures and internal cache details."""
|
||||
engine = InsightsEngine(populated_db)
|
||||
report = engine.generate(days=30)
|
||||
text = engine.format_gateway(report)
|
||||
|
||||
assert "$" in text
|
||||
assert "Top Skills" in text
|
||||
assert "Est. cost" in text
|
||||
assert "$" not in text
|
||||
assert "cache" not in text.lower()
|
||||
|
||||
def test_gateway_format_shows_models(self, populated_db):
|
||||
|
|
|
|||
|
|
@ -84,38 +84,6 @@ class TestMinimaxAuxModel:
|
|||
assert "highspeed" not in _API_KEY_PROVIDER_AUX_MODELS["minimax-cn"]
|
||||
|
||||
|
||||
class TestMinimaxModelCatalog:
|
||||
"""Verify the model catalog matches official Anthropic-compat endpoint models.
|
||||
|
||||
Source: https://platform.minimax.io/docs/api-reference/text-anthropic-api
|
||||
"""
|
||||
|
||||
def test_catalog_includes_current_models(self):
|
||||
from hermes_cli.models import _PROVIDER_MODELS
|
||||
for provider in ("minimax", "minimax-cn"):
|
||||
models = _PROVIDER_MODELS[provider]
|
||||
assert "MiniMax-M2.7" in models
|
||||
assert "MiniMax-M2.5" in models
|
||||
assert "MiniMax-M2.1" in models
|
||||
assert "MiniMax-M2" in models
|
||||
|
||||
def test_catalog_excludes_m1_family(self):
|
||||
"""M1 models are not available on the /anthropic endpoint."""
|
||||
from hermes_cli.models import _PROVIDER_MODELS
|
||||
for provider in ("minimax", "minimax-cn"):
|
||||
models = _PROVIDER_MODELS[provider]
|
||||
assert "MiniMax-M1" not in models
|
||||
|
||||
def test_catalog_excludes_highspeed(self):
|
||||
"""Highspeed variants are available but not shown in default catalog
|
||||
(users can still specify them manually)."""
|
||||
from hermes_cli.models import _PROVIDER_MODELS
|
||||
for provider in ("minimax", "minimax-cn"):
|
||||
models = _PROVIDER_MODELS[provider]
|
||||
assert "MiniMax-M2.7-highspeed" not in models
|
||||
assert "MiniMax-M2.5-highspeed" not in models
|
||||
|
||||
|
||||
class TestMinimaxBetaHeaders:
|
||||
"""MiniMax Anthropic-compat endpoints reject fine-grained-tool-streaming beta.
|
||||
|
||||
|
|
|
|||
|
|
@ -6,6 +6,8 @@ when proxy env vars or custom endpoint URLs are malformed.
|
|||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
|
||||
import pytest
|
||||
|
||||
from agent.auxiliary_client import _validate_base_url, _validate_proxy_env_urls
|
||||
|
|
@ -31,6 +33,12 @@ def test_proxy_env_accepts_empty(monkeypatch):
|
|||
_validate_proxy_env_urls() # should not raise
|
||||
|
||||
|
||||
def test_proxy_env_normalizes_socks_alias(monkeypatch):
|
||||
monkeypatch.setenv("ALL_PROXY", "socks://127.0.0.1:1080/")
|
||||
_validate_proxy_env_urls()
|
||||
assert os.environ["ALL_PROXY"] == "socks5://127.0.0.1:1080/"
|
||||
|
||||
|
||||
@pytest.mark.parametrize("key", [
|
||||
"HTTP_PROXY", "HTTPS_PROXY", "ALL_PROXY",
|
||||
"http_proxy", "https_proxy", "all_proxy",
|
||||
|
|
|
|||
|
|
@ -405,3 +405,191 @@ class TestPlanSkillHelpers:
|
|||
assert "Add a /plan command" in msg
|
||||
assert ".hermes/plans/plan.md" in msg
|
||||
assert "Runtime note:" in msg
|
||||
|
||||
|
||||
class TestSkillDirectoryHeader:
|
||||
"""The activation message must expose the absolute skill directory and
|
||||
explain how to resolve relative paths, so skills with bundled scripts
|
||||
don't force the agent into a second ``skill_view()`` round-trip."""
|
||||
|
||||
def test_header_contains_absolute_skill_dir(self, tmp_path):
|
||||
with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
|
||||
skill_dir = _make_skill(tmp_path, "abs-dir-skill")
|
||||
scan_skill_commands()
|
||||
msg = build_skill_invocation_message("/abs-dir-skill", "go")
|
||||
|
||||
assert msg is not None
|
||||
assert f"[Skill directory: {skill_dir}]" in msg
|
||||
assert "Resolve any relative paths" in msg
|
||||
|
||||
def test_supporting_files_shown_with_absolute_paths(self, tmp_path):
|
||||
with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
|
||||
skill_dir = _make_skill(tmp_path, "scripted-skill")
|
||||
(skill_dir / "scripts").mkdir()
|
||||
(skill_dir / "scripts" / "run.js").write_text("console.log('hi')")
|
||||
scan_skill_commands()
|
||||
msg = build_skill_invocation_message("/scripted-skill")
|
||||
|
||||
assert msg is not None
|
||||
# The supporting-files block must emit both the relative form (so the
|
||||
# agent can call skill_view on it) and the absolute form (so it can
|
||||
# run the script directly via terminal).
|
||||
assert "scripts/run.js" in msg
|
||||
assert str(skill_dir / "scripts" / "run.js") in msg
|
||||
assert f"node {skill_dir}/scripts/foo.js" in msg
|
||||
|
||||
|
||||
class TestTemplateVarSubstitution:
|
||||
"""``${HERMES_SKILL_DIR}`` and ``${HERMES_SESSION_ID}`` in SKILL.md body
|
||||
are replaced before the agent sees the content."""
|
||||
|
||||
def test_substitutes_skill_dir(self, tmp_path):
|
||||
with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
|
||||
skill_dir = _make_skill(
|
||||
tmp_path,
|
||||
"templated",
|
||||
body="Run: node ${HERMES_SKILL_DIR}/scripts/foo.js",
|
||||
)
|
||||
scan_skill_commands()
|
||||
msg = build_skill_invocation_message("/templated")
|
||||
|
||||
assert msg is not None
|
||||
assert f"node {skill_dir}/scripts/foo.js" in msg
|
||||
# The literal template token must not leak through.
|
||||
assert "${HERMES_SKILL_DIR}" not in msg.split("[Skill directory:")[0]
|
||||
|
||||
def test_substitutes_session_id_when_available(self, tmp_path):
|
||||
with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
|
||||
_make_skill(
|
||||
tmp_path,
|
||||
"sess-templated",
|
||||
body="Session: ${HERMES_SESSION_ID}",
|
||||
)
|
||||
scan_skill_commands()
|
||||
msg = build_skill_invocation_message(
|
||||
"/sess-templated", task_id="abc-123"
|
||||
)
|
||||
|
||||
assert msg is not None
|
||||
assert "Session: abc-123" in msg
|
||||
|
||||
def test_leaves_session_id_token_when_missing(self, tmp_path):
|
||||
with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
|
||||
_make_skill(
|
||||
tmp_path,
|
||||
"sess-missing",
|
||||
body="Session: ${HERMES_SESSION_ID}",
|
||||
)
|
||||
scan_skill_commands()
|
||||
msg = build_skill_invocation_message("/sess-missing", task_id=None)
|
||||
|
||||
assert msg is not None
|
||||
# No session — token left intact so the author can spot it.
|
||||
assert "Session: ${HERMES_SESSION_ID}" in msg
|
||||
|
||||
def test_disable_template_vars_via_config(self, tmp_path):
|
||||
with (
|
||||
patch("tools.skills_tool.SKILLS_DIR", tmp_path),
|
||||
patch(
|
||||
"agent.skill_commands._load_skills_config",
|
||||
return_value={"template_vars": False},
|
||||
),
|
||||
):
|
||||
_make_skill(
|
||||
tmp_path,
|
||||
"no-sub",
|
||||
body="Run: node ${HERMES_SKILL_DIR}/scripts/foo.js",
|
||||
)
|
||||
scan_skill_commands()
|
||||
msg = build_skill_invocation_message("/no-sub")
|
||||
|
||||
assert msg is not None
|
||||
# Template token must survive when substitution is disabled.
|
||||
assert "${HERMES_SKILL_DIR}/scripts/foo.js" in msg
|
||||
|
||||
|
||||
class TestInlineShellExpansion:
|
||||
"""Inline ``!`cmd`` snippets in SKILL.md run before the agent sees the
|
||||
content — but only when the user has opted in via config."""
|
||||
|
||||
def test_inline_shell_is_off_by_default(self, tmp_path):
|
||||
with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
|
||||
_make_skill(
|
||||
tmp_path,
|
||||
"dyn-default-off",
|
||||
body="Today is !`echo INLINE_RAN`.",
|
||||
)
|
||||
scan_skill_commands()
|
||||
msg = build_skill_invocation_message("/dyn-default-off")
|
||||
|
||||
assert msg is not None
|
||||
# Default config has inline_shell=False — snippet must stay literal.
|
||||
assert "!`echo INLINE_RAN`" in msg
|
||||
assert "Today is INLINE_RAN." not in msg
|
||||
|
||||
def test_inline_shell_runs_when_enabled(self, tmp_path):
|
||||
with (
|
||||
patch("tools.skills_tool.SKILLS_DIR", tmp_path),
|
||||
patch(
|
||||
"agent.skill_commands._load_skills_config",
|
||||
return_value={"template_vars": True, "inline_shell": True,
|
||||
"inline_shell_timeout": 5},
|
||||
),
|
||||
):
|
||||
_make_skill(
|
||||
tmp_path,
|
||||
"dyn-on",
|
||||
body="Marker: !`echo INLINE_RAN`.",
|
||||
)
|
||||
scan_skill_commands()
|
||||
msg = build_skill_invocation_message("/dyn-on")
|
||||
|
||||
assert msg is not None
|
||||
assert "Marker: INLINE_RAN." in msg
|
||||
assert "!`echo INLINE_RAN`" not in msg
|
||||
|
||||
def test_inline_shell_runs_in_skill_directory(self, tmp_path):
|
||||
"""Inline snippets get the skill dir as CWD so relative paths work."""
|
||||
with (
|
||||
patch("tools.skills_tool.SKILLS_DIR", tmp_path),
|
||||
patch(
|
||||
"agent.skill_commands._load_skills_config",
|
||||
return_value={"template_vars": True, "inline_shell": True,
|
||||
"inline_shell_timeout": 5},
|
||||
),
|
||||
):
|
||||
skill_dir = _make_skill(
|
||||
tmp_path,
|
||||
"dyn-cwd",
|
||||
body="Here: !`pwd`",
|
||||
)
|
||||
scan_skill_commands()
|
||||
msg = build_skill_invocation_message("/dyn-cwd")
|
||||
|
||||
assert msg is not None
|
||||
assert f"Here: {skill_dir}" in msg
|
||||
|
||||
def test_inline_shell_timeout_does_not_break_message(self, tmp_path):
|
||||
with (
|
||||
patch("tools.skills_tool.SKILLS_DIR", tmp_path),
|
||||
patch(
|
||||
"agent.skill_commands._load_skills_config",
|
||||
return_value={"template_vars": True, "inline_shell": True,
|
||||
"inline_shell_timeout": 1},
|
||||
),
|
||||
):
|
||||
_make_skill(
|
||||
tmp_path,
|
||||
"dyn-slow",
|
||||
body="Slow: !`sleep 5 && printf DYN_MARKER`",
|
||||
)
|
||||
scan_skill_commands()
|
||||
msg = build_skill_invocation_message("/dyn-slow")
|
||||
|
||||
assert msg is not None
|
||||
# Timeout is surfaced as a marker instead of propagating as an error,
|
||||
# and the rest of the skill message still renders.
|
||||
assert "inline-shell timeout" in msg
|
||||
# The command's intended stdout never made it through — only the
|
||||
# timeout marker (which echoes the command text) survives.
|
||||
assert "DYN_MARKER" not in msg.replace("sleep 5 && printf DYN_MARKER", "")
|
||||
|
|
|
|||
0
tests/agent/transports/__init__.py
Normal file
0
tests/agent/transports/__init__.py
Normal file
220
tests/agent/transports/test_transport.py
Normal file
220
tests/agent/transports/test_transport.py
Normal file
|
|
@ -0,0 +1,220 @@
|
|||
"""Tests for the transport ABC, registry, and AnthropicTransport."""
|
||||
|
||||
import pytest
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
from agent.transports.base import ProviderTransport
|
||||
from agent.transports.types import NormalizedResponse, ToolCall, Usage
|
||||
from agent.transports import get_transport, register_transport, _REGISTRY
|
||||
|
||||
|
||||
# ── ABC contract tests ──────────────────────────────────────────────────
|
||||
|
||||
class TestProviderTransportABC:
|
||||
"""Verify the ABC contract is enforceable."""
|
||||
|
||||
def test_cannot_instantiate_abc(self):
|
||||
with pytest.raises(TypeError):
|
||||
ProviderTransport()
|
||||
|
||||
def test_concrete_must_implement_all_abstract(self):
|
||||
class Incomplete(ProviderTransport):
|
||||
@property
|
||||
def api_mode(self):
|
||||
return "test"
|
||||
with pytest.raises(TypeError):
|
||||
Incomplete()
|
||||
|
||||
def test_minimal_concrete(self):
|
||||
class Minimal(ProviderTransport):
|
||||
@property
|
||||
def api_mode(self):
|
||||
return "test_minimal"
|
||||
def convert_messages(self, messages, **kw):
|
||||
return messages
|
||||
def convert_tools(self, tools):
|
||||
return tools
|
||||
def build_kwargs(self, model, messages, tools=None, **params):
|
||||
return {"model": model, "messages": messages}
|
||||
def normalize_response(self, response, **kw):
|
||||
return NormalizedResponse(content="ok", tool_calls=None, finish_reason="stop")
|
||||
|
||||
t = Minimal()
|
||||
assert t.api_mode == "test_minimal"
|
||||
assert t.validate_response(None) is True # default
|
||||
assert t.extract_cache_stats(None) is None # default
|
||||
assert t.map_finish_reason("end_turn") == "end_turn" # default passthrough
|
||||
|
||||
|
||||
# ── Registry tests ───────────────────────────────────────────────────────
|
||||
|
||||
class TestTransportRegistry:
|
||||
|
||||
def test_get_unregistered_returns_none(self):
|
||||
assert get_transport("nonexistent_mode") is None
|
||||
|
||||
def test_anthropic_registered_on_import(self):
|
||||
import agent.transports.anthropic # noqa: F401
|
||||
t = get_transport("anthropic_messages")
|
||||
assert t is not None
|
||||
assert t.api_mode == "anthropic_messages"
|
||||
|
||||
def test_register_and_get(self):
|
||||
class DummyTransport(ProviderTransport):
|
||||
@property
|
||||
def api_mode(self):
|
||||
return "dummy_test"
|
||||
def convert_messages(self, messages, **kw):
|
||||
return messages
|
||||
def convert_tools(self, tools):
|
||||
return tools
|
||||
def build_kwargs(self, model, messages, tools=None, **params):
|
||||
return {}
|
||||
def normalize_response(self, response, **kw):
|
||||
return NormalizedResponse(content=None, tool_calls=None, finish_reason="stop")
|
||||
|
||||
register_transport("dummy_test", DummyTransport)
|
||||
t = get_transport("dummy_test")
|
||||
assert t.api_mode == "dummy_test"
|
||||
# Cleanup
|
||||
_REGISTRY.pop("dummy_test", None)
|
||||
|
||||
|
||||
# ── AnthropicTransport tests ────────────────────────────────────────────
|
||||
|
||||
class TestAnthropicTransport:
|
||||
|
||||
@pytest.fixture
|
||||
def transport(self):
|
||||
import agent.transports.anthropic # noqa: F401
|
||||
return get_transport("anthropic_messages")
|
||||
|
||||
def test_api_mode(self, transport):
|
||||
assert transport.api_mode == "anthropic_messages"
|
||||
|
||||
def test_convert_tools_simple(self, transport):
|
||||
tools = [{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "test_tool",
|
||||
"description": "A test",
|
||||
"parameters": {"type": "object", "properties": {}},
|
||||
}
|
||||
}]
|
||||
result = transport.convert_tools(tools)
|
||||
assert len(result) == 1
|
||||
assert result[0]["name"] == "test_tool"
|
||||
assert "input_schema" in result[0]
|
||||
|
||||
def test_validate_response_none(self, transport):
|
||||
assert transport.validate_response(None) is False
|
||||
|
||||
def test_validate_response_empty_content(self, transport):
|
||||
r = SimpleNamespace(content=[])
|
||||
assert transport.validate_response(r) is False
|
||||
|
||||
def test_validate_response_valid(self, transport):
|
||||
r = SimpleNamespace(content=[SimpleNamespace(type="text", text="hello")])
|
||||
assert transport.validate_response(r) is True
|
||||
|
||||
def test_map_finish_reason(self, transport):
|
||||
assert transport.map_finish_reason("end_turn") == "stop"
|
||||
assert transport.map_finish_reason("tool_use") == "tool_calls"
|
||||
assert transport.map_finish_reason("max_tokens") == "length"
|
||||
assert transport.map_finish_reason("stop_sequence") == "stop"
|
||||
assert transport.map_finish_reason("refusal") == "content_filter"
|
||||
assert transport.map_finish_reason("model_context_window_exceeded") == "length"
|
||||
assert transport.map_finish_reason("unknown") == "stop"
|
||||
|
||||
def test_extract_cache_stats_none_usage(self, transport):
|
||||
r = SimpleNamespace(usage=None)
|
||||
assert transport.extract_cache_stats(r) is None
|
||||
|
||||
def test_extract_cache_stats_with_cache(self, transport):
|
||||
usage = SimpleNamespace(cache_read_input_tokens=100, cache_creation_input_tokens=50)
|
||||
r = SimpleNamespace(usage=usage)
|
||||
result = transport.extract_cache_stats(r)
|
||||
assert result == {"cached_tokens": 100, "creation_tokens": 50}
|
||||
|
||||
def test_extract_cache_stats_zero(self, transport):
|
||||
usage = SimpleNamespace(cache_read_input_tokens=0, cache_creation_input_tokens=0)
|
||||
r = SimpleNamespace(usage=usage)
|
||||
assert transport.extract_cache_stats(r) is None
|
||||
|
||||
def test_normalize_response_text(self, transport):
|
||||
"""Test normalization of a simple text response."""
|
||||
r = SimpleNamespace(
|
||||
content=[SimpleNamespace(type="text", text="Hello world")],
|
||||
stop_reason="end_turn",
|
||||
usage=SimpleNamespace(input_tokens=10, output_tokens=5),
|
||||
model="claude-sonnet-4-6",
|
||||
)
|
||||
nr = transport.normalize_response(r)
|
||||
assert isinstance(nr, NormalizedResponse)
|
||||
assert nr.content == "Hello world"
|
||||
assert nr.tool_calls is None or nr.tool_calls == []
|
||||
assert nr.finish_reason == "stop"
|
||||
|
||||
def test_normalize_response_tool_calls(self, transport):
|
||||
"""Test normalization of a tool-use response."""
|
||||
r = SimpleNamespace(
|
||||
content=[
|
||||
SimpleNamespace(
|
||||
type="tool_use",
|
||||
id="toolu_123",
|
||||
name="terminal",
|
||||
input={"command": "ls"},
|
||||
),
|
||||
],
|
||||
stop_reason="tool_use",
|
||||
usage=SimpleNamespace(input_tokens=10, output_tokens=20),
|
||||
model="claude-sonnet-4-6",
|
||||
)
|
||||
nr = transport.normalize_response(r)
|
||||
assert nr.finish_reason == "tool_calls"
|
||||
assert len(nr.tool_calls) == 1
|
||||
tc = nr.tool_calls[0]
|
||||
assert tc.name == "terminal"
|
||||
assert tc.id == "toolu_123"
|
||||
assert '"command"' in tc.arguments
|
||||
|
||||
def test_normalize_response_thinking(self, transport):
|
||||
"""Test normalization preserves thinking content."""
|
||||
r = SimpleNamespace(
|
||||
content=[
|
||||
SimpleNamespace(type="thinking", thinking="Let me think..."),
|
||||
SimpleNamespace(type="text", text="The answer is 42"),
|
||||
],
|
||||
stop_reason="end_turn",
|
||||
usage=SimpleNamespace(input_tokens=10, output_tokens=15),
|
||||
model="claude-sonnet-4-6",
|
||||
)
|
||||
nr = transport.normalize_response(r)
|
||||
assert nr.content == "The answer is 42"
|
||||
assert nr.reasoning == "Let me think..."
|
||||
|
||||
def test_build_kwargs_returns_dict(self, transport):
|
||||
"""Test build_kwargs produces a usable kwargs dict."""
|
||||
messages = [{"role": "user", "content": "Hello"}]
|
||||
kw = transport.build_kwargs(
|
||||
model="claude-sonnet-4-6",
|
||||
messages=messages,
|
||||
max_tokens=1024,
|
||||
)
|
||||
assert isinstance(kw, dict)
|
||||
assert "model" in kw
|
||||
assert "max_tokens" in kw
|
||||
assert "messages" in kw
|
||||
|
||||
def test_convert_messages_extracts_system(self, transport):
|
||||
"""Test convert_messages separates system from messages."""
|
||||
messages = [
|
||||
{"role": "system", "content": "You are helpful."},
|
||||
{"role": "user", "content": "Hi"},
|
||||
]
|
||||
system, msgs = transport.convert_messages(messages)
|
||||
# System should be extracted
|
||||
assert system is not None
|
||||
# Messages should only have user
|
||||
assert len(msgs) >= 1
|
||||
151
tests/agent/transports/test_types.py
Normal file
151
tests/agent/transports/test_types.py
Normal file
|
|
@ -0,0 +1,151 @@
|
|||
"""Tests for agent/transports/types.py — dataclass construction + helpers."""
|
||||
|
||||
import json
|
||||
import pytest
|
||||
|
||||
from agent.transports.types import (
|
||||
NormalizedResponse,
|
||||
ToolCall,
|
||||
Usage,
|
||||
build_tool_call,
|
||||
map_finish_reason,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ToolCall
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestToolCall:
|
||||
def test_basic_construction(self):
|
||||
tc = ToolCall(id="call_abc", name="terminal", arguments='{"cmd": "ls"}')
|
||||
assert tc.id == "call_abc"
|
||||
assert tc.name == "terminal"
|
||||
assert tc.arguments == '{"cmd": "ls"}'
|
||||
assert tc.provider_data is None
|
||||
|
||||
def test_none_id(self):
|
||||
tc = ToolCall(id=None, name="read_file", arguments="{}")
|
||||
assert tc.id is None
|
||||
|
||||
def test_provider_data(self):
|
||||
tc = ToolCall(
|
||||
id="call_x",
|
||||
name="t",
|
||||
arguments="{}",
|
||||
provider_data={"call_id": "call_x", "response_item_id": "fc_x"},
|
||||
)
|
||||
assert tc.provider_data["call_id"] == "call_x"
|
||||
assert tc.provider_data["response_item_id"] == "fc_x"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Usage
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestUsage:
|
||||
def test_defaults(self):
|
||||
u = Usage()
|
||||
assert u.prompt_tokens == 0
|
||||
assert u.completion_tokens == 0
|
||||
assert u.total_tokens == 0
|
||||
assert u.cached_tokens == 0
|
||||
|
||||
def test_explicit(self):
|
||||
u = Usage(prompt_tokens=100, completion_tokens=50, total_tokens=150, cached_tokens=80)
|
||||
assert u.total_tokens == 150
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# NormalizedResponse
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestNormalizedResponse:
|
||||
def test_text_only(self):
|
||||
r = NormalizedResponse(content="hello", tool_calls=None, finish_reason="stop")
|
||||
assert r.content == "hello"
|
||||
assert r.tool_calls is None
|
||||
assert r.finish_reason == "stop"
|
||||
assert r.reasoning is None
|
||||
assert r.usage is None
|
||||
assert r.provider_data is None
|
||||
|
||||
def test_with_tool_calls(self):
|
||||
tcs = [ToolCall(id="call_1", name="terminal", arguments='{"cmd":"pwd"}')]
|
||||
r = NormalizedResponse(content=None, tool_calls=tcs, finish_reason="tool_calls")
|
||||
assert r.finish_reason == "tool_calls"
|
||||
assert len(r.tool_calls) == 1
|
||||
assert r.tool_calls[0].name == "terminal"
|
||||
|
||||
def test_with_reasoning(self):
|
||||
r = NormalizedResponse(
|
||||
content="answer",
|
||||
tool_calls=None,
|
||||
finish_reason="stop",
|
||||
reasoning="I thought about it",
|
||||
)
|
||||
assert r.reasoning == "I thought about it"
|
||||
|
||||
def test_with_provider_data(self):
|
||||
r = NormalizedResponse(
|
||||
content=None,
|
||||
tool_calls=None,
|
||||
finish_reason="stop",
|
||||
provider_data={"reasoning_details": [{"type": "thinking", "thinking": "hmm"}]},
|
||||
)
|
||||
assert r.provider_data["reasoning_details"][0]["type"] == "thinking"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# build_tool_call
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestBuildToolCall:
|
||||
def test_dict_arguments_serialized(self):
|
||||
tc = build_tool_call(id="call_1", name="terminal", arguments={"cmd": "ls"})
|
||||
assert tc.arguments == json.dumps({"cmd": "ls"})
|
||||
assert tc.provider_data is None
|
||||
|
||||
def test_string_arguments_passthrough(self):
|
||||
tc = build_tool_call(id="call_2", name="read_file", arguments='{"path": "/tmp"}')
|
||||
assert tc.arguments == '{"path": "/tmp"}'
|
||||
|
||||
def test_provider_fields(self):
|
||||
tc = build_tool_call(
|
||||
id="call_3",
|
||||
name="terminal",
|
||||
arguments="{}",
|
||||
call_id="call_3",
|
||||
response_item_id="fc_3",
|
||||
)
|
||||
assert tc.provider_data == {"call_id": "call_3", "response_item_id": "fc_3"}
|
||||
|
||||
def test_none_id(self):
|
||||
tc = build_tool_call(id=None, name="t", arguments="{}")
|
||||
assert tc.id is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# map_finish_reason
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestMapFinishReason:
|
||||
ANTHROPIC_MAP = {
|
||||
"end_turn": "stop",
|
||||
"tool_use": "tool_calls",
|
||||
"max_tokens": "length",
|
||||
"stop_sequence": "stop",
|
||||
"refusal": "content_filter",
|
||||
}
|
||||
|
||||
def test_known_reason(self):
|
||||
assert map_finish_reason("end_turn", self.ANTHROPIC_MAP) == "stop"
|
||||
assert map_finish_reason("tool_use", self.ANTHROPIC_MAP) == "tool_calls"
|
||||
assert map_finish_reason("max_tokens", self.ANTHROPIC_MAP) == "length"
|
||||
assert map_finish_reason("refusal", self.ANTHROPIC_MAP) == "content_filter"
|
||||
|
||||
def test_unknown_reason_defaults_to_stop(self):
|
||||
assert map_finish_reason("something_new", self.ANTHROPIC_MAP) == "stop"
|
||||
|
||||
def test_none_reason(self):
|
||||
assert map_finish_reason(None, self.ANTHROPIC_MAP) == "stop"
|
||||
146
tests/cli/test_cli_steer_busy_path.py
Normal file
146
tests/cli/test_cli_steer_busy_path.py
Normal file
|
|
@ -0,0 +1,146 @@
|
|||
"""Regression tests for classic-CLI mid-run /steer dispatch.
|
||||
|
||||
Background
|
||||
----------
|
||||
/steer sent while the agent is running used to be queued through
|
||||
``self._pending_input`` alongside ordinary user input. ``process_loop``
|
||||
pulls from that queue and calls ``process_command()`` — but while the
|
||||
agent is running, ``process_loop`` is blocked inside ``self.chat()``.
|
||||
By the time the queued /steer was pulled, ``_agent_running`` had
|
||||
already flipped back to False, so ``process_command()`` took the idle
|
||||
fallback (``"No agent running; queued as next turn"``) and delivered
|
||||
the steer as an ordinary next-turn message.
|
||||
|
||||
The fix dispatches /steer inline on the UI thread when the agent is
|
||||
running — matching the existing pattern for /model — so the steer
|
||||
reaches ``agent.steer()`` (thread-safe) without touching the queue.
|
||||
|
||||
These tests exercise the detector + inline dispatch without starting a
|
||||
prompt_toolkit app.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import importlib
|
||||
import sys
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
|
||||
def _make_cli():
|
||||
"""Create a HermesCLI instance with prompt_toolkit stubbed out."""
|
||||
_clean_config = {
|
||||
"model": {
|
||||
"default": "anthropic/claude-opus-4.6",
|
||||
"base_url": "https://openrouter.ai/api/v1",
|
||||
"provider": "auto",
|
||||
},
|
||||
"display": {"compact": False, "tool_progress": "all"},
|
||||
"agent": {},
|
||||
"terminal": {"env_type": "local"},
|
||||
}
|
||||
clean_env = {"LLM_MODEL": "", "HERMES_MAX_ITERATIONS": ""}
|
||||
prompt_toolkit_stubs = {
|
||||
"prompt_toolkit": MagicMock(),
|
||||
"prompt_toolkit.history": MagicMock(),
|
||||
"prompt_toolkit.styles": MagicMock(),
|
||||
"prompt_toolkit.patch_stdout": MagicMock(),
|
||||
"prompt_toolkit.application": MagicMock(),
|
||||
"prompt_toolkit.layout": MagicMock(),
|
||||
"prompt_toolkit.layout.processors": MagicMock(),
|
||||
"prompt_toolkit.filters": MagicMock(),
|
||||
"prompt_toolkit.layout.dimension": MagicMock(),
|
||||
"prompt_toolkit.layout.menus": MagicMock(),
|
||||
"prompt_toolkit.widgets": MagicMock(),
|
||||
"prompt_toolkit.key_binding": MagicMock(),
|
||||
"prompt_toolkit.completion": MagicMock(),
|
||||
"prompt_toolkit.formatted_text": MagicMock(),
|
||||
"prompt_toolkit.auto_suggest": MagicMock(),
|
||||
}
|
||||
with patch.dict(sys.modules, prompt_toolkit_stubs), patch.dict(
|
||||
"os.environ", clean_env, clear=False
|
||||
):
|
||||
import cli as _cli_mod
|
||||
|
||||
_cli_mod = importlib.reload(_cli_mod)
|
||||
with patch.object(_cli_mod, "get_tool_definitions", return_value=[]), patch.dict(
|
||||
_cli_mod.__dict__, {"CLI_CONFIG": _clean_config}
|
||||
):
|
||||
return _cli_mod.HermesCLI()
|
||||
|
||||
|
||||
class TestSteerInlineDetector:
|
||||
"""_should_handle_steer_command_inline gates the busy-path fast dispatch."""
|
||||
|
||||
def test_detects_steer_when_agent_running(self):
|
||||
cli = _make_cli()
|
||||
cli._agent_running = True
|
||||
assert cli._should_handle_steer_command_inline("/steer focus on error handling") is True
|
||||
|
||||
def test_ignores_steer_when_agent_idle(self):
|
||||
"""Idle-path /steer should fall through to the normal process_loop
|
||||
dispatch so the queue-style fallback message is emitted."""
|
||||
cli = _make_cli()
|
||||
cli._agent_running = False
|
||||
assert cli._should_handle_steer_command_inline("/steer do something") is False
|
||||
|
||||
def test_ignores_non_slash_input(self):
|
||||
cli = _make_cli()
|
||||
cli._agent_running = True
|
||||
assert cli._should_handle_steer_command_inline("steer without slash") is False
|
||||
assert cli._should_handle_steer_command_inline("") is False
|
||||
|
||||
def test_ignores_other_slash_commands(self):
|
||||
cli = _make_cli()
|
||||
cli._agent_running = True
|
||||
assert cli._should_handle_steer_command_inline("/queue hello") is False
|
||||
assert cli._should_handle_steer_command_inline("/stop") is False
|
||||
assert cli._should_handle_steer_command_inline("/help") is False
|
||||
|
||||
def test_ignores_steer_with_attached_images(self):
|
||||
"""Image payloads take the normal path; steer doesn't accept images."""
|
||||
cli = _make_cli()
|
||||
cli._agent_running = True
|
||||
assert cli._should_handle_steer_command_inline("/steer text", has_images=True) is False
|
||||
|
||||
|
||||
class TestSteerBusyPathDispatch:
|
||||
"""When the detector fires, process_command('/steer ...') must call
|
||||
agent.steer() directly rather than the idle-path fallback."""
|
||||
|
||||
def test_process_command_routes_to_agent_steer(self):
|
||||
"""With _agent_running=True and agent.steer present, /steer reaches
|
||||
agent.steer(payload), NOT _pending_input."""
|
||||
cli = _make_cli()
|
||||
cli._agent_running = True
|
||||
cli.agent = MagicMock()
|
||||
cli.agent.steer = MagicMock(return_value=True)
|
||||
# Make sure the idle-path fallback would be observable if taken
|
||||
cli._pending_input = MagicMock()
|
||||
|
||||
cli.process_command("/steer focus on errors")
|
||||
|
||||
cli.agent.steer.assert_called_once_with("focus on errors")
|
||||
cli._pending_input.put.assert_not_called()
|
||||
|
||||
def test_idle_path_queues_as_next_turn(self):
|
||||
"""Control — when the agent is NOT running, /steer correctly falls
|
||||
back to next-turn queue semantics. Demonstrates why the fix was
|
||||
needed: the queue path only works when you can actually drain it."""
|
||||
cli = _make_cli()
|
||||
cli._agent_running = False
|
||||
cli.agent = MagicMock()
|
||||
cli.agent.steer = MagicMock(return_value=True)
|
||||
cli._pending_input = MagicMock()
|
||||
|
||||
cli.process_command("/steer would-be-next-turn")
|
||||
|
||||
# Idle path does NOT call agent.steer
|
||||
cli.agent.steer.assert_not_called()
|
||||
# It puts the payload in the queue as a normal next-turn message
|
||||
cli._pending_input.put.assert_called_once_with("would-be-next-turn")
|
||||
|
||||
|
||||
if __name__ == "__main__": # pragma: no cover
|
||||
import pytest
|
||||
|
||||
pytest.main([__file__, "-v"])
|
||||
|
|
@ -186,6 +186,31 @@ _HERMES_BEHAVIORAL_VARS = frozenset({
|
|||
"HERMES_HOME_MODE",
|
||||
"BROWSER_CDP_URL",
|
||||
"CAMOFOX_URL",
|
||||
# Platform allowlists — not credentials, but if set from any source
|
||||
# (user shell, earlier leaky test, CI env), they change gateway auth
|
||||
# behavior and flake button-authorization tests.
|
||||
"TELEGRAM_ALLOWED_USERS",
|
||||
"DISCORD_ALLOWED_USERS",
|
||||
"WHATSAPP_ALLOWED_USERS",
|
||||
"SLACK_ALLOWED_USERS",
|
||||
"SIGNAL_ALLOWED_USERS",
|
||||
"SIGNAL_GROUP_ALLOWED_USERS",
|
||||
"EMAIL_ALLOWED_USERS",
|
||||
"SMS_ALLOWED_USERS",
|
||||
"MATTERMOST_ALLOWED_USERS",
|
||||
"MATRIX_ALLOWED_USERS",
|
||||
"DINGTALK_ALLOWED_USERS",
|
||||
"FEISHU_ALLOWED_USERS",
|
||||
"WECOM_ALLOWED_USERS",
|
||||
"GATEWAY_ALLOWED_USERS",
|
||||
"GATEWAY_ALLOW_ALL_USERS",
|
||||
"TELEGRAM_ALLOW_ALL_USERS",
|
||||
"DISCORD_ALLOW_ALL_USERS",
|
||||
"WHATSAPP_ALLOW_ALL_USERS",
|
||||
"SLACK_ALLOW_ALL_USERS",
|
||||
"SIGNAL_ALLOW_ALL_USERS",
|
||||
"EMAIL_ALLOW_ALL_USERS",
|
||||
"SMS_ALLOW_ALL_USERS",
|
||||
})
|
||||
|
||||
|
||||
|
|
@ -258,6 +283,107 @@ def _isolate_hermes_home(_hermetic_environment):
|
|||
return None
|
||||
|
||||
|
||||
# ── Module-level state reset ───────────────────────────────────────────────
|
||||
#
|
||||
# Python modules are singletons per process, and pytest-xdist workers are
|
||||
# long-lived. Module-level dicts/sets (tool registries, approval state,
|
||||
# interrupt flags) and ContextVars persist across tests in the same worker,
|
||||
# causing tests that pass alone to fail when run with siblings.
|
||||
#
|
||||
# Each entry in this fixture clears state that belongs to a specific module.
|
||||
# New state buckets go here too — this is the single gate that prevents
|
||||
# "works alone, flakes in CI" bugs from state leakage.
|
||||
#
|
||||
# The skill `test-suite-cascade-diagnosis` documents the concrete patterns
|
||||
# this closes; the running example was `test_command_guards` failing 12/15
|
||||
# CI runs because ``tools.approval._session_approved`` carried approvals
|
||||
# from one test's session into another's.
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _reset_module_state():
|
||||
"""Clear module-level mutable state and ContextVars between tests.
|
||||
|
||||
Keeps state from leaking across tests on the same xdist worker. Modules
|
||||
that don't exist yet (test collection before production import) are
|
||||
skipped silently — production import later creates fresh empty state.
|
||||
"""
|
||||
# --- tools.approval — the single biggest source of cross-test pollution ---
|
||||
try:
|
||||
from tools import approval as _approval_mod
|
||||
_approval_mod._session_approved.clear()
|
||||
_approval_mod._session_yolo.clear()
|
||||
_approval_mod._permanent_approved.clear()
|
||||
_approval_mod._pending.clear()
|
||||
_approval_mod._gateway_queues.clear()
|
||||
_approval_mod._gateway_notify_cbs.clear()
|
||||
# ContextVar: reset to empty string so get_current_session_key()
|
||||
# falls through to the env var / default path, matching a fresh
|
||||
# process.
|
||||
_approval_mod._approval_session_key.set("")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# --- tools.interrupt — per-thread interrupt flag set ---
|
||||
try:
|
||||
from tools import interrupt as _interrupt_mod
|
||||
with _interrupt_mod._lock:
|
||||
_interrupt_mod._interrupted_threads.clear()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# --- gateway.session_context — 9 ContextVars that represent
|
||||
# the active gateway session. If set in one test and not reset,
|
||||
# the next test's get_session_env() reads stale values.
|
||||
try:
|
||||
from gateway import session_context as _sc_mod
|
||||
for _cv in (
|
||||
_sc_mod._SESSION_PLATFORM,
|
||||
_sc_mod._SESSION_CHAT_ID,
|
||||
_sc_mod._SESSION_CHAT_NAME,
|
||||
_sc_mod._SESSION_THREAD_ID,
|
||||
_sc_mod._SESSION_USER_ID,
|
||||
_sc_mod._SESSION_USER_NAME,
|
||||
_sc_mod._SESSION_KEY,
|
||||
_sc_mod._CRON_AUTO_DELIVER_PLATFORM,
|
||||
_sc_mod._CRON_AUTO_DELIVER_CHAT_ID,
|
||||
_sc_mod._CRON_AUTO_DELIVER_THREAD_ID,
|
||||
):
|
||||
_cv.set(_sc_mod._UNSET)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# --- tools.env_passthrough — ContextVar<set[str]> with no default ---
|
||||
# LookupError is normal if the test never set it. Setting it to an
|
||||
# empty set unconditionally normalizes the starting state.
|
||||
try:
|
||||
from tools import env_passthrough as _envp_mod
|
||||
_envp_mod._allowed_env_vars_var.set(set())
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# --- tools.credential_files — ContextVar<dict> ---
|
||||
try:
|
||||
from tools import credential_files as _credf_mod
|
||||
_credf_mod._registered_files_var.set({})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# --- tools.file_tools — per-task read history + file-ops cache ---
|
||||
# _read_tracker accumulates per-task_id read history for loop detection,
|
||||
# capped by _READ_HISTORY_CAP. If entries from a prior test persist, the
|
||||
# cap is hit faster than expected and capacity-related tests flake.
|
||||
try:
|
||||
from tools import file_tools as _ft_mod
|
||||
with _ft_mod._read_tracker_lock:
|
||||
_ft_mod._read_tracker.clear()
|
||||
with _ft_mod._file_ops_lock:
|
||||
_ft_mod._file_ops_cache.clear()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
yield
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def tmp_dir(tmp_path):
|
||||
"""Provide a temporary directory that is cleaned up automatically."""
|
||||
|
|
|
|||
|
|
@ -1580,3 +1580,128 @@ class TestParallelTick:
|
|||
end_s1 = [t for action, jid, t in call_times if action == "end" and jid == "s1"][0]
|
||||
start_s2 = [t for action, jid, t in call_times if action == "start" and jid == "s2"][0]
|
||||
assert start_s2 >= end_s1, "Jobs ran concurrently despite max_parallel=1"
|
||||
|
||||
|
||||
class TestDeliverResultTimeoutCancelsFuture:
|
||||
"""When future.result(timeout=60) raises TimeoutError in the live
|
||||
adapter delivery path, _deliver_result must cancel the orphan
|
||||
coroutine so it cannot duplicate-send after the standalone fallback.
|
||||
"""
|
||||
|
||||
def test_live_adapter_timeout_cancels_future_and_falls_back(self):
|
||||
"""End-to-end: live adapter hangs past the 60s budget, _deliver_result
|
||||
patches the timeout down to a fast value, confirms future.cancel() fires,
|
||||
and verifies the standalone fallback path still delivers."""
|
||||
from gateway.config import Platform
|
||||
from concurrent.futures import Future
|
||||
|
||||
# Live adapter whose send() coroutine never resolves within the budget
|
||||
adapter = AsyncMock()
|
||||
adapter.send.return_value = MagicMock(success=True)
|
||||
|
||||
pconfig = MagicMock()
|
||||
pconfig.enabled = True
|
||||
mock_cfg = MagicMock()
|
||||
mock_cfg.platforms = {Platform.TELEGRAM: pconfig}
|
||||
|
||||
loop = MagicMock()
|
||||
loop.is_running.return_value = True
|
||||
|
||||
# A real concurrent.futures.Future so .cancel() has real semantics,
|
||||
# but we override .result() to raise TimeoutError exactly like the
|
||||
# 60s wait firing in production.
|
||||
captured_future = Future()
|
||||
cancel_calls = []
|
||||
original_cancel = captured_future.cancel
|
||||
|
||||
def tracking_cancel():
|
||||
cancel_calls.append(True)
|
||||
return original_cancel()
|
||||
|
||||
captured_future.cancel = tracking_cancel
|
||||
captured_future.result = MagicMock(side_effect=TimeoutError("timed out"))
|
||||
|
||||
def fake_run_coro(coro, _loop):
|
||||
coro.close()
|
||||
return captured_future
|
||||
|
||||
job = {
|
||||
"id": "timeout-job",
|
||||
"deliver": "origin",
|
||||
"origin": {"platform": "telegram", "chat_id": "123"},
|
||||
}
|
||||
|
||||
standalone_send = AsyncMock(return_value={"success": True})
|
||||
|
||||
with patch("gateway.config.load_gateway_config", return_value=mock_cfg), \
|
||||
patch("cron.scheduler.load_config", return_value={"cron": {"wrap_response": False}}), \
|
||||
patch("asyncio.run_coroutine_threadsafe", side_effect=fake_run_coro), \
|
||||
patch("tools.send_message_tool._send_to_platform", new=standalone_send):
|
||||
result = _deliver_result(
|
||||
job,
|
||||
"Hello world",
|
||||
adapters={Platform.TELEGRAM: adapter},
|
||||
loop=loop,
|
||||
)
|
||||
|
||||
# 1. The orphan future was cancelled on timeout (the bug fix)
|
||||
assert cancel_calls == [True], "future.cancel() must fire on TimeoutError"
|
||||
# 2. The standalone fallback delivered — no double send, no silent drop
|
||||
assert result is None, f"expected successful delivery, got error: {result!r}"
|
||||
standalone_send.assert_awaited_once()
|
||||
|
||||
|
||||
class TestSendMediaTimeoutCancelsFuture:
|
||||
"""Same orphan-coroutine guarantee for _send_media_via_adapter's
|
||||
future.result(timeout=30) call. If this times out mid-batch, the
|
||||
in-flight coroutine must be cancelled before the next file is tried.
|
||||
"""
|
||||
|
||||
def test_media_send_timeout_cancels_future_and_continues(self):
|
||||
"""End-to-end: _send_media_via_adapter with a future whose .result()
|
||||
raises TimeoutError. Assert cancel() fires and the loop proceeds
|
||||
to the next file rather than hanging or crashing."""
|
||||
from concurrent.futures import Future
|
||||
|
||||
adapter = MagicMock()
|
||||
adapter.send_image_file = AsyncMock()
|
||||
adapter.send_video = AsyncMock()
|
||||
|
||||
# First file: future that times out. Second file: future that resolves OK.
|
||||
timeout_future = Future()
|
||||
timeout_cancel_calls = []
|
||||
original_cancel = timeout_future.cancel
|
||||
|
||||
def tracking_cancel():
|
||||
timeout_cancel_calls.append(True)
|
||||
return original_cancel()
|
||||
|
||||
timeout_future.cancel = tracking_cancel
|
||||
timeout_future.result = MagicMock(side_effect=TimeoutError("timed out"))
|
||||
|
||||
ok_future = Future()
|
||||
ok_future.set_result(MagicMock(success=True))
|
||||
|
||||
futures_iter = iter([timeout_future, ok_future])
|
||||
|
||||
def fake_run_coro(coro, _loop):
|
||||
coro.close()
|
||||
return next(futures_iter)
|
||||
|
||||
media_files = [
|
||||
("/tmp/slow.png", False), # times out
|
||||
("/tmp/fast.mp4", False), # succeeds
|
||||
]
|
||||
|
||||
loop = MagicMock()
|
||||
job = {"id": "media-timeout"}
|
||||
|
||||
with patch("asyncio.run_coroutine_threadsafe", side_effect=fake_run_coro):
|
||||
# Should not raise — the except Exception clause swallows the timeout
|
||||
_send_media_via_adapter(adapter, "chat-1", media_files, None, loop, job)
|
||||
|
||||
# 1. The timed-out future was cancelled (the bug fix)
|
||||
assert timeout_cancel_calls == [True], "future.cancel() must fire on TimeoutError"
|
||||
# 2. Second file still got dispatched — one timeout doesn't abort the batch
|
||||
adapter.send_video.assert_called_once()
|
||||
assert adapter.send_video.call_args[1]["video_path"] == "/tmp/fast.mp4"
|
||||
|
|
|
|||
|
|
@ -355,8 +355,17 @@ async def test_none_user_id_does_not_generate_pairing_code(monkeypatch, tmp_path
|
|||
async def test_non_internal_event_without_user_triggers_pairing(monkeypatch, tmp_path):
|
||||
"""Verify the normal (non-internal) path still triggers pairing for unknown users."""
|
||||
import gateway.run as gateway_run
|
||||
import gateway.pairing as pairing_mod
|
||||
|
||||
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
|
||||
# gateway.pairing.PAIRING_DIR is a module-level constant captured at
|
||||
# import time from whichever HERMES_HOME was set then. Per-test
|
||||
# HERMES_HOME redirection in conftest doesn't retroactively move it.
|
||||
# Override directly so pairing rate-limit state lives in this test's
|
||||
# tmp_path (and so stale state from prior xdist workers can't leak in).
|
||||
pairing_dir = tmp_path / "pairing"
|
||||
pairing_dir.mkdir()
|
||||
monkeypatch.setattr(pairing_mod, "PAIRING_DIR", pairing_dir)
|
||||
(tmp_path / "config.yaml").write_text("", encoding="utf-8")
|
||||
|
||||
# Clear env vars that could let all users through (loaded by
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ from unittest.mock import AsyncMock, MagicMock, patch
|
|||
import pytest
|
||||
|
||||
from gateway.config import Platform, StreamingConfig
|
||||
from gateway.platforms.base import resolve_proxy_url
|
||||
from gateway.run import GatewayRunner
|
||||
from gateway.session import SessionSource
|
||||
|
||||
|
|
@ -133,6 +134,15 @@ class TestGetProxyUrl:
|
|||
assert runner._get_proxy_url() is None
|
||||
|
||||
|
||||
class TestResolveProxyUrl:
|
||||
def test_normalizes_socks_alias_from_all_proxy(self, monkeypatch):
|
||||
for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
|
||||
"https_proxy", "http_proxy", "all_proxy"):
|
||||
monkeypatch.delenv(key, raising=False)
|
||||
monkeypatch.setenv("ALL_PROXY", "socks://127.0.0.1:1080/")
|
||||
assert resolve_proxy_url() == "socks5://127.0.0.1:1080/"
|
||||
|
||||
|
||||
class TestRunAgentProxyDispatch:
|
||||
"""Test that _run_agent() delegates to proxy when configured."""
|
||||
|
||||
|
|
|
|||
|
|
@ -184,8 +184,15 @@ async def test_start_gateway_replace_force_uses_terminate_pid(monkeypatch, tmp_p
|
|||
async def stop(self):
|
||||
return None
|
||||
|
||||
monkeypatch.setattr("gateway.status.get_running_pid", lambda: 42)
|
||||
monkeypatch.setattr("gateway.status.remove_pid_file", lambda: None)
|
||||
# get_running_pid returns 42 before we kill the old gateway, then None
|
||||
# after remove_pid_file() clears the record (reflects real behavior).
|
||||
_pid_state = {"alive": True}
|
||||
def _mock_get_running_pid():
|
||||
return 42 if _pid_state["alive"] else None
|
||||
def _mock_remove_pid_file():
|
||||
_pid_state["alive"] = False
|
||||
monkeypatch.setattr("gateway.status.get_running_pid", _mock_get_running_pid)
|
||||
monkeypatch.setattr("gateway.status.remove_pid_file", _mock_remove_pid_file)
|
||||
monkeypatch.setattr("gateway.status.release_all_scoped_locks", lambda: 0)
|
||||
monkeypatch.setattr("gateway.status.terminate_pid", lambda pid, force=False: calls.append((pid, force)))
|
||||
monkeypatch.setattr("gateway.run.os.getpid", lambda: 100)
|
||||
|
|
@ -253,8 +260,13 @@ async def test_start_gateway_replace_writes_takeover_marker_before_sigterm(
|
|||
async def stop(self):
|
||||
return None
|
||||
|
||||
monkeypatch.setattr("gateway.status.get_running_pid", lambda: 42)
|
||||
monkeypatch.setattr("gateway.status.remove_pid_file", lambda: None)
|
||||
_pid_state = {"alive": True}
|
||||
def _mock_get_running_pid():
|
||||
return 42 if _pid_state["alive"] else None
|
||||
def _mock_remove_pid_file():
|
||||
_pid_state["alive"] = False
|
||||
monkeypatch.setattr("gateway.status.get_running_pid", _mock_get_running_pid)
|
||||
monkeypatch.setattr("gateway.status.remove_pid_file", _mock_remove_pid_file)
|
||||
monkeypatch.setattr("gateway.status.release_all_scoped_locks", lambda: 0)
|
||||
monkeypatch.setattr("gateway.status.write_takeover_marker", record_write_marker)
|
||||
monkeypatch.setattr("gateway.status.terminate_pid", record_terminate)
|
||||
|
|
|
|||
|
|
@ -356,6 +356,28 @@ class TestBuildSessionContextPrompt:
|
|||
assert "**User:** Alice" in prompt
|
||||
assert "Multi-user thread" not in prompt
|
||||
|
||||
def test_shared_non_thread_group_prompt_hides_single_user(self):
|
||||
"""Shared non-thread group sessions should avoid pinning one user."""
|
||||
config = GatewayConfig(
|
||||
platforms={
|
||||
Platform.TELEGRAM: PlatformConfig(enabled=True, token="fake"),
|
||||
},
|
||||
group_sessions_per_user=False,
|
||||
)
|
||||
source = SessionSource(
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_id="-1002285219667",
|
||||
chat_name="Test Group",
|
||||
chat_type="group",
|
||||
user_name="Alice",
|
||||
)
|
||||
ctx = build_session_context(source, config)
|
||||
prompt = build_session_context_prompt(ctx)
|
||||
|
||||
assert "Multi-user session" in prompt
|
||||
assert "[sender name]" in prompt
|
||||
assert "**User:** Alice" not in prompt
|
||||
|
||||
def test_dm_thread_shows_user_not_multi(self):
|
||||
"""DM threads are single-user and should show User, not multi-user note."""
|
||||
config = GatewayConfig(
|
||||
|
|
|
|||
70
tests/gateway/test_shared_group_sender_prefix.py
Normal file
70
tests/gateway/test_shared_group_sender_prefix.py
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
import pytest
|
||||
|
||||
from gateway.config import GatewayConfig, Platform, PlatformConfig
|
||||
from gateway.platforms.base import MessageEvent
|
||||
from gateway.run import GatewayRunner
|
||||
from gateway.session import SessionSource
|
||||
|
||||
|
||||
def _make_runner(config: GatewayConfig) -> GatewayRunner:
|
||||
runner = object.__new__(GatewayRunner)
|
||||
runner.config = config
|
||||
runner.adapters = {}
|
||||
runner._model = "openai/gpt-4.1-mini"
|
||||
runner._base_url = None
|
||||
return runner
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_preprocess_prefixes_sender_for_shared_non_thread_group_session():
|
||||
runner = _make_runner(
|
||||
GatewayConfig(
|
||||
platforms={
|
||||
Platform.TELEGRAM: PlatformConfig(enabled=True, token="fake"),
|
||||
},
|
||||
group_sessions_per_user=False,
|
||||
)
|
||||
)
|
||||
source = SessionSource(
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_id="-1002285219667",
|
||||
chat_name="Test Group",
|
||||
chat_type="group",
|
||||
user_name="Alice",
|
||||
)
|
||||
event = MessageEvent(text="hello", source=source)
|
||||
|
||||
result = await runner._prepare_inbound_message_text(
|
||||
event=event,
|
||||
source=source,
|
||||
history=[],
|
||||
)
|
||||
|
||||
assert result == "[Alice] hello"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_preprocess_keeps_plain_text_for_default_group_sessions():
|
||||
runner = _make_runner(
|
||||
GatewayConfig(
|
||||
platforms={
|
||||
Platform.TELEGRAM: PlatformConfig(enabled=True, token="fake"),
|
||||
},
|
||||
)
|
||||
)
|
||||
source = SessionSource(
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_id="-1002285219667",
|
||||
chat_name="Test Group",
|
||||
chat_type="group",
|
||||
user_name="Alice",
|
||||
)
|
||||
event = MessageEvent(text="hello", source=source)
|
||||
|
||||
result = await runner._prepare_inbound_message_text(
|
||||
event=event,
|
||||
source=source,
|
||||
history=[],
|
||||
)
|
||||
|
||||
assert result == "hello"
|
||||
|
|
@ -306,7 +306,13 @@ class TestSignalSessionSource:
|
|||
class TestSignalPhoneRedaction:
|
||||
@pytest.fixture(autouse=True)
|
||||
def _ensure_redaction_enabled(self, monkeypatch):
|
||||
# agent.redact snapshots _REDACT_ENABLED at import time from the
|
||||
# HERMES_REDACT_SECRETS env var. monkeypatch.delenv is too late —
|
||||
# the module was already imported during test collection with
|
||||
# whatever value was in the env then. Force the flag directly.
|
||||
# See skill: xdist-cross-test-pollution Pattern 5.
|
||||
monkeypatch.delenv("HERMES_REDACT_SECRETS", raising=False)
|
||||
monkeypatch.setattr("agent.redact._REDACT_ENABLED", True)
|
||||
|
||||
def test_us_number(self):
|
||||
from agent.redact import redact_sensitive_text
|
||||
|
|
|
|||
|
|
@ -19,6 +19,30 @@ class TestGatewayPidState:
|
|||
assert isinstance(payload["argv"], list)
|
||||
assert payload["argv"]
|
||||
|
||||
def test_write_pid_file_is_atomic_against_concurrent_writers(self, tmp_path, monkeypatch):
|
||||
"""Regression: two concurrent --replace invocations must not both win.
|
||||
|
||||
Without O_CREAT|O_EXCL, two processes racing through start_gateway()'s
|
||||
termination-wait would both write to gateway.pid, silently overwriting
|
||||
each other and leaving multiple gateway instances alive (#11718).
|
||||
"""
|
||||
import pytest
|
||||
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
|
||||
# First write wins.
|
||||
status.write_pid_file()
|
||||
assert (tmp_path / "gateway.pid").exists()
|
||||
|
||||
# Second write (simulating a racing --replace that missed the earlier
|
||||
# guards) must raise FileExistsError rather than clobber the record.
|
||||
with pytest.raises(FileExistsError):
|
||||
status.write_pid_file()
|
||||
|
||||
# Original record is preserved.
|
||||
payload = json.loads((tmp_path / "gateway.pid").read_text())
|
||||
assert payload["pid"] == os.getpid()
|
||||
|
||||
def test_get_running_pid_rejects_live_non_gateway_pid(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
pid_path = tmp_path / "gateway.pid"
|
||||
|
|
|
|||
|
|
@ -71,7 +71,17 @@ def test_group_messages_can_require_direct_trigger_via_config():
|
|||
assert adapter._should_process_message(_group_message("hello everyone")) is False
|
||||
assert adapter._should_process_message(_group_message("hi @hermes_bot", entities=[_mention_entity("hi @hermes_bot")])) is True
|
||||
assert adapter._should_process_message(_group_message("replying", reply_to_bot=True)) is True
|
||||
assert adapter._should_process_message(_group_message("/status"), is_command=True) is True
|
||||
# Commands must also respect require_mention when it is enabled
|
||||
assert adapter._should_process_message(_group_message("/status"), is_command=True) is False
|
||||
# But commands with @mention still pass (Telegram emits a MENTION entity
|
||||
# for /cmd@botname — the bot menu and python-telegram-bot's CommandHandler
|
||||
# rely on this same mechanism)
|
||||
assert adapter._should_process_message(
|
||||
_group_message("/status@hermes_bot", entities=[_mention_entity("/status@hermes_bot")])
|
||||
) is True
|
||||
# And commands still pass unconditionally when require_mention is disabled
|
||||
adapter_no_mention = _make_adapter(require_mention=False)
|
||||
assert adapter_no_mention._should_process_message(_group_message("/status"), is_command=True) is True
|
||||
|
||||
|
||||
def test_free_response_chats_bypass_mention_requirement():
|
||||
|
|
|
|||
100
tests/gateway/test_telegram_webhook_secret.py
Normal file
100
tests/gateway/test_telegram_webhook_secret.py
Normal file
|
|
@ -0,0 +1,100 @@
|
|||
"""Tests for GHSA-3vpc-7q5r-276h — Telegram webhook secret required.
|
||||
|
||||
Previously, when TELEGRAM_WEBHOOK_URL was set but TELEGRAM_WEBHOOK_SECRET
|
||||
was not, python-telegram-bot received secret_token=None and the webhook
|
||||
endpoint accepted any HTTP POST.
|
||||
|
||||
The fix refuses to start the adapter in webhook mode without the secret.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
_repo = str(Path(__file__).resolve().parents[2])
|
||||
if _repo not in sys.path:
|
||||
sys.path.insert(0, _repo)
|
||||
|
||||
|
||||
class TestTelegramWebhookSecretRequired:
|
||||
"""Direct source-level check of the webhook-secret guard.
|
||||
|
||||
The guard is embedded in TelegramAdapter.connect() and hard to isolate
|
||||
via mocks (requires a full python-telegram-bot ApplicationBuilder
|
||||
chain). These tests exercise it via source inspection — verifying the
|
||||
check exists, raises RuntimeError with the advisory link, and only
|
||||
fires in webhook mode. End-to-end validation is covered by CI +
|
||||
manual deployment tests.
|
||||
"""
|
||||
|
||||
def _get_source(self) -> str:
|
||||
path = Path(_repo) / "gateway" / "platforms" / "telegram.py"
|
||||
return path.read_text(encoding="utf-8")
|
||||
|
||||
def test_webhook_branch_checks_secret(self):
|
||||
"""The webhook-mode branch of connect() must read
|
||||
TELEGRAM_WEBHOOK_SECRET and refuse when empty."""
|
||||
src = self._get_source()
|
||||
# The guard must appear after TELEGRAM_WEBHOOK_URL is set
|
||||
assert re.search(
|
||||
r'TELEGRAM_WEBHOOK_SECRET.*?\.strip\(\)\s*\n\s*if not webhook_secret:',
|
||||
src, re.DOTALL,
|
||||
), (
|
||||
"TelegramAdapter.connect() must strip TELEGRAM_WEBHOOK_SECRET "
|
||||
"and raise when the secret is empty — see GHSA-3vpc-7q5r-276h"
|
||||
)
|
||||
|
||||
def test_guard_raises_runtime_error(self):
|
||||
"""The guard raises RuntimeError (not a silent log) so operators
|
||||
see the failure at startup."""
|
||||
src = self._get_source()
|
||||
# Between the "if not webhook_secret:" line and the next blank
|
||||
# line block, we should see a RuntimeError being raised
|
||||
guard_match = re.search(
|
||||
r'if not webhook_secret:\s*\n\s*raise\s+RuntimeError\(',
|
||||
src,
|
||||
)
|
||||
assert guard_match, (
|
||||
"Missing webhook secret must raise RuntimeError — silent "
|
||||
"fall-through was the original GHSA-3vpc-7q5r-276h bypass"
|
||||
)
|
||||
|
||||
def test_guard_message_includes_advisory_link(self):
|
||||
"""The RuntimeError message should reference the advisory so
|
||||
operators can read the full context."""
|
||||
src = self._get_source()
|
||||
assert "GHSA-3vpc-7q5r-276h" in src, (
|
||||
"Guard error message must cite the advisory for operator context"
|
||||
)
|
||||
|
||||
def test_guard_message_explains_remediation(self):
|
||||
"""The error should tell the operator how to fix it."""
|
||||
src = self._get_source()
|
||||
# Should mention how to generate a secret
|
||||
assert "openssl rand" in src or "TELEGRAM_WEBHOOK_SECRET=" in src, (
|
||||
"Guard error message should show operators how to set "
|
||||
"TELEGRAM_WEBHOOK_SECRET"
|
||||
)
|
||||
|
||||
def test_polling_branch_has_no_secret_guard(self):
|
||||
"""Polling mode (else-branch) must NOT require the webhook secret —
|
||||
polling authenticates via the bot token, not a webhook secret."""
|
||||
src = self._get_source()
|
||||
# The guard should appear inside the `if webhook_url:` branch,
|
||||
# not the `else:` polling branch. Rough check: the raise is
|
||||
# followed (within ~60 lines) by an `else:` that starts the
|
||||
# polling branch, and there's no secret-check in that polling
|
||||
# branch.
|
||||
webhook_block = re.search(
|
||||
r'if webhook_url:\s*\n(.*?)\n else:\s*\n(.*?)\n',
|
||||
src, re.DOTALL,
|
||||
)
|
||||
if webhook_block:
|
||||
webhook_body = webhook_block.group(1)
|
||||
polling_body = webhook_block.group(2)
|
||||
assert "TELEGRAM_WEBHOOK_SECRET" in webhook_body
|
||||
assert "TELEGRAM_WEBHOOK_SECRET" not in polling_body
|
||||
|
|
@ -175,3 +175,79 @@ class TestUsageCachedAgent:
|
|||
result = await runner._handle_usage_command(event)
|
||||
|
||||
assert "Cost: included" in result
|
||||
|
||||
|
||||
class TestUsageAccountSection:
|
||||
"""Account-limits section appended to /usage output (PR #2486)."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_usage_command_includes_account_section(self, monkeypatch):
|
||||
agent = _make_mock_agent(provider="openai-codex")
|
||||
agent.base_url = "https://chatgpt.com/backend-api/codex"
|
||||
agent.api_key = "unused"
|
||||
runner = _make_runner(SK, cached_agent=agent)
|
||||
event = MagicMock()
|
||||
|
||||
monkeypatch.setattr(
|
||||
"gateway.run.fetch_account_usage",
|
||||
lambda provider, base_url=None, api_key=None: object(),
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"gateway.run.render_account_usage_lines",
|
||||
lambda snapshot, markdown=False: [
|
||||
"📈 **Account limits**",
|
||||
"Provider: openai-codex (Pro)",
|
||||
"Session: 85% remaining (15% used)",
|
||||
],
|
||||
)
|
||||
with patch("agent.rate_limit_tracker.format_rate_limit_compact", return_value="RPM: 50/60"), \
|
||||
patch("agent.usage_pricing.estimate_usage_cost") as mock_cost:
|
||||
mock_cost.return_value = MagicMock(amount_usd=None, status="included")
|
||||
result = await runner._handle_usage_command(event)
|
||||
|
||||
assert "📊 **Session Token Usage**" in result
|
||||
assert "📈 **Account limits**" in result
|
||||
assert "Provider: openai-codex (Pro)" in result
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_usage_command_uses_persisted_provider_when_agent_not_running(self, monkeypatch):
|
||||
runner = _make_runner(SK)
|
||||
runner._session_db = MagicMock()
|
||||
runner._session_db.get_session.return_value = {
|
||||
"billing_provider": "openai-codex",
|
||||
"billing_base_url": "https://chatgpt.com/backend-api/codex",
|
||||
}
|
||||
session_entry = MagicMock()
|
||||
session_entry.session_id = "sess-1"
|
||||
runner.session_store.get_or_create_session.return_value = session_entry
|
||||
runner.session_store.load_transcript.return_value = [
|
||||
{"role": "user", "content": "earlier"},
|
||||
]
|
||||
|
||||
calls = {}
|
||||
|
||||
async def _fake_to_thread(fn, *args, **kwargs):
|
||||
calls["args"] = args
|
||||
calls["kwargs"] = kwargs
|
||||
return fn(*args, **kwargs)
|
||||
|
||||
monkeypatch.setattr("gateway.run.asyncio.to_thread", _fake_to_thread)
|
||||
monkeypatch.setattr(
|
||||
"gateway.run.fetch_account_usage",
|
||||
lambda provider, base_url=None, api_key=None: object(),
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"gateway.run.render_account_usage_lines",
|
||||
lambda snapshot, markdown=False: [
|
||||
"📈 **Account limits**",
|
||||
"Provider: openai-codex (Pro)",
|
||||
],
|
||||
)
|
||||
|
||||
event = MagicMock()
|
||||
result = await runner._handle_usage_command(event)
|
||||
|
||||
assert calls["args"] == ("openai-codex",)
|
||||
assert calls["kwargs"]["base_url"] == "https://chatgpt.com/backend-api/codex"
|
||||
assert "📊 **Session Info**" in result
|
||||
assert "📈 **Account limits**" in result
|
||||
|
|
|
|||
|
|
@ -921,17 +921,13 @@ class TestKimiMoonshotModelListIsolation:
|
|||
leaked = set(moonshot_models) & coding_plan_only
|
||||
assert not leaked, f"Moonshot list contains Coding Plan-only models: {leaked}"
|
||||
|
||||
def test_moonshot_list_contains_shared_models(self):
|
||||
def test_moonshot_list_non_empty(self):
|
||||
from hermes_cli.main import _PROVIDER_MODELS
|
||||
moonshot_models = _PROVIDER_MODELS["moonshot"]
|
||||
assert "kimi-k2.5" in moonshot_models
|
||||
assert "kimi-k2-thinking" in moonshot_models
|
||||
assert len(_PROVIDER_MODELS["moonshot"]) >= 1
|
||||
|
||||
def test_coding_plan_list_contains_plan_specific_models(self):
|
||||
def test_coding_plan_list_non_empty(self):
|
||||
from hermes_cli.main import _PROVIDER_MODELS
|
||||
coding_models = _PROVIDER_MODELS["kimi-coding"]
|
||||
assert "kimi-for-coding" in coding_models
|
||||
assert "kimi-k2-thinking-turbo" in coding_models
|
||||
assert len(_PROVIDER_MODELS["kimi-coding"]) >= 1
|
||||
|
||||
|
||||
# =============================================================================
|
||||
|
|
@ -944,14 +940,12 @@ class TestHuggingFaceModels:
|
|||
def test_main_provider_models_has_huggingface(self):
|
||||
from hermes_cli.main import _PROVIDER_MODELS
|
||||
assert "huggingface" in _PROVIDER_MODELS
|
||||
models = _PROVIDER_MODELS["huggingface"]
|
||||
assert len(models) >= 6, "Expected at least 6 curated HF models"
|
||||
assert len(_PROVIDER_MODELS["huggingface"]) >= 1
|
||||
|
||||
def test_models_py_has_huggingface(self):
|
||||
from hermes_cli.models import _PROVIDER_MODELS
|
||||
assert "huggingface" in _PROVIDER_MODELS
|
||||
models = _PROVIDER_MODELS["huggingface"]
|
||||
assert len(models) >= 6
|
||||
assert len(_PROVIDER_MODELS["huggingface"]) >= 1
|
||||
|
||||
def test_model_lists_match(self):
|
||||
"""Model lists in main.py and models.py should be identical."""
|
||||
|
|
|
|||
|
|
@ -115,12 +115,12 @@ class TestArceeCredentials:
|
|||
|
||||
class TestArceeModelCatalog:
|
||||
def test_static_model_list(self):
|
||||
"""Arcee has a static _PROVIDER_MODELS catalog entry. Specific model
|
||||
names change with releases and don't belong in tests.
|
||||
"""
|
||||
from hermes_cli.models import _PROVIDER_MODELS
|
||||
assert "arcee" in _PROVIDER_MODELS
|
||||
models = _PROVIDER_MODELS["arcee"]
|
||||
assert "trinity-large-thinking" in models
|
||||
assert "trinity-large-preview" in models
|
||||
assert "trinity-mini" in models
|
||||
assert len(_PROVIDER_MODELS["arcee"]) >= 1
|
||||
|
||||
def test_canonical_provider_entry(self):
|
||||
from hermes_cli.models import CANONICAL_PROVIDERS
|
||||
|
|
|
|||
|
|
@ -1011,3 +1011,466 @@ def test_seed_from_singletons_respects_codex_suppression(tmp_path, monkeypatch):
|
|||
# Verify the auth store was NOT modified (no auto-import happened)
|
||||
after = json.loads((hermes_home / "auth.json").read_text())
|
||||
assert "openai-codex" not in after.get("providers", {})
|
||||
|
||||
|
||||
def test_auth_remove_env_seeded_suppresses_shell_exported_var(tmp_path, monkeypatch, capsys):
|
||||
"""`hermes auth remove xai 1` must stick even when the env var is exported
|
||||
by the shell (not written into ~/.hermes/.env). Before PR for #13371 the
|
||||
removal silently restored on next load_pool() because _seed_from_env()
|
||||
re-read os.environ. Now env:<VAR> is suppressed in auth.json.
|
||||
"""
|
||||
hermes_home = tmp_path / "hermes"
|
||||
hermes_home.mkdir(parents=True, exist_ok=True)
|
||||
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
||||
|
||||
# Simulate shell export (NOT written to .env)
|
||||
monkeypatch.setenv("XAI_API_KEY", "sk-xai-shell-export")
|
||||
(hermes_home / ".env").write_text("")
|
||||
|
||||
_write_auth_store(
|
||||
tmp_path,
|
||||
{
|
||||
"version": 1,
|
||||
"credential_pool": {
|
||||
"xai": [{
|
||||
"id": "env-1",
|
||||
"label": "XAI_API_KEY",
|
||||
"auth_type": "api_key",
|
||||
"priority": 0,
|
||||
"source": "env:XAI_API_KEY",
|
||||
"access_token": "sk-xai-shell-export",
|
||||
"base_url": "https://api.x.ai/v1",
|
||||
}]
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
from types import SimpleNamespace
|
||||
from hermes_cli.auth_commands import auth_remove_command
|
||||
auth_remove_command(SimpleNamespace(provider="xai", target="1"))
|
||||
|
||||
# Suppression marker written
|
||||
after = json.loads((hermes_home / "auth.json").read_text())
|
||||
assert "env:XAI_API_KEY" in after.get("suppressed_sources", {}).get("xai", [])
|
||||
|
||||
# Diagnostic printed pointing at the shell
|
||||
out = capsys.readouterr().out
|
||||
assert "still set in your shell environment" in out
|
||||
assert "Cleared XAI_API_KEY from .env" not in out # wasn't in .env
|
||||
|
||||
# Fresh simulation: shell re-exports, reload pool
|
||||
monkeypatch.setenv("XAI_API_KEY", "sk-xai-shell-export")
|
||||
from agent.credential_pool import load_pool
|
||||
pool = load_pool("xai")
|
||||
assert not pool.has_credentials(), "pool must stay empty — env:XAI_API_KEY suppressed"
|
||||
|
||||
|
||||
def test_auth_remove_env_seeded_dotenv_only_no_shell_hint(tmp_path, monkeypatch, capsys):
|
||||
"""When the env var lives only in ~/.hermes/.env (not the shell), the
|
||||
shell-hint should NOT be printed — avoid scaring the user about a
|
||||
non-existent shell export.
|
||||
"""
|
||||
hermes_home = tmp_path / "hermes"
|
||||
hermes_home.mkdir(parents=True, exist_ok=True)
|
||||
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
||||
|
||||
# Key ONLY in .env, shell must not have it
|
||||
monkeypatch.delenv("DEEPSEEK_API_KEY", raising=False)
|
||||
(hermes_home / ".env").write_text("DEEPSEEK_API_KEY=sk-ds-only\n")
|
||||
# Mimic load_env() populating os.environ
|
||||
monkeypatch.setenv("DEEPSEEK_API_KEY", "sk-ds-only")
|
||||
|
||||
_write_auth_store(
|
||||
tmp_path,
|
||||
{
|
||||
"version": 1,
|
||||
"credential_pool": {
|
||||
"deepseek": [{
|
||||
"id": "env-1",
|
||||
"label": "DEEPSEEK_API_KEY",
|
||||
"auth_type": "api_key",
|
||||
"priority": 0,
|
||||
"source": "env:DEEPSEEK_API_KEY",
|
||||
"access_token": "sk-ds-only",
|
||||
}]
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
from types import SimpleNamespace
|
||||
from hermes_cli.auth_commands import auth_remove_command
|
||||
auth_remove_command(SimpleNamespace(provider="deepseek", target="1"))
|
||||
|
||||
out = capsys.readouterr().out
|
||||
assert "Cleared DEEPSEEK_API_KEY from .env" in out
|
||||
assert "still set in your shell environment" not in out
|
||||
assert (hermes_home / ".env").read_text().strip() == ""
|
||||
|
||||
|
||||
def test_auth_add_clears_env_suppression_for_provider(tmp_path, monkeypatch):
|
||||
"""Re-adding a credential via `hermes auth add <provider>` clears any
|
||||
env:<VAR> suppression marker — strong signal the user wants auth back.
|
||||
Matches the Codex device_code re-link behaviour.
|
||||
"""
|
||||
hermes_home = tmp_path / "hermes"
|
||||
hermes_home.mkdir(parents=True, exist_ok=True)
|
||||
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
||||
monkeypatch.delenv("XAI_API_KEY", raising=False)
|
||||
|
||||
_write_auth_store(
|
||||
tmp_path,
|
||||
{
|
||||
"version": 1,
|
||||
"providers": {},
|
||||
"suppressed_sources": {"xai": ["env:XAI_API_KEY"]},
|
||||
},
|
||||
)
|
||||
|
||||
from types import SimpleNamespace
|
||||
from hermes_cli.auth import is_source_suppressed
|
||||
from hermes_cli.auth_commands import auth_add_command
|
||||
|
||||
assert is_source_suppressed("xai", "env:XAI_API_KEY") is True
|
||||
auth_add_command(SimpleNamespace(
|
||||
provider="xai", auth_type="api_key",
|
||||
api_key="sk-xai-manual", label="manual",
|
||||
))
|
||||
assert is_source_suppressed("xai", "env:XAI_API_KEY") is False
|
||||
|
||||
|
||||
def test_seed_from_env_respects_env_suppression(tmp_path, monkeypatch):
|
||||
"""_seed_from_env() must skip env:<VAR> sources that the user suppressed
|
||||
via `hermes auth remove`. This is the gate that prevents shell-exported
|
||||
keys from resurrecting removed credentials.
|
||||
"""
|
||||
hermes_home = tmp_path / "hermes"
|
||||
hermes_home.mkdir(parents=True, exist_ok=True)
|
||||
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
||||
monkeypatch.setenv("XAI_API_KEY", "sk-xai-shell-export")
|
||||
|
||||
(hermes_home / "auth.json").write_text(json.dumps({
|
||||
"version": 1,
|
||||
"providers": {},
|
||||
"suppressed_sources": {"xai": ["env:XAI_API_KEY"]},
|
||||
}))
|
||||
|
||||
from agent.credential_pool import _seed_from_env
|
||||
|
||||
entries = []
|
||||
changed, active = _seed_from_env("xai", entries)
|
||||
assert changed is False
|
||||
assert entries == []
|
||||
assert active == set()
|
||||
|
||||
|
||||
def test_seed_from_env_respects_openrouter_suppression(tmp_path, monkeypatch):
|
||||
"""OpenRouter is the special-case branch in _seed_from_env; verify it
|
||||
honours suppression too.
|
||||
"""
|
||||
hermes_home = tmp_path / "hermes"
|
||||
hermes_home.mkdir(parents=True, exist_ok=True)
|
||||
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
||||
monkeypatch.setenv("OPENROUTER_API_KEY", "sk-or-shell-export")
|
||||
|
||||
(hermes_home / "auth.json").write_text(json.dumps({
|
||||
"version": 1,
|
||||
"providers": {},
|
||||
"suppressed_sources": {"openrouter": ["env:OPENROUTER_API_KEY"]},
|
||||
}))
|
||||
|
||||
from agent.credential_pool import _seed_from_env
|
||||
|
||||
entries = []
|
||||
changed, active = _seed_from_env("openrouter", entries)
|
||||
assert changed is False
|
||||
assert entries == []
|
||||
assert active == set()
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Unified credential-source stickiness — every source Hermes reads from has a
|
||||
# registered RemovalStep in agent.credential_sources, and every seeding path
|
||||
# gates on is_source_suppressed. Below: one test per source proving remove
|
||||
# sticks across a fresh load_pool() call.
|
||||
# =============================================================================
|
||||
|
||||
|
||||
def test_seed_from_singletons_respects_nous_suppression(tmp_path, monkeypatch):
|
||||
"""nous device_code must not re-seed from auth.json when suppressed."""
|
||||
hermes_home = tmp_path / "hermes"
|
||||
hermes_home.mkdir(parents=True, exist_ok=True)
|
||||
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
||||
|
||||
(hermes_home / "auth.json").write_text(json.dumps({
|
||||
"version": 1,
|
||||
"providers": {"nous": {"access_token": "tok", "refresh_token": "r", "expires_at": 9999999999}},
|
||||
"suppressed_sources": {"nous": ["device_code"]},
|
||||
}))
|
||||
|
||||
from agent.credential_pool import _seed_from_singletons
|
||||
entries = []
|
||||
changed, active = _seed_from_singletons("nous", entries)
|
||||
assert changed is False
|
||||
assert entries == []
|
||||
assert active == set()
|
||||
|
||||
|
||||
def test_seed_from_singletons_respects_copilot_suppression(tmp_path, monkeypatch):
|
||||
"""copilot gh_cli must not re-seed when suppressed."""
|
||||
hermes_home = tmp_path / "hermes"
|
||||
hermes_home.mkdir(parents=True, exist_ok=True)
|
||||
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
||||
|
||||
(hermes_home / "auth.json").write_text(json.dumps({
|
||||
"version": 1,
|
||||
"providers": {},
|
||||
"suppressed_sources": {"copilot": ["gh_cli"]},
|
||||
}))
|
||||
|
||||
# Stub resolve_copilot_token to return a live token
|
||||
import hermes_cli.copilot_auth as ca
|
||||
monkeypatch.setattr(ca, "resolve_copilot_token", lambda: ("ghp_fake", "gh auth token"))
|
||||
|
||||
from agent.credential_pool import _seed_from_singletons
|
||||
entries = []
|
||||
changed, active = _seed_from_singletons("copilot", entries)
|
||||
assert changed is False
|
||||
assert entries == []
|
||||
assert active == set()
|
||||
|
||||
|
||||
def test_seed_from_singletons_respects_qwen_suppression(tmp_path, monkeypatch):
|
||||
"""qwen-oauth qwen-cli must not re-seed from ~/.qwen/oauth_creds.json when suppressed."""
|
||||
hermes_home = tmp_path / "hermes"
|
||||
hermes_home.mkdir(parents=True, exist_ok=True)
|
||||
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
||||
|
||||
(hermes_home / "auth.json").write_text(json.dumps({
|
||||
"version": 1,
|
||||
"providers": {},
|
||||
"suppressed_sources": {"qwen-oauth": ["qwen-cli"]},
|
||||
}))
|
||||
|
||||
import hermes_cli.auth as ha
|
||||
monkeypatch.setattr(ha, "resolve_qwen_runtime_credentials", lambda **kw: {
|
||||
"api_key": "tok", "source": "qwen-cli", "base_url": "https://q",
|
||||
})
|
||||
|
||||
from agent.credential_pool import _seed_from_singletons
|
||||
entries = []
|
||||
changed, active = _seed_from_singletons("qwen-oauth", entries)
|
||||
assert changed is False
|
||||
assert entries == []
|
||||
assert active == set()
|
||||
|
||||
|
||||
def test_seed_from_singletons_respects_hermes_pkce_suppression(tmp_path, monkeypatch):
|
||||
"""anthropic hermes_pkce must not re-seed from ~/.hermes/.anthropic_oauth.json when suppressed."""
|
||||
hermes_home = tmp_path / "hermes"
|
||||
hermes_home.mkdir(parents=True, exist_ok=True)
|
||||
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
||||
|
||||
import yaml
|
||||
(hermes_home / "config.yaml").write_text(yaml.dump({"model": {"provider": "anthropic", "model": "claude"}}))
|
||||
(hermes_home / "auth.json").write_text(json.dumps({
|
||||
"version": 1,
|
||||
"providers": {},
|
||||
"suppressed_sources": {"anthropic": ["hermes_pkce"]},
|
||||
}))
|
||||
|
||||
# Stub the readers so only hermes_pkce is "available"; claude_code returns None
|
||||
import agent.anthropic_adapter as aa
|
||||
monkeypatch.setattr(aa, "read_hermes_oauth_credentials", lambda: {
|
||||
"accessToken": "tok", "refreshToken": "r", "expiresAt": 9999999999000,
|
||||
})
|
||||
monkeypatch.setattr(aa, "read_claude_code_credentials", lambda: None)
|
||||
|
||||
from agent.credential_pool import _seed_from_singletons
|
||||
entries = []
|
||||
changed, active = _seed_from_singletons("anthropic", entries)
|
||||
# hermes_pkce suppressed, claude_code returns None → nothing should be seeded
|
||||
assert entries == []
|
||||
assert "hermes_pkce" not in active
|
||||
|
||||
|
||||
def test_seed_custom_pool_respects_config_suppression(tmp_path, monkeypatch):
|
||||
"""Custom provider config:<name> source must not re-seed when suppressed."""
|
||||
hermes_home = tmp_path / "hermes"
|
||||
hermes_home.mkdir(parents=True, exist_ok=True)
|
||||
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
||||
|
||||
import yaml
|
||||
(hermes_home / "config.yaml").write_text(yaml.dump({
|
||||
"model": {},
|
||||
"custom_providers": [
|
||||
{"name": "my", "base_url": "https://c.example.com", "api_key": "sk-custom"},
|
||||
],
|
||||
}))
|
||||
|
||||
from agent.credential_pool import _seed_custom_pool, get_custom_provider_pool_key
|
||||
pool_key = get_custom_provider_pool_key("https://c.example.com")
|
||||
|
||||
(hermes_home / "auth.json").write_text(json.dumps({
|
||||
"version": 1,
|
||||
"providers": {},
|
||||
"suppressed_sources": {pool_key: ["config:my"]},
|
||||
}))
|
||||
|
||||
entries = []
|
||||
changed, active = _seed_custom_pool(pool_key, entries)
|
||||
assert changed is False
|
||||
assert entries == []
|
||||
assert "config:my" not in active
|
||||
|
||||
|
||||
def test_credential_sources_registry_has_expected_steps():
|
||||
"""Sanity check — the registry contains the expected RemovalSteps.
|
||||
|
||||
Guards against accidentally dropping a step during future refactors.
|
||||
If you add a new credential source, add it to the expected set below.
|
||||
"""
|
||||
from agent.credential_sources import _REGISTRY
|
||||
|
||||
descriptions = {step.description for step in _REGISTRY}
|
||||
expected = {
|
||||
"gh auth token / COPILOT_GITHUB_TOKEN / GH_TOKEN",
|
||||
"Any env-seeded credential (XAI_API_KEY, DEEPSEEK_API_KEY, etc.)",
|
||||
"~/.claude/.credentials.json",
|
||||
"~/.hermes/.anthropic_oauth.json",
|
||||
"auth.json providers.nous",
|
||||
"auth.json providers.openai-codex + ~/.codex/auth.json",
|
||||
"~/.qwen/oauth_creds.json",
|
||||
"Custom provider config.yaml api_key field",
|
||||
}
|
||||
assert descriptions == expected, f"Registry mismatch. Got: {descriptions}"
|
||||
|
||||
|
||||
def test_credential_sources_find_step_returns_none_for_manual():
|
||||
"""Manual entries have nothing external to clean up — no step registered."""
|
||||
from agent.credential_sources import find_removal_step
|
||||
assert find_removal_step("openrouter", "manual") is None
|
||||
assert find_removal_step("xai", "manual") is None
|
||||
|
||||
|
||||
def test_credential_sources_find_step_copilot_before_generic_env(tmp_path, monkeypatch):
|
||||
"""copilot env:GH_TOKEN must dispatch to the copilot step, not the
|
||||
generic env-var step. The copilot step handles the duplicate-source
|
||||
problem (same token seeded as both gh_cli and env:<VAR>); the generic
|
||||
env step would only suppress one of the variants.
|
||||
"""
|
||||
from agent.credential_sources import find_removal_step
|
||||
|
||||
step = find_removal_step("copilot", "env:GH_TOKEN")
|
||||
assert step is not None
|
||||
assert "copilot" in step.description.lower() or "gh" in step.description.lower()
|
||||
|
||||
# Generic step still matches any other provider's env var
|
||||
step = find_removal_step("xai", "env:XAI_API_KEY")
|
||||
assert step is not None
|
||||
assert "env-seeded" in step.description.lower()
|
||||
|
||||
|
||||
def test_auth_remove_copilot_suppresses_all_variants(tmp_path, monkeypatch):
|
||||
"""Removing any copilot source must suppress gh_cli + all env:* variants
|
||||
so the duplicate-seed paths don't resurrect the credential.
|
||||
"""
|
||||
hermes_home = tmp_path / "hermes"
|
||||
hermes_home.mkdir(parents=True, exist_ok=True)
|
||||
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
||||
|
||||
_write_auth_store(
|
||||
tmp_path,
|
||||
{
|
||||
"version": 1,
|
||||
"credential_pool": {
|
||||
"copilot": [{
|
||||
"id": "c1",
|
||||
"label": "gh auth token",
|
||||
"auth_type": "api_key",
|
||||
"priority": 0,
|
||||
"source": "gh_cli",
|
||||
"access_token": "ghp_fake",
|
||||
}]
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
from types import SimpleNamespace
|
||||
from hermes_cli.auth import is_source_suppressed
|
||||
from hermes_cli.auth_commands import auth_remove_command
|
||||
|
||||
auth_remove_command(SimpleNamespace(provider="copilot", target="1"))
|
||||
|
||||
assert is_source_suppressed("copilot", "gh_cli")
|
||||
assert is_source_suppressed("copilot", "env:COPILOT_GITHUB_TOKEN")
|
||||
assert is_source_suppressed("copilot", "env:GH_TOKEN")
|
||||
assert is_source_suppressed("copilot", "env:GITHUB_TOKEN")
|
||||
|
||||
|
||||
def test_auth_add_clears_all_suppressions_including_non_env(tmp_path, monkeypatch):
|
||||
"""Re-adding a credential via `hermes auth add <provider>` clears ALL
|
||||
suppression markers for the provider, not just env:*. This matches
|
||||
the single "re-engage" semantic — the user wants auth back, period.
|
||||
"""
|
||||
hermes_home = tmp_path / "hermes"
|
||||
hermes_home.mkdir(parents=True, exist_ok=True)
|
||||
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
||||
|
||||
_write_auth_store(
|
||||
tmp_path,
|
||||
{
|
||||
"version": 1,
|
||||
"providers": {},
|
||||
"suppressed_sources": {
|
||||
"copilot": ["gh_cli", "env:GH_TOKEN", "env:COPILOT_GITHUB_TOKEN"],
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
from types import SimpleNamespace
|
||||
from hermes_cli.auth import is_source_suppressed
|
||||
from hermes_cli.auth_commands import auth_add_command
|
||||
|
||||
auth_add_command(SimpleNamespace(
|
||||
provider="copilot", auth_type="api_key",
|
||||
api_key="ghp-manual", label="m",
|
||||
))
|
||||
|
||||
assert not is_source_suppressed("copilot", "gh_cli")
|
||||
assert not is_source_suppressed("copilot", "env:GH_TOKEN")
|
||||
assert not is_source_suppressed("copilot", "env:COPILOT_GITHUB_TOKEN")
|
||||
|
||||
|
||||
def test_auth_remove_codex_manual_device_code_suppresses_canonical(tmp_path, monkeypatch):
|
||||
"""Removing a manual:device_code entry (from `hermes auth add openai-codex`)
|
||||
must suppress the canonical ``device_code`` key, not ``manual:device_code``.
|
||||
The re-seed gate in _seed_from_singletons checks ``device_code``.
|
||||
"""
|
||||
hermes_home = tmp_path / "hermes"
|
||||
hermes_home.mkdir(parents=True, exist_ok=True)
|
||||
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
||||
|
||||
_write_auth_store(
|
||||
tmp_path,
|
||||
{
|
||||
"version": 1,
|
||||
"providers": {"openai-codex": {"tokens": {"access_token": "t", "refresh_token": "r"}}},
|
||||
"credential_pool": {
|
||||
"openai-codex": [{
|
||||
"id": "cdx",
|
||||
"label": "manual-codex",
|
||||
"auth_type": "oauth",
|
||||
"priority": 0,
|
||||
"source": "manual:device_code",
|
||||
"access_token": "t",
|
||||
}]
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
from types import SimpleNamespace
|
||||
from hermes_cli.auth import is_source_suppressed
|
||||
from hermes_cli.auth_commands import auth_remove_command
|
||||
|
||||
auth_remove_command(SimpleNamespace(provider="openai-codex", target="1"))
|
||||
assert is_source_suppressed("openai-codex", "device_code")
|
||||
|
|
|
|||
|
|
@ -459,7 +459,8 @@ class TestCustomProviderCompatibility:
|
|||
migrate_config(interactive=False, quiet=True)
|
||||
raw = yaml.safe_load(config_path.read_text(encoding="utf-8"))
|
||||
|
||||
assert raw["_config_version"] == 21
|
||||
from hermes_cli.config import DEFAULT_CONFIG
|
||||
assert raw["_config_version"] == DEFAULT_CONFIG["_config_version"]
|
||||
assert raw["providers"]["openai-direct"] == {
|
||||
"api": "https://api.openai.com/v1",
|
||||
"api_key": "test-key",
|
||||
|
|
@ -501,7 +502,8 @@ class TestCustomProviderCompatibility:
|
|||
assert compatible[0]["provider_key"] == "openai-direct"
|
||||
assert compatible[0]["api_mode"] == "codex_responses"
|
||||
|
||||
def test_compatible_custom_providers_prefers_api_then_url_then_base_url(self, tmp_path):
|
||||
def test_compatible_custom_providers_prefers_base_url_then_url_then_api(self, tmp_path):
|
||||
"""URL field precedence is base_url > url > api (PR #9332)."""
|
||||
config_path = tmp_path / "config.yaml"
|
||||
config_path.write_text(
|
||||
yaml.safe_dump(
|
||||
|
|
@ -526,7 +528,7 @@ class TestCustomProviderCompatibility:
|
|||
assert compatible == [
|
||||
{
|
||||
"name": "My Provider",
|
||||
"base_url": "https://api.example.com/v1",
|
||||
"base_url": "https://base.example.com/v1",
|
||||
"provider_key": "my-provider",
|
||||
}
|
||||
]
|
||||
|
|
@ -606,7 +608,8 @@ class TestInterimAssistantMessageConfig:
|
|||
migrate_config(interactive=False, quiet=True)
|
||||
raw = yaml.safe_load(config_path.read_text(encoding="utf-8"))
|
||||
|
||||
assert raw["_config_version"] == 21
|
||||
from hermes_cli.config import DEFAULT_CONFIG
|
||||
assert raw["_config_version"] == DEFAULT_CONFIG["_config_version"]
|
||||
assert raw["display"]["tool_progress"] == "off"
|
||||
assert raw["display"]["interim_assistant_messages"] is True
|
||||
|
||||
|
|
@ -626,7 +629,8 @@ class TestDiscordChannelPromptsConfig:
|
|||
migrate_config(interactive=False, quiet=True)
|
||||
raw = yaml.safe_load(config_path.read_text(encoding="utf-8"))
|
||||
|
||||
assert raw["_config_version"] == 21
|
||||
from hermes_cli.config import DEFAULT_CONFIG
|
||||
assert raw["_config_version"] == DEFAULT_CONFIG["_config_version"]
|
||||
assert raw["discord"]["auto_thread"] is True
|
||||
assert raw["discord"]["channel_prompts"] == {}
|
||||
|
||||
|
|
|
|||
|
|
@ -125,18 +125,12 @@ class TestGeminiCredentials:
|
|||
# ── Model Catalog ──
|
||||
|
||||
class TestGeminiModelCatalog:
|
||||
def test_provider_models_exist(self):
|
||||
def test_provider_entry_exists(self):
|
||||
"""Gemini provider has a model catalog entry. Specific model names
|
||||
are data that changes with Google releases and don't belong in tests.
|
||||
"""
|
||||
assert "gemini" in _PROVIDER_MODELS
|
||||
models = _PROVIDER_MODELS["gemini"]
|
||||
assert "gemini-2.5-pro" in models
|
||||
assert "gemini-2.5-flash" in models
|
||||
assert "gemma-4-31b-it" not in models
|
||||
|
||||
def test_provider_models_has_3x(self):
|
||||
models = _PROVIDER_MODELS["gemini"]
|
||||
assert "gemini-3.1-pro-preview" in models
|
||||
assert "gemini-3-flash-preview" in models
|
||||
assert "gemini-3.1-flash-lite-preview" in models
|
||||
assert len(_PROVIDER_MODELS["gemini"]) >= 1
|
||||
|
||||
def test_provider_label(self):
|
||||
assert "gemini" in _PROVIDER_LABELS
|
||||
|
|
|
|||
|
|
@ -457,29 +457,62 @@ class TestValidateApiNotFound:
|
|||
assert "not found" in result["message"]
|
||||
|
||||
|
||||
# -- validate — API unreachable — reject with guidance ----------------
|
||||
# -- validate — API unreachable — soft-accept via catalog or warning --------
|
||||
|
||||
class TestValidateApiFallback:
|
||||
def test_any_model_rejected_when_api_down(self):
|
||||
result = _validate("anthropic/claude-opus-4.6", api_models=None)
|
||||
assert result["accepted"] is False
|
||||
assert result["persist"] is False
|
||||
"""When /models is unreachable, the validator must accept the model (with
|
||||
a warning) rather than reject it outright — otherwise provider switches
|
||||
fail in the gateway for any provider whose /models endpoint is down or
|
||||
doesn't exist (e.g. opencode-go returns 404 HTML).
|
||||
|
||||
def test_unknown_model_also_rejected_when_api_down(self):
|
||||
result = _validate("anthropic/claude-next-gen", api_models=None)
|
||||
assert result["accepted"] is False
|
||||
assert result["persist"] is False
|
||||
assert "could not reach" in result["message"].lower()
|
||||
Two paths:
|
||||
1. Provider has a curated catalog (``_PROVIDER_MODELS`` / live fetch):
|
||||
validate against it (recognized=True for known models,
|
||||
recognized=False with 'Note:' for unknown).
|
||||
2. Provider has no catalog: accept with a generic 'Note:' warning.
|
||||
|
||||
def test_zai_model_rejected_when_api_down(self):
|
||||
In both cases ``accepted`` and ``persist`` must be True so the gateway can
|
||||
write the ``_session_model_overrides`` entry.
|
||||
"""
|
||||
|
||||
def test_known_model_accepted_via_catalog_when_api_down(self):
|
||||
# Force the openrouter catalog lookup to return a deterministic list.
|
||||
with patch(
|
||||
"hermes_cli.models.provider_model_ids",
|
||||
return_value=["anthropic/claude-opus-4.6", "openai/gpt-5.4"],
|
||||
):
|
||||
result = _validate("anthropic/claude-opus-4.6", api_models=None)
|
||||
assert result["accepted"] is True
|
||||
assert result["persist"] is True
|
||||
assert result["recognized"] is True
|
||||
|
||||
def test_unknown_model_accepted_with_note_when_api_down(self):
|
||||
with patch(
|
||||
"hermes_cli.models.provider_model_ids",
|
||||
return_value=["anthropic/claude-opus-4.6", "openai/gpt-5.4"],
|
||||
):
|
||||
result = _validate("anthropic/claude-next-gen", api_models=None)
|
||||
assert result["accepted"] is True
|
||||
assert result["persist"] is True
|
||||
assert result["recognized"] is False
|
||||
# Message flags it as unverified against the catalog.
|
||||
assert "not found" in result["message"].lower() or "note" in result["message"].lower()
|
||||
|
||||
def test_zai_known_model_accepted_via_catalog_when_api_down(self):
|
||||
# glm-5 is in the zai curated catalog (_PROVIDER_MODELS["zai"]).
|
||||
result = _validate("glm-5", provider="zai", api_models=None)
|
||||
assert result["accepted"] is False
|
||||
assert result["persist"] is False
|
||||
assert result["accepted"] is True
|
||||
assert result["persist"] is True
|
||||
assert result["recognized"] is True
|
||||
|
||||
def test_unknown_provider_rejected_when_api_down(self):
|
||||
result = _validate("some-model", provider="totally-unknown", api_models=None)
|
||||
assert result["accepted"] is False
|
||||
assert result["persist"] is False
|
||||
def test_unknown_provider_soft_accepted_when_api_down(self):
|
||||
# No catalog for unknown providers — soft-accept with a Note.
|
||||
with patch("hermes_cli.models.provider_model_ids", return_value=[]):
|
||||
result = _validate("some-model", provider="totally-unknown", api_models=None)
|
||||
assert result["accepted"] is True
|
||||
assert result["persist"] is True
|
||||
assert result["recognized"] is False
|
||||
assert "note" in result["message"].lower()
|
||||
|
||||
def test_custom_endpoint_warns_with_probed_url_and_v1_hint(self):
|
||||
with patch(
|
||||
|
|
|
|||
|
|
@ -88,6 +88,131 @@ class TestFetchOpenRouterModels:
|
|||
|
||||
assert models == OPENROUTER_MODELS
|
||||
|
||||
def test_filters_out_models_without_tool_support(self, monkeypatch):
|
||||
"""Models whose supported_parameters omits 'tools' must not appear in the picker.
|
||||
|
||||
hermes-agent is tool-calling-first — surfacing a non-tool model leads to
|
||||
immediate runtime failures when the user selects it. Ported from
|
||||
Kilo-Org/kilocode#9068.
|
||||
"""
|
||||
class _Resp:
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc, tb):
|
||||
return False
|
||||
|
||||
def read(self):
|
||||
# opus-4.6 advertises tools → kept
|
||||
# nano-image has explicit supported_parameters that OMITS tools → dropped
|
||||
# qwen3.6-plus advertises tools → kept
|
||||
return (
|
||||
b'{"data":['
|
||||
b'{"id":"anthropic/claude-opus-4.6","pricing":{"prompt":"0.000015","completion":"0.000075"},'
|
||||
b'"supported_parameters":["temperature","tools","tool_choice"]},'
|
||||
b'{"id":"google/gemini-3-pro-image-preview","pricing":{"prompt":"0.00001","completion":"0.00003"},'
|
||||
b'"supported_parameters":["temperature","response_format"]},'
|
||||
b'{"id":"qwen/qwen3.6-plus","pricing":{"prompt":"0.000000325","completion":"0.00000195"},'
|
||||
b'"supported_parameters":["tools","temperature"]}'
|
||||
b']}'
|
||||
)
|
||||
|
||||
# Include the image-only id in the curated list so it has a chance to be surfaced.
|
||||
monkeypatch.setattr(
|
||||
_models_mod,
|
||||
"OPENROUTER_MODELS",
|
||||
[
|
||||
("anthropic/claude-opus-4.6", ""),
|
||||
("google/gemini-3-pro-image-preview", ""),
|
||||
("qwen/qwen3.6-plus", ""),
|
||||
],
|
||||
)
|
||||
monkeypatch.setattr(_models_mod, "_openrouter_catalog_cache", None)
|
||||
with patch("hermes_cli.models.urllib.request.urlopen", return_value=_Resp()):
|
||||
models = fetch_openrouter_models(force_refresh=True)
|
||||
|
||||
ids = [mid for mid, _ in models]
|
||||
assert "anthropic/claude-opus-4.6" in ids
|
||||
assert "qwen/qwen3.6-plus" in ids
|
||||
# Image-only model advertised supported_parameters WITHOUT tools → must be dropped.
|
||||
assert "google/gemini-3-pro-image-preview" not in ids
|
||||
|
||||
def test_permissive_when_supported_parameters_missing(self, monkeypatch):
|
||||
"""Models missing the supported_parameters field keep appearing in the picker.
|
||||
|
||||
Some OpenRouter-compatible gateways (Nous Portal, private mirrors, older
|
||||
catalog snapshots) don't populate supported_parameters. Treating missing
|
||||
as 'unknown → allow' prevents the picker from silently emptying on
|
||||
those gateways.
|
||||
"""
|
||||
class _Resp:
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc, tb):
|
||||
return False
|
||||
|
||||
def read(self):
|
||||
# No supported_parameters field at all on either entry.
|
||||
return (
|
||||
b'{"data":['
|
||||
b'{"id":"anthropic/claude-opus-4.6","pricing":{"prompt":"0.000015","completion":"0.000075"}},'
|
||||
b'{"id":"qwen/qwen3.6-plus","pricing":{"prompt":"0.000000325","completion":"0.00000195"}}'
|
||||
b']}'
|
||||
)
|
||||
|
||||
monkeypatch.setattr(_models_mod, "_openrouter_catalog_cache", None)
|
||||
with patch("hermes_cli.models.urllib.request.urlopen", return_value=_Resp()):
|
||||
models = fetch_openrouter_models(force_refresh=True)
|
||||
|
||||
ids = [mid for mid, _ in models]
|
||||
assert "anthropic/claude-opus-4.6" in ids
|
||||
assert "qwen/qwen3.6-plus" in ids
|
||||
|
||||
|
||||
class TestOpenRouterToolSupportHelper:
|
||||
"""Unit tests for _openrouter_model_supports_tools (Kilo port #9068)."""
|
||||
|
||||
def test_tools_in_supported_parameters(self):
|
||||
from hermes_cli.models import _openrouter_model_supports_tools
|
||||
assert _openrouter_model_supports_tools(
|
||||
{"id": "x", "supported_parameters": ["temperature", "tools"]}
|
||||
) is True
|
||||
|
||||
def test_tools_missing_from_supported_parameters(self):
|
||||
from hermes_cli.models import _openrouter_model_supports_tools
|
||||
assert _openrouter_model_supports_tools(
|
||||
{"id": "x", "supported_parameters": ["temperature", "response_format"]}
|
||||
) is False
|
||||
|
||||
def test_supported_parameters_absent_is_permissive(self):
|
||||
"""Missing field → allow (so older / non-OR gateways still work)."""
|
||||
from hermes_cli.models import _openrouter_model_supports_tools
|
||||
assert _openrouter_model_supports_tools({"id": "x"}) is True
|
||||
|
||||
def test_supported_parameters_none_is_permissive(self):
|
||||
from hermes_cli.models import _openrouter_model_supports_tools
|
||||
assert _openrouter_model_supports_tools({"id": "x", "supported_parameters": None}) is True
|
||||
|
||||
def test_supported_parameters_malformed_is_permissive(self):
|
||||
"""Malformed (non-list) value → allow rather than silently drop."""
|
||||
from hermes_cli.models import _openrouter_model_supports_tools
|
||||
assert _openrouter_model_supports_tools(
|
||||
{"id": "x", "supported_parameters": "tools,temperature"}
|
||||
) is True
|
||||
|
||||
def test_non_dict_item_is_permissive(self):
|
||||
from hermes_cli.models import _openrouter_model_supports_tools
|
||||
assert _openrouter_model_supports_tools(None) is True
|
||||
assert _openrouter_model_supports_tools("anthropic/claude-opus-4.6") is True
|
||||
|
||||
def test_empty_supported_parameters_list_drops_model(self):
|
||||
"""Explicit empty list → no tools → drop."""
|
||||
from hermes_cli.models import _openrouter_model_supports_tools
|
||||
assert _openrouter_model_supports_tools(
|
||||
{"id": "x", "supported_parameters": []}
|
||||
) is False
|
||||
|
||||
|
||||
class TestFindOpenrouterSlug:
|
||||
def test_exact_match(self):
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ def test_opencode_go_appears_when_api_key_set():
|
|||
opencode_go = next((p for p in providers if p["slug"] == "opencode-go"), None)
|
||||
|
||||
assert opencode_go is not None, "opencode-go should appear when OPENCODE_GO_API_KEY is set"
|
||||
assert opencode_go["models"] == ["kimi-k2.5", "glm-5.1", "glm-5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.7", "minimax-m2.5"]
|
||||
assert opencode_go["models"] == ["kimi-k2.6", "kimi-k2.5", "glm-5.1", "glm-5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.7", "minimax-m2.5"]
|
||||
# opencode-go can appear as "built-in" (from PROVIDER_TO_MODELS_DEV when
|
||||
# models.dev is reachable) or "hermes" (from HERMES_OVERLAYS fallback when
|
||||
# the API is unavailable, e.g. in CI).
|
||||
|
|
|
|||
133
tests/hermes_cli/test_opencode_go_validation_fallback.py
Normal file
133
tests/hermes_cli/test_opencode_go_validation_fallback.py
Normal file
|
|
@ -0,0 +1,133 @@
|
|||
"""Tests for the static-catalog fallback in validate_requested_model.
|
||||
|
||||
OpenCode Go and OpenCode Zen publish an OpenAI-compatible API at paths that do
|
||||
NOT expose ``/models`` (the path returns the marketing site's HTML 404). This
|
||||
caused ``validate_requested_model`` to return ``accepted=False`` for every
|
||||
model on those providers, which in turn made ``switch_model()`` fail and the
|
||||
gateway's ``/model <name> --provider opencode-go`` command never write to
|
||||
``_session_model_overrides``.
|
||||
|
||||
These tests cover the catalog-fallback path: when ``fetch_api_models`` returns
|
||||
``None``, the validator must consult ``provider_model_ids()`` for the provider
|
||||
(populated from ``_PROVIDER_MODELS``) rather than rejecting outright.
|
||||
"""
|
||||
|
||||
from unittest.mock import patch
|
||||
|
||||
from hermes_cli.models import validate_requested_model
|
||||
|
||||
|
||||
_UNREACHABLE_PROBE = {
|
||||
"models": None,
|
||||
"probed_url": "https://opencode.ai/zen/go/v1/models",
|
||||
"resolved_base_url": "https://opencode.ai/zen/go/v1",
|
||||
"suggested_base_url": None,
|
||||
"used_fallback": False,
|
||||
}
|
||||
|
||||
|
||||
def _patched(func):
|
||||
"""Decorator: force fetch_api_models / probe_api_models to simulate an
|
||||
unreachable /models endpoint, proving the catalog path is used."""
|
||||
def wrapper(*args, **kwargs):
|
||||
with patch("hermes_cli.models.fetch_api_models", return_value=None), \
|
||||
patch("hermes_cli.models.probe_api_models", return_value=_UNREACHABLE_PROBE):
|
||||
return func(*args, **kwargs)
|
||||
wrapper.__name__ = func.__name__
|
||||
return wrapper
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# opencode-go: curated catalog in _PROVIDER_MODELS
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@_patched
|
||||
def test_opencode_go_known_model_accepted():
|
||||
"""A model present in the opencode-go curated catalog must be accepted
|
||||
even when /models is unreachable."""
|
||||
result = validate_requested_model("kimi-k2.6", "opencode-go")
|
||||
assert result["accepted"] is True
|
||||
assert result["persist"] is True
|
||||
assert result["recognized"] is True
|
||||
assert result["message"] is None
|
||||
|
||||
|
||||
@_patched
|
||||
def test_opencode_go_known_model_case_insensitive():
|
||||
"""Catalog lookup is case-insensitive."""
|
||||
result = validate_requested_model("KIMI-K2.6", "opencode-go")
|
||||
assert result["accepted"] is True
|
||||
assert result["recognized"] is True
|
||||
|
||||
|
||||
@_patched
|
||||
def test_opencode_go_typo_auto_corrected():
|
||||
"""A close typo (>= 0.9 similarity) is auto-corrected to the catalog
|
||||
entry."""
|
||||
# 'kimi-k2.55' vs 'kimi-k2.5' ratio ≈ 0.95 — within the 0.9 cutoff.
|
||||
result = validate_requested_model("kimi-k2.55", "opencode-go")
|
||||
assert result["accepted"] is True
|
||||
assert result["recognized"] is True
|
||||
assert result.get("corrected_model") == "kimi-k2.5"
|
||||
|
||||
|
||||
@_patched
|
||||
def test_opencode_go_unknown_model_accepted_with_suggestion():
|
||||
"""An unknown model that has a medium-similarity match (>= 0.5 but < 0.9)
|
||||
is accepted with recognized=False and a 'similar models' hint. The key
|
||||
invariant: the gateway MUST be able to persist this override, so
|
||||
accepted/persist must both be True."""
|
||||
# 'kimi-k3-preview' vs 'kimi-k2.6' — similar enough to suggest, not to auto-correct.
|
||||
result = validate_requested_model("kimi-k3-preview", "opencode-go")
|
||||
assert result["accepted"] is True
|
||||
assert result["persist"] is True
|
||||
assert result["recognized"] is False
|
||||
assert "kimi-k3-preview" in result["message"]
|
||||
assert "curated catalog" in result["message"]
|
||||
|
||||
|
||||
@_patched
|
||||
def test_opencode_go_totally_unknown_model_still_accepted():
|
||||
"""A model with zero similarity to the catalog is still accepted (no
|
||||
suggestion line) so the user can try a model that hasn't made it into the
|
||||
curated list yet."""
|
||||
result = validate_requested_model("some-brand-new-model", "opencode-go")
|
||||
assert result["accepted"] is True
|
||||
assert result["persist"] is True
|
||||
assert result["recognized"] is False
|
||||
# No suggestion text (no close matches)
|
||||
assert "Similar models" not in result["message"]
|
||||
assert "opencode" in result["message"].lower() or "opencode go" in result["message"].lower()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# opencode-zen: same pattern as opencode-go
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@_patched
|
||||
def test_opencode_zen_known_model_accepted():
|
||||
"""opencode-zen also uses _PROVIDER_MODELS; kimi-k2 is in its catalog."""
|
||||
result = validate_requested_model("kimi-k2", "opencode-zen")
|
||||
assert result["accepted"] is True
|
||||
assert result["recognized"] is True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Unknown provider with no catalog: soft-accept (honors the comment's intent)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@_patched
|
||||
def test_provider_without_catalog_accepts_with_warning():
|
||||
"""When a provider has no entry in _PROVIDER_MODELS and /models is
|
||||
unreachable, accept the model with a 'Note:' warning rather than reject.
|
||||
This matches the in-code comment: 'Accept and persist, but warn so typos
|
||||
don't silently break things.'"""
|
||||
# Use a made-up provider name that won't resolve to any catalog.
|
||||
result = validate_requested_model("some-model", "provider-that-does-not-exist")
|
||||
assert result["accepted"] is True
|
||||
assert result["persist"] is True
|
||||
assert result["recognized"] is False
|
||||
assert "Note:" in result["message"]
|
||||
|
|
@ -1412,3 +1412,90 @@ def test_named_custom_runtime_no_model_when_absent(monkeypatch):
|
|||
|
||||
resolved = rp.resolve_runtime_provider(requested="my-server")
|
||||
assert "model" not in resolved
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# GHSA-76xc-57q6-vm5m — Ollama URL substring leak
|
||||
#
|
||||
# Same bug class as the previously-fixed GHSA-xf8p-v2cg-h7h5 (OpenRouter).
|
||||
# _resolve_openrouter_runtime's custom-endpoint branch selects OLLAMA_API_KEY
|
||||
# when the base_url "looks like" ollama.com. Previous implementation used
|
||||
# raw substring match; a custom base_url whose PATH or look-alike host
|
||||
# merely contained "ollama.com" leaked OLLAMA_API_KEY to that endpoint.
|
||||
# Fix: use base_url_host_matches (same helper as the OpenRouter sweep).
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestOllamaUrlSubstringLeak:
|
||||
"""Call-site regression tests for the fix in _resolve_openrouter_runtime."""
|
||||
|
||||
def _make_cfg(self, base_url):
|
||||
return {"base_url": base_url, "api_key": "", "provider": "custom"}
|
||||
|
||||
def test_ollama_key_not_leaked_to_path_injection(self, monkeypatch):
|
||||
"""http://127.0.0.1:9000/ollama.com/v1 — attacker endpoint with
|
||||
ollama.com in PATH. Must resolve to OPENAI_API_KEY, not OLLAMA_API_KEY."""
|
||||
monkeypatch.setenv("OPENAI_API_KEY", "oa-secret")
|
||||
monkeypatch.setenv("OPENROUTER_API_KEY", "or-secret")
|
||||
monkeypatch.setenv("OLLAMA_API_KEY", "ol-SECRET-should-not-leak")
|
||||
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "custom")
|
||||
monkeypatch.setattr(rp, "_get_model_config", lambda: self._make_cfg(
|
||||
"http://127.0.0.1:9000/ollama.com/v1"
|
||||
))
|
||||
monkeypatch.setattr(rp, "load_pool", lambda provider: None)
|
||||
monkeypatch.setattr(rp, "_try_resolve_from_custom_pool", lambda *a, **k: None)
|
||||
|
||||
resolved = rp.resolve_runtime_provider(requested="custom")
|
||||
|
||||
assert "ol-SECRET" not in resolved["api_key"], (
|
||||
"OLLAMA_API_KEY must not be sent to an endpoint whose "
|
||||
"hostname is not ollama.com (GHSA-76xc-57q6-vm5m)"
|
||||
)
|
||||
assert resolved["api_key"] == "oa-secret"
|
||||
|
||||
def test_ollama_key_not_leaked_to_lookalike_host(self, monkeypatch):
|
||||
"""ollama.com.attacker.test — look-alike host. OLLAMA_API_KEY
|
||||
must not be sent."""
|
||||
monkeypatch.setenv("OPENAI_API_KEY", "oa-secret")
|
||||
monkeypatch.setenv("OLLAMA_API_KEY", "ol-SECRET-should-not-leak")
|
||||
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "custom")
|
||||
monkeypatch.setattr(rp, "_get_model_config", lambda: self._make_cfg(
|
||||
"http://ollama.com.attacker.test:9000/v1"
|
||||
))
|
||||
monkeypatch.setattr(rp, "load_pool", lambda provider: None)
|
||||
monkeypatch.setattr(rp, "_try_resolve_from_custom_pool", lambda *a, **k: None)
|
||||
|
||||
resolved = rp.resolve_runtime_provider(requested="custom")
|
||||
|
||||
assert "ol-SECRET" not in resolved["api_key"]
|
||||
assert resolved["api_key"] == "oa-secret"
|
||||
|
||||
def test_ollama_key_sent_to_genuine_ollama_com(self, monkeypatch):
|
||||
"""https://ollama.com/v1 — legit Ollama Cloud. OLLAMA_API_KEY
|
||||
should be used."""
|
||||
monkeypatch.setenv("OPENAI_API_KEY", "oa-secret")
|
||||
monkeypatch.setenv("OLLAMA_API_KEY", "ol-legit-key")
|
||||
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "custom")
|
||||
monkeypatch.setattr(rp, "_get_model_config", lambda: self._make_cfg(
|
||||
"https://ollama.com/v1"
|
||||
))
|
||||
monkeypatch.setattr(rp, "load_pool", lambda provider: None)
|
||||
monkeypatch.setattr(rp, "_try_resolve_from_custom_pool", lambda *a, **k: None)
|
||||
|
||||
resolved = rp.resolve_runtime_provider(requested="custom")
|
||||
|
||||
assert resolved["api_key"] == "ol-legit-key"
|
||||
|
||||
def test_ollama_key_sent_to_ollama_subdomain(self, monkeypatch):
|
||||
"""https://api.ollama.com/v1 — legit subdomain."""
|
||||
monkeypatch.setenv("OPENAI_API_KEY", "oa-secret")
|
||||
monkeypatch.setenv("OLLAMA_API_KEY", "ol-legit-key")
|
||||
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "custom")
|
||||
monkeypatch.setattr(rp, "_get_model_config", lambda: self._make_cfg(
|
||||
"https://api.ollama.com/v1"
|
||||
))
|
||||
monkeypatch.setattr(rp, "load_pool", lambda provider: None)
|
||||
monkeypatch.setattr(rp, "_try_resolve_from_custom_pool", lambda *a, **k: None)
|
||||
|
||||
resolved = rp.resolve_runtime_provider(requested="custom")
|
||||
|
||||
assert resolved["api_key"] == "ol-legit-key"
|
||||
|
|
|
|||
148
tests/hermes_cli/test_web_server_host_header.py
Normal file
148
tests/hermes_cli/test_web_server_host_header.py
Normal file
|
|
@ -0,0 +1,148 @@
|
|||
"""Tests for GHSA-ppp5-vxwm-4cf7 — Host-header validation.
|
||||
|
||||
DNS rebinding defence: a victim browser that has the dashboard open
|
||||
could be tricked into fetching from an attacker-controlled hostname
|
||||
that TTL-flips to 127.0.0.1. Same-origin / CORS checks won't help —
|
||||
the browser now treats the attacker origin as same-origin. Validating
|
||||
the Host header at the application layer rejects the attack.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
_repo = str(Path(__file__).resolve().parents[1])
|
||||
if _repo not in sys.path:
|
||||
sys.path.insert(0, _repo)
|
||||
|
||||
|
||||
class TestHostHeaderValidator:
|
||||
"""Unit test the _is_accepted_host helper directly — cheaper and
|
||||
more thorough than spinning up the full FastAPI app."""
|
||||
|
||||
def test_loopback_bind_accepts_loopback_names(self):
|
||||
from hermes_cli.web_server import _is_accepted_host
|
||||
|
||||
for bound in ("127.0.0.1", "localhost", "::1"):
|
||||
for host_header in (
|
||||
"127.0.0.1", "127.0.0.1:9119",
|
||||
"localhost", "localhost:9119",
|
||||
"[::1]", "[::1]:9119",
|
||||
):
|
||||
assert _is_accepted_host(host_header, bound), (
|
||||
f"bound={bound} must accept host={host_header}"
|
||||
)
|
||||
|
||||
def test_loopback_bind_rejects_attacker_hostnames(self):
|
||||
"""The core rebinding defence: attacker-controlled hosts that
|
||||
TTL-flip to 127.0.0.1 must be rejected."""
|
||||
from hermes_cli.web_server import _is_accepted_host
|
||||
|
||||
for bound in ("127.0.0.1", "localhost"):
|
||||
for attacker in (
|
||||
"evil.example",
|
||||
"evil.example:9119",
|
||||
"rebind.attacker.test:80",
|
||||
"localhost.attacker.test", # subdomain trick
|
||||
"127.0.0.1.evil.test", # lookalike IP prefix
|
||||
"", # missing Host
|
||||
):
|
||||
assert not _is_accepted_host(attacker, bound), (
|
||||
f"bound={bound} must reject attacker host={attacker!r}"
|
||||
)
|
||||
|
||||
def test_zero_zero_bind_accepts_anything(self):
|
||||
"""0.0.0.0 means operator explicitly opted into all-interfaces
|
||||
(requires --insecure). No Host-layer defence is possible — rely
|
||||
on operator network controls."""
|
||||
from hermes_cli.web_server import _is_accepted_host
|
||||
|
||||
for host in ("10.0.0.5", "evil.example", "my-server.corp.net"):
|
||||
assert _is_accepted_host(host, "0.0.0.0")
|
||||
assert _is_accepted_host(host + ":9119", "0.0.0.0")
|
||||
|
||||
def test_explicit_non_loopback_bind_requires_exact_match(self):
|
||||
"""If the operator bound to a specific non-loopback hostname,
|
||||
the Host header must match exactly."""
|
||||
from hermes_cli.web_server import _is_accepted_host
|
||||
|
||||
assert _is_accepted_host("my-server.corp.net", "my-server.corp.net")
|
||||
assert _is_accepted_host("my-server.corp.net:9119", "my-server.corp.net")
|
||||
# Different host — reject
|
||||
assert not _is_accepted_host("evil.example", "my-server.corp.net")
|
||||
# Loopback — reject (we bound to a specific non-loopback name)
|
||||
assert not _is_accepted_host("localhost", "my-server.corp.net")
|
||||
|
||||
def test_case_insensitive_comparison(self):
|
||||
"""Host headers are case-insensitive per RFC — accept variations."""
|
||||
from hermes_cli.web_server import _is_accepted_host
|
||||
|
||||
assert _is_accepted_host("LOCALHOST", "127.0.0.1")
|
||||
assert _is_accepted_host("LocalHost:9119", "127.0.0.1")
|
||||
|
||||
|
||||
class TestHostHeaderMiddleware:
|
||||
"""End-to-end test via the FastAPI app — verify the middleware
|
||||
rejects bad Host headers with 400."""
|
||||
|
||||
def test_rebinding_request_rejected(self):
|
||||
from fastapi.testclient import TestClient
|
||||
from hermes_cli.web_server import app
|
||||
|
||||
# Simulate start_server having set the bound_host
|
||||
app.state.bound_host = "127.0.0.1"
|
||||
try:
|
||||
client = TestClient(app)
|
||||
# The TestClient sends Host: testserver by default — which is
|
||||
# NOT a loopback alias, so the middleware must reject it.
|
||||
resp = client.get(
|
||||
"/api/status",
|
||||
headers={"Host": "evil.example"},
|
||||
)
|
||||
assert resp.status_code == 400
|
||||
assert "Invalid Host header" in resp.json()["detail"]
|
||||
finally:
|
||||
# Clean up so other tests don't inherit the bound_host
|
||||
if hasattr(app.state, "bound_host"):
|
||||
del app.state.bound_host
|
||||
|
||||
def test_legit_loopback_request_accepted(self):
|
||||
from fastapi.testclient import TestClient
|
||||
from hermes_cli.web_server import app
|
||||
|
||||
app.state.bound_host = "127.0.0.1"
|
||||
try:
|
||||
client = TestClient(app)
|
||||
# /api/status is in _PUBLIC_API_PATHS — passes auth — so the
|
||||
# only thing that can reject is the host header middleware
|
||||
resp = client.get(
|
||||
"/api/status",
|
||||
headers={"Host": "localhost:9119"},
|
||||
)
|
||||
# Either 200 (endpoint served) or some other non-400 —
|
||||
# just not the host-rejection 400
|
||||
assert resp.status_code != 400 or (
|
||||
"Invalid Host header" not in resp.json().get("detail", "")
|
||||
)
|
||||
finally:
|
||||
if hasattr(app.state, "bound_host"):
|
||||
del app.state.bound_host
|
||||
|
||||
def test_no_bound_host_skips_validation(self):
|
||||
"""If app.state.bound_host isn't set (e.g. running under test
|
||||
infra without calling start_server), middleware must pass through
|
||||
rather than crash."""
|
||||
from fastapi.testclient import TestClient
|
||||
from hermes_cli.web_server import app
|
||||
|
||||
# Make sure bound_host isn't set
|
||||
if hasattr(app.state, "bound_host"):
|
||||
del app.state.bound_host
|
||||
|
||||
client = TestClient(app)
|
||||
resp = client.get("/api/status")
|
||||
# Should get through to the status endpoint, not a 400
|
||||
assert resp.status_code != 400
|
||||
|
|
@ -136,13 +136,15 @@ class TestXiaomiModelCatalog:
|
|||
assert PROVIDER_TO_MODELS_DEV["xiaomi"] == "xiaomi"
|
||||
|
||||
def test_static_model_list_fallback(self):
|
||||
"""Static _PROVIDER_MODELS fallback must exist for model picker."""
|
||||
"""Static _PROVIDER_MODELS fallback must exist for model picker.
|
||||
|
||||
We only assert the provider key is present — the specific model
|
||||
names are data that changes with upstream releases and doesn't
|
||||
belong in tests.
|
||||
"""
|
||||
from hermes_cli.models import _PROVIDER_MODELS
|
||||
assert "xiaomi" in _PROVIDER_MODELS
|
||||
models = _PROVIDER_MODELS["xiaomi"]
|
||||
assert "mimo-v2-pro" in models
|
||||
assert "mimo-v2-omni" in models
|
||||
assert "mimo-v2-flash" in models
|
||||
assert len(_PROVIDER_MODELS["xiaomi"]) >= 1
|
||||
|
||||
def test_list_agentic_models_mock(self, monkeypatch):
|
||||
"""When models.dev returns Xiaomi data, list_agentic_models should return models."""
|
||||
|
|
|
|||
|
|
@ -118,6 +118,86 @@ class TestOpenAIWireFormatOnCustomProvider:
|
|||
assert agent._anthropic_prompt_cache_policy() == (False, False)
|
||||
|
||||
|
||||
class TestQwenAlibabaFamily:
|
||||
"""Qwen on OpenCode/OpenCode-Go/Alibaba — needs cache_control even on OpenAI-wire.
|
||||
|
||||
Upstream pi-mono #3392 / #3393 documented that these providers serve
|
||||
zero cache hits without Anthropic-style markers. Regression reported
|
||||
by community user (Qwen3.6 on opencode-go burning through
|
||||
subscription with no cache). Envelope layout, not native, because the
|
||||
wire format is OpenAI chat.completions.
|
||||
"""
|
||||
|
||||
def test_qwen_on_opencode_go_caches_with_envelope_layout(self):
|
||||
agent = _make_agent(
|
||||
provider="opencode-go",
|
||||
base_url="https://opencode.ai/v1",
|
||||
api_mode="chat_completions",
|
||||
model="qwen3.6-plus",
|
||||
)
|
||||
should, native = agent._anthropic_prompt_cache_policy()
|
||||
assert should is True, "Qwen on opencode-go must cache"
|
||||
assert native is False, "opencode-go is OpenAI-wire; envelope layout"
|
||||
|
||||
def test_qwen35_plus_on_opencode_go(self):
|
||||
agent = _make_agent(
|
||||
provider="opencode-go",
|
||||
base_url="https://opencode.ai/v1",
|
||||
api_mode="chat_completions",
|
||||
model="qwen3.5-plus",
|
||||
)
|
||||
assert agent._anthropic_prompt_cache_policy() == (True, False)
|
||||
|
||||
def test_qwen_on_opencode_zen_caches(self):
|
||||
agent = _make_agent(
|
||||
provider="opencode",
|
||||
base_url="https://opencode.ai/v1",
|
||||
api_mode="chat_completions",
|
||||
model="qwen3-coder-plus",
|
||||
)
|
||||
assert agent._anthropic_prompt_cache_policy() == (True, False)
|
||||
|
||||
def test_qwen_on_direct_alibaba_caches(self):
|
||||
agent = _make_agent(
|
||||
provider="alibaba",
|
||||
base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
|
||||
api_mode="chat_completions",
|
||||
model="qwen3-coder",
|
||||
)
|
||||
assert agent._anthropic_prompt_cache_policy() == (True, False)
|
||||
|
||||
def test_non_qwen_on_opencode_go_does_not_cache(self):
|
||||
# GLM / Kimi on opencode-go don't need markers (they have automatic
|
||||
# server-side caching or none at all).
|
||||
agent = _make_agent(
|
||||
provider="opencode-go",
|
||||
base_url="https://opencode.ai/v1",
|
||||
api_mode="chat_completions",
|
||||
model="glm-5",
|
||||
)
|
||||
assert agent._anthropic_prompt_cache_policy() == (False, False)
|
||||
|
||||
def test_kimi_on_opencode_go_does_not_cache(self):
|
||||
agent = _make_agent(
|
||||
provider="opencode-go",
|
||||
base_url="https://opencode.ai/v1",
|
||||
api_mode="chat_completions",
|
||||
model="kimi-k2.5",
|
||||
)
|
||||
assert agent._anthropic_prompt_cache_policy() == (False, False)
|
||||
|
||||
def test_qwen_on_openrouter_not_affected(self):
|
||||
# Qwen via OpenRouter falls through — OpenRouter has its own
|
||||
# upstream caching arrangement for Qwen (provider-dependent).
|
||||
agent = _make_agent(
|
||||
provider="openrouter",
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
api_mode="chat_completions",
|
||||
model="qwen/qwen3-coder",
|
||||
)
|
||||
assert agent._anthropic_prompt_cache_policy() == (False, False)
|
||||
|
||||
|
||||
class TestExplicitOverrides:
|
||||
"""Policy accepts keyword overrides for switch_model / fallback activation."""
|
||||
|
||||
|
|
|
|||
|
|
@ -67,6 +67,14 @@ def test_get_proxy_from_env_ignores_blank_values(monkeypatch):
|
|||
assert _get_proxy_from_env() == "http://real-proxy:8080"
|
||||
|
||||
|
||||
def test_get_proxy_from_env_normalizes_socks_alias(monkeypatch):
|
||||
for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
|
||||
"https_proxy", "http_proxy", "all_proxy"):
|
||||
monkeypatch.delenv(key, raising=False)
|
||||
monkeypatch.setenv("ALL_PROXY", "socks://127.0.0.1:1080/")
|
||||
assert _get_proxy_from_env() == "socks5://127.0.0.1:1080/"
|
||||
|
||||
|
||||
@patch("run_agent.OpenAI")
|
||||
def test_create_openai_client_routes_via_proxy_when_env_set(mock_openai, monkeypatch):
|
||||
"""With HTTPS_PROXY set, the custom httpx.Client must mount an HTTPProxy pool.
|
||||
|
|
|
|||
|
|
@ -33,6 +33,11 @@ class TestInterruptPropagationToChild(unittest.TestCase):
|
|||
agent._active_children = []
|
||||
agent._active_children_lock = threading.Lock()
|
||||
agent.quiet_mode = True
|
||||
# Provider/model/base_url are read by stale-timeout resolution paths;
|
||||
# the specific values don't matter for interrupt tests.
|
||||
agent.provider = "openrouter"
|
||||
agent.model = "test/model"
|
||||
agent._base_url = "http://localhost:1234"
|
||||
return agent
|
||||
|
||||
def test_parent_interrupt_sets_child_flag(self):
|
||||
|
|
|
|||
|
|
@ -952,6 +952,84 @@ class TestBuildApiKwargs:
|
|||
|
||||
assert "temperature" not in kwargs
|
||||
|
||||
def test_kimi_coding_endpoint_sends_max_tokens_and_reasoning(self, agent):
|
||||
"""Kimi endpoint should send max_tokens=32000 and reasoning_effort as
|
||||
top-level params, matching Kimi CLI's default behavior."""
|
||||
agent.base_url = "https://api.kimi.com/coding/v1"
|
||||
agent._base_url_lower = agent.base_url.lower()
|
||||
agent.model = "kimi-for-coding"
|
||||
messages = [{"role": "user", "content": "hi"}]
|
||||
|
||||
kwargs = agent._build_api_kwargs(messages)
|
||||
|
||||
assert kwargs["max_tokens"] == 32000
|
||||
assert kwargs["reasoning_effort"] == "medium"
|
||||
|
||||
def test_kimi_coding_endpoint_respects_custom_effort(self, agent):
|
||||
"""reasoning_effort should reflect reasoning_config.effort when set."""
|
||||
agent.base_url = "https://api.kimi.com/coding/v1"
|
||||
agent._base_url_lower = agent.base_url.lower()
|
||||
agent.model = "kimi-for-coding"
|
||||
agent.reasoning_config = {"enabled": True, "effort": "high"}
|
||||
messages = [{"role": "user", "content": "hi"}]
|
||||
|
||||
kwargs = agent._build_api_kwargs(messages)
|
||||
|
||||
assert kwargs["reasoning_effort"] == "high"
|
||||
|
||||
def test_kimi_coding_endpoint_sends_thinking_extra_body(self, agent):
|
||||
"""Kimi endpoint should send extra_body.thinking={"type":"enabled"}
|
||||
to activate reasoning mode, mirroring Kimi CLI's with_thinking()."""
|
||||
agent.base_url = "https://api.kimi.com/coding/v1"
|
||||
agent._base_url_lower = agent.base_url.lower()
|
||||
agent.model = "kimi-for-coding"
|
||||
messages = [{"role": "user", "content": "hi"}]
|
||||
|
||||
kwargs = agent._build_api_kwargs(messages)
|
||||
|
||||
assert kwargs["extra_body"]["thinking"] == {"type": "enabled"}
|
||||
|
||||
def test_kimi_coding_endpoint_disables_thinking(self, agent):
|
||||
"""When reasoning_config.enabled=False, thinking should be disabled
|
||||
and reasoning_effort should be omitted entirely — mirroring Kimi
|
||||
CLI's with_thinking("off") which maps to reasoning_effort=None."""
|
||||
agent.base_url = "https://api.kimi.com/coding/v1"
|
||||
agent._base_url_lower = agent.base_url.lower()
|
||||
agent.model = "kimi-for-coding"
|
||||
agent.reasoning_config = {"enabled": False}
|
||||
messages = [{"role": "user", "content": "hi"}]
|
||||
|
||||
kwargs = agent._build_api_kwargs(messages)
|
||||
|
||||
assert kwargs["extra_body"]["thinking"] == {"type": "disabled"}
|
||||
assert "reasoning_effort" not in kwargs
|
||||
|
||||
def test_moonshot_endpoint_sends_max_tokens_and_reasoning(self, agent):
|
||||
"""api.moonshot.ai should get the same Kimi-compatible params."""
|
||||
agent.base_url = "https://api.moonshot.ai/v1"
|
||||
agent._base_url_lower = agent.base_url.lower()
|
||||
agent.model = "kimi-k2.5"
|
||||
messages = [{"role": "user", "content": "hi"}]
|
||||
|
||||
kwargs = agent._build_api_kwargs(messages)
|
||||
|
||||
assert kwargs["max_tokens"] == 32000
|
||||
assert kwargs["reasoning_effort"] == "medium"
|
||||
assert kwargs["extra_body"]["thinking"] == {"type": "enabled"}
|
||||
|
||||
def test_moonshot_cn_endpoint_sends_max_tokens_and_reasoning(self, agent):
|
||||
"""api.moonshot.cn (China endpoint) should get the same params."""
|
||||
agent.base_url = "https://api.moonshot.cn/v1"
|
||||
agent._base_url_lower = agent.base_url.lower()
|
||||
agent.model = "kimi-k2.5"
|
||||
messages = [{"role": "user", "content": "hi"}]
|
||||
|
||||
kwargs = agent._build_api_kwargs(messages)
|
||||
|
||||
assert kwargs["max_tokens"] == 32000
|
||||
assert kwargs["reasoning_effort"] == "medium"
|
||||
assert kwargs["extra_body"]["thinking"] == {"type": "enabled"}
|
||||
|
||||
def test_provider_preferences_injected(self, agent):
|
||||
agent.base_url = "https://openrouter.ai/api/v1"
|
||||
agent.providers_allowed = ["Anthropic"]
|
||||
|
|
|
|||
203
tests/test_account_usage.py
Normal file
203
tests/test_account_usage.py
Normal file
|
|
@ -0,0 +1,203 @@
|
|||
from datetime import datetime, timezone
|
||||
|
||||
from agent.account_usage import (
|
||||
AccountUsageSnapshot,
|
||||
AccountUsageWindow,
|
||||
fetch_account_usage,
|
||||
render_account_usage_lines,
|
||||
)
|
||||
|
||||
|
||||
class _Response:
|
||||
def __init__(self, payload, status_code=200):
|
||||
self._payload = payload
|
||||
self.status_code = status_code
|
||||
|
||||
def raise_for_status(self):
|
||||
if self.status_code >= 400:
|
||||
raise RuntimeError(f"HTTP {self.status_code}")
|
||||
|
||||
def json(self):
|
||||
return self._payload
|
||||
|
||||
|
||||
class _Client:
|
||||
def __init__(self, payload):
|
||||
self._payload = payload
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc, tb):
|
||||
return False
|
||||
|
||||
def get(self, url, headers=None):
|
||||
return _Response(self._payload)
|
||||
|
||||
|
||||
class _RoutingClient:
|
||||
def __init__(self, payloads):
|
||||
self._payloads = payloads
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc, tb):
|
||||
return False
|
||||
|
||||
def get(self, url, headers=None):
|
||||
return _Response(self._payloads[url])
|
||||
|
||||
|
||||
def test_fetch_account_usage_codex(monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
"agent.account_usage.resolve_codex_runtime_credentials",
|
||||
lambda refresh_if_expiring=True: {
|
||||
"provider": "openai-codex",
|
||||
"base_url": "https://chatgpt.com/backend-api/codex",
|
||||
"api_key": "access-token",
|
||||
},
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"agent.account_usage._read_codex_tokens",
|
||||
lambda: {"tokens": {"account_id": "acct_123"}},
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"agent.account_usage.httpx.Client",
|
||||
lambda timeout=15.0: _Client(
|
||||
{
|
||||
"plan_type": "pro",
|
||||
"rate_limit": {
|
||||
"primary_window": {
|
||||
"used_percent": 15,
|
||||
"reset_at": 1_900_000_000,
|
||||
"limit_window_seconds": 18000,
|
||||
},
|
||||
"secondary_window": {
|
||||
"used_percent": 40,
|
||||
"reset_at": 1_900_500_000,
|
||||
"limit_window_seconds": 604800,
|
||||
},
|
||||
},
|
||||
"credits": {"has_credits": True, "balance": 12.5},
|
||||
}
|
||||
),
|
||||
)
|
||||
|
||||
snapshot = fetch_account_usage("openai-codex")
|
||||
|
||||
assert snapshot is not None
|
||||
assert snapshot.plan == "Pro"
|
||||
assert len(snapshot.windows) == 2
|
||||
assert snapshot.windows[0].label == "Session"
|
||||
assert snapshot.windows[0].used_percent == 15.0
|
||||
assert snapshot.windows[0].reset_at == datetime.fromtimestamp(1_900_000_000, tz=timezone.utc)
|
||||
assert "Credits balance: $12.50" in snapshot.details
|
||||
|
||||
|
||||
def test_render_account_usage_lines_includes_reset_and_provider():
|
||||
snapshot = AccountUsageSnapshot(
|
||||
provider="openai-codex",
|
||||
source="usage_api",
|
||||
fetched_at=datetime.now(timezone.utc),
|
||||
plan="Pro",
|
||||
windows=(
|
||||
AccountUsageWindow(
|
||||
label="Session",
|
||||
used_percent=25,
|
||||
reset_at=datetime.now(timezone.utc),
|
||||
),
|
||||
),
|
||||
details=("Credits balance: $9.99",),
|
||||
)
|
||||
lines = render_account_usage_lines(snapshot)
|
||||
|
||||
assert lines[0] == "📈 Account limits"
|
||||
assert "openai-codex (Pro)" in lines[1]
|
||||
assert "Session: 75% remaining (25% used)" in lines[2]
|
||||
assert "Credits balance: $9.99" in lines[3]
|
||||
|
||||
|
||||
def test_fetch_account_usage_openrouter_uses_limit_remaining_and_ignores_deprecated_rate_limit(monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
"agent.account_usage.resolve_runtime_provider",
|
||||
lambda requested, explicit_base_url=None, explicit_api_key=None: {
|
||||
"provider": "openrouter",
|
||||
"base_url": "https://openrouter.ai/api/v1",
|
||||
"api_key": "sk-test",
|
||||
},
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"agent.account_usage.httpx.Client",
|
||||
lambda timeout=10.0: _RoutingClient(
|
||||
{
|
||||
"https://openrouter.ai/api/v1/credits": {
|
||||
"data": {"total_credits": 300.0, "total_usage": 10.92}
|
||||
},
|
||||
"https://openrouter.ai/api/v1/key": {
|
||||
"data": {
|
||||
"limit": 100.0,
|
||||
"limit_remaining": 70.0,
|
||||
"limit_reset": "monthly",
|
||||
"usage": 12.5,
|
||||
"usage_daily": 0.5,
|
||||
"usage_weekly": 2.0,
|
||||
"usage_monthly": 8.0,
|
||||
"rate_limit": {"requests": -1, "interval": "10s"},
|
||||
}
|
||||
},
|
||||
}
|
||||
),
|
||||
)
|
||||
|
||||
snapshot = fetch_account_usage("openrouter")
|
||||
|
||||
assert snapshot is not None
|
||||
assert snapshot.windows == (
|
||||
AccountUsageWindow(
|
||||
label="API key quota",
|
||||
used_percent=30.0,
|
||||
detail="$70.00 of $100.00 remaining • resets monthly",
|
||||
),
|
||||
)
|
||||
assert "Credits balance: $289.08" in snapshot.details
|
||||
assert "API key usage: $12.50 total • $0.50 today • $2.00 this week • $8.00 this month" in snapshot.details
|
||||
assert all("-1 requests / 10s" not in line for line in render_account_usage_lines(snapshot))
|
||||
|
||||
|
||||
def test_fetch_account_usage_openrouter_omits_quota_window_when_key_has_no_limit(monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
"agent.account_usage.resolve_runtime_provider",
|
||||
lambda requested, explicit_base_url=None, explicit_api_key=None: {
|
||||
"provider": "openrouter",
|
||||
"base_url": "https://openrouter.ai/api/v1",
|
||||
"api_key": "sk-test",
|
||||
},
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"agent.account_usage.httpx.Client",
|
||||
lambda timeout=10.0: _RoutingClient(
|
||||
{
|
||||
"https://openrouter.ai/api/v1/credits": {
|
||||
"data": {"total_credits": 100.0, "total_usage": 25.5}
|
||||
},
|
||||
"https://openrouter.ai/api/v1/key": {
|
||||
"data": {
|
||||
"limit": None,
|
||||
"limit_remaining": None,
|
||||
"usage": 25.5,
|
||||
"usage_daily": 1.25,
|
||||
"usage_weekly": 4.5,
|
||||
"usage_monthly": 18.0,
|
||||
}
|
||||
},
|
||||
}
|
||||
),
|
||||
)
|
||||
|
||||
snapshot = fetch_account_usage("openrouter")
|
||||
|
||||
assert snapshot is not None
|
||||
assert snapshot.windows == ()
|
||||
assert "Credits balance: $74.50" in snapshot.details
|
||||
assert "API key usage: $25.50 total • $1.25 today • $4.50 this week • $18.00 this month" in snapshot.details
|
||||
|
|
@ -106,3 +106,55 @@ class TestBaseUrlHostMatchesEdgeCases:
|
|||
|
||||
def test_trailing_dot_on_domain_stripped(self):
|
||||
assert base_url_host_matches("https://openrouter.ai/v1", "openrouter.ai.") is True
|
||||
|
||||
|
||||
class TestOllamaUrlHostCheck:
|
||||
"""GHSA-76xc-57q6-vm5m — ollama.com was using a raw substring match for
|
||||
credential selection (same bug class as GHSA-xf8p-v2cg-h7h5 for OpenRouter).
|
||||
These tests lock in that the base_url_host_matches fix correctly rejects
|
||||
the same attack vectors for Ollama.
|
||||
"""
|
||||
|
||||
def test_ollama_com_path_injection_rejected(self):
|
||||
"""http://evil.test/ollama.com/v1 — ollama.com appears in the path,
|
||||
not the host. Must not be treated as Ollama Cloud."""
|
||||
assert base_url_host_matches(
|
||||
"http://127.0.0.1:9000/ollama.com/v1", "ollama.com"
|
||||
) is False
|
||||
|
||||
def test_ollama_com_subdomain_lookalike_rejected(self):
|
||||
"""ollama.com.attacker.test is a separate host, not ollama.com."""
|
||||
assert base_url_host_matches(
|
||||
"http://ollama.com.attacker.test:9000/v1", "ollama.com"
|
||||
) is False
|
||||
|
||||
def test_ollama_com_localtest_me_rejected(self):
|
||||
"""ollama.com.localtest.me resolves to 127.0.0.1 via localtest.me
|
||||
but its true hostname is localtest.me, not ollama.com."""
|
||||
assert base_url_host_matches(
|
||||
"http://ollama.com.localtest.me:9000/v1", "ollama.com"
|
||||
) is False
|
||||
|
||||
def test_ollama_ai_is_not_ollama_com(self):
|
||||
"""Different TLD. ollama.ai is not ollama.com."""
|
||||
assert base_url_host_matches(
|
||||
"https://ollama.ai/v1", "ollama.com"
|
||||
) is False
|
||||
|
||||
def test_localhost_ollama_port_is_not_ollama_com(self):
|
||||
"""http://localhost:11434/v1 is a local Ollama install, but its
|
||||
hostname is localhost, so OLLAMA_API_KEY (an ollama.com-only secret)
|
||||
must not be sent."""
|
||||
assert base_url_host_matches(
|
||||
"http://localhost:11434/v1", "ollama.com"
|
||||
) is False
|
||||
|
||||
def test_genuine_ollama_com_matches(self):
|
||||
assert base_url_host_matches(
|
||||
"https://ollama.com/api/generate", "ollama.com"
|
||||
) is True
|
||||
|
||||
def test_ollama_com_subdomain_matches(self):
|
||||
assert base_url_host_matches(
|
||||
"https://api.ollama.com/v1", "ollama.com"
|
||||
) is True
|
||||
|
|
|
|||
|
|
@ -161,6 +161,8 @@ def test_transform_tool_result_runs_after_post_tool_call(monkeypatch):
|
|||
|
||||
def test_transform_tool_result_integration_with_real_plugin(monkeypatch, tmp_path):
|
||||
"""End-to-end: load a real plugin from HERMES_HOME and verify it rewrites results."""
|
||||
import yaml
|
||||
|
||||
hermes_home = Path(os.environ["HERMES_HOME"])
|
||||
plugins_dir = hermes_home / "plugins"
|
||||
plugin_dir = plugins_dir / "transform_result_canon"
|
||||
|
|
@ -172,7 +174,15 @@ def test_transform_tool_result_integration_with_real_plugin(monkeypatch, tmp_pat
|
|||
'lambda **kw: f\'CANON[{kw["tool_name"]}]\' + kw["result"])\n',
|
||||
encoding="utf-8",
|
||||
)
|
||||
# Plugins are opt-in — must be listed in plugins.enabled to load.
|
||||
cfg_path = hermes_home / "config.yaml"
|
||||
cfg_path.write_text(
|
||||
yaml.safe_dump({"plugins": {"enabled": ["transform_result_canon"]}}),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
# Force a fresh plugin manager so the new config is picked up.
|
||||
plugins_mod._plugin_manager = plugins_mod.PluginManager()
|
||||
plugins_mod.discover_plugins()
|
||||
|
||||
out = _run_handle_function_call(
|
||||
|
|
|
|||
|
|
@ -58,10 +58,3 @@ class TestCamofoxConfigDefaults:
|
|||
|
||||
browser_cfg = DEFAULT_CONFIG["browser"]
|
||||
assert browser_cfg["camofox"]["managed_persistence"] is False
|
||||
|
||||
def test_config_version_matches_current_schema(self):
|
||||
from hermes_cli.config import DEFAULT_CONFIG
|
||||
|
||||
# The current schema version is tracked globally; unrelated default
|
||||
# options may bump it after browser defaults are added.
|
||||
assert DEFAULT_CONFIG["_config_version"] == 20
|
||||
|
|
|
|||
|
|
@ -172,28 +172,60 @@ class TestTerminalIntegration:
|
|||
assert blocked_var not in result
|
||||
assert "PATH" in result
|
||||
|
||||
def test_passthrough_allows_blocklisted_var(self):
|
||||
from tools.environments.local import _sanitize_subprocess_env, _HERMES_PROVIDER_ENV_BLOCKLIST
|
||||
def test_passthrough_cannot_override_provider_blocklist(self):
|
||||
"""GHSA-rhgp-j443-p4rf: register_env_passthrough must NOT accept
|
||||
Hermes provider credentials — that was the bypass where a skill
|
||||
could declare ANTHROPIC_TOKEN / OPENAI_API_KEY as passthrough and
|
||||
defeat the execute_code sandbox scrubbing."""
|
||||
from tools.environments.local import (
|
||||
_sanitize_subprocess_env,
|
||||
_HERMES_PROVIDER_ENV_BLOCKLIST,
|
||||
)
|
||||
|
||||
blocked_var = next(iter(_HERMES_PROVIDER_ENV_BLOCKLIST))
|
||||
# Attempt to register — must be silently refused (logged warning).
|
||||
register_env_passthrough([blocked_var])
|
||||
|
||||
# is_env_passthrough must NOT report it as allowed
|
||||
assert not is_env_passthrough(blocked_var)
|
||||
|
||||
# Sanitizer still strips the var from subprocess env
|
||||
env = {blocked_var: "secret_value", "PATH": "/usr/bin"}
|
||||
result = _sanitize_subprocess_env(env)
|
||||
assert blocked_var in result
|
||||
assert result[blocked_var] == "secret_value"
|
||||
assert blocked_var not in result
|
||||
assert "PATH" in result
|
||||
|
||||
def test_make_run_env_passthrough(self, monkeypatch):
|
||||
from tools.environments.local import _make_run_env, _HERMES_PROVIDER_ENV_BLOCKLIST
|
||||
def test_make_run_env_blocklist_override_rejected(self):
|
||||
"""_make_run_env must NOT expose a blocklisted var to subprocess env
|
||||
even after a skill attempts to register it via passthrough."""
|
||||
import os
|
||||
from tools.environments.local import (
|
||||
_make_run_env,
|
||||
_HERMES_PROVIDER_ENV_BLOCKLIST,
|
||||
)
|
||||
|
||||
blocked_var = next(iter(_HERMES_PROVIDER_ENV_BLOCKLIST))
|
||||
monkeypatch.setenv(blocked_var, "secret_value")
|
||||
os.environ[blocked_var] = "secret_value"
|
||||
try:
|
||||
# Without passthrough — blocked
|
||||
result_before = _make_run_env({})
|
||||
assert blocked_var not in result_before
|
||||
|
||||
# Without passthrough — blocked
|
||||
result_before = _make_run_env({})
|
||||
assert blocked_var not in result_before
|
||||
# Skill tries to register it — must be refused, so still blocked
|
||||
register_env_passthrough([blocked_var])
|
||||
result_after = _make_run_env({})
|
||||
assert blocked_var not in result_after
|
||||
finally:
|
||||
os.environ.pop(blocked_var, None)
|
||||
|
||||
# With passthrough — allowed
|
||||
register_env_passthrough([blocked_var])
|
||||
result_after = _make_run_env({})
|
||||
assert blocked_var in result_after
|
||||
def test_non_hermes_api_key_still_registerable(self):
|
||||
"""Third-party API keys (TENOR_API_KEY, NOTION_TOKEN, etc.) are NOT
|
||||
Hermes provider credentials and must still pass through — skills
|
||||
that legitimately wrap third-party APIs must keep working."""
|
||||
# TENOR_API_KEY is a real example — used by the gif-search skill
|
||||
register_env_passthrough(["TENOR_API_KEY"])
|
||||
assert is_env_passthrough("TENOR_API_KEY")
|
||||
|
||||
# Arbitrary skill-specific var
|
||||
register_env_passthrough(["MY_SKILL_CUSTOM_CONFIG"])
|
||||
assert is_env_passthrough("MY_SKILL_CUSTOM_CONFIG")
|
||||
|
|
|
|||
|
|
@ -230,3 +230,102 @@ class TestEscapeDriftGuard:
|
|||
new, count, strategy, err = fuzzy_find_and_replace(content, old_string, new_string)
|
||||
assert err is None
|
||||
assert count == 1
|
||||
|
||||
|
||||
class TestFindClosestLines:
|
||||
def setup_method(self):
|
||||
from tools.fuzzy_match import find_closest_lines
|
||||
self.find_closest_lines = find_closest_lines
|
||||
|
||||
def test_finds_similar_line(self):
|
||||
content = "def foo():\n pass\ndef bar():\n return 1\n"
|
||||
result = self.find_closest_lines("def baz():", content)
|
||||
assert "def foo" in result or "def bar" in result
|
||||
|
||||
def test_returns_empty_for_no_match(self):
|
||||
content = "completely different content here"
|
||||
result = self.find_closest_lines("xyzzy_no_match_possible_!!!", content)
|
||||
assert result == ""
|
||||
|
||||
def test_returns_empty_for_empty_inputs(self):
|
||||
assert self.find_closest_lines("", "some content") == ""
|
||||
assert self.find_closest_lines("old string", "") == ""
|
||||
|
||||
def test_includes_context_lines(self):
|
||||
content = "line1\nline2\ndef target():\n pass\nline5\n"
|
||||
result = self.find_closest_lines("def target():", content)
|
||||
assert "target" in result
|
||||
|
||||
def test_includes_line_numbers(self):
|
||||
content = "line1\nline2\ndef foo():\n pass\n"
|
||||
result = self.find_closest_lines("def foo():", content)
|
||||
# Should include line numbers in format "N| content"
|
||||
assert "|" in result
|
||||
|
||||
|
||||
class TestFormatNoMatchHint:
|
||||
"""Gating tests for format_no_match_hint — the shared helper that decides
|
||||
whether a 'Did you mean?' snippet should be appended to an error.
|
||||
"""
|
||||
|
||||
def setup_method(self):
|
||||
from tools.fuzzy_match import format_no_match_hint
|
||||
self.fmt = format_no_match_hint
|
||||
|
||||
def test_fires_on_could_not_find_with_match(self):
|
||||
"""Classic no-match: similar content exists → hint fires."""
|
||||
content = "def foo():\n pass\ndef bar():\n pass\n"
|
||||
result = self.fmt(
|
||||
"Could not find a match for old_string in the file",
|
||||
0, "def baz():", content,
|
||||
)
|
||||
assert "Did you mean" in result
|
||||
assert "foo" in result or "bar" in result
|
||||
|
||||
def test_silent_on_ambiguous_match_error(self):
|
||||
"""'Found N matches' is not a missing-match failure — no hint."""
|
||||
content = "aaa bbb aaa\n"
|
||||
result = self.fmt(
|
||||
"Found 2 matches for old_string. Provide more context to make it unique, or use replace_all=True.",
|
||||
0, "aaa", content,
|
||||
)
|
||||
assert result == ""
|
||||
|
||||
def test_silent_on_escape_drift_error(self):
|
||||
"""Escape-drift errors are intentional blocks — hint would mislead."""
|
||||
content = "x = 1\n"
|
||||
result = self.fmt(
|
||||
"Escape-drift detected: old_string and new_string contain the literal sequence '\\\\''...",
|
||||
0, "x = \\'1\\'", content,
|
||||
)
|
||||
assert result == ""
|
||||
|
||||
def test_silent_on_identical_strings(self):
|
||||
"""old_string == new_string — hint irrelevant."""
|
||||
result = self.fmt(
|
||||
"old_string and new_string are identical",
|
||||
0, "foo", "foo bar\n",
|
||||
)
|
||||
assert result == ""
|
||||
|
||||
def test_silent_when_match_count_nonzero(self):
|
||||
"""If match succeeded, we shouldn't be in the error path — defense in depth."""
|
||||
result = self.fmt(
|
||||
"Could not find a match for old_string in the file",
|
||||
1, "foo", "foo bar\n",
|
||||
)
|
||||
assert result == ""
|
||||
|
||||
def test_silent_on_none_error(self):
|
||||
"""No error at all — no hint."""
|
||||
result = self.fmt(None, 0, "foo", "bar\n")
|
||||
assert result == ""
|
||||
|
||||
def test_silent_when_no_similar_content(self):
|
||||
"""Even for a valid no-match error, skip hint when nothing similar exists."""
|
||||
result = self.fmt(
|
||||
"Could not find a match for old_string in the file",
|
||||
0, "totally_unique_xyzzy_qux", "abc\nxyz\n",
|
||||
)
|
||||
assert result == ""
|
||||
|
||||
|
|
|
|||
39
tests/tools/test_image_generation_env.py
Normal file
39
tests/tools/test_image_generation_env.py
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
"""FAL_KEY env var normalization (whitespace-only treated as unset)."""
|
||||
|
||||
|
||||
def test_fal_key_whitespace_is_unset(monkeypatch):
|
||||
# Whitespace-only FAL_KEY must NOT register as configured, and the managed
|
||||
# gateway fallback must be disabled for this assertion to be meaningful.
|
||||
monkeypatch.setenv("FAL_KEY", " ")
|
||||
|
||||
from tools import image_generation_tool
|
||||
|
||||
monkeypatch.setattr(
|
||||
image_generation_tool, "_resolve_managed_fal_gateway", lambda: None
|
||||
)
|
||||
|
||||
assert image_generation_tool.check_fal_api_key() is False
|
||||
|
||||
|
||||
def test_fal_key_valid(monkeypatch):
|
||||
monkeypatch.setenv("FAL_KEY", "sk-test")
|
||||
|
||||
from tools import image_generation_tool
|
||||
|
||||
monkeypatch.setattr(
|
||||
image_generation_tool, "_resolve_managed_fal_gateway", lambda: None
|
||||
)
|
||||
|
||||
assert image_generation_tool.check_fal_api_key() is True
|
||||
|
||||
|
||||
def test_fal_key_empty_is_unset(monkeypatch):
|
||||
monkeypatch.setenv("FAL_KEY", "")
|
||||
|
||||
from tools import image_generation_tool
|
||||
|
||||
monkeypatch.setattr(
|
||||
image_generation_tool, "_resolve_managed_fal_gateway", lambda: None
|
||||
)
|
||||
|
||||
assert image_generation_tool.check_fal_api_key() is False
|
||||
162
tests/tools/test_local_shell_init.py
Normal file
162
tests/tools/test_local_shell_init.py
Normal file
|
|
@ -0,0 +1,162 @@
|
|||
"""Tests for terminal.shell_init_files / terminal.auto_source_bashrc.
|
||||
|
||||
A bash ``-l -c`` invocation does NOT source ``~/.bashrc``, so tools that
|
||||
register themselves there (nvm, asdf, pyenv) stay invisible to the
|
||||
environment snapshot built by ``LocalEnvironment.init_session``. These
|
||||
tests verify the config-driven prelude that fixes that.
|
||||
"""
|
||||
|
||||
import os
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from tools.environments.local import (
|
||||
LocalEnvironment,
|
||||
_prepend_shell_init,
|
||||
_read_terminal_shell_init_config,
|
||||
_resolve_shell_init_files,
|
||||
)
|
||||
|
||||
|
||||
class TestResolveShellInitFiles:
|
||||
def test_auto_sources_bashrc_when_present(self, tmp_path, monkeypatch):
|
||||
bashrc = tmp_path / ".bashrc"
|
||||
bashrc.write_text('export MARKER=seen\n')
|
||||
monkeypatch.setenv("HOME", str(tmp_path))
|
||||
|
||||
# Default config: auto_source_bashrc on, no explicit list.
|
||||
with patch(
|
||||
"tools.environments.local._read_terminal_shell_init_config",
|
||||
return_value=([], True),
|
||||
):
|
||||
resolved = _resolve_shell_init_files()
|
||||
|
||||
assert resolved == [str(bashrc)]
|
||||
|
||||
def test_skips_bashrc_when_missing(self, tmp_path, monkeypatch):
|
||||
# No bashrc written.
|
||||
monkeypatch.setenv("HOME", str(tmp_path))
|
||||
|
||||
with patch(
|
||||
"tools.environments.local._read_terminal_shell_init_config",
|
||||
return_value=([], True),
|
||||
):
|
||||
resolved = _resolve_shell_init_files()
|
||||
|
||||
assert resolved == []
|
||||
|
||||
def test_auto_source_bashrc_off_suppresses_default(self, tmp_path, monkeypatch):
|
||||
bashrc = tmp_path / ".bashrc"
|
||||
bashrc.write_text('export MARKER=seen\n')
|
||||
monkeypatch.setenv("HOME", str(tmp_path))
|
||||
|
||||
with patch(
|
||||
"tools.environments.local._read_terminal_shell_init_config",
|
||||
return_value=([], False),
|
||||
):
|
||||
resolved = _resolve_shell_init_files()
|
||||
|
||||
assert resolved == []
|
||||
|
||||
def test_explicit_list_wins_over_auto(self, tmp_path, monkeypatch):
|
||||
bashrc = tmp_path / ".bashrc"
|
||||
bashrc.write_text('export FROM_BASHRC=1\n')
|
||||
custom = tmp_path / "custom.sh"
|
||||
custom.write_text('export FROM_CUSTOM=1\n')
|
||||
monkeypatch.setenv("HOME", str(tmp_path))
|
||||
|
||||
# auto_source_bashrc stays True but the explicit list takes precedence.
|
||||
with patch(
|
||||
"tools.environments.local._read_terminal_shell_init_config",
|
||||
return_value=([str(custom)], True),
|
||||
):
|
||||
resolved = _resolve_shell_init_files()
|
||||
|
||||
assert resolved == [str(custom)]
|
||||
assert str(bashrc) not in resolved
|
||||
|
||||
def test_expands_home_and_env_vars(self, tmp_path, monkeypatch):
|
||||
target = tmp_path / "rc" / "custom.sh"
|
||||
target.parent.mkdir()
|
||||
target.write_text('export A=1\n')
|
||||
monkeypatch.setenv("HOME", str(tmp_path))
|
||||
monkeypatch.setenv("CUSTOM_RC_DIR", str(tmp_path / "rc"))
|
||||
|
||||
with patch(
|
||||
"tools.environments.local._read_terminal_shell_init_config",
|
||||
return_value=(["~/rc/custom.sh"], False),
|
||||
):
|
||||
resolved_home = _resolve_shell_init_files()
|
||||
|
||||
with patch(
|
||||
"tools.environments.local._read_terminal_shell_init_config",
|
||||
return_value=(["${CUSTOM_RC_DIR}/custom.sh"], False),
|
||||
):
|
||||
resolved_var = _resolve_shell_init_files()
|
||||
|
||||
assert resolved_home == [str(target)]
|
||||
assert resolved_var == [str(target)]
|
||||
|
||||
def test_missing_explicit_files_are_skipped_silently(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HOME", str(tmp_path))
|
||||
with patch(
|
||||
"tools.environments.local._read_terminal_shell_init_config",
|
||||
return_value=([str(tmp_path / "does-not-exist.sh")], False),
|
||||
):
|
||||
resolved = _resolve_shell_init_files()
|
||||
|
||||
assert resolved == []
|
||||
|
||||
|
||||
class TestPrependShellInit:
|
||||
def test_empty_list_returns_command_unchanged(self):
|
||||
assert _prepend_shell_init("echo hi", []) == "echo hi"
|
||||
|
||||
def test_prepends_guarded_source_lines(self):
|
||||
wrapped = _prepend_shell_init("echo hi", ["/tmp/a.sh", "/tmp/b.sh"])
|
||||
assert "echo hi" in wrapped
|
||||
# Each file is sourced through a guarded [ -r … ] && . '…' || true
|
||||
# pattern so a missing/broken rc can't abort the bootstrap.
|
||||
assert "/tmp/a.sh" in wrapped
|
||||
assert "/tmp/b.sh" in wrapped
|
||||
assert "|| true" in wrapped
|
||||
assert "set +e" in wrapped
|
||||
|
||||
def test_escapes_single_quotes(self):
|
||||
wrapped = _prepend_shell_init("echo hi", ["/tmp/o'malley.sh"])
|
||||
# The path must survive as the shell receives it; embedded single
|
||||
# quote is escaped as '\'' rather than breaking the outer quoting.
|
||||
assert "o'\\''malley" in wrapped
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
os.environ.get("CI") == "true" and not os.path.isfile("/bin/bash"),
|
||||
reason="Requires bash; CI sandbox may strip it.",
|
||||
)
|
||||
class TestSnapshotEndToEnd:
|
||||
"""Spin up a real LocalEnvironment and confirm the snapshot sources
|
||||
extra init files."""
|
||||
|
||||
def test_snapshot_picks_up_init_file_exports(self, tmp_path, monkeypatch):
|
||||
init_file = tmp_path / "custom-init.sh"
|
||||
init_file.write_text(
|
||||
'export HERMES_SHELL_INIT_PROBE="probe-ok"\n'
|
||||
'export PATH="/opt/shell-init-probe/bin:$PATH"\n'
|
||||
)
|
||||
|
||||
with patch(
|
||||
"tools.environments.local._read_terminal_shell_init_config",
|
||||
return_value=([str(init_file)], False),
|
||||
):
|
||||
env = LocalEnvironment(cwd=str(tmp_path), timeout=15)
|
||||
try:
|
||||
result = env.execute(
|
||||
'echo "PROBE=$HERMES_SHELL_INIT_PROBE"; echo "PATH=$PATH"'
|
||||
)
|
||||
finally:
|
||||
env.cleanup()
|
||||
|
||||
output = result.get("output", "")
|
||||
assert "PROBE=probe-ok" in output
|
||||
assert "/opt/shell-init-probe/bin" in output
|
||||
252
tests/tools/test_mcp_circuit_breaker.py
Normal file
252
tests/tools/test_mcp_circuit_breaker.py
Normal file
|
|
@ -0,0 +1,252 @@
|
|||
"""Tests for MCP tool-handler circuit-breaker recovery.
|
||||
|
||||
The circuit breaker in ``tools/mcp_tool.py`` is intended to short-circuit
|
||||
calls to an MCP server that has failed ``_CIRCUIT_BREAKER_THRESHOLD``
|
||||
consecutive times, then *transition back to a usable state* once the
|
||||
server has had time to recover (or an explicit reconnect succeeds).
|
||||
|
||||
The original implementation only had two states — closed and open — with
|
||||
no mechanism to transition back to closed, so a tripped breaker stayed
|
||||
tripped for the lifetime of the process. These tests lock in the
|
||||
half-open / cooldown / reconnect-resets-breaker behavior that fixes
|
||||
that.
|
||||
"""
|
||||
import json
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
pytest.importorskip("mcp.client.auth.oauth2")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _install_stub_server(mcp_tool_module, name: str, call_tool_impl):
|
||||
"""Install a fake MCP server in the module's registry.
|
||||
|
||||
``call_tool_impl`` is an async function stored at ``session.call_tool``
|
||||
(it's what the tool handler invokes).
|
||||
"""
|
||||
server = MagicMock()
|
||||
server.name = name
|
||||
session = MagicMock()
|
||||
session.call_tool = call_tool_impl
|
||||
server.session = session
|
||||
server._reconnect_event = MagicMock()
|
||||
server._ready = MagicMock()
|
||||
server._ready.is_set.return_value = True
|
||||
|
||||
mcp_tool_module._servers[name] = server
|
||||
mcp_tool_module._server_error_counts.pop(name, None)
|
||||
if hasattr(mcp_tool_module, "_server_breaker_opened_at"):
|
||||
mcp_tool_module._server_breaker_opened_at.pop(name, None)
|
||||
return server
|
||||
|
||||
|
||||
def _cleanup(mcp_tool_module, name: str) -> None:
|
||||
mcp_tool_module._servers.pop(name, None)
|
||||
mcp_tool_module._server_error_counts.pop(name, None)
|
||||
if hasattr(mcp_tool_module, "_server_breaker_opened_at"):
|
||||
mcp_tool_module._server_breaker_opened_at.pop(name, None)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_circuit_breaker_half_opens_after_cooldown(monkeypatch, tmp_path):
|
||||
"""After a tripped breaker's cooldown elapses, the *next* call must
|
||||
actually execute against the session (half-open probe). When the
|
||||
probe succeeds, the breaker resets to fully closed.
|
||||
"""
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
|
||||
from tools import mcp_tool
|
||||
from tools.mcp_tool import _make_tool_handler
|
||||
|
||||
call_count = {"n": 0}
|
||||
|
||||
async def _call_tool_success(*a, **kw):
|
||||
call_count["n"] += 1
|
||||
result = MagicMock()
|
||||
result.isError = False
|
||||
block = MagicMock()
|
||||
block.text = "ok"
|
||||
result.content = [block]
|
||||
result.structuredContent = None
|
||||
return result
|
||||
|
||||
_install_stub_server(mcp_tool, "srv", _call_tool_success)
|
||||
mcp_tool._ensure_mcp_loop()
|
||||
|
||||
try:
|
||||
# Trip the breaker by setting the count at/above threshold and
|
||||
# stamping the open-time to "now".
|
||||
mcp_tool._server_error_counts["srv"] = mcp_tool._CIRCUIT_BREAKER_THRESHOLD
|
||||
fake_now = [1000.0]
|
||||
|
||||
def _fake_monotonic():
|
||||
return fake_now[0]
|
||||
|
||||
monkeypatch.setattr(mcp_tool.time, "monotonic", _fake_monotonic)
|
||||
# The breaker-open timestamp dict is introduced by the fix; on
|
||||
# a pre-fix build it won't exist, which will cause the test to
|
||||
# fail at the .get() inside the gate (correct — the fix is
|
||||
# required for this state to be tracked at all).
|
||||
if hasattr(mcp_tool, "_server_breaker_opened_at"):
|
||||
mcp_tool._server_breaker_opened_at["srv"] = fake_now[0]
|
||||
cooldown = getattr(mcp_tool, "_CIRCUIT_BREAKER_COOLDOWN_SEC", 60.0)
|
||||
|
||||
handler = _make_tool_handler("srv", "tool1", 10.0)
|
||||
|
||||
# Before cooldown: must short-circuit (no session call).
|
||||
result = handler({})
|
||||
parsed = json.loads(result)
|
||||
assert "error" in parsed, parsed
|
||||
assert "unreachable" in parsed["error"].lower()
|
||||
assert call_count["n"] == 0, (
|
||||
"breaker should short-circuit before cooldown elapses"
|
||||
)
|
||||
|
||||
# Advance past cooldown → next call is a half-open probe that
|
||||
# actually hits the session.
|
||||
fake_now[0] += cooldown + 1.0
|
||||
|
||||
result = handler({})
|
||||
parsed = json.loads(result)
|
||||
assert parsed.get("result") == "ok", parsed
|
||||
assert call_count["n"] == 1, "half-open probe should invoke session"
|
||||
|
||||
# On probe success the breaker must close (count reset to 0).
|
||||
assert mcp_tool._server_error_counts.get("srv", 0) == 0
|
||||
finally:
|
||||
_cleanup(mcp_tool, "srv")
|
||||
|
||||
|
||||
def test_circuit_breaker_reopens_on_probe_failure(monkeypatch, tmp_path):
|
||||
"""If the half-open probe fails, the breaker must re-arm the
|
||||
cooldown (not let every subsequent call through).
|
||||
"""
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
|
||||
from tools import mcp_tool
|
||||
from tools.mcp_tool import _make_tool_handler
|
||||
|
||||
call_count = {"n": 0}
|
||||
|
||||
async def _call_tool_fails(*a, **kw):
|
||||
call_count["n"] += 1
|
||||
raise RuntimeError("still broken")
|
||||
|
||||
_install_stub_server(mcp_tool, "srv", _call_tool_fails)
|
||||
mcp_tool._ensure_mcp_loop()
|
||||
|
||||
try:
|
||||
mcp_tool._server_error_counts["srv"] = mcp_tool._CIRCUIT_BREAKER_THRESHOLD
|
||||
fake_now = [1000.0]
|
||||
|
||||
def _fake_monotonic():
|
||||
return fake_now[0]
|
||||
|
||||
monkeypatch.setattr(mcp_tool.time, "monotonic", _fake_monotonic)
|
||||
if hasattr(mcp_tool, "_server_breaker_opened_at"):
|
||||
mcp_tool._server_breaker_opened_at["srv"] = fake_now[0]
|
||||
cooldown = getattr(mcp_tool, "_CIRCUIT_BREAKER_COOLDOWN_SEC", 60.0)
|
||||
|
||||
handler = _make_tool_handler("srv", "tool1", 10.0)
|
||||
|
||||
# Advance past cooldown, run probe, expect failure.
|
||||
fake_now[0] += cooldown + 1.0
|
||||
result = handler({})
|
||||
parsed = json.loads(result)
|
||||
assert "error" in parsed
|
||||
assert call_count["n"] == 1, "probe should invoke session once"
|
||||
|
||||
# The probe failure must have re-armed the cooldown — another
|
||||
# immediate call should short-circuit, not invoke session again.
|
||||
result = handler({})
|
||||
parsed = json.loads(result)
|
||||
assert "unreachable" in parsed.get("error", "").lower()
|
||||
assert call_count["n"] == 1, (
|
||||
"breaker should re-open and block further calls after probe failure"
|
||||
)
|
||||
finally:
|
||||
_cleanup(mcp_tool, "srv")
|
||||
|
||||
|
||||
def test_circuit_breaker_cleared_on_reconnect(monkeypatch, tmp_path):
|
||||
"""When the auth-recovery path successfully reconnects the server,
|
||||
the breaker should be cleared so subsequent calls aren't gated on a
|
||||
stale failure count — even if the post-reconnect retry itself fails.
|
||||
|
||||
This locks in the fix-#2 contract: a successful reconnect is
|
||||
sufficient evidence that the server is viable again. Under the old
|
||||
implementation, reset only happened on retry *success*, so a
|
||||
reconnect+retry-failure left the counter pinned above threshold
|
||||
forever.
|
||||
"""
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
|
||||
from tools import mcp_tool
|
||||
from tools.mcp_oauth_manager import get_manager, reset_manager_for_tests
|
||||
from mcp.client.auth import OAuthFlowError
|
||||
|
||||
reset_manager_for_tests()
|
||||
|
||||
async def _call_tool_unused(*a, **kw): # pragma: no cover
|
||||
raise AssertionError("session.call_tool should not be reached in this test")
|
||||
|
||||
_install_stub_server(mcp_tool, "srv", _call_tool_unused)
|
||||
mcp_tool._ensure_mcp_loop()
|
||||
|
||||
# Open the breaker well above threshold, with a recent open-time so
|
||||
# it would short-circuit everything without a reset.
|
||||
mcp_tool._server_error_counts["srv"] = mcp_tool._CIRCUIT_BREAKER_THRESHOLD + 2
|
||||
if hasattr(mcp_tool, "_server_breaker_opened_at"):
|
||||
import time as _time
|
||||
mcp_tool._server_breaker_opened_at["srv"] = _time.monotonic()
|
||||
|
||||
# Force handle_401 to claim recovery succeeded.
|
||||
mgr = get_manager()
|
||||
|
||||
async def _h401(name, token=None):
|
||||
return True
|
||||
|
||||
monkeypatch.setattr(mgr, "handle_401", _h401)
|
||||
|
||||
try:
|
||||
# Retry fails *after* the successful reconnect. Under the old
|
||||
# implementation this bumps an already-tripped counter even
|
||||
# higher. Under fix #2 the reset happens on successful
|
||||
# reconnect, and the post-retry bump only raises the fresh
|
||||
# count to 1 — still below threshold.
|
||||
def _retry_call():
|
||||
raise OAuthFlowError("still failing post-reconnect")
|
||||
|
||||
result = mcp_tool._handle_auth_error_and_retry(
|
||||
"srv",
|
||||
OAuthFlowError("initial"),
|
||||
_retry_call,
|
||||
"tools/call test",
|
||||
)
|
||||
# The call as a whole still surfaces needs_reauth because the
|
||||
# retry itself didn't succeed, but the breaker state must
|
||||
# reflect the successful reconnect.
|
||||
assert result is not None
|
||||
parsed = json.loads(result)
|
||||
assert parsed.get("needs_reauth") is True, parsed
|
||||
|
||||
# Post-reconnect count was reset to 0, then the failing retry
|
||||
# bumped it to exactly 1 — well below threshold.
|
||||
count = mcp_tool._server_error_counts.get("srv", 0)
|
||||
assert count < mcp_tool._CIRCUIT_BREAKER_THRESHOLD, (
|
||||
f"successful reconnect must reset the breaker below threshold; "
|
||||
f"got count={count}, threshold={mcp_tool._CIRCUIT_BREAKER_THRESHOLD}"
|
||||
)
|
||||
finally:
|
||||
_cleanup(mcp_tool, "srv")
|
||||
|
|
@ -173,6 +173,8 @@ def test_terminal_output_transform_does_not_change_approval_or_exit_code_meaning
|
|||
|
||||
|
||||
def test_terminal_output_transform_integration_with_real_plugin(monkeypatch, tmp_path):
|
||||
import yaml
|
||||
|
||||
hermes_home = Path(os.environ["HERMES_HOME"])
|
||||
plugins_dir = hermes_home / "plugins"
|
||||
plugin_dir = plugins_dir / "terminal_transform"
|
||||
|
|
@ -184,7 +186,15 @@ def test_terminal_output_transform_integration_with_real_plugin(monkeypatch, tmp
|
|||
'lambda **kw: "PLUGIN-HEAD\\n" + kw["output"] + "\\nPLUGIN-TAIL")\n',
|
||||
encoding="utf-8",
|
||||
)
|
||||
# Plugins are opt-in — must be listed in plugins.enabled to load.
|
||||
cfg_path = hermes_home / "config.yaml"
|
||||
cfg_path.write_text(
|
||||
yaml.safe_dump({"plugins": {"enabled": ["terminal_transform"]}}),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
# Force a fresh plugin manager so the new config is picked up.
|
||||
plugins_mod._plugin_manager = plugins_mod.PluginManager()
|
||||
plugins_mod.discover_plugins()
|
||||
|
||||
long_output = "X" * 60000
|
||||
|
|
|
|||
198
tests/tools/test_tts_kittentts.py
Normal file
198
tests/tools/test_tts_kittentts.py
Normal file
|
|
@ -0,0 +1,198 @@
|
|||
"""Tests for the KittenTTS local provider in tools/tts_tool.py."""
|
||||
|
||||
import json
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def clean_env(monkeypatch):
|
||||
for key in ("HERMES_SESSION_PLATFORM",):
|
||||
monkeypatch.delenv(key, raising=False)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def clear_kittentts_cache():
|
||||
"""Reset the module-level model cache between tests."""
|
||||
from tools import tts_tool as _tt
|
||||
_tt._kittentts_model_cache.clear()
|
||||
yield
|
||||
_tt._kittentts_model_cache.clear()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_kittentts_module():
|
||||
"""Inject a fake kittentts + soundfile module that return stub objects."""
|
||||
fake_model = MagicMock()
|
||||
# 24kHz float32 PCM at ~2s of silence
|
||||
fake_model.generate.return_value = np.zeros(48000, dtype=np.float32)
|
||||
fake_cls = MagicMock(return_value=fake_model)
|
||||
fake_kittentts = MagicMock()
|
||||
fake_kittentts.KittenTTS = fake_cls
|
||||
|
||||
# Stub soundfile — the real package isn't installed in CI venv, and
|
||||
# _generate_kittentts does `import soundfile as sf` at runtime.
|
||||
fake_sf = MagicMock()
|
||||
def _fake_write(path, audio, samplerate):
|
||||
# Emulate writing a real file so downstream path checks succeed.
|
||||
import pathlib
|
||||
pathlib.Path(path).write_bytes(b"RIFF\x00\x00\x00\x00WAVEfmt fake")
|
||||
fake_sf.write = _fake_write
|
||||
|
||||
with patch.dict(
|
||||
"sys.modules",
|
||||
{"kittentts": fake_kittentts, "soundfile": fake_sf},
|
||||
):
|
||||
yield fake_model, fake_cls
|
||||
|
||||
|
||||
class TestGenerateKittenTts:
|
||||
def test_successful_wav_generation(self, tmp_path, mock_kittentts_module):
|
||||
from tools.tts_tool import _generate_kittentts
|
||||
|
||||
fake_model, fake_cls = mock_kittentts_module
|
||||
output_path = str(tmp_path / "test.wav")
|
||||
result = _generate_kittentts("Hello world", output_path, {})
|
||||
|
||||
assert result == output_path
|
||||
assert (tmp_path / "test.wav").exists()
|
||||
fake_cls.assert_called_once()
|
||||
fake_model.generate.assert_called_once()
|
||||
|
||||
def test_config_passes_voice_speed_cleantext(self, tmp_path, mock_kittentts_module):
|
||||
from tools.tts_tool import _generate_kittentts
|
||||
|
||||
fake_model, _ = mock_kittentts_module
|
||||
config = {
|
||||
"kittentts": {
|
||||
"model": "KittenML/kitten-tts-mini-0.8",
|
||||
"voice": "Luna",
|
||||
"speed": 1.25,
|
||||
"clean_text": False,
|
||||
}
|
||||
}
|
||||
_generate_kittentts("Hi there", str(tmp_path / "out.wav"), config)
|
||||
|
||||
call_kwargs = fake_model.generate.call_args.kwargs
|
||||
assert call_kwargs["voice"] == "Luna"
|
||||
assert call_kwargs["speed"] == 1.25
|
||||
assert call_kwargs["clean_text"] is False
|
||||
|
||||
def test_default_model_and_voice(self, tmp_path, mock_kittentts_module):
|
||||
from tools.tts_tool import (
|
||||
DEFAULT_KITTENTTS_MODEL,
|
||||
DEFAULT_KITTENTTS_VOICE,
|
||||
_generate_kittentts,
|
||||
)
|
||||
|
||||
fake_model, fake_cls = mock_kittentts_module
|
||||
_generate_kittentts("Hi", str(tmp_path / "out.wav"), {})
|
||||
|
||||
fake_cls.assert_called_once_with(DEFAULT_KITTENTTS_MODEL)
|
||||
assert fake_model.generate.call_args.kwargs["voice"] == DEFAULT_KITTENTTS_VOICE
|
||||
|
||||
def test_model_is_cached_across_calls(self, tmp_path, mock_kittentts_module):
|
||||
from tools.tts_tool import _generate_kittentts
|
||||
|
||||
_, fake_cls = mock_kittentts_module
|
||||
_generate_kittentts("One", str(tmp_path / "a.wav"), {})
|
||||
_generate_kittentts("Two", str(tmp_path / "b.wav"), {})
|
||||
|
||||
# Same model name → class instantiated exactly once
|
||||
assert fake_cls.call_count == 1
|
||||
|
||||
def test_different_models_are_cached_separately(self, tmp_path, mock_kittentts_module):
|
||||
from tools.tts_tool import _generate_kittentts
|
||||
|
||||
_, fake_cls = mock_kittentts_module
|
||||
_generate_kittentts(
|
||||
"A", str(tmp_path / "a.wav"),
|
||||
{"kittentts": {"model": "KittenML/kitten-tts-nano-0.8-int8"}},
|
||||
)
|
||||
_generate_kittentts(
|
||||
"B", str(tmp_path / "b.wav"),
|
||||
{"kittentts": {"model": "KittenML/kitten-tts-mini-0.8"}},
|
||||
)
|
||||
|
||||
assert fake_cls.call_count == 2
|
||||
|
||||
def test_non_wav_extension_triggers_ffmpeg_conversion(
|
||||
self, tmp_path, mock_kittentts_module, monkeypatch
|
||||
):
|
||||
"""Non-.wav output path causes WAV → target ffmpeg conversion."""
|
||||
from tools import tts_tool as _tt
|
||||
|
||||
calls = []
|
||||
|
||||
def fake_shutil_which(cmd):
|
||||
return "/usr/bin/ffmpeg" if cmd == "ffmpeg" else None
|
||||
|
||||
def fake_run(cmd, check=False, timeout=None, **kw):
|
||||
calls.append(cmd)
|
||||
# Emulate ffmpeg writing the output file
|
||||
import pathlib
|
||||
out_path = cmd[-1]
|
||||
pathlib.Path(out_path).write_bytes(b"fake-mp3-data")
|
||||
return MagicMock(returncode=0)
|
||||
|
||||
monkeypatch.setattr(_tt.shutil, "which", fake_shutil_which)
|
||||
monkeypatch.setattr(_tt.subprocess, "run", fake_run)
|
||||
|
||||
output_path = str(tmp_path / "test.mp3")
|
||||
result = _tt._generate_kittentts("Hi", output_path, {})
|
||||
|
||||
assert result == output_path
|
||||
assert len(calls) == 1
|
||||
assert calls[0][0] == "/usr/bin/ffmpeg"
|
||||
|
||||
def test_missing_kittentts_raises_import_error(self, tmp_path, monkeypatch):
|
||||
"""When kittentts package is not installed, _import_kittentts raises."""
|
||||
import sys
|
||||
monkeypatch.setitem(sys.modules, "kittentts", None)
|
||||
from tools.tts_tool import _generate_kittentts
|
||||
|
||||
with pytest.raises((ImportError, TypeError)):
|
||||
_generate_kittentts("Hi", str(tmp_path / "out.wav"), {})
|
||||
|
||||
|
||||
class TestCheckKittenttsAvailable:
|
||||
def test_reports_available_when_package_present(self, monkeypatch):
|
||||
import importlib.util
|
||||
from tools.tts_tool import _check_kittentts_available
|
||||
|
||||
fake_spec = MagicMock()
|
||||
monkeypatch.setattr(
|
||||
importlib.util, "find_spec",
|
||||
lambda name: fake_spec if name == "kittentts" else None,
|
||||
)
|
||||
assert _check_kittentts_available() is True
|
||||
|
||||
def test_reports_unavailable_when_package_missing(self, monkeypatch):
|
||||
import importlib.util
|
||||
from tools.tts_tool import _check_kittentts_available
|
||||
|
||||
monkeypatch.setattr(importlib.util, "find_spec", lambda name: None)
|
||||
assert _check_kittentts_available() is False
|
||||
|
||||
|
||||
class TestDispatcherBranch:
|
||||
def test_kittentts_not_installed_returns_helpful_error(self, monkeypatch, tmp_path):
|
||||
"""When provider=kittentts but package missing, return JSON error with setup hint."""
|
||||
import sys
|
||||
monkeypatch.setitem(sys.modules, "kittentts", None)
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
|
||||
from tools.tts_tool import text_to_speech_tool
|
||||
|
||||
# Write a config telling it to use kittentts
|
||||
import yaml
|
||||
(tmp_path / "config.yaml").write_text(
|
||||
yaml.safe_dump({"tts": {"provider": "kittentts"}})
|
||||
)
|
||||
|
||||
result = json.loads(text_to_speech_tool(text="Hello"))
|
||||
assert result["success"] is False
|
||||
assert "kittentts" in result["error"].lower()
|
||||
assert "hermes setup tts" in result["error"].lower()
|
||||
|
|
@ -933,6 +933,58 @@ class TestEnableVoiceModeReal:
|
|||
assert cli._voice_mode is True
|
||||
|
||||
|
||||
class TestVoiceBeepConfigReal:
|
||||
"""Tests the CLI voice beep toggle."""
|
||||
|
||||
@patch("hermes_cli.config.load_config", return_value={"voice": {}})
|
||||
def test_beeps_enabled_by_default(self, _cfg):
|
||||
cli = _make_voice_cli()
|
||||
assert cli._voice_beeps_enabled() is True
|
||||
|
||||
@patch("hermes_cli.config.load_config", return_value={"voice": {"beep_enabled": False}})
|
||||
def test_beeps_can_be_disabled(self, _cfg):
|
||||
cli = _make_voice_cli()
|
||||
assert cli._voice_beeps_enabled() is False
|
||||
|
||||
@patch("cli._cprint")
|
||||
@patch("cli.threading.Thread")
|
||||
@patch("tools.voice_mode.play_beep")
|
||||
@patch("tools.voice_mode.create_audio_recorder")
|
||||
@patch(
|
||||
"tools.voice_mode.check_voice_requirements",
|
||||
return_value={
|
||||
"available": True,
|
||||
"audio_available": True,
|
||||
"stt_available": True,
|
||||
"details": "OK",
|
||||
"missing_packages": [],
|
||||
},
|
||||
)
|
||||
@patch(
|
||||
"hermes_cli.config.load_config",
|
||||
return_value={
|
||||
"voice": {
|
||||
"beep_enabled": False,
|
||||
"silence_threshold": 200,
|
||||
"silence_duration": 3.0,
|
||||
}
|
||||
},
|
||||
)
|
||||
def test_start_recording_skips_beep_when_disabled(
|
||||
self, _cfg, _req, mock_create, mock_beep, mock_thread, _cp
|
||||
):
|
||||
recorder = MagicMock()
|
||||
recorder.supports_silence_autostop = True
|
||||
mock_create.return_value = recorder
|
||||
mock_thread.return_value = MagicMock(start=MagicMock())
|
||||
|
||||
cli = _make_voice_cli()
|
||||
cli._voice_start_recording()
|
||||
|
||||
recorder.start.assert_called_once()
|
||||
mock_beep.assert_not_called()
|
||||
|
||||
|
||||
class TestDisableVoiceModeReal:
|
||||
"""Tests _disable_voice_mode with real CLI instance."""
|
||||
|
||||
|
|
@ -1087,6 +1139,16 @@ class TestVoiceStopAndTranscribeReal:
|
|||
cli._voice_stop_and_transcribe()
|
||||
assert cli._pending_input.empty()
|
||||
|
||||
@patch("cli._cprint")
|
||||
@patch("hermes_cli.config.load_config", return_value={"voice": {"beep_enabled": False}})
|
||||
@patch("tools.voice_mode.play_beep")
|
||||
def test_no_speech_detected_skips_beep_when_disabled(self, mock_beep, _cfg, _cp):
|
||||
recorder = MagicMock()
|
||||
recorder.stop.return_value = None
|
||||
cli = _make_voice_cli(_voice_recording=True, _voice_recorder=recorder)
|
||||
cli._voice_stop_and_transcribe()
|
||||
mock_beep.assert_not_called()
|
||||
|
||||
@patch("cli._cprint")
|
||||
@patch("cli.os.unlink")
|
||||
@patch("cli.os.path.isfile", return_value=True)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue