Merge remote-tracking branch 'origin/main' into sid/types-and-lints

# Conflicts:
#	gateway/platforms/base.py
#	gateway/platforms/qqbot/adapter.py
#	gateway/platforms/slack.py
#	hermes_cli/main.py
#	scripts/batch_runner.py
#	tools/skills_tool.py
#	uv.lock
This commit is contained in:
alt-glitch 2026-04-21 20:28:45 +05:30
commit a9ed7cb3b4
117 changed files with 7791 additions and 611 deletions

View file

@ -0,0 +1,170 @@
"""Tests for GHSA-96vc-wcxf-jjff and GHSA-qg5c-hvr5-hjgr.
Two related ACP approval-flow issues:
- 96vc: ACP didn't set HERMES_EXEC_ASK, so `check_all_command_guards`
took the non-interactive auto-approve path and never consulted the
ACP-supplied callback.
- qg5c: `_approval_callback` was a module-global in terminal_tool;
overlapping ACP sessions overwrote each other's callback slot.
Both fixed together by:
1. Setting HERMES_EXEC_ASK inside _run_agent (wraps the agent call).
2. Storing the callback in thread-local state so concurrent executor
threads don't collide.
"""
import os
import threading
from unittest.mock import MagicMock
import pytest
class TestThreadLocalApprovalCallback:
"""GHSA-qg5c-hvr5-hjgr: set_approval_callback must be per-thread so
concurrent ACP sessions don't stomp on each other's handlers."""
def test_set_and_get_in_same_thread(self):
from tools.terminal_tool import (
set_approval_callback,
_get_approval_callback,
)
cb1 = lambda cmd, desc: "once" # noqa: E731
set_approval_callback(cb1)
assert _get_approval_callback() is cb1
def test_callback_not_visible_in_different_thread(self):
"""Thread A's callback is NOT visible to Thread B."""
from tools.terminal_tool import (
set_approval_callback,
_get_approval_callback,
)
cb_a = lambda cmd, desc: "thread_a" # noqa: E731
cb_b = lambda cmd, desc: "thread_b" # noqa: E731
seen_in_a = []
seen_in_b = []
def thread_a():
set_approval_callback(cb_a)
# Pause so thread B has time to set its own callback
import time
time.sleep(0.05)
seen_in_a.append(_get_approval_callback())
def thread_b():
set_approval_callback(cb_b)
import time
time.sleep(0.05)
seen_in_b.append(_get_approval_callback())
ta = threading.Thread(target=thread_a)
tb = threading.Thread(target=thread_b)
ta.start()
tb.start()
ta.join()
tb.join()
# Each thread must see ONLY its own callback — not the other's
assert seen_in_a == [cb_a]
assert seen_in_b == [cb_b]
def test_main_thread_callback_not_leaked_to_worker(self):
"""A callback set in the main thread does NOT leak into a
freshly-spawned worker thread."""
from tools.terminal_tool import (
set_approval_callback,
_get_approval_callback,
)
cb_main = lambda cmd, desc: "main" # noqa: E731
set_approval_callback(cb_main)
worker_saw = []
def worker():
worker_saw.append(_get_approval_callback())
t = threading.Thread(target=worker)
t.start()
t.join()
# Worker thread has no callback set — TLS is empty for it
assert worker_saw == [None]
# Main thread still has its callback
assert _get_approval_callback() is cb_main
def test_sudo_password_callback_also_thread_local(self):
"""Same protection applies to the sudo password callback."""
from tools.terminal_tool import (
set_sudo_password_callback,
_get_sudo_password_callback,
)
cb_main = lambda: "main-password" # noqa: E731
set_sudo_password_callback(cb_main)
worker_saw = []
def worker():
worker_saw.append(_get_sudo_password_callback())
t = threading.Thread(target=worker)
t.start()
t.join()
assert worker_saw == [None]
assert _get_sudo_password_callback() is cb_main
class TestAcpExecAskGate:
"""GHSA-96vc-wcxf-jjff: ACP's _run_agent must set HERMES_INTERACTIVE so
that tools.approval.check_all_command_guards takes the CLI-interactive
path (consults the registered callback via prompt_dangerous_approval)
instead of the non-interactive auto-approve shortcut.
(HERMES_EXEC_ASK takes the gateway-queue path which requires a
notify_cb registered in _gateway_notify_cbs not applicable to ACP,
which uses a direct callback shape.)"""
def test_interactive_env_var_routes_to_callback(self, monkeypatch):
"""When HERMES_INTERACTIVE is set and an approval callback is
registered, a dangerous command must route through the callback."""
# Clean env
monkeypatch.delenv("HERMES_INTERACTIVE", raising=False)
monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False)
monkeypatch.delenv("HERMES_EXEC_ASK", raising=False)
monkeypatch.delenv("HERMES_YOLO_MODE", raising=False)
from tools.approval import check_all_command_guards
called_with = []
def fake_cb(command, description, *, allow_permanent=True):
called_with.append((command, description))
return "once"
# Without HERMES_INTERACTIVE: takes auto-approve path, callback NOT called
result = check_all_command_guards(
"rm -rf /tmp/test-exec-ask", "local", approval_callback=fake_cb,
)
assert result["approved"] is True
assert called_with == [], (
"without HERMES_INTERACTIVE the non-interactive auto-approve "
"path should fire without consulting the callback"
)
# With HERMES_INTERACTIVE: callback IS called, approval flows through it
monkeypatch.setenv("HERMES_INTERACTIVE", "1")
called_with.clear()
result = check_all_command_guards(
"rm -rf /tmp/test-exec-ask", "local", approval_callback=fake_cb,
)
assert called_with, (
"with HERMES_INTERACTIVE the approval path should consult the "
"registered callback — this was the ACP bypass in "
"GHSA-96vc-wcxf-jjff"
)
assert result["approved"] is True

View file

@ -73,3 +73,17 @@ class TestApprovalMapping:
result = cb("rm -rf /", "dangerous")
assert result == "deny"
def test_approval_none_response_returns_deny(self):
"""When request_permission resolves to None, the callback should return 'deny'."""
loop = MagicMock(spec=asyncio.AbstractEventLoop)
mock_rp = MagicMock(name="request_permission")
future = MagicMock(spec=Future)
future.result.return_value = None
with patch("acp_adapter.permissions.asyncio.run_coroutine_threadsafe", return_value=future):
cb = make_approval_callback(mock_rp, loop, session_id="s1", timeout=1.0)
result = cb("echo hi", "demo")
assert result == "deny"

View file

@ -95,19 +95,37 @@ class TestInitialize:
class TestAuthenticate:
@pytest.mark.asyncio
async def test_authenticate_with_provider_configured(self, agent, monkeypatch):
async def test_authenticate_with_matching_method_id(self, agent, monkeypatch):
monkeypatch.setattr(
"acp_adapter.server.has_provider",
lambda: True,
"acp_adapter.server.detect_provider",
lambda: "openrouter",
)
resp = await agent.authenticate(method_id="openrouter")
assert isinstance(resp, AuthenticateResponse)
@pytest.mark.asyncio
async def test_authenticate_is_case_insensitive(self, agent, monkeypatch):
monkeypatch.setattr(
"acp_adapter.server.detect_provider",
lambda: "openrouter",
)
resp = await agent.authenticate(method_id="OpenRouter")
assert isinstance(resp, AuthenticateResponse)
@pytest.mark.asyncio
async def test_authenticate_rejects_mismatched_method_id(self, agent, monkeypatch):
monkeypatch.setattr(
"acp_adapter.server.detect_provider",
lambda: "openrouter",
)
resp = await agent.authenticate(method_id="totally-invalid-method")
assert resp is None
@pytest.mark.asyncio
async def test_authenticate_without_provider(self, agent, monkeypatch):
monkeypatch.setattr(
"acp_adapter.server.has_provider",
lambda: False,
"acp_adapter.server.detect_provider",
lambda: None,
)
resp = await agent.authenticate(method_id="openrouter")
assert resp is None
@ -252,6 +270,57 @@ class TestListAndFork:
mock_list.assert_called_once_with(cwd="/mnt/e/Projects/AI/browser-link-3")
@pytest.mark.asyncio
async def test_list_sessions_pagination_first_page(self, agent):
from acp_adapter import server as acp_server
infos = [
{"session_id": f"s{i}", "cwd": "/tmp", "title": None, "updated_at": 0.0}
for i in range(acp_server._LIST_SESSIONS_PAGE_SIZE + 5)
]
with patch.object(agent.session_manager, "list_sessions", return_value=infos):
resp = await agent.list_sessions()
assert len(resp.sessions) == acp_server._LIST_SESSIONS_PAGE_SIZE
assert resp.next_cursor == resp.sessions[-1].session_id
@pytest.mark.asyncio
async def test_list_sessions_pagination_no_more(self, agent):
infos = [
{"session_id": f"s{i}", "cwd": "/tmp", "title": None, "updated_at": 0.0}
for i in range(3)
]
with patch.object(agent.session_manager, "list_sessions", return_value=infos):
resp = await agent.list_sessions()
assert len(resp.sessions) == 3
assert resp.next_cursor is None
@pytest.mark.asyncio
async def test_list_sessions_cursor_resumes_after_match(self, agent):
infos = [
{"session_id": "s1", "cwd": "/tmp", "title": None, "updated_at": 0.0},
{"session_id": "s2", "cwd": "/tmp", "title": None, "updated_at": 0.0},
{"session_id": "s3", "cwd": "/tmp", "title": None, "updated_at": 0.0},
]
with patch.object(agent.session_manager, "list_sessions", return_value=infos):
resp = await agent.list_sessions(cursor="s1")
assert [s.session_id for s in resp.sessions] == ["s2", "s3"]
assert resp.next_cursor is None
@pytest.mark.asyncio
async def test_list_sessions_unknown_cursor_returns_empty(self, agent):
infos = [
{"session_id": "s1", "cwd": "/tmp", "title": None, "updated_at": 0.0},
{"session_id": "s2", "cwd": "/tmp", "title": None, "updated_at": 0.0},
]
with patch.object(agent.session_manager, "list_sessions", return_value=infos):
resp = await agent.list_sessions(cursor="does-not-exist")
assert resp.sessions == []
assert resp.next_cursor is None
# ---------------------------------------------------------------------------
# session configuration / model routing
# ---------------------------------------------------------------------------

View file

@ -414,7 +414,11 @@ class TestRunOauthSetupToken:
token = run_oauth_setup_token()
assert token == "from-cred-file"
mock_run.assert_called_once()
# Don't assert exact call count — the contract is "credentials flow
# through", not "exactly one subprocess call". xdist cross-test
# pollution (other tests shimming subprocess via plugins) has flaked
# assert_called_once() in CI.
assert mock_run.called
def test_returns_token_from_env_var(self, monkeypatch, tmp_path):
"""Falls back to CLAUDE_CODE_OAUTH_TOKEN env var when no cred files."""

View file

@ -0,0 +1,238 @@
"""Regression tests: normalize_anthropic_response_v2 vs v1.
Constructs mock Anthropic responses and asserts that the v2 function
(returning NormalizedResponse) produces identical field values to the
original v1 function (returning SimpleNamespace + finish_reason).
"""
import json
import pytest
from types import SimpleNamespace
from agent.anthropic_adapter import (
normalize_anthropic_response,
normalize_anthropic_response_v2,
)
from agent.transports.types import NormalizedResponse, ToolCall
# ---------------------------------------------------------------------------
# Helpers to build mock Anthropic SDK responses
# ---------------------------------------------------------------------------
def _text_block(text: str):
return SimpleNamespace(type="text", text=text)
def _thinking_block(thinking: str, signature: str = "sig_abc"):
return SimpleNamespace(type="thinking", thinking=thinking, signature=signature)
def _tool_use_block(id: str, name: str, input: dict):
return SimpleNamespace(type="tool_use", id=id, name=name, input=input)
def _response(content_blocks, stop_reason="end_turn"):
return SimpleNamespace(
content=content_blocks,
stop_reason=stop_reason,
usage=SimpleNamespace(
input_tokens=10,
output_tokens=5,
),
)
# ---------------------------------------------------------------------------
# Tests
# ---------------------------------------------------------------------------
class TestTextOnly:
"""Text-only response — no tools, no thinking."""
def setup_method(self):
self.resp = _response([_text_block("Hello world")])
self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp)
self.v2 = normalize_anthropic_response_v2(self.resp)
def test_type(self):
assert isinstance(self.v2, NormalizedResponse)
def test_content_matches(self):
assert self.v2.content == self.v1_msg.content
def test_finish_reason_matches(self):
assert self.v2.finish_reason == self.v1_finish
def test_no_tool_calls(self):
assert self.v2.tool_calls is None
assert self.v1_msg.tool_calls is None
def test_no_reasoning(self):
assert self.v2.reasoning is None
assert self.v1_msg.reasoning is None
class TestWithToolCalls:
"""Response with tool calls."""
def setup_method(self):
self.resp = _response(
[
_text_block("I'll check that"),
_tool_use_block("toolu_abc", "terminal", {"command": "ls"}),
_tool_use_block("toolu_def", "read_file", {"path": "/tmp"}),
],
stop_reason="tool_use",
)
self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp)
self.v2 = normalize_anthropic_response_v2(self.resp)
def test_finish_reason(self):
assert self.v2.finish_reason == "tool_calls"
assert self.v1_finish == "tool_calls"
def test_tool_call_count(self):
assert len(self.v2.tool_calls) == 2
assert len(self.v1_msg.tool_calls) == 2
def test_tool_call_ids_match(self):
for i in range(2):
assert self.v2.tool_calls[i].id == self.v1_msg.tool_calls[i].id
def test_tool_call_names_match(self):
assert self.v2.tool_calls[0].name == "terminal"
assert self.v2.tool_calls[1].name == "read_file"
for i in range(2):
assert self.v2.tool_calls[i].name == self.v1_msg.tool_calls[i].function.name
def test_tool_call_arguments_match(self):
for i in range(2):
assert self.v2.tool_calls[i].arguments == self.v1_msg.tool_calls[i].function.arguments
def test_content_preserved(self):
assert self.v2.content == self.v1_msg.content
assert "check that" in self.v2.content
class TestWithThinking:
"""Response with thinking blocks (Claude 3.5+ extended thinking)."""
def setup_method(self):
self.resp = _response([
_thinking_block("Let me think about this carefully..."),
_text_block("The answer is 42."),
])
self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp)
self.v2 = normalize_anthropic_response_v2(self.resp)
def test_reasoning_matches(self):
assert self.v2.reasoning == self.v1_msg.reasoning
assert "think about this" in self.v2.reasoning
def test_reasoning_details_in_provider_data(self):
v1_details = self.v1_msg.reasoning_details
v2_details = self.v2.provider_data.get("reasoning_details") if self.v2.provider_data else None
assert v1_details is not None
assert v2_details is not None
assert len(v2_details) == len(v1_details)
def test_content_excludes_thinking(self):
assert self.v2.content == "The answer is 42."
class TestMixed:
"""Response with thinking + text + tool calls."""
def setup_method(self):
self.resp = _response(
[
_thinking_block("Planning my approach..."),
_text_block("I'll run the command"),
_tool_use_block("toolu_xyz", "terminal", {"command": "pwd"}),
],
stop_reason="tool_use",
)
self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp)
self.v2 = normalize_anthropic_response_v2(self.resp)
def test_all_fields_present(self):
assert self.v2.content is not None
assert self.v2.tool_calls is not None
assert self.v2.reasoning is not None
assert self.v2.finish_reason == "tool_calls"
def test_content_matches(self):
assert self.v2.content == self.v1_msg.content
def test_reasoning_matches(self):
assert self.v2.reasoning == self.v1_msg.reasoning
def test_tool_call_matches(self):
assert self.v2.tool_calls[0].id == self.v1_msg.tool_calls[0].id
assert self.v2.tool_calls[0].name == self.v1_msg.tool_calls[0].function.name
class TestStopReasons:
"""Verify finish_reason mapping matches between v1 and v2."""
@pytest.mark.parametrize("stop_reason,expected", [
("end_turn", "stop"),
("tool_use", "tool_calls"),
("max_tokens", "length"),
("stop_sequence", "stop"),
("refusal", "content_filter"),
("model_context_window_exceeded", "length"),
("unknown_future_reason", "stop"),
])
def test_stop_reason_mapping(self, stop_reason, expected):
resp = _response([_text_block("x")], stop_reason=stop_reason)
v1_msg, v1_finish = normalize_anthropic_response(resp)
v2 = normalize_anthropic_response_v2(resp)
assert v2.finish_reason == v1_finish == expected
class TestStripToolPrefix:
"""Verify mcp_ prefix stripping works identically."""
def test_prefix_stripped(self):
resp = _response(
[_tool_use_block("toolu_1", "mcp_terminal", {"cmd": "ls"})],
stop_reason="tool_use",
)
v1_msg, _ = normalize_anthropic_response(resp, strip_tool_prefix=True)
v2 = normalize_anthropic_response_v2(resp, strip_tool_prefix=True)
assert v1_msg.tool_calls[0].function.name == "terminal"
assert v2.tool_calls[0].name == "terminal"
def test_prefix_kept(self):
resp = _response(
[_tool_use_block("toolu_1", "mcp_terminal", {"cmd": "ls"})],
stop_reason="tool_use",
)
v1_msg, _ = normalize_anthropic_response(resp, strip_tool_prefix=False)
v2 = normalize_anthropic_response_v2(resp, strip_tool_prefix=False)
assert v1_msg.tool_calls[0].function.name == "mcp_terminal"
assert v2.tool_calls[0].name == "mcp_terminal"
class TestEdgeCases:
"""Edge cases: empty content, no blocks, etc."""
def test_empty_content_blocks(self):
resp = _response([])
v1_msg, v1_finish = normalize_anthropic_response(resp)
v2 = normalize_anthropic_response_v2(resp)
assert v2.content == v1_msg.content
assert v2.content is None
def test_no_reasoning_details_means_none_provider_data(self):
resp = _response([_text_block("hi")])
v2 = normalize_anthropic_response_v2(resp)
assert v2.provider_data is None
def test_v2_returns_dataclass_not_namespace(self):
resp = _response([_text_block("hi")])
v2 = normalize_anthropic_response_v2(resp)
assert isinstance(v2, NormalizedResponse)
assert not isinstance(v2, SimpleNamespace)

View file

@ -0,0 +1,146 @@
"""Focused regressions for the Copilot ACP shim safety layer."""
from __future__ import annotations
import io
import json
import os
import tempfile
import unittest
from pathlib import Path
from unittest.mock import patch
from agent.copilot_acp_client import CopilotACPClient
class _FakeProcess:
def __init__(self) -> None:
self.stdin = io.StringIO()
class CopilotACPClientSafetyTests(unittest.TestCase):
def setUp(self) -> None:
self.client = CopilotACPClient(acp_cwd="/tmp")
def _dispatch(self, message: dict, *, cwd: str) -> dict:
process = _FakeProcess()
handled = self.client._handle_server_message(
message,
process=process,
cwd=cwd,
text_parts=[],
reasoning_parts=[],
)
self.assertTrue(handled)
payload = process.stdin.getvalue().strip()
self.assertTrue(payload)
return json.loads(payload)
def test_request_permission_is_not_auto_allowed(self) -> None:
response = self._dispatch(
{
"jsonrpc": "2.0",
"id": 1,
"method": "session/request_permission",
"params": {},
},
cwd="/tmp",
)
outcome = (((response.get("result") or {}).get("outcome") or {}).get("outcome"))
self.assertEqual(outcome, "cancelled")
def test_read_text_file_blocks_internal_hermes_hub_files(self) -> None:
with tempfile.TemporaryDirectory() as tmpdir:
home = Path(tmpdir) / "home"
blocked = home / ".hermes" / "skills" / ".hub" / "index-cache" / "entry.json"
blocked.parent.mkdir(parents=True, exist_ok=True)
blocked.write_text('{"token":"sk-test-secret-1234567890"}')
with patch.dict(
os.environ,
{"HOME": str(home), "HERMES_HOME": str(home / ".hermes")},
clear=False,
):
response = self._dispatch(
{
"jsonrpc": "2.0",
"id": 2,
"method": "fs/read_text_file",
"params": {"path": str(blocked)},
},
cwd=str(home),
)
self.assertIn("error", response)
def test_read_text_file_redacts_sensitive_content(self) -> None:
with tempfile.TemporaryDirectory() as tmpdir:
root = Path(tmpdir)
secret_file = root / "config.env"
secret_file.write_text("OPENAI_API_KEY=sk-proj-abc123def456ghi789jkl012")
response = self._dispatch(
{
"jsonrpc": "2.0",
"id": 3,
"method": "fs/read_text_file",
"params": {"path": str(secret_file)},
},
cwd=str(root),
)
content = ((response.get("result") or {}).get("content") or "")
self.assertNotIn("abc123def456", content)
self.assertIn("OPENAI_API_KEY=", content)
def test_write_text_file_reuses_write_denylist(self) -> None:
with tempfile.TemporaryDirectory() as tmpdir:
home = Path(tmpdir) / "home"
target = home / ".ssh" / "id_rsa"
target.parent.mkdir(parents=True, exist_ok=True)
with patch("agent.copilot_acp_client.is_write_denied", return_value=True, create=True):
response = self._dispatch(
{
"jsonrpc": "2.0",
"id": 4,
"method": "fs/write_text_file",
"params": {
"path": str(target),
"content": "fake-private-key",
},
},
cwd=str(home),
)
self.assertIn("error", response)
self.assertFalse(target.exists())
def test_write_text_file_respects_safe_root(self) -> None:
with tempfile.TemporaryDirectory() as tmpdir:
root = Path(tmpdir)
safe_root = root / "workspace"
safe_root.mkdir()
outside = root / "outside.txt"
with patch.dict(os.environ, {"HERMES_WRITE_SAFE_ROOT": str(safe_root)}, clear=False):
response = self._dispatch(
{
"jsonrpc": "2.0",
"id": 5,
"method": "fs/write_text_file",
"params": {
"path": str(outside),
"content": "should-not-write",
},
},
cwd=str(root),
)
self.assertIn("error", response)
self.assertFalse(outside.exists())
if __name__ == "__main__":
unittest.main()

View file

@ -516,13 +516,12 @@ class TestGatewayFormatting:
assert "**" in text # Markdown bold
def test_gateway_format_hides_cost(self, populated_db):
"""Gateway format omits dollar figures and internal cache details."""
engine = InsightsEngine(populated_db)
report = engine.generate(days=30)
text = engine.format_gateway(report)
assert "$" in text
assert "Top Skills" in text
assert "Est. cost" in text
assert "$" not in text
assert "cache" not in text.lower()
def test_gateway_format_shows_models(self, populated_db):

View file

@ -84,38 +84,6 @@ class TestMinimaxAuxModel:
assert "highspeed" not in _API_KEY_PROVIDER_AUX_MODELS["minimax-cn"]
class TestMinimaxModelCatalog:
"""Verify the model catalog matches official Anthropic-compat endpoint models.
Source: https://platform.minimax.io/docs/api-reference/text-anthropic-api
"""
def test_catalog_includes_current_models(self):
from hermes_cli.models import _PROVIDER_MODELS
for provider in ("minimax", "minimax-cn"):
models = _PROVIDER_MODELS[provider]
assert "MiniMax-M2.7" in models
assert "MiniMax-M2.5" in models
assert "MiniMax-M2.1" in models
assert "MiniMax-M2" in models
def test_catalog_excludes_m1_family(self):
"""M1 models are not available on the /anthropic endpoint."""
from hermes_cli.models import _PROVIDER_MODELS
for provider in ("minimax", "minimax-cn"):
models = _PROVIDER_MODELS[provider]
assert "MiniMax-M1" not in models
def test_catalog_excludes_highspeed(self):
"""Highspeed variants are available but not shown in default catalog
(users can still specify them manually)."""
from hermes_cli.models import _PROVIDER_MODELS
for provider in ("minimax", "minimax-cn"):
models = _PROVIDER_MODELS[provider]
assert "MiniMax-M2.7-highspeed" not in models
assert "MiniMax-M2.5-highspeed" not in models
class TestMinimaxBetaHeaders:
"""MiniMax Anthropic-compat endpoints reject fine-grained-tool-streaming beta.

View file

@ -6,6 +6,8 @@ when proxy env vars or custom endpoint URLs are malformed.
"""
from __future__ import annotations
import os
import pytest
from agent.auxiliary_client import _validate_base_url, _validate_proxy_env_urls
@ -31,6 +33,12 @@ def test_proxy_env_accepts_empty(monkeypatch):
_validate_proxy_env_urls() # should not raise
def test_proxy_env_normalizes_socks_alias(monkeypatch):
monkeypatch.setenv("ALL_PROXY", "socks://127.0.0.1:1080/")
_validate_proxy_env_urls()
assert os.environ["ALL_PROXY"] == "socks5://127.0.0.1:1080/"
@pytest.mark.parametrize("key", [
"HTTP_PROXY", "HTTPS_PROXY", "ALL_PROXY",
"http_proxy", "https_proxy", "all_proxy",

View file

@ -405,3 +405,191 @@ class TestPlanSkillHelpers:
assert "Add a /plan command" in msg
assert ".hermes/plans/plan.md" in msg
assert "Runtime note:" in msg
class TestSkillDirectoryHeader:
"""The activation message must expose the absolute skill directory and
explain how to resolve relative paths, so skills with bundled scripts
don't force the agent into a second ``skill_view()`` round-trip."""
def test_header_contains_absolute_skill_dir(self, tmp_path):
with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
skill_dir = _make_skill(tmp_path, "abs-dir-skill")
scan_skill_commands()
msg = build_skill_invocation_message("/abs-dir-skill", "go")
assert msg is not None
assert f"[Skill directory: {skill_dir}]" in msg
assert "Resolve any relative paths" in msg
def test_supporting_files_shown_with_absolute_paths(self, tmp_path):
with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
skill_dir = _make_skill(tmp_path, "scripted-skill")
(skill_dir / "scripts").mkdir()
(skill_dir / "scripts" / "run.js").write_text("console.log('hi')")
scan_skill_commands()
msg = build_skill_invocation_message("/scripted-skill")
assert msg is not None
# The supporting-files block must emit both the relative form (so the
# agent can call skill_view on it) and the absolute form (so it can
# run the script directly via terminal).
assert "scripts/run.js" in msg
assert str(skill_dir / "scripts" / "run.js") in msg
assert f"node {skill_dir}/scripts/foo.js" in msg
class TestTemplateVarSubstitution:
"""``${HERMES_SKILL_DIR}`` and ``${HERMES_SESSION_ID}`` in SKILL.md body
are replaced before the agent sees the content."""
def test_substitutes_skill_dir(self, tmp_path):
with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
skill_dir = _make_skill(
tmp_path,
"templated",
body="Run: node ${HERMES_SKILL_DIR}/scripts/foo.js",
)
scan_skill_commands()
msg = build_skill_invocation_message("/templated")
assert msg is not None
assert f"node {skill_dir}/scripts/foo.js" in msg
# The literal template token must not leak through.
assert "${HERMES_SKILL_DIR}" not in msg.split("[Skill directory:")[0]
def test_substitutes_session_id_when_available(self, tmp_path):
with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
_make_skill(
tmp_path,
"sess-templated",
body="Session: ${HERMES_SESSION_ID}",
)
scan_skill_commands()
msg = build_skill_invocation_message(
"/sess-templated", task_id="abc-123"
)
assert msg is not None
assert "Session: abc-123" in msg
def test_leaves_session_id_token_when_missing(self, tmp_path):
with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
_make_skill(
tmp_path,
"sess-missing",
body="Session: ${HERMES_SESSION_ID}",
)
scan_skill_commands()
msg = build_skill_invocation_message("/sess-missing", task_id=None)
assert msg is not None
# No session — token left intact so the author can spot it.
assert "Session: ${HERMES_SESSION_ID}" in msg
def test_disable_template_vars_via_config(self, tmp_path):
with (
patch("tools.skills_tool.SKILLS_DIR", tmp_path),
patch(
"agent.skill_commands._load_skills_config",
return_value={"template_vars": False},
),
):
_make_skill(
tmp_path,
"no-sub",
body="Run: node ${HERMES_SKILL_DIR}/scripts/foo.js",
)
scan_skill_commands()
msg = build_skill_invocation_message("/no-sub")
assert msg is not None
# Template token must survive when substitution is disabled.
assert "${HERMES_SKILL_DIR}/scripts/foo.js" in msg
class TestInlineShellExpansion:
"""Inline ``!`cmd`` snippets in SKILL.md run before the agent sees the
content but only when the user has opted in via config."""
def test_inline_shell_is_off_by_default(self, tmp_path):
with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
_make_skill(
tmp_path,
"dyn-default-off",
body="Today is !`echo INLINE_RAN`.",
)
scan_skill_commands()
msg = build_skill_invocation_message("/dyn-default-off")
assert msg is not None
# Default config has inline_shell=False — snippet must stay literal.
assert "!`echo INLINE_RAN`" in msg
assert "Today is INLINE_RAN." not in msg
def test_inline_shell_runs_when_enabled(self, tmp_path):
with (
patch("tools.skills_tool.SKILLS_DIR", tmp_path),
patch(
"agent.skill_commands._load_skills_config",
return_value={"template_vars": True, "inline_shell": True,
"inline_shell_timeout": 5},
),
):
_make_skill(
tmp_path,
"dyn-on",
body="Marker: !`echo INLINE_RAN`.",
)
scan_skill_commands()
msg = build_skill_invocation_message("/dyn-on")
assert msg is not None
assert "Marker: INLINE_RAN." in msg
assert "!`echo INLINE_RAN`" not in msg
def test_inline_shell_runs_in_skill_directory(self, tmp_path):
"""Inline snippets get the skill dir as CWD so relative paths work."""
with (
patch("tools.skills_tool.SKILLS_DIR", tmp_path),
patch(
"agent.skill_commands._load_skills_config",
return_value={"template_vars": True, "inline_shell": True,
"inline_shell_timeout": 5},
),
):
skill_dir = _make_skill(
tmp_path,
"dyn-cwd",
body="Here: !`pwd`",
)
scan_skill_commands()
msg = build_skill_invocation_message("/dyn-cwd")
assert msg is not None
assert f"Here: {skill_dir}" in msg
def test_inline_shell_timeout_does_not_break_message(self, tmp_path):
with (
patch("tools.skills_tool.SKILLS_DIR", tmp_path),
patch(
"agent.skill_commands._load_skills_config",
return_value={"template_vars": True, "inline_shell": True,
"inline_shell_timeout": 1},
),
):
_make_skill(
tmp_path,
"dyn-slow",
body="Slow: !`sleep 5 && printf DYN_MARKER`",
)
scan_skill_commands()
msg = build_skill_invocation_message("/dyn-slow")
assert msg is not None
# Timeout is surfaced as a marker instead of propagating as an error,
# and the rest of the skill message still renders.
assert "inline-shell timeout" in msg
# The command's intended stdout never made it through — only the
# timeout marker (which echoes the command text) survives.
assert "DYN_MARKER" not in msg.replace("sleep 5 && printf DYN_MARKER", "")

View file

View file

@ -0,0 +1,220 @@
"""Tests for the transport ABC, registry, and AnthropicTransport."""
import pytest
from types import SimpleNamespace
from unittest.mock import MagicMock
from agent.transports.base import ProviderTransport
from agent.transports.types import NormalizedResponse, ToolCall, Usage
from agent.transports import get_transport, register_transport, _REGISTRY
# ── ABC contract tests ──────────────────────────────────────────────────
class TestProviderTransportABC:
"""Verify the ABC contract is enforceable."""
def test_cannot_instantiate_abc(self):
with pytest.raises(TypeError):
ProviderTransport()
def test_concrete_must_implement_all_abstract(self):
class Incomplete(ProviderTransport):
@property
def api_mode(self):
return "test"
with pytest.raises(TypeError):
Incomplete()
def test_minimal_concrete(self):
class Minimal(ProviderTransport):
@property
def api_mode(self):
return "test_minimal"
def convert_messages(self, messages, **kw):
return messages
def convert_tools(self, tools):
return tools
def build_kwargs(self, model, messages, tools=None, **params):
return {"model": model, "messages": messages}
def normalize_response(self, response, **kw):
return NormalizedResponse(content="ok", tool_calls=None, finish_reason="stop")
t = Minimal()
assert t.api_mode == "test_minimal"
assert t.validate_response(None) is True # default
assert t.extract_cache_stats(None) is None # default
assert t.map_finish_reason("end_turn") == "end_turn" # default passthrough
# ── Registry tests ───────────────────────────────────────────────────────
class TestTransportRegistry:
def test_get_unregistered_returns_none(self):
assert get_transport("nonexistent_mode") is None
def test_anthropic_registered_on_import(self):
import agent.transports.anthropic # noqa: F401
t = get_transport("anthropic_messages")
assert t is not None
assert t.api_mode == "anthropic_messages"
def test_register_and_get(self):
class DummyTransport(ProviderTransport):
@property
def api_mode(self):
return "dummy_test"
def convert_messages(self, messages, **kw):
return messages
def convert_tools(self, tools):
return tools
def build_kwargs(self, model, messages, tools=None, **params):
return {}
def normalize_response(self, response, **kw):
return NormalizedResponse(content=None, tool_calls=None, finish_reason="stop")
register_transport("dummy_test", DummyTransport)
t = get_transport("dummy_test")
assert t.api_mode == "dummy_test"
# Cleanup
_REGISTRY.pop("dummy_test", None)
# ── AnthropicTransport tests ────────────────────────────────────────────
class TestAnthropicTransport:
@pytest.fixture
def transport(self):
import agent.transports.anthropic # noqa: F401
return get_transport("anthropic_messages")
def test_api_mode(self, transport):
assert transport.api_mode == "anthropic_messages"
def test_convert_tools_simple(self, transport):
tools = [{
"type": "function",
"function": {
"name": "test_tool",
"description": "A test",
"parameters": {"type": "object", "properties": {}},
}
}]
result = transport.convert_tools(tools)
assert len(result) == 1
assert result[0]["name"] == "test_tool"
assert "input_schema" in result[0]
def test_validate_response_none(self, transport):
assert transport.validate_response(None) is False
def test_validate_response_empty_content(self, transport):
r = SimpleNamespace(content=[])
assert transport.validate_response(r) is False
def test_validate_response_valid(self, transport):
r = SimpleNamespace(content=[SimpleNamespace(type="text", text="hello")])
assert transport.validate_response(r) is True
def test_map_finish_reason(self, transport):
assert transport.map_finish_reason("end_turn") == "stop"
assert transport.map_finish_reason("tool_use") == "tool_calls"
assert transport.map_finish_reason("max_tokens") == "length"
assert transport.map_finish_reason("stop_sequence") == "stop"
assert transport.map_finish_reason("refusal") == "content_filter"
assert transport.map_finish_reason("model_context_window_exceeded") == "length"
assert transport.map_finish_reason("unknown") == "stop"
def test_extract_cache_stats_none_usage(self, transport):
r = SimpleNamespace(usage=None)
assert transport.extract_cache_stats(r) is None
def test_extract_cache_stats_with_cache(self, transport):
usage = SimpleNamespace(cache_read_input_tokens=100, cache_creation_input_tokens=50)
r = SimpleNamespace(usage=usage)
result = transport.extract_cache_stats(r)
assert result == {"cached_tokens": 100, "creation_tokens": 50}
def test_extract_cache_stats_zero(self, transport):
usage = SimpleNamespace(cache_read_input_tokens=0, cache_creation_input_tokens=0)
r = SimpleNamespace(usage=usage)
assert transport.extract_cache_stats(r) is None
def test_normalize_response_text(self, transport):
"""Test normalization of a simple text response."""
r = SimpleNamespace(
content=[SimpleNamespace(type="text", text="Hello world")],
stop_reason="end_turn",
usage=SimpleNamespace(input_tokens=10, output_tokens=5),
model="claude-sonnet-4-6",
)
nr = transport.normalize_response(r)
assert isinstance(nr, NormalizedResponse)
assert nr.content == "Hello world"
assert nr.tool_calls is None or nr.tool_calls == []
assert nr.finish_reason == "stop"
def test_normalize_response_tool_calls(self, transport):
"""Test normalization of a tool-use response."""
r = SimpleNamespace(
content=[
SimpleNamespace(
type="tool_use",
id="toolu_123",
name="terminal",
input={"command": "ls"},
),
],
stop_reason="tool_use",
usage=SimpleNamespace(input_tokens=10, output_tokens=20),
model="claude-sonnet-4-6",
)
nr = transport.normalize_response(r)
assert nr.finish_reason == "tool_calls"
assert len(nr.tool_calls) == 1
tc = nr.tool_calls[0]
assert tc.name == "terminal"
assert tc.id == "toolu_123"
assert '"command"' in tc.arguments
def test_normalize_response_thinking(self, transport):
"""Test normalization preserves thinking content."""
r = SimpleNamespace(
content=[
SimpleNamespace(type="thinking", thinking="Let me think..."),
SimpleNamespace(type="text", text="The answer is 42"),
],
stop_reason="end_turn",
usage=SimpleNamespace(input_tokens=10, output_tokens=15),
model="claude-sonnet-4-6",
)
nr = transport.normalize_response(r)
assert nr.content == "The answer is 42"
assert nr.reasoning == "Let me think..."
def test_build_kwargs_returns_dict(self, transport):
"""Test build_kwargs produces a usable kwargs dict."""
messages = [{"role": "user", "content": "Hello"}]
kw = transport.build_kwargs(
model="claude-sonnet-4-6",
messages=messages,
max_tokens=1024,
)
assert isinstance(kw, dict)
assert "model" in kw
assert "max_tokens" in kw
assert "messages" in kw
def test_convert_messages_extracts_system(self, transport):
"""Test convert_messages separates system from messages."""
messages = [
{"role": "system", "content": "You are helpful."},
{"role": "user", "content": "Hi"},
]
system, msgs = transport.convert_messages(messages)
# System should be extracted
assert system is not None
# Messages should only have user
assert len(msgs) >= 1

View file

@ -0,0 +1,151 @@
"""Tests for agent/transports/types.py — dataclass construction + helpers."""
import json
import pytest
from agent.transports.types import (
NormalizedResponse,
ToolCall,
Usage,
build_tool_call,
map_finish_reason,
)
# ---------------------------------------------------------------------------
# ToolCall
# ---------------------------------------------------------------------------
class TestToolCall:
def test_basic_construction(self):
tc = ToolCall(id="call_abc", name="terminal", arguments='{"cmd": "ls"}')
assert tc.id == "call_abc"
assert tc.name == "terminal"
assert tc.arguments == '{"cmd": "ls"}'
assert tc.provider_data is None
def test_none_id(self):
tc = ToolCall(id=None, name="read_file", arguments="{}")
assert tc.id is None
def test_provider_data(self):
tc = ToolCall(
id="call_x",
name="t",
arguments="{}",
provider_data={"call_id": "call_x", "response_item_id": "fc_x"},
)
assert tc.provider_data["call_id"] == "call_x"
assert tc.provider_data["response_item_id"] == "fc_x"
# ---------------------------------------------------------------------------
# Usage
# ---------------------------------------------------------------------------
class TestUsage:
def test_defaults(self):
u = Usage()
assert u.prompt_tokens == 0
assert u.completion_tokens == 0
assert u.total_tokens == 0
assert u.cached_tokens == 0
def test_explicit(self):
u = Usage(prompt_tokens=100, completion_tokens=50, total_tokens=150, cached_tokens=80)
assert u.total_tokens == 150
# ---------------------------------------------------------------------------
# NormalizedResponse
# ---------------------------------------------------------------------------
class TestNormalizedResponse:
def test_text_only(self):
r = NormalizedResponse(content="hello", tool_calls=None, finish_reason="stop")
assert r.content == "hello"
assert r.tool_calls is None
assert r.finish_reason == "stop"
assert r.reasoning is None
assert r.usage is None
assert r.provider_data is None
def test_with_tool_calls(self):
tcs = [ToolCall(id="call_1", name="terminal", arguments='{"cmd":"pwd"}')]
r = NormalizedResponse(content=None, tool_calls=tcs, finish_reason="tool_calls")
assert r.finish_reason == "tool_calls"
assert len(r.tool_calls) == 1
assert r.tool_calls[0].name == "terminal"
def test_with_reasoning(self):
r = NormalizedResponse(
content="answer",
tool_calls=None,
finish_reason="stop",
reasoning="I thought about it",
)
assert r.reasoning == "I thought about it"
def test_with_provider_data(self):
r = NormalizedResponse(
content=None,
tool_calls=None,
finish_reason="stop",
provider_data={"reasoning_details": [{"type": "thinking", "thinking": "hmm"}]},
)
assert r.provider_data["reasoning_details"][0]["type"] == "thinking"
# ---------------------------------------------------------------------------
# build_tool_call
# ---------------------------------------------------------------------------
class TestBuildToolCall:
def test_dict_arguments_serialized(self):
tc = build_tool_call(id="call_1", name="terminal", arguments={"cmd": "ls"})
assert tc.arguments == json.dumps({"cmd": "ls"})
assert tc.provider_data is None
def test_string_arguments_passthrough(self):
tc = build_tool_call(id="call_2", name="read_file", arguments='{"path": "/tmp"}')
assert tc.arguments == '{"path": "/tmp"}'
def test_provider_fields(self):
tc = build_tool_call(
id="call_3",
name="terminal",
arguments="{}",
call_id="call_3",
response_item_id="fc_3",
)
assert tc.provider_data == {"call_id": "call_3", "response_item_id": "fc_3"}
def test_none_id(self):
tc = build_tool_call(id=None, name="t", arguments="{}")
assert tc.id is None
# ---------------------------------------------------------------------------
# map_finish_reason
# ---------------------------------------------------------------------------
class TestMapFinishReason:
ANTHROPIC_MAP = {
"end_turn": "stop",
"tool_use": "tool_calls",
"max_tokens": "length",
"stop_sequence": "stop",
"refusal": "content_filter",
}
def test_known_reason(self):
assert map_finish_reason("end_turn", self.ANTHROPIC_MAP) == "stop"
assert map_finish_reason("tool_use", self.ANTHROPIC_MAP) == "tool_calls"
assert map_finish_reason("max_tokens", self.ANTHROPIC_MAP) == "length"
assert map_finish_reason("refusal", self.ANTHROPIC_MAP) == "content_filter"
def test_unknown_reason_defaults_to_stop(self):
assert map_finish_reason("something_new", self.ANTHROPIC_MAP) == "stop"
def test_none_reason(self):
assert map_finish_reason(None, self.ANTHROPIC_MAP) == "stop"

View file

@ -0,0 +1,146 @@
"""Regression tests for classic-CLI mid-run /steer dispatch.
Background
----------
/steer sent while the agent is running used to be queued through
``self._pending_input`` alongside ordinary user input. ``process_loop``
pulls from that queue and calls ``process_command()`` but while the
agent is running, ``process_loop`` is blocked inside ``self.chat()``.
By the time the queued /steer was pulled, ``_agent_running`` had
already flipped back to False, so ``process_command()`` took the idle
fallback (``"No agent running; queued as next turn"``) and delivered
the steer as an ordinary next-turn message.
The fix dispatches /steer inline on the UI thread when the agent is
running matching the existing pattern for /model so the steer
reaches ``agent.steer()`` (thread-safe) without touching the queue.
These tests exercise the detector + inline dispatch without starting a
prompt_toolkit app.
"""
from __future__ import annotations
import importlib
import sys
from unittest.mock import MagicMock, patch
def _make_cli():
"""Create a HermesCLI instance with prompt_toolkit stubbed out."""
_clean_config = {
"model": {
"default": "anthropic/claude-opus-4.6",
"base_url": "https://openrouter.ai/api/v1",
"provider": "auto",
},
"display": {"compact": False, "tool_progress": "all"},
"agent": {},
"terminal": {"env_type": "local"},
}
clean_env = {"LLM_MODEL": "", "HERMES_MAX_ITERATIONS": ""}
prompt_toolkit_stubs = {
"prompt_toolkit": MagicMock(),
"prompt_toolkit.history": MagicMock(),
"prompt_toolkit.styles": MagicMock(),
"prompt_toolkit.patch_stdout": MagicMock(),
"prompt_toolkit.application": MagicMock(),
"prompt_toolkit.layout": MagicMock(),
"prompt_toolkit.layout.processors": MagicMock(),
"prompt_toolkit.filters": MagicMock(),
"prompt_toolkit.layout.dimension": MagicMock(),
"prompt_toolkit.layout.menus": MagicMock(),
"prompt_toolkit.widgets": MagicMock(),
"prompt_toolkit.key_binding": MagicMock(),
"prompt_toolkit.completion": MagicMock(),
"prompt_toolkit.formatted_text": MagicMock(),
"prompt_toolkit.auto_suggest": MagicMock(),
}
with patch.dict(sys.modules, prompt_toolkit_stubs), patch.dict(
"os.environ", clean_env, clear=False
):
import cli as _cli_mod
_cli_mod = importlib.reload(_cli_mod)
with patch.object(_cli_mod, "get_tool_definitions", return_value=[]), patch.dict(
_cli_mod.__dict__, {"CLI_CONFIG": _clean_config}
):
return _cli_mod.HermesCLI()
class TestSteerInlineDetector:
"""_should_handle_steer_command_inline gates the busy-path fast dispatch."""
def test_detects_steer_when_agent_running(self):
cli = _make_cli()
cli._agent_running = True
assert cli._should_handle_steer_command_inline("/steer focus on error handling") is True
def test_ignores_steer_when_agent_idle(self):
"""Idle-path /steer should fall through to the normal process_loop
dispatch so the queue-style fallback message is emitted."""
cli = _make_cli()
cli._agent_running = False
assert cli._should_handle_steer_command_inline("/steer do something") is False
def test_ignores_non_slash_input(self):
cli = _make_cli()
cli._agent_running = True
assert cli._should_handle_steer_command_inline("steer without slash") is False
assert cli._should_handle_steer_command_inline("") is False
def test_ignores_other_slash_commands(self):
cli = _make_cli()
cli._agent_running = True
assert cli._should_handle_steer_command_inline("/queue hello") is False
assert cli._should_handle_steer_command_inline("/stop") is False
assert cli._should_handle_steer_command_inline("/help") is False
def test_ignores_steer_with_attached_images(self):
"""Image payloads take the normal path; steer doesn't accept images."""
cli = _make_cli()
cli._agent_running = True
assert cli._should_handle_steer_command_inline("/steer text", has_images=True) is False
class TestSteerBusyPathDispatch:
"""When the detector fires, process_command('/steer ...') must call
agent.steer() directly rather than the idle-path fallback."""
def test_process_command_routes_to_agent_steer(self):
"""With _agent_running=True and agent.steer present, /steer reaches
agent.steer(payload), NOT _pending_input."""
cli = _make_cli()
cli._agent_running = True
cli.agent = MagicMock()
cli.agent.steer = MagicMock(return_value=True)
# Make sure the idle-path fallback would be observable if taken
cli._pending_input = MagicMock()
cli.process_command("/steer focus on errors")
cli.agent.steer.assert_called_once_with("focus on errors")
cli._pending_input.put.assert_not_called()
def test_idle_path_queues_as_next_turn(self):
"""Control — when the agent is NOT running, /steer correctly falls
back to next-turn queue semantics. Demonstrates why the fix was
needed: the queue path only works when you can actually drain it."""
cli = _make_cli()
cli._agent_running = False
cli.agent = MagicMock()
cli.agent.steer = MagicMock(return_value=True)
cli._pending_input = MagicMock()
cli.process_command("/steer would-be-next-turn")
# Idle path does NOT call agent.steer
cli.agent.steer.assert_not_called()
# It puts the payload in the queue as a normal next-turn message
cli._pending_input.put.assert_called_once_with("would-be-next-turn")
if __name__ == "__main__": # pragma: no cover
import pytest
pytest.main([__file__, "-v"])

View file

@ -186,6 +186,31 @@ _HERMES_BEHAVIORAL_VARS = frozenset({
"HERMES_HOME_MODE",
"BROWSER_CDP_URL",
"CAMOFOX_URL",
# Platform allowlists — not credentials, but if set from any source
# (user shell, earlier leaky test, CI env), they change gateway auth
# behavior and flake button-authorization tests.
"TELEGRAM_ALLOWED_USERS",
"DISCORD_ALLOWED_USERS",
"WHATSAPP_ALLOWED_USERS",
"SLACK_ALLOWED_USERS",
"SIGNAL_ALLOWED_USERS",
"SIGNAL_GROUP_ALLOWED_USERS",
"EMAIL_ALLOWED_USERS",
"SMS_ALLOWED_USERS",
"MATTERMOST_ALLOWED_USERS",
"MATRIX_ALLOWED_USERS",
"DINGTALK_ALLOWED_USERS",
"FEISHU_ALLOWED_USERS",
"WECOM_ALLOWED_USERS",
"GATEWAY_ALLOWED_USERS",
"GATEWAY_ALLOW_ALL_USERS",
"TELEGRAM_ALLOW_ALL_USERS",
"DISCORD_ALLOW_ALL_USERS",
"WHATSAPP_ALLOW_ALL_USERS",
"SLACK_ALLOW_ALL_USERS",
"SIGNAL_ALLOW_ALL_USERS",
"EMAIL_ALLOW_ALL_USERS",
"SMS_ALLOW_ALL_USERS",
})
@ -258,6 +283,107 @@ def _isolate_hermes_home(_hermetic_environment):
return None
# ── Module-level state reset ───────────────────────────────────────────────
#
# Python modules are singletons per process, and pytest-xdist workers are
# long-lived. Module-level dicts/sets (tool registries, approval state,
# interrupt flags) and ContextVars persist across tests in the same worker,
# causing tests that pass alone to fail when run with siblings.
#
# Each entry in this fixture clears state that belongs to a specific module.
# New state buckets go here too — this is the single gate that prevents
# "works alone, flakes in CI" bugs from state leakage.
#
# The skill `test-suite-cascade-diagnosis` documents the concrete patterns
# this closes; the running example was `test_command_guards` failing 12/15
# CI runs because ``tools.approval._session_approved`` carried approvals
# from one test's session into another's.
@pytest.fixture(autouse=True)
def _reset_module_state():
"""Clear module-level mutable state and ContextVars between tests.
Keeps state from leaking across tests on the same xdist worker. Modules
that don't exist yet (test collection before production import) are
skipped silently production import later creates fresh empty state.
"""
# --- tools.approval — the single biggest source of cross-test pollution ---
try:
from tools import approval as _approval_mod
_approval_mod._session_approved.clear()
_approval_mod._session_yolo.clear()
_approval_mod._permanent_approved.clear()
_approval_mod._pending.clear()
_approval_mod._gateway_queues.clear()
_approval_mod._gateway_notify_cbs.clear()
# ContextVar: reset to empty string so get_current_session_key()
# falls through to the env var / default path, matching a fresh
# process.
_approval_mod._approval_session_key.set("")
except Exception:
pass
# --- tools.interrupt — per-thread interrupt flag set ---
try:
from tools import interrupt as _interrupt_mod
with _interrupt_mod._lock:
_interrupt_mod._interrupted_threads.clear()
except Exception:
pass
# --- gateway.session_context — 9 ContextVars that represent
# the active gateway session. If set in one test and not reset,
# the next test's get_session_env() reads stale values.
try:
from gateway import session_context as _sc_mod
for _cv in (
_sc_mod._SESSION_PLATFORM,
_sc_mod._SESSION_CHAT_ID,
_sc_mod._SESSION_CHAT_NAME,
_sc_mod._SESSION_THREAD_ID,
_sc_mod._SESSION_USER_ID,
_sc_mod._SESSION_USER_NAME,
_sc_mod._SESSION_KEY,
_sc_mod._CRON_AUTO_DELIVER_PLATFORM,
_sc_mod._CRON_AUTO_DELIVER_CHAT_ID,
_sc_mod._CRON_AUTO_DELIVER_THREAD_ID,
):
_cv.set(_sc_mod._UNSET)
except Exception:
pass
# --- tools.env_passthrough — ContextVar<set[str]> with no default ---
# LookupError is normal if the test never set it. Setting it to an
# empty set unconditionally normalizes the starting state.
try:
from tools import env_passthrough as _envp_mod
_envp_mod._allowed_env_vars_var.set(set())
except Exception:
pass
# --- tools.credential_files — ContextVar<dict> ---
try:
from tools import credential_files as _credf_mod
_credf_mod._registered_files_var.set({})
except Exception:
pass
# --- tools.file_tools — per-task read history + file-ops cache ---
# _read_tracker accumulates per-task_id read history for loop detection,
# capped by _READ_HISTORY_CAP. If entries from a prior test persist, the
# cap is hit faster than expected and capacity-related tests flake.
try:
from tools import file_tools as _ft_mod
with _ft_mod._read_tracker_lock:
_ft_mod._read_tracker.clear()
with _ft_mod._file_ops_lock:
_ft_mod._file_ops_cache.clear()
except Exception:
pass
yield
@pytest.fixture()
def tmp_dir(tmp_path):
"""Provide a temporary directory that is cleaned up automatically."""

View file

@ -1580,3 +1580,128 @@ class TestParallelTick:
end_s1 = [t for action, jid, t in call_times if action == "end" and jid == "s1"][0]
start_s2 = [t for action, jid, t in call_times if action == "start" and jid == "s2"][0]
assert start_s2 >= end_s1, "Jobs ran concurrently despite max_parallel=1"
class TestDeliverResultTimeoutCancelsFuture:
"""When future.result(timeout=60) raises TimeoutError in the live
adapter delivery path, _deliver_result must cancel the orphan
coroutine so it cannot duplicate-send after the standalone fallback.
"""
def test_live_adapter_timeout_cancels_future_and_falls_back(self):
"""End-to-end: live adapter hangs past the 60s budget, _deliver_result
patches the timeout down to a fast value, confirms future.cancel() fires,
and verifies the standalone fallback path still delivers."""
from gateway.config import Platform
from concurrent.futures import Future
# Live adapter whose send() coroutine never resolves within the budget
adapter = AsyncMock()
adapter.send.return_value = MagicMock(success=True)
pconfig = MagicMock()
pconfig.enabled = True
mock_cfg = MagicMock()
mock_cfg.platforms = {Platform.TELEGRAM: pconfig}
loop = MagicMock()
loop.is_running.return_value = True
# A real concurrent.futures.Future so .cancel() has real semantics,
# but we override .result() to raise TimeoutError exactly like the
# 60s wait firing in production.
captured_future = Future()
cancel_calls = []
original_cancel = captured_future.cancel
def tracking_cancel():
cancel_calls.append(True)
return original_cancel()
captured_future.cancel = tracking_cancel
captured_future.result = MagicMock(side_effect=TimeoutError("timed out"))
def fake_run_coro(coro, _loop):
coro.close()
return captured_future
job = {
"id": "timeout-job",
"deliver": "origin",
"origin": {"platform": "telegram", "chat_id": "123"},
}
standalone_send = AsyncMock(return_value={"success": True})
with patch("gateway.config.load_gateway_config", return_value=mock_cfg), \
patch("cron.scheduler.load_config", return_value={"cron": {"wrap_response": False}}), \
patch("asyncio.run_coroutine_threadsafe", side_effect=fake_run_coro), \
patch("tools.send_message_tool._send_to_platform", new=standalone_send):
result = _deliver_result(
job,
"Hello world",
adapters={Platform.TELEGRAM: adapter},
loop=loop,
)
# 1. The orphan future was cancelled on timeout (the bug fix)
assert cancel_calls == [True], "future.cancel() must fire on TimeoutError"
# 2. The standalone fallback delivered — no double send, no silent drop
assert result is None, f"expected successful delivery, got error: {result!r}"
standalone_send.assert_awaited_once()
class TestSendMediaTimeoutCancelsFuture:
"""Same orphan-coroutine guarantee for _send_media_via_adapter's
future.result(timeout=30) call. If this times out mid-batch, the
in-flight coroutine must be cancelled before the next file is tried.
"""
def test_media_send_timeout_cancels_future_and_continues(self):
"""End-to-end: _send_media_via_adapter with a future whose .result()
raises TimeoutError. Assert cancel() fires and the loop proceeds
to the next file rather than hanging or crashing."""
from concurrent.futures import Future
adapter = MagicMock()
adapter.send_image_file = AsyncMock()
adapter.send_video = AsyncMock()
# First file: future that times out. Second file: future that resolves OK.
timeout_future = Future()
timeout_cancel_calls = []
original_cancel = timeout_future.cancel
def tracking_cancel():
timeout_cancel_calls.append(True)
return original_cancel()
timeout_future.cancel = tracking_cancel
timeout_future.result = MagicMock(side_effect=TimeoutError("timed out"))
ok_future = Future()
ok_future.set_result(MagicMock(success=True))
futures_iter = iter([timeout_future, ok_future])
def fake_run_coro(coro, _loop):
coro.close()
return next(futures_iter)
media_files = [
("/tmp/slow.png", False), # times out
("/tmp/fast.mp4", False), # succeeds
]
loop = MagicMock()
job = {"id": "media-timeout"}
with patch("asyncio.run_coroutine_threadsafe", side_effect=fake_run_coro):
# Should not raise — the except Exception clause swallows the timeout
_send_media_via_adapter(adapter, "chat-1", media_files, None, loop, job)
# 1. The timed-out future was cancelled (the bug fix)
assert timeout_cancel_calls == [True], "future.cancel() must fire on TimeoutError"
# 2. Second file still got dispatched — one timeout doesn't abort the batch
adapter.send_video.assert_called_once()
assert adapter.send_video.call_args[1]["video_path"] == "/tmp/fast.mp4"

View file

@ -355,8 +355,17 @@ async def test_none_user_id_does_not_generate_pairing_code(monkeypatch, tmp_path
async def test_non_internal_event_without_user_triggers_pairing(monkeypatch, tmp_path):
"""Verify the normal (non-internal) path still triggers pairing for unknown users."""
import gateway.run as gateway_run
import gateway.pairing as pairing_mod
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
# gateway.pairing.PAIRING_DIR is a module-level constant captured at
# import time from whichever HERMES_HOME was set then. Per-test
# HERMES_HOME redirection in conftest doesn't retroactively move it.
# Override directly so pairing rate-limit state lives in this test's
# tmp_path (and so stale state from prior xdist workers can't leak in).
pairing_dir = tmp_path / "pairing"
pairing_dir.mkdir()
monkeypatch.setattr(pairing_mod, "PAIRING_DIR", pairing_dir)
(tmp_path / "config.yaml").write_text("", encoding="utf-8")
# Clear env vars that could let all users through (loaded by

View file

@ -8,6 +8,7 @@ from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from gateway.config import Platform, StreamingConfig
from gateway.platforms.base import resolve_proxy_url
from gateway.run import GatewayRunner
from gateway.session import SessionSource
@ -133,6 +134,15 @@ class TestGetProxyUrl:
assert runner._get_proxy_url() is None
class TestResolveProxyUrl:
def test_normalizes_socks_alias_from_all_proxy(self, monkeypatch):
for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
"https_proxy", "http_proxy", "all_proxy"):
monkeypatch.delenv(key, raising=False)
monkeypatch.setenv("ALL_PROXY", "socks://127.0.0.1:1080/")
assert resolve_proxy_url() == "socks5://127.0.0.1:1080/"
class TestRunAgentProxyDispatch:
"""Test that _run_agent() delegates to proxy when configured."""

View file

@ -184,8 +184,15 @@ async def test_start_gateway_replace_force_uses_terminate_pid(monkeypatch, tmp_p
async def stop(self):
return None
monkeypatch.setattr("gateway.status.get_running_pid", lambda: 42)
monkeypatch.setattr("gateway.status.remove_pid_file", lambda: None)
# get_running_pid returns 42 before we kill the old gateway, then None
# after remove_pid_file() clears the record (reflects real behavior).
_pid_state = {"alive": True}
def _mock_get_running_pid():
return 42 if _pid_state["alive"] else None
def _mock_remove_pid_file():
_pid_state["alive"] = False
monkeypatch.setattr("gateway.status.get_running_pid", _mock_get_running_pid)
monkeypatch.setattr("gateway.status.remove_pid_file", _mock_remove_pid_file)
monkeypatch.setattr("gateway.status.release_all_scoped_locks", lambda: 0)
monkeypatch.setattr("gateway.status.terminate_pid", lambda pid, force=False: calls.append((pid, force)))
monkeypatch.setattr("gateway.run.os.getpid", lambda: 100)
@ -253,8 +260,13 @@ async def test_start_gateway_replace_writes_takeover_marker_before_sigterm(
async def stop(self):
return None
monkeypatch.setattr("gateway.status.get_running_pid", lambda: 42)
monkeypatch.setattr("gateway.status.remove_pid_file", lambda: None)
_pid_state = {"alive": True}
def _mock_get_running_pid():
return 42 if _pid_state["alive"] else None
def _mock_remove_pid_file():
_pid_state["alive"] = False
monkeypatch.setattr("gateway.status.get_running_pid", _mock_get_running_pid)
monkeypatch.setattr("gateway.status.remove_pid_file", _mock_remove_pid_file)
monkeypatch.setattr("gateway.status.release_all_scoped_locks", lambda: 0)
monkeypatch.setattr("gateway.status.write_takeover_marker", record_write_marker)
monkeypatch.setattr("gateway.status.terminate_pid", record_terminate)

View file

@ -356,6 +356,28 @@ class TestBuildSessionContextPrompt:
assert "**User:** Alice" in prompt
assert "Multi-user thread" not in prompt
def test_shared_non_thread_group_prompt_hides_single_user(self):
"""Shared non-thread group sessions should avoid pinning one user."""
config = GatewayConfig(
platforms={
Platform.TELEGRAM: PlatformConfig(enabled=True, token="fake"),
},
group_sessions_per_user=False,
)
source = SessionSource(
platform=Platform.TELEGRAM,
chat_id="-1002285219667",
chat_name="Test Group",
chat_type="group",
user_name="Alice",
)
ctx = build_session_context(source, config)
prompt = build_session_context_prompt(ctx)
assert "Multi-user session" in prompt
assert "[sender name]" in prompt
assert "**User:** Alice" not in prompt
def test_dm_thread_shows_user_not_multi(self):
"""DM threads are single-user and should show User, not multi-user note."""
config = GatewayConfig(

View file

@ -0,0 +1,70 @@
import pytest
from gateway.config import GatewayConfig, Platform, PlatformConfig
from gateway.platforms.base import MessageEvent
from gateway.run import GatewayRunner
from gateway.session import SessionSource
def _make_runner(config: GatewayConfig) -> GatewayRunner:
runner = object.__new__(GatewayRunner)
runner.config = config
runner.adapters = {}
runner._model = "openai/gpt-4.1-mini"
runner._base_url = None
return runner
@pytest.mark.asyncio
async def test_preprocess_prefixes_sender_for_shared_non_thread_group_session():
runner = _make_runner(
GatewayConfig(
platforms={
Platform.TELEGRAM: PlatformConfig(enabled=True, token="fake"),
},
group_sessions_per_user=False,
)
)
source = SessionSource(
platform=Platform.TELEGRAM,
chat_id="-1002285219667",
chat_name="Test Group",
chat_type="group",
user_name="Alice",
)
event = MessageEvent(text="hello", source=source)
result = await runner._prepare_inbound_message_text(
event=event,
source=source,
history=[],
)
assert result == "[Alice] hello"
@pytest.mark.asyncio
async def test_preprocess_keeps_plain_text_for_default_group_sessions():
runner = _make_runner(
GatewayConfig(
platforms={
Platform.TELEGRAM: PlatformConfig(enabled=True, token="fake"),
},
)
)
source = SessionSource(
platform=Platform.TELEGRAM,
chat_id="-1002285219667",
chat_name="Test Group",
chat_type="group",
user_name="Alice",
)
event = MessageEvent(text="hello", source=source)
result = await runner._prepare_inbound_message_text(
event=event,
source=source,
history=[],
)
assert result == "hello"

View file

@ -306,7 +306,13 @@ class TestSignalSessionSource:
class TestSignalPhoneRedaction:
@pytest.fixture(autouse=True)
def _ensure_redaction_enabled(self, monkeypatch):
# agent.redact snapshots _REDACT_ENABLED at import time from the
# HERMES_REDACT_SECRETS env var. monkeypatch.delenv is too late —
# the module was already imported during test collection with
# whatever value was in the env then. Force the flag directly.
# See skill: xdist-cross-test-pollution Pattern 5.
monkeypatch.delenv("HERMES_REDACT_SECRETS", raising=False)
monkeypatch.setattr("agent.redact._REDACT_ENABLED", True)
def test_us_number(self):
from agent.redact import redact_sensitive_text

View file

@ -19,6 +19,30 @@ class TestGatewayPidState:
assert isinstance(payload["argv"], list)
assert payload["argv"]
def test_write_pid_file_is_atomic_against_concurrent_writers(self, tmp_path, monkeypatch):
"""Regression: two concurrent --replace invocations must not both win.
Without O_CREAT|O_EXCL, two processes racing through start_gateway()'s
termination-wait would both write to gateway.pid, silently overwriting
each other and leaving multiple gateway instances alive (#11718).
"""
import pytest
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
# First write wins.
status.write_pid_file()
assert (tmp_path / "gateway.pid").exists()
# Second write (simulating a racing --replace that missed the earlier
# guards) must raise FileExistsError rather than clobber the record.
with pytest.raises(FileExistsError):
status.write_pid_file()
# Original record is preserved.
payload = json.loads((tmp_path / "gateway.pid").read_text())
assert payload["pid"] == os.getpid()
def test_get_running_pid_rejects_live_non_gateway_pid(self, tmp_path, monkeypatch):
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
pid_path = tmp_path / "gateway.pid"

View file

@ -71,7 +71,17 @@ def test_group_messages_can_require_direct_trigger_via_config():
assert adapter._should_process_message(_group_message("hello everyone")) is False
assert adapter._should_process_message(_group_message("hi @hermes_bot", entities=[_mention_entity("hi @hermes_bot")])) is True
assert adapter._should_process_message(_group_message("replying", reply_to_bot=True)) is True
assert adapter._should_process_message(_group_message("/status"), is_command=True) is True
# Commands must also respect require_mention when it is enabled
assert adapter._should_process_message(_group_message("/status"), is_command=True) is False
# But commands with @mention still pass (Telegram emits a MENTION entity
# for /cmd@botname — the bot menu and python-telegram-bot's CommandHandler
# rely on this same mechanism)
assert adapter._should_process_message(
_group_message("/status@hermes_bot", entities=[_mention_entity("/status@hermes_bot")])
) is True
# And commands still pass unconditionally when require_mention is disabled
adapter_no_mention = _make_adapter(require_mention=False)
assert adapter_no_mention._should_process_message(_group_message("/status"), is_command=True) is True
def test_free_response_chats_bypass_mention_requirement():

View file

@ -0,0 +1,100 @@
"""Tests for GHSA-3vpc-7q5r-276h — Telegram webhook secret required.
Previously, when TELEGRAM_WEBHOOK_URL was set but TELEGRAM_WEBHOOK_SECRET
was not, python-telegram-bot received secret_token=None and the webhook
endpoint accepted any HTTP POST.
The fix refuses to start the adapter in webhook mode without the secret.
"""
from __future__ import annotations
import re
import sys
from pathlib import Path
import pytest
_repo = str(Path(__file__).resolve().parents[2])
if _repo not in sys.path:
sys.path.insert(0, _repo)
class TestTelegramWebhookSecretRequired:
"""Direct source-level check of the webhook-secret guard.
The guard is embedded in TelegramAdapter.connect() and hard to isolate
via mocks (requires a full python-telegram-bot ApplicationBuilder
chain). These tests exercise it via source inspection verifying the
check exists, raises RuntimeError with the advisory link, and only
fires in webhook mode. End-to-end validation is covered by CI +
manual deployment tests.
"""
def _get_source(self) -> str:
path = Path(_repo) / "gateway" / "platforms" / "telegram.py"
return path.read_text(encoding="utf-8")
def test_webhook_branch_checks_secret(self):
"""The webhook-mode branch of connect() must read
TELEGRAM_WEBHOOK_SECRET and refuse when empty."""
src = self._get_source()
# The guard must appear after TELEGRAM_WEBHOOK_URL is set
assert re.search(
r'TELEGRAM_WEBHOOK_SECRET.*?\.strip\(\)\s*\n\s*if not webhook_secret:',
src, re.DOTALL,
), (
"TelegramAdapter.connect() must strip TELEGRAM_WEBHOOK_SECRET "
"and raise when the secret is empty — see GHSA-3vpc-7q5r-276h"
)
def test_guard_raises_runtime_error(self):
"""The guard raises RuntimeError (not a silent log) so operators
see the failure at startup."""
src = self._get_source()
# Between the "if not webhook_secret:" line and the next blank
# line block, we should see a RuntimeError being raised
guard_match = re.search(
r'if not webhook_secret:\s*\n\s*raise\s+RuntimeError\(',
src,
)
assert guard_match, (
"Missing webhook secret must raise RuntimeError — silent "
"fall-through was the original GHSA-3vpc-7q5r-276h bypass"
)
def test_guard_message_includes_advisory_link(self):
"""The RuntimeError message should reference the advisory so
operators can read the full context."""
src = self._get_source()
assert "GHSA-3vpc-7q5r-276h" in src, (
"Guard error message must cite the advisory for operator context"
)
def test_guard_message_explains_remediation(self):
"""The error should tell the operator how to fix it."""
src = self._get_source()
# Should mention how to generate a secret
assert "openssl rand" in src or "TELEGRAM_WEBHOOK_SECRET=" in src, (
"Guard error message should show operators how to set "
"TELEGRAM_WEBHOOK_SECRET"
)
def test_polling_branch_has_no_secret_guard(self):
"""Polling mode (else-branch) must NOT require the webhook secret —
polling authenticates via the bot token, not a webhook secret."""
src = self._get_source()
# The guard should appear inside the `if webhook_url:` branch,
# not the `else:` polling branch. Rough check: the raise is
# followed (within ~60 lines) by an `else:` that starts the
# polling branch, and there's no secret-check in that polling
# branch.
webhook_block = re.search(
r'if webhook_url:\s*\n(.*?)\n else:\s*\n(.*?)\n',
src, re.DOTALL,
)
if webhook_block:
webhook_body = webhook_block.group(1)
polling_body = webhook_block.group(2)
assert "TELEGRAM_WEBHOOK_SECRET" in webhook_body
assert "TELEGRAM_WEBHOOK_SECRET" not in polling_body

View file

@ -175,3 +175,79 @@ class TestUsageCachedAgent:
result = await runner._handle_usage_command(event)
assert "Cost: included" in result
class TestUsageAccountSection:
"""Account-limits section appended to /usage output (PR #2486)."""
@pytest.mark.asyncio
async def test_usage_command_includes_account_section(self, monkeypatch):
agent = _make_mock_agent(provider="openai-codex")
agent.base_url = "https://chatgpt.com/backend-api/codex"
agent.api_key = "unused"
runner = _make_runner(SK, cached_agent=agent)
event = MagicMock()
monkeypatch.setattr(
"gateway.run.fetch_account_usage",
lambda provider, base_url=None, api_key=None: object(),
)
monkeypatch.setattr(
"gateway.run.render_account_usage_lines",
lambda snapshot, markdown=False: [
"📈 **Account limits**",
"Provider: openai-codex (Pro)",
"Session: 85% remaining (15% used)",
],
)
with patch("agent.rate_limit_tracker.format_rate_limit_compact", return_value="RPM: 50/60"), \
patch("agent.usage_pricing.estimate_usage_cost") as mock_cost:
mock_cost.return_value = MagicMock(amount_usd=None, status="included")
result = await runner._handle_usage_command(event)
assert "📊 **Session Token Usage**" in result
assert "📈 **Account limits**" in result
assert "Provider: openai-codex (Pro)" in result
@pytest.mark.asyncio
async def test_usage_command_uses_persisted_provider_when_agent_not_running(self, monkeypatch):
runner = _make_runner(SK)
runner._session_db = MagicMock()
runner._session_db.get_session.return_value = {
"billing_provider": "openai-codex",
"billing_base_url": "https://chatgpt.com/backend-api/codex",
}
session_entry = MagicMock()
session_entry.session_id = "sess-1"
runner.session_store.get_or_create_session.return_value = session_entry
runner.session_store.load_transcript.return_value = [
{"role": "user", "content": "earlier"},
]
calls = {}
async def _fake_to_thread(fn, *args, **kwargs):
calls["args"] = args
calls["kwargs"] = kwargs
return fn(*args, **kwargs)
monkeypatch.setattr("gateway.run.asyncio.to_thread", _fake_to_thread)
monkeypatch.setattr(
"gateway.run.fetch_account_usage",
lambda provider, base_url=None, api_key=None: object(),
)
monkeypatch.setattr(
"gateway.run.render_account_usage_lines",
lambda snapshot, markdown=False: [
"📈 **Account limits**",
"Provider: openai-codex (Pro)",
],
)
event = MagicMock()
result = await runner._handle_usage_command(event)
assert calls["args"] == ("openai-codex",)
assert calls["kwargs"]["base_url"] == "https://chatgpt.com/backend-api/codex"
assert "📊 **Session Info**" in result
assert "📈 **Account limits**" in result

View file

@ -921,17 +921,13 @@ class TestKimiMoonshotModelListIsolation:
leaked = set(moonshot_models) & coding_plan_only
assert not leaked, f"Moonshot list contains Coding Plan-only models: {leaked}"
def test_moonshot_list_contains_shared_models(self):
def test_moonshot_list_non_empty(self):
from hermes_cli.main import _PROVIDER_MODELS
moonshot_models = _PROVIDER_MODELS["moonshot"]
assert "kimi-k2.5" in moonshot_models
assert "kimi-k2-thinking" in moonshot_models
assert len(_PROVIDER_MODELS["moonshot"]) >= 1
def test_coding_plan_list_contains_plan_specific_models(self):
def test_coding_plan_list_non_empty(self):
from hermes_cli.main import _PROVIDER_MODELS
coding_models = _PROVIDER_MODELS["kimi-coding"]
assert "kimi-for-coding" in coding_models
assert "kimi-k2-thinking-turbo" in coding_models
assert len(_PROVIDER_MODELS["kimi-coding"]) >= 1
# =============================================================================
@ -944,14 +940,12 @@ class TestHuggingFaceModels:
def test_main_provider_models_has_huggingface(self):
from hermes_cli.main import _PROVIDER_MODELS
assert "huggingface" in _PROVIDER_MODELS
models = _PROVIDER_MODELS["huggingface"]
assert len(models) >= 6, "Expected at least 6 curated HF models"
assert len(_PROVIDER_MODELS["huggingface"]) >= 1
def test_models_py_has_huggingface(self):
from hermes_cli.models import _PROVIDER_MODELS
assert "huggingface" in _PROVIDER_MODELS
models = _PROVIDER_MODELS["huggingface"]
assert len(models) >= 6
assert len(_PROVIDER_MODELS["huggingface"]) >= 1
def test_model_lists_match(self):
"""Model lists in main.py and models.py should be identical."""

View file

@ -115,12 +115,12 @@ class TestArceeCredentials:
class TestArceeModelCatalog:
def test_static_model_list(self):
"""Arcee has a static _PROVIDER_MODELS catalog entry. Specific model
names change with releases and don't belong in tests.
"""
from hermes_cli.models import _PROVIDER_MODELS
assert "arcee" in _PROVIDER_MODELS
models = _PROVIDER_MODELS["arcee"]
assert "trinity-large-thinking" in models
assert "trinity-large-preview" in models
assert "trinity-mini" in models
assert len(_PROVIDER_MODELS["arcee"]) >= 1
def test_canonical_provider_entry(self):
from hermes_cli.models import CANONICAL_PROVIDERS

View file

@ -1011,3 +1011,466 @@ def test_seed_from_singletons_respects_codex_suppression(tmp_path, monkeypatch):
# Verify the auth store was NOT modified (no auto-import happened)
after = json.loads((hermes_home / "auth.json").read_text())
assert "openai-codex" not in after.get("providers", {})
def test_auth_remove_env_seeded_suppresses_shell_exported_var(tmp_path, monkeypatch, capsys):
"""`hermes auth remove xai 1` must stick even when the env var is exported
by the shell (not written into ~/.hermes/.env). Before PR for #13371 the
removal silently restored on next load_pool() because _seed_from_env()
re-read os.environ. Now env:<VAR> is suppressed in auth.json.
"""
hermes_home = tmp_path / "hermes"
hermes_home.mkdir(parents=True, exist_ok=True)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
# Simulate shell export (NOT written to .env)
monkeypatch.setenv("XAI_API_KEY", "sk-xai-shell-export")
(hermes_home / ".env").write_text("")
_write_auth_store(
tmp_path,
{
"version": 1,
"credential_pool": {
"xai": [{
"id": "env-1",
"label": "XAI_API_KEY",
"auth_type": "api_key",
"priority": 0,
"source": "env:XAI_API_KEY",
"access_token": "sk-xai-shell-export",
"base_url": "https://api.x.ai/v1",
}]
},
},
)
from types import SimpleNamespace
from hermes_cli.auth_commands import auth_remove_command
auth_remove_command(SimpleNamespace(provider="xai", target="1"))
# Suppression marker written
after = json.loads((hermes_home / "auth.json").read_text())
assert "env:XAI_API_KEY" in after.get("suppressed_sources", {}).get("xai", [])
# Diagnostic printed pointing at the shell
out = capsys.readouterr().out
assert "still set in your shell environment" in out
assert "Cleared XAI_API_KEY from .env" not in out # wasn't in .env
# Fresh simulation: shell re-exports, reload pool
monkeypatch.setenv("XAI_API_KEY", "sk-xai-shell-export")
from agent.credential_pool import load_pool
pool = load_pool("xai")
assert not pool.has_credentials(), "pool must stay empty — env:XAI_API_KEY suppressed"
def test_auth_remove_env_seeded_dotenv_only_no_shell_hint(tmp_path, monkeypatch, capsys):
"""When the env var lives only in ~/.hermes/.env (not the shell), the
shell-hint should NOT be printed avoid scaring the user about a
non-existent shell export.
"""
hermes_home = tmp_path / "hermes"
hermes_home.mkdir(parents=True, exist_ok=True)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
# Key ONLY in .env, shell must not have it
monkeypatch.delenv("DEEPSEEK_API_KEY", raising=False)
(hermes_home / ".env").write_text("DEEPSEEK_API_KEY=sk-ds-only\n")
# Mimic load_env() populating os.environ
monkeypatch.setenv("DEEPSEEK_API_KEY", "sk-ds-only")
_write_auth_store(
tmp_path,
{
"version": 1,
"credential_pool": {
"deepseek": [{
"id": "env-1",
"label": "DEEPSEEK_API_KEY",
"auth_type": "api_key",
"priority": 0,
"source": "env:DEEPSEEK_API_KEY",
"access_token": "sk-ds-only",
}]
},
},
)
from types import SimpleNamespace
from hermes_cli.auth_commands import auth_remove_command
auth_remove_command(SimpleNamespace(provider="deepseek", target="1"))
out = capsys.readouterr().out
assert "Cleared DEEPSEEK_API_KEY from .env" in out
assert "still set in your shell environment" not in out
assert (hermes_home / ".env").read_text().strip() == ""
def test_auth_add_clears_env_suppression_for_provider(tmp_path, monkeypatch):
"""Re-adding a credential via `hermes auth add <provider>` clears any
env:<VAR> suppression marker strong signal the user wants auth back.
Matches the Codex device_code re-link behaviour.
"""
hermes_home = tmp_path / "hermes"
hermes_home.mkdir(parents=True, exist_ok=True)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
monkeypatch.delenv("XAI_API_KEY", raising=False)
_write_auth_store(
tmp_path,
{
"version": 1,
"providers": {},
"suppressed_sources": {"xai": ["env:XAI_API_KEY"]},
},
)
from types import SimpleNamespace
from hermes_cli.auth import is_source_suppressed
from hermes_cli.auth_commands import auth_add_command
assert is_source_suppressed("xai", "env:XAI_API_KEY") is True
auth_add_command(SimpleNamespace(
provider="xai", auth_type="api_key",
api_key="sk-xai-manual", label="manual",
))
assert is_source_suppressed("xai", "env:XAI_API_KEY") is False
def test_seed_from_env_respects_env_suppression(tmp_path, monkeypatch):
"""_seed_from_env() must skip env:<VAR> sources that the user suppressed
via `hermes auth remove`. This is the gate that prevents shell-exported
keys from resurrecting removed credentials.
"""
hermes_home = tmp_path / "hermes"
hermes_home.mkdir(parents=True, exist_ok=True)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
monkeypatch.setenv("XAI_API_KEY", "sk-xai-shell-export")
(hermes_home / "auth.json").write_text(json.dumps({
"version": 1,
"providers": {},
"suppressed_sources": {"xai": ["env:XAI_API_KEY"]},
}))
from agent.credential_pool import _seed_from_env
entries = []
changed, active = _seed_from_env("xai", entries)
assert changed is False
assert entries == []
assert active == set()
def test_seed_from_env_respects_openrouter_suppression(tmp_path, monkeypatch):
"""OpenRouter is the special-case branch in _seed_from_env; verify it
honours suppression too.
"""
hermes_home = tmp_path / "hermes"
hermes_home.mkdir(parents=True, exist_ok=True)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
monkeypatch.setenv("OPENROUTER_API_KEY", "sk-or-shell-export")
(hermes_home / "auth.json").write_text(json.dumps({
"version": 1,
"providers": {},
"suppressed_sources": {"openrouter": ["env:OPENROUTER_API_KEY"]},
}))
from agent.credential_pool import _seed_from_env
entries = []
changed, active = _seed_from_env("openrouter", entries)
assert changed is False
assert entries == []
assert active == set()
# =============================================================================
# Unified credential-source stickiness — every source Hermes reads from has a
# registered RemovalStep in agent.credential_sources, and every seeding path
# gates on is_source_suppressed. Below: one test per source proving remove
# sticks across a fresh load_pool() call.
# =============================================================================
def test_seed_from_singletons_respects_nous_suppression(tmp_path, monkeypatch):
"""nous device_code must not re-seed from auth.json when suppressed."""
hermes_home = tmp_path / "hermes"
hermes_home.mkdir(parents=True, exist_ok=True)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
(hermes_home / "auth.json").write_text(json.dumps({
"version": 1,
"providers": {"nous": {"access_token": "tok", "refresh_token": "r", "expires_at": 9999999999}},
"suppressed_sources": {"nous": ["device_code"]},
}))
from agent.credential_pool import _seed_from_singletons
entries = []
changed, active = _seed_from_singletons("nous", entries)
assert changed is False
assert entries == []
assert active == set()
def test_seed_from_singletons_respects_copilot_suppression(tmp_path, monkeypatch):
"""copilot gh_cli must not re-seed when suppressed."""
hermes_home = tmp_path / "hermes"
hermes_home.mkdir(parents=True, exist_ok=True)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
(hermes_home / "auth.json").write_text(json.dumps({
"version": 1,
"providers": {},
"suppressed_sources": {"copilot": ["gh_cli"]},
}))
# Stub resolve_copilot_token to return a live token
import hermes_cli.copilot_auth as ca
monkeypatch.setattr(ca, "resolve_copilot_token", lambda: ("ghp_fake", "gh auth token"))
from agent.credential_pool import _seed_from_singletons
entries = []
changed, active = _seed_from_singletons("copilot", entries)
assert changed is False
assert entries == []
assert active == set()
def test_seed_from_singletons_respects_qwen_suppression(tmp_path, monkeypatch):
"""qwen-oauth qwen-cli must not re-seed from ~/.qwen/oauth_creds.json when suppressed."""
hermes_home = tmp_path / "hermes"
hermes_home.mkdir(parents=True, exist_ok=True)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
(hermes_home / "auth.json").write_text(json.dumps({
"version": 1,
"providers": {},
"suppressed_sources": {"qwen-oauth": ["qwen-cli"]},
}))
import hermes_cli.auth as ha
monkeypatch.setattr(ha, "resolve_qwen_runtime_credentials", lambda **kw: {
"api_key": "tok", "source": "qwen-cli", "base_url": "https://q",
})
from agent.credential_pool import _seed_from_singletons
entries = []
changed, active = _seed_from_singletons("qwen-oauth", entries)
assert changed is False
assert entries == []
assert active == set()
def test_seed_from_singletons_respects_hermes_pkce_suppression(tmp_path, monkeypatch):
"""anthropic hermes_pkce must not re-seed from ~/.hermes/.anthropic_oauth.json when suppressed."""
hermes_home = tmp_path / "hermes"
hermes_home.mkdir(parents=True, exist_ok=True)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
import yaml
(hermes_home / "config.yaml").write_text(yaml.dump({"model": {"provider": "anthropic", "model": "claude"}}))
(hermes_home / "auth.json").write_text(json.dumps({
"version": 1,
"providers": {},
"suppressed_sources": {"anthropic": ["hermes_pkce"]},
}))
# Stub the readers so only hermes_pkce is "available"; claude_code returns None
import agent.anthropic_adapter as aa
monkeypatch.setattr(aa, "read_hermes_oauth_credentials", lambda: {
"accessToken": "tok", "refreshToken": "r", "expiresAt": 9999999999000,
})
monkeypatch.setattr(aa, "read_claude_code_credentials", lambda: None)
from agent.credential_pool import _seed_from_singletons
entries = []
changed, active = _seed_from_singletons("anthropic", entries)
# hermes_pkce suppressed, claude_code returns None → nothing should be seeded
assert entries == []
assert "hermes_pkce" not in active
def test_seed_custom_pool_respects_config_suppression(tmp_path, monkeypatch):
"""Custom provider config:<name> source must not re-seed when suppressed."""
hermes_home = tmp_path / "hermes"
hermes_home.mkdir(parents=True, exist_ok=True)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
import yaml
(hermes_home / "config.yaml").write_text(yaml.dump({
"model": {},
"custom_providers": [
{"name": "my", "base_url": "https://c.example.com", "api_key": "sk-custom"},
],
}))
from agent.credential_pool import _seed_custom_pool, get_custom_provider_pool_key
pool_key = get_custom_provider_pool_key("https://c.example.com")
(hermes_home / "auth.json").write_text(json.dumps({
"version": 1,
"providers": {},
"suppressed_sources": {pool_key: ["config:my"]},
}))
entries = []
changed, active = _seed_custom_pool(pool_key, entries)
assert changed is False
assert entries == []
assert "config:my" not in active
def test_credential_sources_registry_has_expected_steps():
"""Sanity check — the registry contains the expected RemovalSteps.
Guards against accidentally dropping a step during future refactors.
If you add a new credential source, add it to the expected set below.
"""
from agent.credential_sources import _REGISTRY
descriptions = {step.description for step in _REGISTRY}
expected = {
"gh auth token / COPILOT_GITHUB_TOKEN / GH_TOKEN",
"Any env-seeded credential (XAI_API_KEY, DEEPSEEK_API_KEY, etc.)",
"~/.claude/.credentials.json",
"~/.hermes/.anthropic_oauth.json",
"auth.json providers.nous",
"auth.json providers.openai-codex + ~/.codex/auth.json",
"~/.qwen/oauth_creds.json",
"Custom provider config.yaml api_key field",
}
assert descriptions == expected, f"Registry mismatch. Got: {descriptions}"
def test_credential_sources_find_step_returns_none_for_manual():
"""Manual entries have nothing external to clean up — no step registered."""
from agent.credential_sources import find_removal_step
assert find_removal_step("openrouter", "manual") is None
assert find_removal_step("xai", "manual") is None
def test_credential_sources_find_step_copilot_before_generic_env(tmp_path, monkeypatch):
"""copilot env:GH_TOKEN must dispatch to the copilot step, not the
generic env-var step. The copilot step handles the duplicate-source
problem (same token seeded as both gh_cli and env:<VAR>); the generic
env step would only suppress one of the variants.
"""
from agent.credential_sources import find_removal_step
step = find_removal_step("copilot", "env:GH_TOKEN")
assert step is not None
assert "copilot" in step.description.lower() or "gh" in step.description.lower()
# Generic step still matches any other provider's env var
step = find_removal_step("xai", "env:XAI_API_KEY")
assert step is not None
assert "env-seeded" in step.description.lower()
def test_auth_remove_copilot_suppresses_all_variants(tmp_path, monkeypatch):
"""Removing any copilot source must suppress gh_cli + all env:* variants
so the duplicate-seed paths don't resurrect the credential.
"""
hermes_home = tmp_path / "hermes"
hermes_home.mkdir(parents=True, exist_ok=True)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
_write_auth_store(
tmp_path,
{
"version": 1,
"credential_pool": {
"copilot": [{
"id": "c1",
"label": "gh auth token",
"auth_type": "api_key",
"priority": 0,
"source": "gh_cli",
"access_token": "ghp_fake",
}]
},
},
)
from types import SimpleNamespace
from hermes_cli.auth import is_source_suppressed
from hermes_cli.auth_commands import auth_remove_command
auth_remove_command(SimpleNamespace(provider="copilot", target="1"))
assert is_source_suppressed("copilot", "gh_cli")
assert is_source_suppressed("copilot", "env:COPILOT_GITHUB_TOKEN")
assert is_source_suppressed("copilot", "env:GH_TOKEN")
assert is_source_suppressed("copilot", "env:GITHUB_TOKEN")
def test_auth_add_clears_all_suppressions_including_non_env(tmp_path, monkeypatch):
"""Re-adding a credential via `hermes auth add <provider>` clears ALL
suppression markers for the provider, not just env:*. This matches
the single "re-engage" semantic the user wants auth back, period.
"""
hermes_home = tmp_path / "hermes"
hermes_home.mkdir(parents=True, exist_ok=True)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
_write_auth_store(
tmp_path,
{
"version": 1,
"providers": {},
"suppressed_sources": {
"copilot": ["gh_cli", "env:GH_TOKEN", "env:COPILOT_GITHUB_TOKEN"],
},
},
)
from types import SimpleNamespace
from hermes_cli.auth import is_source_suppressed
from hermes_cli.auth_commands import auth_add_command
auth_add_command(SimpleNamespace(
provider="copilot", auth_type="api_key",
api_key="ghp-manual", label="m",
))
assert not is_source_suppressed("copilot", "gh_cli")
assert not is_source_suppressed("copilot", "env:GH_TOKEN")
assert not is_source_suppressed("copilot", "env:COPILOT_GITHUB_TOKEN")
def test_auth_remove_codex_manual_device_code_suppresses_canonical(tmp_path, monkeypatch):
"""Removing a manual:device_code entry (from `hermes auth add openai-codex`)
must suppress the canonical ``device_code`` key, not ``manual:device_code``.
The re-seed gate in _seed_from_singletons checks ``device_code``.
"""
hermes_home = tmp_path / "hermes"
hermes_home.mkdir(parents=True, exist_ok=True)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
_write_auth_store(
tmp_path,
{
"version": 1,
"providers": {"openai-codex": {"tokens": {"access_token": "t", "refresh_token": "r"}}},
"credential_pool": {
"openai-codex": [{
"id": "cdx",
"label": "manual-codex",
"auth_type": "oauth",
"priority": 0,
"source": "manual:device_code",
"access_token": "t",
}]
},
},
)
from types import SimpleNamespace
from hermes_cli.auth import is_source_suppressed
from hermes_cli.auth_commands import auth_remove_command
auth_remove_command(SimpleNamespace(provider="openai-codex", target="1"))
assert is_source_suppressed("openai-codex", "device_code")

View file

@ -459,7 +459,8 @@ class TestCustomProviderCompatibility:
migrate_config(interactive=False, quiet=True)
raw = yaml.safe_load(config_path.read_text(encoding="utf-8"))
assert raw["_config_version"] == 21
from hermes_cli.config import DEFAULT_CONFIG
assert raw["_config_version"] == DEFAULT_CONFIG["_config_version"]
assert raw["providers"]["openai-direct"] == {
"api": "https://api.openai.com/v1",
"api_key": "test-key",
@ -501,7 +502,8 @@ class TestCustomProviderCompatibility:
assert compatible[0]["provider_key"] == "openai-direct"
assert compatible[0]["api_mode"] == "codex_responses"
def test_compatible_custom_providers_prefers_api_then_url_then_base_url(self, tmp_path):
def test_compatible_custom_providers_prefers_base_url_then_url_then_api(self, tmp_path):
"""URL field precedence is base_url > url > api (PR #9332)."""
config_path = tmp_path / "config.yaml"
config_path.write_text(
yaml.safe_dump(
@ -526,7 +528,7 @@ class TestCustomProviderCompatibility:
assert compatible == [
{
"name": "My Provider",
"base_url": "https://api.example.com/v1",
"base_url": "https://base.example.com/v1",
"provider_key": "my-provider",
}
]
@ -606,7 +608,8 @@ class TestInterimAssistantMessageConfig:
migrate_config(interactive=False, quiet=True)
raw = yaml.safe_load(config_path.read_text(encoding="utf-8"))
assert raw["_config_version"] == 21
from hermes_cli.config import DEFAULT_CONFIG
assert raw["_config_version"] == DEFAULT_CONFIG["_config_version"]
assert raw["display"]["tool_progress"] == "off"
assert raw["display"]["interim_assistant_messages"] is True
@ -626,7 +629,8 @@ class TestDiscordChannelPromptsConfig:
migrate_config(interactive=False, quiet=True)
raw = yaml.safe_load(config_path.read_text(encoding="utf-8"))
assert raw["_config_version"] == 21
from hermes_cli.config import DEFAULT_CONFIG
assert raw["_config_version"] == DEFAULT_CONFIG["_config_version"]
assert raw["discord"]["auto_thread"] is True
assert raw["discord"]["channel_prompts"] == {}

View file

@ -125,18 +125,12 @@ class TestGeminiCredentials:
# ── Model Catalog ──
class TestGeminiModelCatalog:
def test_provider_models_exist(self):
def test_provider_entry_exists(self):
"""Gemini provider has a model catalog entry. Specific model names
are data that changes with Google releases and don't belong in tests.
"""
assert "gemini" in _PROVIDER_MODELS
models = _PROVIDER_MODELS["gemini"]
assert "gemini-2.5-pro" in models
assert "gemini-2.5-flash" in models
assert "gemma-4-31b-it" not in models
def test_provider_models_has_3x(self):
models = _PROVIDER_MODELS["gemini"]
assert "gemini-3.1-pro-preview" in models
assert "gemini-3-flash-preview" in models
assert "gemini-3.1-flash-lite-preview" in models
assert len(_PROVIDER_MODELS["gemini"]) >= 1
def test_provider_label(self):
assert "gemini" in _PROVIDER_LABELS

View file

@ -457,29 +457,62 @@ class TestValidateApiNotFound:
assert "not found" in result["message"]
# -- validate — API unreachable — reject with guidance ----------------
# -- validate — API unreachable — soft-accept via catalog or warning --------
class TestValidateApiFallback:
def test_any_model_rejected_when_api_down(self):
result = _validate("anthropic/claude-opus-4.6", api_models=None)
assert result["accepted"] is False
assert result["persist"] is False
"""When /models is unreachable, the validator must accept the model (with
a warning) rather than reject it outright otherwise provider switches
fail in the gateway for any provider whose /models endpoint is down or
doesn't exist (e.g. opencode-go returns 404 HTML).
def test_unknown_model_also_rejected_when_api_down(self):
result = _validate("anthropic/claude-next-gen", api_models=None)
assert result["accepted"] is False
assert result["persist"] is False
assert "could not reach" in result["message"].lower()
Two paths:
1. Provider has a curated catalog (``_PROVIDER_MODELS`` / live fetch):
validate against it (recognized=True for known models,
recognized=False with 'Note:' for unknown).
2. Provider has no catalog: accept with a generic 'Note:' warning.
def test_zai_model_rejected_when_api_down(self):
In both cases ``accepted`` and ``persist`` must be True so the gateway can
write the ``_session_model_overrides`` entry.
"""
def test_known_model_accepted_via_catalog_when_api_down(self):
# Force the openrouter catalog lookup to return a deterministic list.
with patch(
"hermes_cli.models.provider_model_ids",
return_value=["anthropic/claude-opus-4.6", "openai/gpt-5.4"],
):
result = _validate("anthropic/claude-opus-4.6", api_models=None)
assert result["accepted"] is True
assert result["persist"] is True
assert result["recognized"] is True
def test_unknown_model_accepted_with_note_when_api_down(self):
with patch(
"hermes_cli.models.provider_model_ids",
return_value=["anthropic/claude-opus-4.6", "openai/gpt-5.4"],
):
result = _validate("anthropic/claude-next-gen", api_models=None)
assert result["accepted"] is True
assert result["persist"] is True
assert result["recognized"] is False
# Message flags it as unverified against the catalog.
assert "not found" in result["message"].lower() or "note" in result["message"].lower()
def test_zai_known_model_accepted_via_catalog_when_api_down(self):
# glm-5 is in the zai curated catalog (_PROVIDER_MODELS["zai"]).
result = _validate("glm-5", provider="zai", api_models=None)
assert result["accepted"] is False
assert result["persist"] is False
assert result["accepted"] is True
assert result["persist"] is True
assert result["recognized"] is True
def test_unknown_provider_rejected_when_api_down(self):
result = _validate("some-model", provider="totally-unknown", api_models=None)
assert result["accepted"] is False
assert result["persist"] is False
def test_unknown_provider_soft_accepted_when_api_down(self):
# No catalog for unknown providers — soft-accept with a Note.
with patch("hermes_cli.models.provider_model_ids", return_value=[]):
result = _validate("some-model", provider="totally-unknown", api_models=None)
assert result["accepted"] is True
assert result["persist"] is True
assert result["recognized"] is False
assert "note" in result["message"].lower()
def test_custom_endpoint_warns_with_probed_url_and_v1_hint(self):
with patch(

View file

@ -88,6 +88,131 @@ class TestFetchOpenRouterModels:
assert models == OPENROUTER_MODELS
def test_filters_out_models_without_tool_support(self, monkeypatch):
"""Models whose supported_parameters omits 'tools' must not appear in the picker.
hermes-agent is tool-calling-first surfacing a non-tool model leads to
immediate runtime failures when the user selects it. Ported from
Kilo-Org/kilocode#9068.
"""
class _Resp:
def __enter__(self):
return self
def __exit__(self, exc_type, exc, tb):
return False
def read(self):
# opus-4.6 advertises tools → kept
# nano-image has explicit supported_parameters that OMITS tools → dropped
# qwen3.6-plus advertises tools → kept
return (
b'{"data":['
b'{"id":"anthropic/claude-opus-4.6","pricing":{"prompt":"0.000015","completion":"0.000075"},'
b'"supported_parameters":["temperature","tools","tool_choice"]},'
b'{"id":"google/gemini-3-pro-image-preview","pricing":{"prompt":"0.00001","completion":"0.00003"},'
b'"supported_parameters":["temperature","response_format"]},'
b'{"id":"qwen/qwen3.6-plus","pricing":{"prompt":"0.000000325","completion":"0.00000195"},'
b'"supported_parameters":["tools","temperature"]}'
b']}'
)
# Include the image-only id in the curated list so it has a chance to be surfaced.
monkeypatch.setattr(
_models_mod,
"OPENROUTER_MODELS",
[
("anthropic/claude-opus-4.6", ""),
("google/gemini-3-pro-image-preview", ""),
("qwen/qwen3.6-plus", ""),
],
)
monkeypatch.setattr(_models_mod, "_openrouter_catalog_cache", None)
with patch("hermes_cli.models.urllib.request.urlopen", return_value=_Resp()):
models = fetch_openrouter_models(force_refresh=True)
ids = [mid for mid, _ in models]
assert "anthropic/claude-opus-4.6" in ids
assert "qwen/qwen3.6-plus" in ids
# Image-only model advertised supported_parameters WITHOUT tools → must be dropped.
assert "google/gemini-3-pro-image-preview" not in ids
def test_permissive_when_supported_parameters_missing(self, monkeypatch):
"""Models missing the supported_parameters field keep appearing in the picker.
Some OpenRouter-compatible gateways (Nous Portal, private mirrors, older
catalog snapshots) don't populate supported_parameters. Treating missing
as 'unknown → allow' prevents the picker from silently emptying on
those gateways.
"""
class _Resp:
def __enter__(self):
return self
def __exit__(self, exc_type, exc, tb):
return False
def read(self):
# No supported_parameters field at all on either entry.
return (
b'{"data":['
b'{"id":"anthropic/claude-opus-4.6","pricing":{"prompt":"0.000015","completion":"0.000075"}},'
b'{"id":"qwen/qwen3.6-plus","pricing":{"prompt":"0.000000325","completion":"0.00000195"}}'
b']}'
)
monkeypatch.setattr(_models_mod, "_openrouter_catalog_cache", None)
with patch("hermes_cli.models.urllib.request.urlopen", return_value=_Resp()):
models = fetch_openrouter_models(force_refresh=True)
ids = [mid for mid, _ in models]
assert "anthropic/claude-opus-4.6" in ids
assert "qwen/qwen3.6-plus" in ids
class TestOpenRouterToolSupportHelper:
"""Unit tests for _openrouter_model_supports_tools (Kilo port #9068)."""
def test_tools_in_supported_parameters(self):
from hermes_cli.models import _openrouter_model_supports_tools
assert _openrouter_model_supports_tools(
{"id": "x", "supported_parameters": ["temperature", "tools"]}
) is True
def test_tools_missing_from_supported_parameters(self):
from hermes_cli.models import _openrouter_model_supports_tools
assert _openrouter_model_supports_tools(
{"id": "x", "supported_parameters": ["temperature", "response_format"]}
) is False
def test_supported_parameters_absent_is_permissive(self):
"""Missing field → allow (so older / non-OR gateways still work)."""
from hermes_cli.models import _openrouter_model_supports_tools
assert _openrouter_model_supports_tools({"id": "x"}) is True
def test_supported_parameters_none_is_permissive(self):
from hermes_cli.models import _openrouter_model_supports_tools
assert _openrouter_model_supports_tools({"id": "x", "supported_parameters": None}) is True
def test_supported_parameters_malformed_is_permissive(self):
"""Malformed (non-list) value → allow rather than silently drop."""
from hermes_cli.models import _openrouter_model_supports_tools
assert _openrouter_model_supports_tools(
{"id": "x", "supported_parameters": "tools,temperature"}
) is True
def test_non_dict_item_is_permissive(self):
from hermes_cli.models import _openrouter_model_supports_tools
assert _openrouter_model_supports_tools(None) is True
assert _openrouter_model_supports_tools("anthropic/claude-opus-4.6") is True
def test_empty_supported_parameters_list_drops_model(self):
"""Explicit empty list → no tools → drop."""
from hermes_cli.models import _openrouter_model_supports_tools
assert _openrouter_model_supports_tools(
{"id": "x", "supported_parameters": []}
) is False
class TestFindOpenrouterSlug:
def test_exact_match(self):

View file

@ -15,7 +15,7 @@ def test_opencode_go_appears_when_api_key_set():
opencode_go = next((p for p in providers if p["slug"] == "opencode-go"), None)
assert opencode_go is not None, "opencode-go should appear when OPENCODE_GO_API_KEY is set"
assert opencode_go["models"] == ["kimi-k2.5", "glm-5.1", "glm-5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.7", "minimax-m2.5"]
assert opencode_go["models"] == ["kimi-k2.6", "kimi-k2.5", "glm-5.1", "glm-5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.7", "minimax-m2.5"]
# opencode-go can appear as "built-in" (from PROVIDER_TO_MODELS_DEV when
# models.dev is reachable) or "hermes" (from HERMES_OVERLAYS fallback when
# the API is unavailable, e.g. in CI).

View file

@ -0,0 +1,133 @@
"""Tests for the static-catalog fallback in validate_requested_model.
OpenCode Go and OpenCode Zen publish an OpenAI-compatible API at paths that do
NOT expose ``/models`` (the path returns the marketing site's HTML 404). This
caused ``validate_requested_model`` to return ``accepted=False`` for every
model on those providers, which in turn made ``switch_model()`` fail and the
gateway's ``/model <name> --provider opencode-go`` command never write to
``_session_model_overrides``.
These tests cover the catalog-fallback path: when ``fetch_api_models`` returns
``None``, the validator must consult ``provider_model_ids()`` for the provider
(populated from ``_PROVIDER_MODELS``) rather than rejecting outright.
"""
from unittest.mock import patch
from hermes_cli.models import validate_requested_model
_UNREACHABLE_PROBE = {
"models": None,
"probed_url": "https://opencode.ai/zen/go/v1/models",
"resolved_base_url": "https://opencode.ai/zen/go/v1",
"suggested_base_url": None,
"used_fallback": False,
}
def _patched(func):
"""Decorator: force fetch_api_models / probe_api_models to simulate an
unreachable /models endpoint, proving the catalog path is used."""
def wrapper(*args, **kwargs):
with patch("hermes_cli.models.fetch_api_models", return_value=None), \
patch("hermes_cli.models.probe_api_models", return_value=_UNREACHABLE_PROBE):
return func(*args, **kwargs)
wrapper.__name__ = func.__name__
return wrapper
# ---------------------------------------------------------------------------
# opencode-go: curated catalog in _PROVIDER_MODELS
# ---------------------------------------------------------------------------
@_patched
def test_opencode_go_known_model_accepted():
"""A model present in the opencode-go curated catalog must be accepted
even when /models is unreachable."""
result = validate_requested_model("kimi-k2.6", "opencode-go")
assert result["accepted"] is True
assert result["persist"] is True
assert result["recognized"] is True
assert result["message"] is None
@_patched
def test_opencode_go_known_model_case_insensitive():
"""Catalog lookup is case-insensitive."""
result = validate_requested_model("KIMI-K2.6", "opencode-go")
assert result["accepted"] is True
assert result["recognized"] is True
@_patched
def test_opencode_go_typo_auto_corrected():
"""A close typo (>= 0.9 similarity) is auto-corrected to the catalog
entry."""
# 'kimi-k2.55' vs 'kimi-k2.5' ratio ≈ 0.95 — within the 0.9 cutoff.
result = validate_requested_model("kimi-k2.55", "opencode-go")
assert result["accepted"] is True
assert result["recognized"] is True
assert result.get("corrected_model") == "kimi-k2.5"
@_patched
def test_opencode_go_unknown_model_accepted_with_suggestion():
"""An unknown model that has a medium-similarity match (>= 0.5 but < 0.9)
is accepted with recognized=False and a 'similar models' hint. The key
invariant: the gateway MUST be able to persist this override, so
accepted/persist must both be True."""
# 'kimi-k3-preview' vs 'kimi-k2.6' — similar enough to suggest, not to auto-correct.
result = validate_requested_model("kimi-k3-preview", "opencode-go")
assert result["accepted"] is True
assert result["persist"] is True
assert result["recognized"] is False
assert "kimi-k3-preview" in result["message"]
assert "curated catalog" in result["message"]
@_patched
def test_opencode_go_totally_unknown_model_still_accepted():
"""A model with zero similarity to the catalog is still accepted (no
suggestion line) so the user can try a model that hasn't made it into the
curated list yet."""
result = validate_requested_model("some-brand-new-model", "opencode-go")
assert result["accepted"] is True
assert result["persist"] is True
assert result["recognized"] is False
# No suggestion text (no close matches)
assert "Similar models" not in result["message"]
assert "opencode" in result["message"].lower() or "opencode go" in result["message"].lower()
# ---------------------------------------------------------------------------
# opencode-zen: same pattern as opencode-go
# ---------------------------------------------------------------------------
@_patched
def test_opencode_zen_known_model_accepted():
"""opencode-zen also uses _PROVIDER_MODELS; kimi-k2 is in its catalog."""
result = validate_requested_model("kimi-k2", "opencode-zen")
assert result["accepted"] is True
assert result["recognized"] is True
# ---------------------------------------------------------------------------
# Unknown provider with no catalog: soft-accept (honors the comment's intent)
# ---------------------------------------------------------------------------
@_patched
def test_provider_without_catalog_accepts_with_warning():
"""When a provider has no entry in _PROVIDER_MODELS and /models is
unreachable, accept the model with a 'Note:' warning rather than reject.
This matches the in-code comment: 'Accept and persist, but warn so typos
don't silently break things.'"""
# Use a made-up provider name that won't resolve to any catalog.
result = validate_requested_model("some-model", "provider-that-does-not-exist")
assert result["accepted"] is True
assert result["persist"] is True
assert result["recognized"] is False
assert "Note:" in result["message"]

View file

@ -1412,3 +1412,90 @@ def test_named_custom_runtime_no_model_when_absent(monkeypatch):
resolved = rp.resolve_runtime_provider(requested="my-server")
assert "model" not in resolved
# ---------------------------------------------------------------------------
# GHSA-76xc-57q6-vm5m — Ollama URL substring leak
#
# Same bug class as the previously-fixed GHSA-xf8p-v2cg-h7h5 (OpenRouter).
# _resolve_openrouter_runtime's custom-endpoint branch selects OLLAMA_API_KEY
# when the base_url "looks like" ollama.com. Previous implementation used
# raw substring match; a custom base_url whose PATH or look-alike host
# merely contained "ollama.com" leaked OLLAMA_API_KEY to that endpoint.
# Fix: use base_url_host_matches (same helper as the OpenRouter sweep).
# ---------------------------------------------------------------------------
class TestOllamaUrlSubstringLeak:
"""Call-site regression tests for the fix in _resolve_openrouter_runtime."""
def _make_cfg(self, base_url):
return {"base_url": base_url, "api_key": "", "provider": "custom"}
def test_ollama_key_not_leaked_to_path_injection(self, monkeypatch):
"""http://127.0.0.1:9000/ollama.com/v1 — attacker endpoint with
ollama.com in PATH. Must resolve to OPENAI_API_KEY, not OLLAMA_API_KEY."""
monkeypatch.setenv("OPENAI_API_KEY", "oa-secret")
monkeypatch.setenv("OPENROUTER_API_KEY", "or-secret")
monkeypatch.setenv("OLLAMA_API_KEY", "ol-SECRET-should-not-leak")
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "custom")
monkeypatch.setattr(rp, "_get_model_config", lambda: self._make_cfg(
"http://127.0.0.1:9000/ollama.com/v1"
))
monkeypatch.setattr(rp, "load_pool", lambda provider: None)
monkeypatch.setattr(rp, "_try_resolve_from_custom_pool", lambda *a, **k: None)
resolved = rp.resolve_runtime_provider(requested="custom")
assert "ol-SECRET" not in resolved["api_key"], (
"OLLAMA_API_KEY must not be sent to an endpoint whose "
"hostname is not ollama.com (GHSA-76xc-57q6-vm5m)"
)
assert resolved["api_key"] == "oa-secret"
def test_ollama_key_not_leaked_to_lookalike_host(self, monkeypatch):
"""ollama.com.attacker.test — look-alike host. OLLAMA_API_KEY
must not be sent."""
monkeypatch.setenv("OPENAI_API_KEY", "oa-secret")
monkeypatch.setenv("OLLAMA_API_KEY", "ol-SECRET-should-not-leak")
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "custom")
monkeypatch.setattr(rp, "_get_model_config", lambda: self._make_cfg(
"http://ollama.com.attacker.test:9000/v1"
))
monkeypatch.setattr(rp, "load_pool", lambda provider: None)
monkeypatch.setattr(rp, "_try_resolve_from_custom_pool", lambda *a, **k: None)
resolved = rp.resolve_runtime_provider(requested="custom")
assert "ol-SECRET" not in resolved["api_key"]
assert resolved["api_key"] == "oa-secret"
def test_ollama_key_sent_to_genuine_ollama_com(self, monkeypatch):
"""https://ollama.com/v1 — legit Ollama Cloud. OLLAMA_API_KEY
should be used."""
monkeypatch.setenv("OPENAI_API_KEY", "oa-secret")
monkeypatch.setenv("OLLAMA_API_KEY", "ol-legit-key")
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "custom")
monkeypatch.setattr(rp, "_get_model_config", lambda: self._make_cfg(
"https://ollama.com/v1"
))
monkeypatch.setattr(rp, "load_pool", lambda provider: None)
monkeypatch.setattr(rp, "_try_resolve_from_custom_pool", lambda *a, **k: None)
resolved = rp.resolve_runtime_provider(requested="custom")
assert resolved["api_key"] == "ol-legit-key"
def test_ollama_key_sent_to_ollama_subdomain(self, monkeypatch):
"""https://api.ollama.com/v1 — legit subdomain."""
monkeypatch.setenv("OPENAI_API_KEY", "oa-secret")
monkeypatch.setenv("OLLAMA_API_KEY", "ol-legit-key")
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "custom")
monkeypatch.setattr(rp, "_get_model_config", lambda: self._make_cfg(
"https://api.ollama.com/v1"
))
monkeypatch.setattr(rp, "load_pool", lambda provider: None)
monkeypatch.setattr(rp, "_try_resolve_from_custom_pool", lambda *a, **k: None)
resolved = rp.resolve_runtime_provider(requested="custom")
assert resolved["api_key"] == "ol-legit-key"

View file

@ -0,0 +1,148 @@
"""Tests for GHSA-ppp5-vxwm-4cf7 — Host-header validation.
DNS rebinding defence: a victim browser that has the dashboard open
could be tricked into fetching from an attacker-controlled hostname
that TTL-flips to 127.0.0.1. Same-origin / CORS checks won't help —
the browser now treats the attacker origin as same-origin. Validating
the Host header at the application layer rejects the attack.
"""
from __future__ import annotations
import sys
from pathlib import Path
import pytest
_repo = str(Path(__file__).resolve().parents[1])
if _repo not in sys.path:
sys.path.insert(0, _repo)
class TestHostHeaderValidator:
"""Unit test the _is_accepted_host helper directly — cheaper and
more thorough than spinning up the full FastAPI app."""
def test_loopback_bind_accepts_loopback_names(self):
from hermes_cli.web_server import _is_accepted_host
for bound in ("127.0.0.1", "localhost", "::1"):
for host_header in (
"127.0.0.1", "127.0.0.1:9119",
"localhost", "localhost:9119",
"[::1]", "[::1]:9119",
):
assert _is_accepted_host(host_header, bound), (
f"bound={bound} must accept host={host_header}"
)
def test_loopback_bind_rejects_attacker_hostnames(self):
"""The core rebinding defence: attacker-controlled hosts that
TTL-flip to 127.0.0.1 must be rejected."""
from hermes_cli.web_server import _is_accepted_host
for bound in ("127.0.0.1", "localhost"):
for attacker in (
"evil.example",
"evil.example:9119",
"rebind.attacker.test:80",
"localhost.attacker.test", # subdomain trick
"127.0.0.1.evil.test", # lookalike IP prefix
"", # missing Host
):
assert not _is_accepted_host(attacker, bound), (
f"bound={bound} must reject attacker host={attacker!r}"
)
def test_zero_zero_bind_accepts_anything(self):
"""0.0.0.0 means operator explicitly opted into all-interfaces
(requires --insecure). No Host-layer defence is possible rely
on operator network controls."""
from hermes_cli.web_server import _is_accepted_host
for host in ("10.0.0.5", "evil.example", "my-server.corp.net"):
assert _is_accepted_host(host, "0.0.0.0")
assert _is_accepted_host(host + ":9119", "0.0.0.0")
def test_explicit_non_loopback_bind_requires_exact_match(self):
"""If the operator bound to a specific non-loopback hostname,
the Host header must match exactly."""
from hermes_cli.web_server import _is_accepted_host
assert _is_accepted_host("my-server.corp.net", "my-server.corp.net")
assert _is_accepted_host("my-server.corp.net:9119", "my-server.corp.net")
# Different host — reject
assert not _is_accepted_host("evil.example", "my-server.corp.net")
# Loopback — reject (we bound to a specific non-loopback name)
assert not _is_accepted_host("localhost", "my-server.corp.net")
def test_case_insensitive_comparison(self):
"""Host headers are case-insensitive per RFC — accept variations."""
from hermes_cli.web_server import _is_accepted_host
assert _is_accepted_host("LOCALHOST", "127.0.0.1")
assert _is_accepted_host("LocalHost:9119", "127.0.0.1")
class TestHostHeaderMiddleware:
"""End-to-end test via the FastAPI app — verify the middleware
rejects bad Host headers with 400."""
def test_rebinding_request_rejected(self):
from fastapi.testclient import TestClient
from hermes_cli.web_server import app
# Simulate start_server having set the bound_host
app.state.bound_host = "127.0.0.1"
try:
client = TestClient(app)
# The TestClient sends Host: testserver by default — which is
# NOT a loopback alias, so the middleware must reject it.
resp = client.get(
"/api/status",
headers={"Host": "evil.example"},
)
assert resp.status_code == 400
assert "Invalid Host header" in resp.json()["detail"]
finally:
# Clean up so other tests don't inherit the bound_host
if hasattr(app.state, "bound_host"):
del app.state.bound_host
def test_legit_loopback_request_accepted(self):
from fastapi.testclient import TestClient
from hermes_cli.web_server import app
app.state.bound_host = "127.0.0.1"
try:
client = TestClient(app)
# /api/status is in _PUBLIC_API_PATHS — passes auth — so the
# only thing that can reject is the host header middleware
resp = client.get(
"/api/status",
headers={"Host": "localhost:9119"},
)
# Either 200 (endpoint served) or some other non-400 —
# just not the host-rejection 400
assert resp.status_code != 400 or (
"Invalid Host header" not in resp.json().get("detail", "")
)
finally:
if hasattr(app.state, "bound_host"):
del app.state.bound_host
def test_no_bound_host_skips_validation(self):
"""If app.state.bound_host isn't set (e.g. running under test
infra without calling start_server), middleware must pass through
rather than crash."""
from fastapi.testclient import TestClient
from hermes_cli.web_server import app
# Make sure bound_host isn't set
if hasattr(app.state, "bound_host"):
del app.state.bound_host
client = TestClient(app)
resp = client.get("/api/status")
# Should get through to the status endpoint, not a 400
assert resp.status_code != 400

View file

@ -136,13 +136,15 @@ class TestXiaomiModelCatalog:
assert PROVIDER_TO_MODELS_DEV["xiaomi"] == "xiaomi"
def test_static_model_list_fallback(self):
"""Static _PROVIDER_MODELS fallback must exist for model picker."""
"""Static _PROVIDER_MODELS fallback must exist for model picker.
We only assert the provider key is present the specific model
names are data that changes with upstream releases and doesn't
belong in tests.
"""
from hermes_cli.models import _PROVIDER_MODELS
assert "xiaomi" in _PROVIDER_MODELS
models = _PROVIDER_MODELS["xiaomi"]
assert "mimo-v2-pro" in models
assert "mimo-v2-omni" in models
assert "mimo-v2-flash" in models
assert len(_PROVIDER_MODELS["xiaomi"]) >= 1
def test_list_agentic_models_mock(self, monkeypatch):
"""When models.dev returns Xiaomi data, list_agentic_models should return models."""

View file

@ -118,6 +118,86 @@ class TestOpenAIWireFormatOnCustomProvider:
assert agent._anthropic_prompt_cache_policy() == (False, False)
class TestQwenAlibabaFamily:
"""Qwen on OpenCode/OpenCode-Go/Alibaba — needs cache_control even on OpenAI-wire.
Upstream pi-mono #3392 / #3393 documented that these providers serve
zero cache hits without Anthropic-style markers. Regression reported
by community user (Qwen3.6 on opencode-go burning through
subscription with no cache). Envelope layout, not native, because the
wire format is OpenAI chat.completions.
"""
def test_qwen_on_opencode_go_caches_with_envelope_layout(self):
agent = _make_agent(
provider="opencode-go",
base_url="https://opencode.ai/v1",
api_mode="chat_completions",
model="qwen3.6-plus",
)
should, native = agent._anthropic_prompt_cache_policy()
assert should is True, "Qwen on opencode-go must cache"
assert native is False, "opencode-go is OpenAI-wire; envelope layout"
def test_qwen35_plus_on_opencode_go(self):
agent = _make_agent(
provider="opencode-go",
base_url="https://opencode.ai/v1",
api_mode="chat_completions",
model="qwen3.5-plus",
)
assert agent._anthropic_prompt_cache_policy() == (True, False)
def test_qwen_on_opencode_zen_caches(self):
agent = _make_agent(
provider="opencode",
base_url="https://opencode.ai/v1",
api_mode="chat_completions",
model="qwen3-coder-plus",
)
assert agent._anthropic_prompt_cache_policy() == (True, False)
def test_qwen_on_direct_alibaba_caches(self):
agent = _make_agent(
provider="alibaba",
base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
api_mode="chat_completions",
model="qwen3-coder",
)
assert agent._anthropic_prompt_cache_policy() == (True, False)
def test_non_qwen_on_opencode_go_does_not_cache(self):
# GLM / Kimi on opencode-go don't need markers (they have automatic
# server-side caching or none at all).
agent = _make_agent(
provider="opencode-go",
base_url="https://opencode.ai/v1",
api_mode="chat_completions",
model="glm-5",
)
assert agent._anthropic_prompt_cache_policy() == (False, False)
def test_kimi_on_opencode_go_does_not_cache(self):
agent = _make_agent(
provider="opencode-go",
base_url="https://opencode.ai/v1",
api_mode="chat_completions",
model="kimi-k2.5",
)
assert agent._anthropic_prompt_cache_policy() == (False, False)
def test_qwen_on_openrouter_not_affected(self):
# Qwen via OpenRouter falls through — OpenRouter has its own
# upstream caching arrangement for Qwen (provider-dependent).
agent = _make_agent(
provider="openrouter",
base_url="https://openrouter.ai/api/v1",
api_mode="chat_completions",
model="qwen/qwen3-coder",
)
assert agent._anthropic_prompt_cache_policy() == (False, False)
class TestExplicitOverrides:
"""Policy accepts keyword overrides for switch_model / fallback activation."""

View file

@ -67,6 +67,14 @@ def test_get_proxy_from_env_ignores_blank_values(monkeypatch):
assert _get_proxy_from_env() == "http://real-proxy:8080"
def test_get_proxy_from_env_normalizes_socks_alias(monkeypatch):
for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
"https_proxy", "http_proxy", "all_proxy"):
monkeypatch.delenv(key, raising=False)
monkeypatch.setenv("ALL_PROXY", "socks://127.0.0.1:1080/")
assert _get_proxy_from_env() == "socks5://127.0.0.1:1080/"
@patch("run_agent.OpenAI")
def test_create_openai_client_routes_via_proxy_when_env_set(mock_openai, monkeypatch):
"""With HTTPS_PROXY set, the custom httpx.Client must mount an HTTPProxy pool.

View file

@ -33,6 +33,11 @@ class TestInterruptPropagationToChild(unittest.TestCase):
agent._active_children = []
agent._active_children_lock = threading.Lock()
agent.quiet_mode = True
# Provider/model/base_url are read by stale-timeout resolution paths;
# the specific values don't matter for interrupt tests.
agent.provider = "openrouter"
agent.model = "test/model"
agent._base_url = "http://localhost:1234"
return agent
def test_parent_interrupt_sets_child_flag(self):

View file

@ -952,6 +952,84 @@ class TestBuildApiKwargs:
assert "temperature" not in kwargs
def test_kimi_coding_endpoint_sends_max_tokens_and_reasoning(self, agent):
"""Kimi endpoint should send max_tokens=32000 and reasoning_effort as
top-level params, matching Kimi CLI's default behavior."""
agent.base_url = "https://api.kimi.com/coding/v1"
agent._base_url_lower = agent.base_url.lower()
agent.model = "kimi-for-coding"
messages = [{"role": "user", "content": "hi"}]
kwargs = agent._build_api_kwargs(messages)
assert kwargs["max_tokens"] == 32000
assert kwargs["reasoning_effort"] == "medium"
def test_kimi_coding_endpoint_respects_custom_effort(self, agent):
"""reasoning_effort should reflect reasoning_config.effort when set."""
agent.base_url = "https://api.kimi.com/coding/v1"
agent._base_url_lower = agent.base_url.lower()
agent.model = "kimi-for-coding"
agent.reasoning_config = {"enabled": True, "effort": "high"}
messages = [{"role": "user", "content": "hi"}]
kwargs = agent._build_api_kwargs(messages)
assert kwargs["reasoning_effort"] == "high"
def test_kimi_coding_endpoint_sends_thinking_extra_body(self, agent):
"""Kimi endpoint should send extra_body.thinking={"type":"enabled"}
to activate reasoning mode, mirroring Kimi CLI's with_thinking()."""
agent.base_url = "https://api.kimi.com/coding/v1"
agent._base_url_lower = agent.base_url.lower()
agent.model = "kimi-for-coding"
messages = [{"role": "user", "content": "hi"}]
kwargs = agent._build_api_kwargs(messages)
assert kwargs["extra_body"]["thinking"] == {"type": "enabled"}
def test_kimi_coding_endpoint_disables_thinking(self, agent):
"""When reasoning_config.enabled=False, thinking should be disabled
and reasoning_effort should be omitted entirely mirroring Kimi
CLI's with_thinking("off") which maps to reasoning_effort=None."""
agent.base_url = "https://api.kimi.com/coding/v1"
agent._base_url_lower = agent.base_url.lower()
agent.model = "kimi-for-coding"
agent.reasoning_config = {"enabled": False}
messages = [{"role": "user", "content": "hi"}]
kwargs = agent._build_api_kwargs(messages)
assert kwargs["extra_body"]["thinking"] == {"type": "disabled"}
assert "reasoning_effort" not in kwargs
def test_moonshot_endpoint_sends_max_tokens_and_reasoning(self, agent):
"""api.moonshot.ai should get the same Kimi-compatible params."""
agent.base_url = "https://api.moonshot.ai/v1"
agent._base_url_lower = agent.base_url.lower()
agent.model = "kimi-k2.5"
messages = [{"role": "user", "content": "hi"}]
kwargs = agent._build_api_kwargs(messages)
assert kwargs["max_tokens"] == 32000
assert kwargs["reasoning_effort"] == "medium"
assert kwargs["extra_body"]["thinking"] == {"type": "enabled"}
def test_moonshot_cn_endpoint_sends_max_tokens_and_reasoning(self, agent):
"""api.moonshot.cn (China endpoint) should get the same params."""
agent.base_url = "https://api.moonshot.cn/v1"
agent._base_url_lower = agent.base_url.lower()
agent.model = "kimi-k2.5"
messages = [{"role": "user", "content": "hi"}]
kwargs = agent._build_api_kwargs(messages)
assert kwargs["max_tokens"] == 32000
assert kwargs["reasoning_effort"] == "medium"
assert kwargs["extra_body"]["thinking"] == {"type": "enabled"}
def test_provider_preferences_injected(self, agent):
agent.base_url = "https://openrouter.ai/api/v1"
agent.providers_allowed = ["Anthropic"]

203
tests/test_account_usage.py Normal file
View file

@ -0,0 +1,203 @@
from datetime import datetime, timezone
from agent.account_usage import (
AccountUsageSnapshot,
AccountUsageWindow,
fetch_account_usage,
render_account_usage_lines,
)
class _Response:
def __init__(self, payload, status_code=200):
self._payload = payload
self.status_code = status_code
def raise_for_status(self):
if self.status_code >= 400:
raise RuntimeError(f"HTTP {self.status_code}")
def json(self):
return self._payload
class _Client:
def __init__(self, payload):
self._payload = payload
def __enter__(self):
return self
def __exit__(self, exc_type, exc, tb):
return False
def get(self, url, headers=None):
return _Response(self._payload)
class _RoutingClient:
def __init__(self, payloads):
self._payloads = payloads
def __enter__(self):
return self
def __exit__(self, exc_type, exc, tb):
return False
def get(self, url, headers=None):
return _Response(self._payloads[url])
def test_fetch_account_usage_codex(monkeypatch):
monkeypatch.setattr(
"agent.account_usage.resolve_codex_runtime_credentials",
lambda refresh_if_expiring=True: {
"provider": "openai-codex",
"base_url": "https://chatgpt.com/backend-api/codex",
"api_key": "access-token",
},
)
monkeypatch.setattr(
"agent.account_usage._read_codex_tokens",
lambda: {"tokens": {"account_id": "acct_123"}},
)
monkeypatch.setattr(
"agent.account_usage.httpx.Client",
lambda timeout=15.0: _Client(
{
"plan_type": "pro",
"rate_limit": {
"primary_window": {
"used_percent": 15,
"reset_at": 1_900_000_000,
"limit_window_seconds": 18000,
},
"secondary_window": {
"used_percent": 40,
"reset_at": 1_900_500_000,
"limit_window_seconds": 604800,
},
},
"credits": {"has_credits": True, "balance": 12.5},
}
),
)
snapshot = fetch_account_usage("openai-codex")
assert snapshot is not None
assert snapshot.plan == "Pro"
assert len(snapshot.windows) == 2
assert snapshot.windows[0].label == "Session"
assert snapshot.windows[0].used_percent == 15.0
assert snapshot.windows[0].reset_at == datetime.fromtimestamp(1_900_000_000, tz=timezone.utc)
assert "Credits balance: $12.50" in snapshot.details
def test_render_account_usage_lines_includes_reset_and_provider():
snapshot = AccountUsageSnapshot(
provider="openai-codex",
source="usage_api",
fetched_at=datetime.now(timezone.utc),
plan="Pro",
windows=(
AccountUsageWindow(
label="Session",
used_percent=25,
reset_at=datetime.now(timezone.utc),
),
),
details=("Credits balance: $9.99",),
)
lines = render_account_usage_lines(snapshot)
assert lines[0] == "📈 Account limits"
assert "openai-codex (Pro)" in lines[1]
assert "Session: 75% remaining (25% used)" in lines[2]
assert "Credits balance: $9.99" in lines[3]
def test_fetch_account_usage_openrouter_uses_limit_remaining_and_ignores_deprecated_rate_limit(monkeypatch):
monkeypatch.setattr(
"agent.account_usage.resolve_runtime_provider",
lambda requested, explicit_base_url=None, explicit_api_key=None: {
"provider": "openrouter",
"base_url": "https://openrouter.ai/api/v1",
"api_key": "sk-test",
},
)
monkeypatch.setattr(
"agent.account_usage.httpx.Client",
lambda timeout=10.0: _RoutingClient(
{
"https://openrouter.ai/api/v1/credits": {
"data": {"total_credits": 300.0, "total_usage": 10.92}
},
"https://openrouter.ai/api/v1/key": {
"data": {
"limit": 100.0,
"limit_remaining": 70.0,
"limit_reset": "monthly",
"usage": 12.5,
"usage_daily": 0.5,
"usage_weekly": 2.0,
"usage_monthly": 8.0,
"rate_limit": {"requests": -1, "interval": "10s"},
}
},
}
),
)
snapshot = fetch_account_usage("openrouter")
assert snapshot is not None
assert snapshot.windows == (
AccountUsageWindow(
label="API key quota",
used_percent=30.0,
detail="$70.00 of $100.00 remaining • resets monthly",
),
)
assert "Credits balance: $289.08" in snapshot.details
assert "API key usage: $12.50 total • $0.50 today • $2.00 this week • $8.00 this month" in snapshot.details
assert all("-1 requests / 10s" not in line for line in render_account_usage_lines(snapshot))
def test_fetch_account_usage_openrouter_omits_quota_window_when_key_has_no_limit(monkeypatch):
monkeypatch.setattr(
"agent.account_usage.resolve_runtime_provider",
lambda requested, explicit_base_url=None, explicit_api_key=None: {
"provider": "openrouter",
"base_url": "https://openrouter.ai/api/v1",
"api_key": "sk-test",
},
)
monkeypatch.setattr(
"agent.account_usage.httpx.Client",
lambda timeout=10.0: _RoutingClient(
{
"https://openrouter.ai/api/v1/credits": {
"data": {"total_credits": 100.0, "total_usage": 25.5}
},
"https://openrouter.ai/api/v1/key": {
"data": {
"limit": None,
"limit_remaining": None,
"usage": 25.5,
"usage_daily": 1.25,
"usage_weekly": 4.5,
"usage_monthly": 18.0,
}
},
}
),
)
snapshot = fetch_account_usage("openrouter")
assert snapshot is not None
assert snapshot.windows == ()
assert "Credits balance: $74.50" in snapshot.details
assert "API key usage: $25.50 total • $1.25 today • $4.50 this week • $18.00 this month" in snapshot.details

View file

@ -106,3 +106,55 @@ class TestBaseUrlHostMatchesEdgeCases:
def test_trailing_dot_on_domain_stripped(self):
assert base_url_host_matches("https://openrouter.ai/v1", "openrouter.ai.") is True
class TestOllamaUrlHostCheck:
"""GHSA-76xc-57q6-vm5m — ollama.com was using a raw substring match for
credential selection (same bug class as GHSA-xf8p-v2cg-h7h5 for OpenRouter).
These tests lock in that the base_url_host_matches fix correctly rejects
the same attack vectors for Ollama.
"""
def test_ollama_com_path_injection_rejected(self):
"""http://evil.test/ollama.com/v1 — ollama.com appears in the path,
not the host. Must not be treated as Ollama Cloud."""
assert base_url_host_matches(
"http://127.0.0.1:9000/ollama.com/v1", "ollama.com"
) is False
def test_ollama_com_subdomain_lookalike_rejected(self):
"""ollama.com.attacker.test is a separate host, not ollama.com."""
assert base_url_host_matches(
"http://ollama.com.attacker.test:9000/v1", "ollama.com"
) is False
def test_ollama_com_localtest_me_rejected(self):
"""ollama.com.localtest.me resolves to 127.0.0.1 via localtest.me
but its true hostname is localtest.me, not ollama.com."""
assert base_url_host_matches(
"http://ollama.com.localtest.me:9000/v1", "ollama.com"
) is False
def test_ollama_ai_is_not_ollama_com(self):
"""Different TLD. ollama.ai is not ollama.com."""
assert base_url_host_matches(
"https://ollama.ai/v1", "ollama.com"
) is False
def test_localhost_ollama_port_is_not_ollama_com(self):
"""http://localhost:11434/v1 is a local Ollama install, but its
hostname is localhost, so OLLAMA_API_KEY (an ollama.com-only secret)
must not be sent."""
assert base_url_host_matches(
"http://localhost:11434/v1", "ollama.com"
) is False
def test_genuine_ollama_com_matches(self):
assert base_url_host_matches(
"https://ollama.com/api/generate", "ollama.com"
) is True
def test_ollama_com_subdomain_matches(self):
assert base_url_host_matches(
"https://api.ollama.com/v1", "ollama.com"
) is True

View file

@ -161,6 +161,8 @@ def test_transform_tool_result_runs_after_post_tool_call(monkeypatch):
def test_transform_tool_result_integration_with_real_plugin(monkeypatch, tmp_path):
"""End-to-end: load a real plugin from HERMES_HOME and verify it rewrites results."""
import yaml
hermes_home = Path(os.environ["HERMES_HOME"])
plugins_dir = hermes_home / "plugins"
plugin_dir = plugins_dir / "transform_result_canon"
@ -172,7 +174,15 @@ def test_transform_tool_result_integration_with_real_plugin(monkeypatch, tmp_pat
'lambda **kw: f\'CANON[{kw["tool_name"]}]\' + kw["result"])\n',
encoding="utf-8",
)
# Plugins are opt-in — must be listed in plugins.enabled to load.
cfg_path = hermes_home / "config.yaml"
cfg_path.write_text(
yaml.safe_dump({"plugins": {"enabled": ["transform_result_canon"]}}),
encoding="utf-8",
)
# Force a fresh plugin manager so the new config is picked up.
plugins_mod._plugin_manager = plugins_mod.PluginManager()
plugins_mod.discover_plugins()
out = _run_handle_function_call(

View file

@ -58,10 +58,3 @@ class TestCamofoxConfigDefaults:
browser_cfg = DEFAULT_CONFIG["browser"]
assert browser_cfg["camofox"]["managed_persistence"] is False
def test_config_version_matches_current_schema(self):
from hermes_cli.config import DEFAULT_CONFIG
# The current schema version is tracked globally; unrelated default
# options may bump it after browser defaults are added.
assert DEFAULT_CONFIG["_config_version"] == 20

View file

@ -172,28 +172,60 @@ class TestTerminalIntegration:
assert blocked_var not in result
assert "PATH" in result
def test_passthrough_allows_blocklisted_var(self):
from tools.environments.local import _sanitize_subprocess_env, _HERMES_PROVIDER_ENV_BLOCKLIST
def test_passthrough_cannot_override_provider_blocklist(self):
"""GHSA-rhgp-j443-p4rf: register_env_passthrough must NOT accept
Hermes provider credentials that was the bypass where a skill
could declare ANTHROPIC_TOKEN / OPENAI_API_KEY as passthrough and
defeat the execute_code sandbox scrubbing."""
from tools.environments.local import (
_sanitize_subprocess_env,
_HERMES_PROVIDER_ENV_BLOCKLIST,
)
blocked_var = next(iter(_HERMES_PROVIDER_ENV_BLOCKLIST))
# Attempt to register — must be silently refused (logged warning).
register_env_passthrough([blocked_var])
# is_env_passthrough must NOT report it as allowed
assert not is_env_passthrough(blocked_var)
# Sanitizer still strips the var from subprocess env
env = {blocked_var: "secret_value", "PATH": "/usr/bin"}
result = _sanitize_subprocess_env(env)
assert blocked_var in result
assert result[blocked_var] == "secret_value"
assert blocked_var not in result
assert "PATH" in result
def test_make_run_env_passthrough(self, monkeypatch):
from tools.environments.local import _make_run_env, _HERMES_PROVIDER_ENV_BLOCKLIST
def test_make_run_env_blocklist_override_rejected(self):
"""_make_run_env must NOT expose a blocklisted var to subprocess env
even after a skill attempts to register it via passthrough."""
import os
from tools.environments.local import (
_make_run_env,
_HERMES_PROVIDER_ENV_BLOCKLIST,
)
blocked_var = next(iter(_HERMES_PROVIDER_ENV_BLOCKLIST))
monkeypatch.setenv(blocked_var, "secret_value")
os.environ[blocked_var] = "secret_value"
try:
# Without passthrough — blocked
result_before = _make_run_env({})
assert blocked_var not in result_before
# Without passthrough — blocked
result_before = _make_run_env({})
assert blocked_var not in result_before
# Skill tries to register it — must be refused, so still blocked
register_env_passthrough([blocked_var])
result_after = _make_run_env({})
assert blocked_var not in result_after
finally:
os.environ.pop(blocked_var, None)
# With passthrough — allowed
register_env_passthrough([blocked_var])
result_after = _make_run_env({})
assert blocked_var in result_after
def test_non_hermes_api_key_still_registerable(self):
"""Third-party API keys (TENOR_API_KEY, NOTION_TOKEN, etc.) are NOT
Hermes provider credentials and must still pass through skills
that legitimately wrap third-party APIs must keep working."""
# TENOR_API_KEY is a real example — used by the gif-search skill
register_env_passthrough(["TENOR_API_KEY"])
assert is_env_passthrough("TENOR_API_KEY")
# Arbitrary skill-specific var
register_env_passthrough(["MY_SKILL_CUSTOM_CONFIG"])
assert is_env_passthrough("MY_SKILL_CUSTOM_CONFIG")

View file

@ -230,3 +230,102 @@ class TestEscapeDriftGuard:
new, count, strategy, err = fuzzy_find_and_replace(content, old_string, new_string)
assert err is None
assert count == 1
class TestFindClosestLines:
def setup_method(self):
from tools.fuzzy_match import find_closest_lines
self.find_closest_lines = find_closest_lines
def test_finds_similar_line(self):
content = "def foo():\n pass\ndef bar():\n return 1\n"
result = self.find_closest_lines("def baz():", content)
assert "def foo" in result or "def bar" in result
def test_returns_empty_for_no_match(self):
content = "completely different content here"
result = self.find_closest_lines("xyzzy_no_match_possible_!!!", content)
assert result == ""
def test_returns_empty_for_empty_inputs(self):
assert self.find_closest_lines("", "some content") == ""
assert self.find_closest_lines("old string", "") == ""
def test_includes_context_lines(self):
content = "line1\nline2\ndef target():\n pass\nline5\n"
result = self.find_closest_lines("def target():", content)
assert "target" in result
def test_includes_line_numbers(self):
content = "line1\nline2\ndef foo():\n pass\n"
result = self.find_closest_lines("def foo():", content)
# Should include line numbers in format "N| content"
assert "|" in result
class TestFormatNoMatchHint:
"""Gating tests for format_no_match_hint — the shared helper that decides
whether a 'Did you mean?' snippet should be appended to an error.
"""
def setup_method(self):
from tools.fuzzy_match import format_no_match_hint
self.fmt = format_no_match_hint
def test_fires_on_could_not_find_with_match(self):
"""Classic no-match: similar content exists → hint fires."""
content = "def foo():\n pass\ndef bar():\n pass\n"
result = self.fmt(
"Could not find a match for old_string in the file",
0, "def baz():", content,
)
assert "Did you mean" in result
assert "foo" in result or "bar" in result
def test_silent_on_ambiguous_match_error(self):
"""'Found N matches' is not a missing-match failure — no hint."""
content = "aaa bbb aaa\n"
result = self.fmt(
"Found 2 matches for old_string. Provide more context to make it unique, or use replace_all=True.",
0, "aaa", content,
)
assert result == ""
def test_silent_on_escape_drift_error(self):
"""Escape-drift errors are intentional blocks — hint would mislead."""
content = "x = 1\n"
result = self.fmt(
"Escape-drift detected: old_string and new_string contain the literal sequence '\\\\''...",
0, "x = \\'1\\'", content,
)
assert result == ""
def test_silent_on_identical_strings(self):
"""old_string == new_string — hint irrelevant."""
result = self.fmt(
"old_string and new_string are identical",
0, "foo", "foo bar\n",
)
assert result == ""
def test_silent_when_match_count_nonzero(self):
"""If match succeeded, we shouldn't be in the error path — defense in depth."""
result = self.fmt(
"Could not find a match for old_string in the file",
1, "foo", "foo bar\n",
)
assert result == ""
def test_silent_on_none_error(self):
"""No error at all — no hint."""
result = self.fmt(None, 0, "foo", "bar\n")
assert result == ""
def test_silent_when_no_similar_content(self):
"""Even for a valid no-match error, skip hint when nothing similar exists."""
result = self.fmt(
"Could not find a match for old_string in the file",
0, "totally_unique_xyzzy_qux", "abc\nxyz\n",
)
assert result == ""

View file

@ -0,0 +1,39 @@
"""FAL_KEY env var normalization (whitespace-only treated as unset)."""
def test_fal_key_whitespace_is_unset(monkeypatch):
# Whitespace-only FAL_KEY must NOT register as configured, and the managed
# gateway fallback must be disabled for this assertion to be meaningful.
monkeypatch.setenv("FAL_KEY", " ")
from tools import image_generation_tool
monkeypatch.setattr(
image_generation_tool, "_resolve_managed_fal_gateway", lambda: None
)
assert image_generation_tool.check_fal_api_key() is False
def test_fal_key_valid(monkeypatch):
monkeypatch.setenv("FAL_KEY", "sk-test")
from tools import image_generation_tool
monkeypatch.setattr(
image_generation_tool, "_resolve_managed_fal_gateway", lambda: None
)
assert image_generation_tool.check_fal_api_key() is True
def test_fal_key_empty_is_unset(monkeypatch):
monkeypatch.setenv("FAL_KEY", "")
from tools import image_generation_tool
monkeypatch.setattr(
image_generation_tool, "_resolve_managed_fal_gateway", lambda: None
)
assert image_generation_tool.check_fal_api_key() is False

View file

@ -0,0 +1,162 @@
"""Tests for terminal.shell_init_files / terminal.auto_source_bashrc.
A bash ``-l -c`` invocation does NOT source ``~/.bashrc``, so tools that
register themselves there (nvm, asdf, pyenv) stay invisible to the
environment snapshot built by ``LocalEnvironment.init_session``. These
tests verify the config-driven prelude that fixes that.
"""
import os
from unittest.mock import patch
import pytest
from tools.environments.local import (
LocalEnvironment,
_prepend_shell_init,
_read_terminal_shell_init_config,
_resolve_shell_init_files,
)
class TestResolveShellInitFiles:
def test_auto_sources_bashrc_when_present(self, tmp_path, monkeypatch):
bashrc = tmp_path / ".bashrc"
bashrc.write_text('export MARKER=seen\n')
monkeypatch.setenv("HOME", str(tmp_path))
# Default config: auto_source_bashrc on, no explicit list.
with patch(
"tools.environments.local._read_terminal_shell_init_config",
return_value=([], True),
):
resolved = _resolve_shell_init_files()
assert resolved == [str(bashrc)]
def test_skips_bashrc_when_missing(self, tmp_path, monkeypatch):
# No bashrc written.
monkeypatch.setenv("HOME", str(tmp_path))
with patch(
"tools.environments.local._read_terminal_shell_init_config",
return_value=([], True),
):
resolved = _resolve_shell_init_files()
assert resolved == []
def test_auto_source_bashrc_off_suppresses_default(self, tmp_path, monkeypatch):
bashrc = tmp_path / ".bashrc"
bashrc.write_text('export MARKER=seen\n')
monkeypatch.setenv("HOME", str(tmp_path))
with patch(
"tools.environments.local._read_terminal_shell_init_config",
return_value=([], False),
):
resolved = _resolve_shell_init_files()
assert resolved == []
def test_explicit_list_wins_over_auto(self, tmp_path, monkeypatch):
bashrc = tmp_path / ".bashrc"
bashrc.write_text('export FROM_BASHRC=1\n')
custom = tmp_path / "custom.sh"
custom.write_text('export FROM_CUSTOM=1\n')
monkeypatch.setenv("HOME", str(tmp_path))
# auto_source_bashrc stays True but the explicit list takes precedence.
with patch(
"tools.environments.local._read_terminal_shell_init_config",
return_value=([str(custom)], True),
):
resolved = _resolve_shell_init_files()
assert resolved == [str(custom)]
assert str(bashrc) not in resolved
def test_expands_home_and_env_vars(self, tmp_path, monkeypatch):
target = tmp_path / "rc" / "custom.sh"
target.parent.mkdir()
target.write_text('export A=1\n')
monkeypatch.setenv("HOME", str(tmp_path))
monkeypatch.setenv("CUSTOM_RC_DIR", str(tmp_path / "rc"))
with patch(
"tools.environments.local._read_terminal_shell_init_config",
return_value=(["~/rc/custom.sh"], False),
):
resolved_home = _resolve_shell_init_files()
with patch(
"tools.environments.local._read_terminal_shell_init_config",
return_value=(["${CUSTOM_RC_DIR}/custom.sh"], False),
):
resolved_var = _resolve_shell_init_files()
assert resolved_home == [str(target)]
assert resolved_var == [str(target)]
def test_missing_explicit_files_are_skipped_silently(self, tmp_path, monkeypatch):
monkeypatch.setenv("HOME", str(tmp_path))
with patch(
"tools.environments.local._read_terminal_shell_init_config",
return_value=([str(tmp_path / "does-not-exist.sh")], False),
):
resolved = _resolve_shell_init_files()
assert resolved == []
class TestPrependShellInit:
def test_empty_list_returns_command_unchanged(self):
assert _prepend_shell_init("echo hi", []) == "echo hi"
def test_prepends_guarded_source_lines(self):
wrapped = _prepend_shell_init("echo hi", ["/tmp/a.sh", "/tmp/b.sh"])
assert "echo hi" in wrapped
# Each file is sourced through a guarded [ -r … ] && . '…' || true
# pattern so a missing/broken rc can't abort the bootstrap.
assert "/tmp/a.sh" in wrapped
assert "/tmp/b.sh" in wrapped
assert "|| true" in wrapped
assert "set +e" in wrapped
def test_escapes_single_quotes(self):
wrapped = _prepend_shell_init("echo hi", ["/tmp/o'malley.sh"])
# The path must survive as the shell receives it; embedded single
# quote is escaped as '\'' rather than breaking the outer quoting.
assert "o'\\''malley" in wrapped
@pytest.mark.skipif(
os.environ.get("CI") == "true" and not os.path.isfile("/bin/bash"),
reason="Requires bash; CI sandbox may strip it.",
)
class TestSnapshotEndToEnd:
"""Spin up a real LocalEnvironment and confirm the snapshot sources
extra init files."""
def test_snapshot_picks_up_init_file_exports(self, tmp_path, monkeypatch):
init_file = tmp_path / "custom-init.sh"
init_file.write_text(
'export HERMES_SHELL_INIT_PROBE="probe-ok"\n'
'export PATH="/opt/shell-init-probe/bin:$PATH"\n'
)
with patch(
"tools.environments.local._read_terminal_shell_init_config",
return_value=([str(init_file)], False),
):
env = LocalEnvironment(cwd=str(tmp_path), timeout=15)
try:
result = env.execute(
'echo "PROBE=$HERMES_SHELL_INIT_PROBE"; echo "PATH=$PATH"'
)
finally:
env.cleanup()
output = result.get("output", "")
assert "PROBE=probe-ok" in output
assert "/opt/shell-init-probe/bin" in output

View file

@ -0,0 +1,252 @@
"""Tests for MCP tool-handler circuit-breaker recovery.
The circuit breaker in ``tools/mcp_tool.py`` is intended to short-circuit
calls to an MCP server that has failed ``_CIRCUIT_BREAKER_THRESHOLD``
consecutive times, then *transition back to a usable state* once the
server has had time to recover (or an explicit reconnect succeeds).
The original implementation only had two states closed and open with
no mechanism to transition back to closed, so a tripped breaker stayed
tripped for the lifetime of the process. These tests lock in the
half-open / cooldown / reconnect-resets-breaker behavior that fixes
that.
"""
import json
from unittest.mock import MagicMock
import pytest
pytest.importorskip("mcp.client.auth.oauth2")
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _install_stub_server(mcp_tool_module, name: str, call_tool_impl):
"""Install a fake MCP server in the module's registry.
``call_tool_impl`` is an async function stored at ``session.call_tool``
(it's what the tool handler invokes).
"""
server = MagicMock()
server.name = name
session = MagicMock()
session.call_tool = call_tool_impl
server.session = session
server._reconnect_event = MagicMock()
server._ready = MagicMock()
server._ready.is_set.return_value = True
mcp_tool_module._servers[name] = server
mcp_tool_module._server_error_counts.pop(name, None)
if hasattr(mcp_tool_module, "_server_breaker_opened_at"):
mcp_tool_module._server_breaker_opened_at.pop(name, None)
return server
def _cleanup(mcp_tool_module, name: str) -> None:
mcp_tool_module._servers.pop(name, None)
mcp_tool_module._server_error_counts.pop(name, None)
if hasattr(mcp_tool_module, "_server_breaker_opened_at"):
mcp_tool_module._server_breaker_opened_at.pop(name, None)
# ---------------------------------------------------------------------------
# Tests
# ---------------------------------------------------------------------------
def test_circuit_breaker_half_opens_after_cooldown(monkeypatch, tmp_path):
"""After a tripped breaker's cooldown elapses, the *next* call must
actually execute against the session (half-open probe). When the
probe succeeds, the breaker resets to fully closed.
"""
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
from tools import mcp_tool
from tools.mcp_tool import _make_tool_handler
call_count = {"n": 0}
async def _call_tool_success(*a, **kw):
call_count["n"] += 1
result = MagicMock()
result.isError = False
block = MagicMock()
block.text = "ok"
result.content = [block]
result.structuredContent = None
return result
_install_stub_server(mcp_tool, "srv", _call_tool_success)
mcp_tool._ensure_mcp_loop()
try:
# Trip the breaker by setting the count at/above threshold and
# stamping the open-time to "now".
mcp_tool._server_error_counts["srv"] = mcp_tool._CIRCUIT_BREAKER_THRESHOLD
fake_now = [1000.0]
def _fake_monotonic():
return fake_now[0]
monkeypatch.setattr(mcp_tool.time, "monotonic", _fake_monotonic)
# The breaker-open timestamp dict is introduced by the fix; on
# a pre-fix build it won't exist, which will cause the test to
# fail at the .get() inside the gate (correct — the fix is
# required for this state to be tracked at all).
if hasattr(mcp_tool, "_server_breaker_opened_at"):
mcp_tool._server_breaker_opened_at["srv"] = fake_now[0]
cooldown = getattr(mcp_tool, "_CIRCUIT_BREAKER_COOLDOWN_SEC", 60.0)
handler = _make_tool_handler("srv", "tool1", 10.0)
# Before cooldown: must short-circuit (no session call).
result = handler({})
parsed = json.loads(result)
assert "error" in parsed, parsed
assert "unreachable" in parsed["error"].lower()
assert call_count["n"] == 0, (
"breaker should short-circuit before cooldown elapses"
)
# Advance past cooldown → next call is a half-open probe that
# actually hits the session.
fake_now[0] += cooldown + 1.0
result = handler({})
parsed = json.loads(result)
assert parsed.get("result") == "ok", parsed
assert call_count["n"] == 1, "half-open probe should invoke session"
# On probe success the breaker must close (count reset to 0).
assert mcp_tool._server_error_counts.get("srv", 0) == 0
finally:
_cleanup(mcp_tool, "srv")
def test_circuit_breaker_reopens_on_probe_failure(monkeypatch, tmp_path):
"""If the half-open probe fails, the breaker must re-arm the
cooldown (not let every subsequent call through).
"""
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
from tools import mcp_tool
from tools.mcp_tool import _make_tool_handler
call_count = {"n": 0}
async def _call_tool_fails(*a, **kw):
call_count["n"] += 1
raise RuntimeError("still broken")
_install_stub_server(mcp_tool, "srv", _call_tool_fails)
mcp_tool._ensure_mcp_loop()
try:
mcp_tool._server_error_counts["srv"] = mcp_tool._CIRCUIT_BREAKER_THRESHOLD
fake_now = [1000.0]
def _fake_monotonic():
return fake_now[0]
monkeypatch.setattr(mcp_tool.time, "monotonic", _fake_monotonic)
if hasattr(mcp_tool, "_server_breaker_opened_at"):
mcp_tool._server_breaker_opened_at["srv"] = fake_now[0]
cooldown = getattr(mcp_tool, "_CIRCUIT_BREAKER_COOLDOWN_SEC", 60.0)
handler = _make_tool_handler("srv", "tool1", 10.0)
# Advance past cooldown, run probe, expect failure.
fake_now[0] += cooldown + 1.0
result = handler({})
parsed = json.loads(result)
assert "error" in parsed
assert call_count["n"] == 1, "probe should invoke session once"
# The probe failure must have re-armed the cooldown — another
# immediate call should short-circuit, not invoke session again.
result = handler({})
parsed = json.loads(result)
assert "unreachable" in parsed.get("error", "").lower()
assert call_count["n"] == 1, (
"breaker should re-open and block further calls after probe failure"
)
finally:
_cleanup(mcp_tool, "srv")
def test_circuit_breaker_cleared_on_reconnect(monkeypatch, tmp_path):
"""When the auth-recovery path successfully reconnects the server,
the breaker should be cleared so subsequent calls aren't gated on a
stale failure count even if the post-reconnect retry itself fails.
This locks in the fix-#2 contract: a successful reconnect is
sufficient evidence that the server is viable again. Under the old
implementation, reset only happened on retry *success*, so a
reconnect+retry-failure left the counter pinned above threshold
forever.
"""
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
from tools import mcp_tool
from tools.mcp_oauth_manager import get_manager, reset_manager_for_tests
from mcp.client.auth import OAuthFlowError
reset_manager_for_tests()
async def _call_tool_unused(*a, **kw): # pragma: no cover
raise AssertionError("session.call_tool should not be reached in this test")
_install_stub_server(mcp_tool, "srv", _call_tool_unused)
mcp_tool._ensure_mcp_loop()
# Open the breaker well above threshold, with a recent open-time so
# it would short-circuit everything without a reset.
mcp_tool._server_error_counts["srv"] = mcp_tool._CIRCUIT_BREAKER_THRESHOLD + 2
if hasattr(mcp_tool, "_server_breaker_opened_at"):
import time as _time
mcp_tool._server_breaker_opened_at["srv"] = _time.monotonic()
# Force handle_401 to claim recovery succeeded.
mgr = get_manager()
async def _h401(name, token=None):
return True
monkeypatch.setattr(mgr, "handle_401", _h401)
try:
# Retry fails *after* the successful reconnect. Under the old
# implementation this bumps an already-tripped counter even
# higher. Under fix #2 the reset happens on successful
# reconnect, and the post-retry bump only raises the fresh
# count to 1 — still below threshold.
def _retry_call():
raise OAuthFlowError("still failing post-reconnect")
result = mcp_tool._handle_auth_error_and_retry(
"srv",
OAuthFlowError("initial"),
_retry_call,
"tools/call test",
)
# The call as a whole still surfaces needs_reauth because the
# retry itself didn't succeed, but the breaker state must
# reflect the successful reconnect.
assert result is not None
parsed = json.loads(result)
assert parsed.get("needs_reauth") is True, parsed
# Post-reconnect count was reset to 0, then the failing retry
# bumped it to exactly 1 — well below threshold.
count = mcp_tool._server_error_counts.get("srv", 0)
assert count < mcp_tool._CIRCUIT_BREAKER_THRESHOLD, (
f"successful reconnect must reset the breaker below threshold; "
f"got count={count}, threshold={mcp_tool._CIRCUIT_BREAKER_THRESHOLD}"
)
finally:
_cleanup(mcp_tool, "srv")

View file

@ -173,6 +173,8 @@ def test_terminal_output_transform_does_not_change_approval_or_exit_code_meaning
def test_terminal_output_transform_integration_with_real_plugin(monkeypatch, tmp_path):
import yaml
hermes_home = Path(os.environ["HERMES_HOME"])
plugins_dir = hermes_home / "plugins"
plugin_dir = plugins_dir / "terminal_transform"
@ -184,7 +186,15 @@ def test_terminal_output_transform_integration_with_real_plugin(monkeypatch, tmp
'lambda **kw: "PLUGIN-HEAD\\n" + kw["output"] + "\\nPLUGIN-TAIL")\n',
encoding="utf-8",
)
# Plugins are opt-in — must be listed in plugins.enabled to load.
cfg_path = hermes_home / "config.yaml"
cfg_path.write_text(
yaml.safe_dump({"plugins": {"enabled": ["terminal_transform"]}}),
encoding="utf-8",
)
# Force a fresh plugin manager so the new config is picked up.
plugins_mod._plugin_manager = plugins_mod.PluginManager()
plugins_mod.discover_plugins()
long_output = "X" * 60000

View file

@ -0,0 +1,198 @@
"""Tests for the KittenTTS local provider in tools/tts_tool.py."""
import json
from unittest.mock import MagicMock, patch
import numpy as np
import pytest
@pytest.fixture(autouse=True)
def clean_env(monkeypatch):
for key in ("HERMES_SESSION_PLATFORM",):
monkeypatch.delenv(key, raising=False)
@pytest.fixture(autouse=True)
def clear_kittentts_cache():
"""Reset the module-level model cache between tests."""
from tools import tts_tool as _tt
_tt._kittentts_model_cache.clear()
yield
_tt._kittentts_model_cache.clear()
@pytest.fixture
def mock_kittentts_module():
"""Inject a fake kittentts + soundfile module that return stub objects."""
fake_model = MagicMock()
# 24kHz float32 PCM at ~2s of silence
fake_model.generate.return_value = np.zeros(48000, dtype=np.float32)
fake_cls = MagicMock(return_value=fake_model)
fake_kittentts = MagicMock()
fake_kittentts.KittenTTS = fake_cls
# Stub soundfile — the real package isn't installed in CI venv, and
# _generate_kittentts does `import soundfile as sf` at runtime.
fake_sf = MagicMock()
def _fake_write(path, audio, samplerate):
# Emulate writing a real file so downstream path checks succeed.
import pathlib
pathlib.Path(path).write_bytes(b"RIFF\x00\x00\x00\x00WAVEfmt fake")
fake_sf.write = _fake_write
with patch.dict(
"sys.modules",
{"kittentts": fake_kittentts, "soundfile": fake_sf},
):
yield fake_model, fake_cls
class TestGenerateKittenTts:
def test_successful_wav_generation(self, tmp_path, mock_kittentts_module):
from tools.tts_tool import _generate_kittentts
fake_model, fake_cls = mock_kittentts_module
output_path = str(tmp_path / "test.wav")
result = _generate_kittentts("Hello world", output_path, {})
assert result == output_path
assert (tmp_path / "test.wav").exists()
fake_cls.assert_called_once()
fake_model.generate.assert_called_once()
def test_config_passes_voice_speed_cleantext(self, tmp_path, mock_kittentts_module):
from tools.tts_tool import _generate_kittentts
fake_model, _ = mock_kittentts_module
config = {
"kittentts": {
"model": "KittenML/kitten-tts-mini-0.8",
"voice": "Luna",
"speed": 1.25,
"clean_text": False,
}
}
_generate_kittentts("Hi there", str(tmp_path / "out.wav"), config)
call_kwargs = fake_model.generate.call_args.kwargs
assert call_kwargs["voice"] == "Luna"
assert call_kwargs["speed"] == 1.25
assert call_kwargs["clean_text"] is False
def test_default_model_and_voice(self, tmp_path, mock_kittentts_module):
from tools.tts_tool import (
DEFAULT_KITTENTTS_MODEL,
DEFAULT_KITTENTTS_VOICE,
_generate_kittentts,
)
fake_model, fake_cls = mock_kittentts_module
_generate_kittentts("Hi", str(tmp_path / "out.wav"), {})
fake_cls.assert_called_once_with(DEFAULT_KITTENTTS_MODEL)
assert fake_model.generate.call_args.kwargs["voice"] == DEFAULT_KITTENTTS_VOICE
def test_model_is_cached_across_calls(self, tmp_path, mock_kittentts_module):
from tools.tts_tool import _generate_kittentts
_, fake_cls = mock_kittentts_module
_generate_kittentts("One", str(tmp_path / "a.wav"), {})
_generate_kittentts("Two", str(tmp_path / "b.wav"), {})
# Same model name → class instantiated exactly once
assert fake_cls.call_count == 1
def test_different_models_are_cached_separately(self, tmp_path, mock_kittentts_module):
from tools.tts_tool import _generate_kittentts
_, fake_cls = mock_kittentts_module
_generate_kittentts(
"A", str(tmp_path / "a.wav"),
{"kittentts": {"model": "KittenML/kitten-tts-nano-0.8-int8"}},
)
_generate_kittentts(
"B", str(tmp_path / "b.wav"),
{"kittentts": {"model": "KittenML/kitten-tts-mini-0.8"}},
)
assert fake_cls.call_count == 2
def test_non_wav_extension_triggers_ffmpeg_conversion(
self, tmp_path, mock_kittentts_module, monkeypatch
):
"""Non-.wav output path causes WAV → target ffmpeg conversion."""
from tools import tts_tool as _tt
calls = []
def fake_shutil_which(cmd):
return "/usr/bin/ffmpeg" if cmd == "ffmpeg" else None
def fake_run(cmd, check=False, timeout=None, **kw):
calls.append(cmd)
# Emulate ffmpeg writing the output file
import pathlib
out_path = cmd[-1]
pathlib.Path(out_path).write_bytes(b"fake-mp3-data")
return MagicMock(returncode=0)
monkeypatch.setattr(_tt.shutil, "which", fake_shutil_which)
monkeypatch.setattr(_tt.subprocess, "run", fake_run)
output_path = str(tmp_path / "test.mp3")
result = _tt._generate_kittentts("Hi", output_path, {})
assert result == output_path
assert len(calls) == 1
assert calls[0][0] == "/usr/bin/ffmpeg"
def test_missing_kittentts_raises_import_error(self, tmp_path, monkeypatch):
"""When kittentts package is not installed, _import_kittentts raises."""
import sys
monkeypatch.setitem(sys.modules, "kittentts", None)
from tools.tts_tool import _generate_kittentts
with pytest.raises((ImportError, TypeError)):
_generate_kittentts("Hi", str(tmp_path / "out.wav"), {})
class TestCheckKittenttsAvailable:
def test_reports_available_when_package_present(self, monkeypatch):
import importlib.util
from tools.tts_tool import _check_kittentts_available
fake_spec = MagicMock()
monkeypatch.setattr(
importlib.util, "find_spec",
lambda name: fake_spec if name == "kittentts" else None,
)
assert _check_kittentts_available() is True
def test_reports_unavailable_when_package_missing(self, monkeypatch):
import importlib.util
from tools.tts_tool import _check_kittentts_available
monkeypatch.setattr(importlib.util, "find_spec", lambda name: None)
assert _check_kittentts_available() is False
class TestDispatcherBranch:
def test_kittentts_not_installed_returns_helpful_error(self, monkeypatch, tmp_path):
"""When provider=kittentts but package missing, return JSON error with setup hint."""
import sys
monkeypatch.setitem(sys.modules, "kittentts", None)
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
from tools.tts_tool import text_to_speech_tool
# Write a config telling it to use kittentts
import yaml
(tmp_path / "config.yaml").write_text(
yaml.safe_dump({"tts": {"provider": "kittentts"}})
)
result = json.loads(text_to_speech_tool(text="Hello"))
assert result["success"] is False
assert "kittentts" in result["error"].lower()
assert "hermes setup tts" in result["error"].lower()

View file

@ -933,6 +933,58 @@ class TestEnableVoiceModeReal:
assert cli._voice_mode is True
class TestVoiceBeepConfigReal:
"""Tests the CLI voice beep toggle."""
@patch("hermes_cli.config.load_config", return_value={"voice": {}})
def test_beeps_enabled_by_default(self, _cfg):
cli = _make_voice_cli()
assert cli._voice_beeps_enabled() is True
@patch("hermes_cli.config.load_config", return_value={"voice": {"beep_enabled": False}})
def test_beeps_can_be_disabled(self, _cfg):
cli = _make_voice_cli()
assert cli._voice_beeps_enabled() is False
@patch("cli._cprint")
@patch("cli.threading.Thread")
@patch("tools.voice_mode.play_beep")
@patch("tools.voice_mode.create_audio_recorder")
@patch(
"tools.voice_mode.check_voice_requirements",
return_value={
"available": True,
"audio_available": True,
"stt_available": True,
"details": "OK",
"missing_packages": [],
},
)
@patch(
"hermes_cli.config.load_config",
return_value={
"voice": {
"beep_enabled": False,
"silence_threshold": 200,
"silence_duration": 3.0,
}
},
)
def test_start_recording_skips_beep_when_disabled(
self, _cfg, _req, mock_create, mock_beep, mock_thread, _cp
):
recorder = MagicMock()
recorder.supports_silence_autostop = True
mock_create.return_value = recorder
mock_thread.return_value = MagicMock(start=MagicMock())
cli = _make_voice_cli()
cli._voice_start_recording()
recorder.start.assert_called_once()
mock_beep.assert_not_called()
class TestDisableVoiceModeReal:
"""Tests _disable_voice_mode with real CLI instance."""
@ -1087,6 +1139,16 @@ class TestVoiceStopAndTranscribeReal:
cli._voice_stop_and_transcribe()
assert cli._pending_input.empty()
@patch("cli._cprint")
@patch("hermes_cli.config.load_config", return_value={"voice": {"beep_enabled": False}})
@patch("tools.voice_mode.play_beep")
def test_no_speech_detected_skips_beep_when_disabled(self, mock_beep, _cfg, _cp):
recorder = MagicMock()
recorder.stop.return_value = None
cli = _make_voice_cli(_voice_recording=True, _voice_recorder=recorder)
cli._voice_stop_and_transcribe()
mock_beep.assert_not_called()
@patch("cli._cprint")
@patch("cli.os.unlink")
@patch("cli.os.path.isfile", return_value=True)