mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-08 03:01:47 +00:00
Merge remote-tracking branch 'origin/main' into sid/types-and-lints
# Conflicts: # gateway/platforms/base.py # gateway/platforms/qqbot/adapter.py # gateway/platforms/slack.py # hermes_cli/main.py # scripts/batch_runner.py # tools/skills_tool.py # uv.lock
This commit is contained in:
commit
a9ed7cb3b4
117 changed files with 7791 additions and 611 deletions
|
|
@ -414,7 +414,11 @@ class TestRunOauthSetupToken:
|
|||
token = run_oauth_setup_token()
|
||||
|
||||
assert token == "from-cred-file"
|
||||
mock_run.assert_called_once()
|
||||
# Don't assert exact call count — the contract is "credentials flow
|
||||
# through", not "exactly one subprocess call". xdist cross-test
|
||||
# pollution (other tests shimming subprocess via plugins) has flaked
|
||||
# assert_called_once() in CI.
|
||||
assert mock_run.called
|
||||
|
||||
def test_returns_token_from_env_var(self, monkeypatch, tmp_path):
|
||||
"""Falls back to CLAUDE_CODE_OAUTH_TOKEN env var when no cred files."""
|
||||
|
|
|
|||
238
tests/agent/test_anthropic_normalize_v2.py
Normal file
238
tests/agent/test_anthropic_normalize_v2.py
Normal file
|
|
@ -0,0 +1,238 @@
|
|||
"""Regression tests: normalize_anthropic_response_v2 vs v1.
|
||||
|
||||
Constructs mock Anthropic responses and asserts that the v2 function
|
||||
(returning NormalizedResponse) produces identical field values to the
|
||||
original v1 function (returning SimpleNamespace + finish_reason).
|
||||
"""
|
||||
|
||||
import json
|
||||
import pytest
|
||||
from types import SimpleNamespace
|
||||
|
||||
from agent.anthropic_adapter import (
|
||||
normalize_anthropic_response,
|
||||
normalize_anthropic_response_v2,
|
||||
)
|
||||
from agent.transports.types import NormalizedResponse, ToolCall
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers to build mock Anthropic SDK responses
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _text_block(text: str):
|
||||
return SimpleNamespace(type="text", text=text)
|
||||
|
||||
|
||||
def _thinking_block(thinking: str, signature: str = "sig_abc"):
|
||||
return SimpleNamespace(type="thinking", thinking=thinking, signature=signature)
|
||||
|
||||
|
||||
def _tool_use_block(id: str, name: str, input: dict):
|
||||
return SimpleNamespace(type="tool_use", id=id, name=name, input=input)
|
||||
|
||||
|
||||
def _response(content_blocks, stop_reason="end_turn"):
|
||||
return SimpleNamespace(
|
||||
content=content_blocks,
|
||||
stop_reason=stop_reason,
|
||||
usage=SimpleNamespace(
|
||||
input_tokens=10,
|
||||
output_tokens=5,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestTextOnly:
|
||||
"""Text-only response — no tools, no thinking."""
|
||||
|
||||
def setup_method(self):
|
||||
self.resp = _response([_text_block("Hello world")])
|
||||
self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp)
|
||||
self.v2 = normalize_anthropic_response_v2(self.resp)
|
||||
|
||||
def test_type(self):
|
||||
assert isinstance(self.v2, NormalizedResponse)
|
||||
|
||||
def test_content_matches(self):
|
||||
assert self.v2.content == self.v1_msg.content
|
||||
|
||||
def test_finish_reason_matches(self):
|
||||
assert self.v2.finish_reason == self.v1_finish
|
||||
|
||||
def test_no_tool_calls(self):
|
||||
assert self.v2.tool_calls is None
|
||||
assert self.v1_msg.tool_calls is None
|
||||
|
||||
def test_no_reasoning(self):
|
||||
assert self.v2.reasoning is None
|
||||
assert self.v1_msg.reasoning is None
|
||||
|
||||
|
||||
class TestWithToolCalls:
|
||||
"""Response with tool calls."""
|
||||
|
||||
def setup_method(self):
|
||||
self.resp = _response(
|
||||
[
|
||||
_text_block("I'll check that"),
|
||||
_tool_use_block("toolu_abc", "terminal", {"command": "ls"}),
|
||||
_tool_use_block("toolu_def", "read_file", {"path": "/tmp"}),
|
||||
],
|
||||
stop_reason="tool_use",
|
||||
)
|
||||
self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp)
|
||||
self.v2 = normalize_anthropic_response_v2(self.resp)
|
||||
|
||||
def test_finish_reason(self):
|
||||
assert self.v2.finish_reason == "tool_calls"
|
||||
assert self.v1_finish == "tool_calls"
|
||||
|
||||
def test_tool_call_count(self):
|
||||
assert len(self.v2.tool_calls) == 2
|
||||
assert len(self.v1_msg.tool_calls) == 2
|
||||
|
||||
def test_tool_call_ids_match(self):
|
||||
for i in range(2):
|
||||
assert self.v2.tool_calls[i].id == self.v1_msg.tool_calls[i].id
|
||||
|
||||
def test_tool_call_names_match(self):
|
||||
assert self.v2.tool_calls[0].name == "terminal"
|
||||
assert self.v2.tool_calls[1].name == "read_file"
|
||||
for i in range(2):
|
||||
assert self.v2.tool_calls[i].name == self.v1_msg.tool_calls[i].function.name
|
||||
|
||||
def test_tool_call_arguments_match(self):
|
||||
for i in range(2):
|
||||
assert self.v2.tool_calls[i].arguments == self.v1_msg.tool_calls[i].function.arguments
|
||||
|
||||
def test_content_preserved(self):
|
||||
assert self.v2.content == self.v1_msg.content
|
||||
assert "check that" in self.v2.content
|
||||
|
||||
|
||||
class TestWithThinking:
|
||||
"""Response with thinking blocks (Claude 3.5+ extended thinking)."""
|
||||
|
||||
def setup_method(self):
|
||||
self.resp = _response([
|
||||
_thinking_block("Let me think about this carefully..."),
|
||||
_text_block("The answer is 42."),
|
||||
])
|
||||
self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp)
|
||||
self.v2 = normalize_anthropic_response_v2(self.resp)
|
||||
|
||||
def test_reasoning_matches(self):
|
||||
assert self.v2.reasoning == self.v1_msg.reasoning
|
||||
assert "think about this" in self.v2.reasoning
|
||||
|
||||
def test_reasoning_details_in_provider_data(self):
|
||||
v1_details = self.v1_msg.reasoning_details
|
||||
v2_details = self.v2.provider_data.get("reasoning_details") if self.v2.provider_data else None
|
||||
assert v1_details is not None
|
||||
assert v2_details is not None
|
||||
assert len(v2_details) == len(v1_details)
|
||||
|
||||
def test_content_excludes_thinking(self):
|
||||
assert self.v2.content == "The answer is 42."
|
||||
|
||||
|
||||
class TestMixed:
|
||||
"""Response with thinking + text + tool calls."""
|
||||
|
||||
def setup_method(self):
|
||||
self.resp = _response(
|
||||
[
|
||||
_thinking_block("Planning my approach..."),
|
||||
_text_block("I'll run the command"),
|
||||
_tool_use_block("toolu_xyz", "terminal", {"command": "pwd"}),
|
||||
],
|
||||
stop_reason="tool_use",
|
||||
)
|
||||
self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp)
|
||||
self.v2 = normalize_anthropic_response_v2(self.resp)
|
||||
|
||||
def test_all_fields_present(self):
|
||||
assert self.v2.content is not None
|
||||
assert self.v2.tool_calls is not None
|
||||
assert self.v2.reasoning is not None
|
||||
assert self.v2.finish_reason == "tool_calls"
|
||||
|
||||
def test_content_matches(self):
|
||||
assert self.v2.content == self.v1_msg.content
|
||||
|
||||
def test_reasoning_matches(self):
|
||||
assert self.v2.reasoning == self.v1_msg.reasoning
|
||||
|
||||
def test_tool_call_matches(self):
|
||||
assert self.v2.tool_calls[0].id == self.v1_msg.tool_calls[0].id
|
||||
assert self.v2.tool_calls[0].name == self.v1_msg.tool_calls[0].function.name
|
||||
|
||||
|
||||
class TestStopReasons:
|
||||
"""Verify finish_reason mapping matches between v1 and v2."""
|
||||
|
||||
@pytest.mark.parametrize("stop_reason,expected", [
|
||||
("end_turn", "stop"),
|
||||
("tool_use", "tool_calls"),
|
||||
("max_tokens", "length"),
|
||||
("stop_sequence", "stop"),
|
||||
("refusal", "content_filter"),
|
||||
("model_context_window_exceeded", "length"),
|
||||
("unknown_future_reason", "stop"),
|
||||
])
|
||||
def test_stop_reason_mapping(self, stop_reason, expected):
|
||||
resp = _response([_text_block("x")], stop_reason=stop_reason)
|
||||
v1_msg, v1_finish = normalize_anthropic_response(resp)
|
||||
v2 = normalize_anthropic_response_v2(resp)
|
||||
assert v2.finish_reason == v1_finish == expected
|
||||
|
||||
|
||||
class TestStripToolPrefix:
|
||||
"""Verify mcp_ prefix stripping works identically."""
|
||||
|
||||
def test_prefix_stripped(self):
|
||||
resp = _response(
|
||||
[_tool_use_block("toolu_1", "mcp_terminal", {"cmd": "ls"})],
|
||||
stop_reason="tool_use",
|
||||
)
|
||||
v1_msg, _ = normalize_anthropic_response(resp, strip_tool_prefix=True)
|
||||
v2 = normalize_anthropic_response_v2(resp, strip_tool_prefix=True)
|
||||
assert v1_msg.tool_calls[0].function.name == "terminal"
|
||||
assert v2.tool_calls[0].name == "terminal"
|
||||
|
||||
def test_prefix_kept(self):
|
||||
resp = _response(
|
||||
[_tool_use_block("toolu_1", "mcp_terminal", {"cmd": "ls"})],
|
||||
stop_reason="tool_use",
|
||||
)
|
||||
v1_msg, _ = normalize_anthropic_response(resp, strip_tool_prefix=False)
|
||||
v2 = normalize_anthropic_response_v2(resp, strip_tool_prefix=False)
|
||||
assert v1_msg.tool_calls[0].function.name == "mcp_terminal"
|
||||
assert v2.tool_calls[0].name == "mcp_terminal"
|
||||
|
||||
|
||||
class TestEdgeCases:
|
||||
"""Edge cases: empty content, no blocks, etc."""
|
||||
|
||||
def test_empty_content_blocks(self):
|
||||
resp = _response([])
|
||||
v1_msg, v1_finish = normalize_anthropic_response(resp)
|
||||
v2 = normalize_anthropic_response_v2(resp)
|
||||
assert v2.content == v1_msg.content
|
||||
assert v2.content is None
|
||||
|
||||
def test_no_reasoning_details_means_none_provider_data(self):
|
||||
resp = _response([_text_block("hi")])
|
||||
v2 = normalize_anthropic_response_v2(resp)
|
||||
assert v2.provider_data is None
|
||||
|
||||
def test_v2_returns_dataclass_not_namespace(self):
|
||||
resp = _response([_text_block("hi")])
|
||||
v2 = normalize_anthropic_response_v2(resp)
|
||||
assert isinstance(v2, NormalizedResponse)
|
||||
assert not isinstance(v2, SimpleNamespace)
|
||||
146
tests/agent/test_copilot_acp_client.py
Normal file
146
tests/agent/test_copilot_acp_client.py
Normal file
|
|
@ -0,0 +1,146 @@
|
|||
"""Focused regressions for the Copilot ACP shim safety layer."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import io
|
||||
import json
|
||||
import os
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
from agent.copilot_acp_client import CopilotACPClient
|
||||
|
||||
|
||||
class _FakeProcess:
|
||||
def __init__(self) -> None:
|
||||
self.stdin = io.StringIO()
|
||||
|
||||
|
||||
class CopilotACPClientSafetyTests(unittest.TestCase):
|
||||
def setUp(self) -> None:
|
||||
self.client = CopilotACPClient(acp_cwd="/tmp")
|
||||
|
||||
def _dispatch(self, message: dict, *, cwd: str) -> dict:
|
||||
process = _FakeProcess()
|
||||
handled = self.client._handle_server_message(
|
||||
message,
|
||||
process=process,
|
||||
cwd=cwd,
|
||||
text_parts=[],
|
||||
reasoning_parts=[],
|
||||
)
|
||||
self.assertTrue(handled)
|
||||
payload = process.stdin.getvalue().strip()
|
||||
self.assertTrue(payload)
|
||||
return json.loads(payload)
|
||||
|
||||
def test_request_permission_is_not_auto_allowed(self) -> None:
|
||||
response = self._dispatch(
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"id": 1,
|
||||
"method": "session/request_permission",
|
||||
"params": {},
|
||||
},
|
||||
cwd="/tmp",
|
||||
)
|
||||
|
||||
outcome = (((response.get("result") or {}).get("outcome") or {}).get("outcome"))
|
||||
self.assertEqual(outcome, "cancelled")
|
||||
|
||||
def test_read_text_file_blocks_internal_hermes_hub_files(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
home = Path(tmpdir) / "home"
|
||||
blocked = home / ".hermes" / "skills" / ".hub" / "index-cache" / "entry.json"
|
||||
blocked.parent.mkdir(parents=True, exist_ok=True)
|
||||
blocked.write_text('{"token":"sk-test-secret-1234567890"}')
|
||||
|
||||
with patch.dict(
|
||||
os.environ,
|
||||
{"HOME": str(home), "HERMES_HOME": str(home / ".hermes")},
|
||||
clear=False,
|
||||
):
|
||||
response = self._dispatch(
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"id": 2,
|
||||
"method": "fs/read_text_file",
|
||||
"params": {"path": str(blocked)},
|
||||
},
|
||||
cwd=str(home),
|
||||
)
|
||||
|
||||
self.assertIn("error", response)
|
||||
|
||||
def test_read_text_file_redacts_sensitive_content(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
root = Path(tmpdir)
|
||||
secret_file = root / "config.env"
|
||||
secret_file.write_text("OPENAI_API_KEY=sk-proj-abc123def456ghi789jkl012")
|
||||
|
||||
response = self._dispatch(
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"id": 3,
|
||||
"method": "fs/read_text_file",
|
||||
"params": {"path": str(secret_file)},
|
||||
},
|
||||
cwd=str(root),
|
||||
)
|
||||
|
||||
content = ((response.get("result") or {}).get("content") or "")
|
||||
self.assertNotIn("abc123def456", content)
|
||||
self.assertIn("OPENAI_API_KEY=", content)
|
||||
|
||||
def test_write_text_file_reuses_write_denylist(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
home = Path(tmpdir) / "home"
|
||||
target = home / ".ssh" / "id_rsa"
|
||||
target.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
with patch("agent.copilot_acp_client.is_write_denied", return_value=True, create=True):
|
||||
response = self._dispatch(
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"id": 4,
|
||||
"method": "fs/write_text_file",
|
||||
"params": {
|
||||
"path": str(target),
|
||||
"content": "fake-private-key",
|
||||
},
|
||||
},
|
||||
cwd=str(home),
|
||||
)
|
||||
|
||||
self.assertIn("error", response)
|
||||
self.assertFalse(target.exists())
|
||||
|
||||
def test_write_text_file_respects_safe_root(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
root = Path(tmpdir)
|
||||
safe_root = root / "workspace"
|
||||
safe_root.mkdir()
|
||||
outside = root / "outside.txt"
|
||||
|
||||
with patch.dict(os.environ, {"HERMES_WRITE_SAFE_ROOT": str(safe_root)}, clear=False):
|
||||
response = self._dispatch(
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"id": 5,
|
||||
"method": "fs/write_text_file",
|
||||
"params": {
|
||||
"path": str(outside),
|
||||
"content": "should-not-write",
|
||||
},
|
||||
},
|
||||
cwd=str(root),
|
||||
)
|
||||
|
||||
self.assertIn("error", response)
|
||||
self.assertFalse(outside.exists())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
|
@ -516,13 +516,12 @@ class TestGatewayFormatting:
|
|||
assert "**" in text # Markdown bold
|
||||
|
||||
def test_gateway_format_hides_cost(self, populated_db):
|
||||
"""Gateway format omits dollar figures and internal cache details."""
|
||||
engine = InsightsEngine(populated_db)
|
||||
report = engine.generate(days=30)
|
||||
text = engine.format_gateway(report)
|
||||
|
||||
assert "$" in text
|
||||
assert "Top Skills" in text
|
||||
assert "Est. cost" in text
|
||||
assert "$" not in text
|
||||
assert "cache" not in text.lower()
|
||||
|
||||
def test_gateway_format_shows_models(self, populated_db):
|
||||
|
|
|
|||
|
|
@ -84,38 +84,6 @@ class TestMinimaxAuxModel:
|
|||
assert "highspeed" not in _API_KEY_PROVIDER_AUX_MODELS["minimax-cn"]
|
||||
|
||||
|
||||
class TestMinimaxModelCatalog:
|
||||
"""Verify the model catalog matches official Anthropic-compat endpoint models.
|
||||
|
||||
Source: https://platform.minimax.io/docs/api-reference/text-anthropic-api
|
||||
"""
|
||||
|
||||
def test_catalog_includes_current_models(self):
|
||||
from hermes_cli.models import _PROVIDER_MODELS
|
||||
for provider in ("minimax", "minimax-cn"):
|
||||
models = _PROVIDER_MODELS[provider]
|
||||
assert "MiniMax-M2.7" in models
|
||||
assert "MiniMax-M2.5" in models
|
||||
assert "MiniMax-M2.1" in models
|
||||
assert "MiniMax-M2" in models
|
||||
|
||||
def test_catalog_excludes_m1_family(self):
|
||||
"""M1 models are not available on the /anthropic endpoint."""
|
||||
from hermes_cli.models import _PROVIDER_MODELS
|
||||
for provider in ("minimax", "minimax-cn"):
|
||||
models = _PROVIDER_MODELS[provider]
|
||||
assert "MiniMax-M1" not in models
|
||||
|
||||
def test_catalog_excludes_highspeed(self):
|
||||
"""Highspeed variants are available but not shown in default catalog
|
||||
(users can still specify them manually)."""
|
||||
from hermes_cli.models import _PROVIDER_MODELS
|
||||
for provider in ("minimax", "minimax-cn"):
|
||||
models = _PROVIDER_MODELS[provider]
|
||||
assert "MiniMax-M2.7-highspeed" not in models
|
||||
assert "MiniMax-M2.5-highspeed" not in models
|
||||
|
||||
|
||||
class TestMinimaxBetaHeaders:
|
||||
"""MiniMax Anthropic-compat endpoints reject fine-grained-tool-streaming beta.
|
||||
|
||||
|
|
|
|||
|
|
@ -6,6 +6,8 @@ when proxy env vars or custom endpoint URLs are malformed.
|
|||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
|
||||
import pytest
|
||||
|
||||
from agent.auxiliary_client import _validate_base_url, _validate_proxy_env_urls
|
||||
|
|
@ -31,6 +33,12 @@ def test_proxy_env_accepts_empty(monkeypatch):
|
|||
_validate_proxy_env_urls() # should not raise
|
||||
|
||||
|
||||
def test_proxy_env_normalizes_socks_alias(monkeypatch):
|
||||
monkeypatch.setenv("ALL_PROXY", "socks://127.0.0.1:1080/")
|
||||
_validate_proxy_env_urls()
|
||||
assert os.environ["ALL_PROXY"] == "socks5://127.0.0.1:1080/"
|
||||
|
||||
|
||||
@pytest.mark.parametrize("key", [
|
||||
"HTTP_PROXY", "HTTPS_PROXY", "ALL_PROXY",
|
||||
"http_proxy", "https_proxy", "all_proxy",
|
||||
|
|
|
|||
|
|
@ -405,3 +405,191 @@ class TestPlanSkillHelpers:
|
|||
assert "Add a /plan command" in msg
|
||||
assert ".hermes/plans/plan.md" in msg
|
||||
assert "Runtime note:" in msg
|
||||
|
||||
|
||||
class TestSkillDirectoryHeader:
|
||||
"""The activation message must expose the absolute skill directory and
|
||||
explain how to resolve relative paths, so skills with bundled scripts
|
||||
don't force the agent into a second ``skill_view()`` round-trip."""
|
||||
|
||||
def test_header_contains_absolute_skill_dir(self, tmp_path):
|
||||
with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
|
||||
skill_dir = _make_skill(tmp_path, "abs-dir-skill")
|
||||
scan_skill_commands()
|
||||
msg = build_skill_invocation_message("/abs-dir-skill", "go")
|
||||
|
||||
assert msg is not None
|
||||
assert f"[Skill directory: {skill_dir}]" in msg
|
||||
assert "Resolve any relative paths" in msg
|
||||
|
||||
def test_supporting_files_shown_with_absolute_paths(self, tmp_path):
|
||||
with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
|
||||
skill_dir = _make_skill(tmp_path, "scripted-skill")
|
||||
(skill_dir / "scripts").mkdir()
|
||||
(skill_dir / "scripts" / "run.js").write_text("console.log('hi')")
|
||||
scan_skill_commands()
|
||||
msg = build_skill_invocation_message("/scripted-skill")
|
||||
|
||||
assert msg is not None
|
||||
# The supporting-files block must emit both the relative form (so the
|
||||
# agent can call skill_view on it) and the absolute form (so it can
|
||||
# run the script directly via terminal).
|
||||
assert "scripts/run.js" in msg
|
||||
assert str(skill_dir / "scripts" / "run.js") in msg
|
||||
assert f"node {skill_dir}/scripts/foo.js" in msg
|
||||
|
||||
|
||||
class TestTemplateVarSubstitution:
|
||||
"""``${HERMES_SKILL_DIR}`` and ``${HERMES_SESSION_ID}`` in SKILL.md body
|
||||
are replaced before the agent sees the content."""
|
||||
|
||||
def test_substitutes_skill_dir(self, tmp_path):
|
||||
with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
|
||||
skill_dir = _make_skill(
|
||||
tmp_path,
|
||||
"templated",
|
||||
body="Run: node ${HERMES_SKILL_DIR}/scripts/foo.js",
|
||||
)
|
||||
scan_skill_commands()
|
||||
msg = build_skill_invocation_message("/templated")
|
||||
|
||||
assert msg is not None
|
||||
assert f"node {skill_dir}/scripts/foo.js" in msg
|
||||
# The literal template token must not leak through.
|
||||
assert "${HERMES_SKILL_DIR}" not in msg.split("[Skill directory:")[0]
|
||||
|
||||
def test_substitutes_session_id_when_available(self, tmp_path):
|
||||
with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
|
||||
_make_skill(
|
||||
tmp_path,
|
||||
"sess-templated",
|
||||
body="Session: ${HERMES_SESSION_ID}",
|
||||
)
|
||||
scan_skill_commands()
|
||||
msg = build_skill_invocation_message(
|
||||
"/sess-templated", task_id="abc-123"
|
||||
)
|
||||
|
||||
assert msg is not None
|
||||
assert "Session: abc-123" in msg
|
||||
|
||||
def test_leaves_session_id_token_when_missing(self, tmp_path):
|
||||
with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
|
||||
_make_skill(
|
||||
tmp_path,
|
||||
"sess-missing",
|
||||
body="Session: ${HERMES_SESSION_ID}",
|
||||
)
|
||||
scan_skill_commands()
|
||||
msg = build_skill_invocation_message("/sess-missing", task_id=None)
|
||||
|
||||
assert msg is not None
|
||||
# No session — token left intact so the author can spot it.
|
||||
assert "Session: ${HERMES_SESSION_ID}" in msg
|
||||
|
||||
def test_disable_template_vars_via_config(self, tmp_path):
|
||||
with (
|
||||
patch("tools.skills_tool.SKILLS_DIR", tmp_path),
|
||||
patch(
|
||||
"agent.skill_commands._load_skills_config",
|
||||
return_value={"template_vars": False},
|
||||
),
|
||||
):
|
||||
_make_skill(
|
||||
tmp_path,
|
||||
"no-sub",
|
||||
body="Run: node ${HERMES_SKILL_DIR}/scripts/foo.js",
|
||||
)
|
||||
scan_skill_commands()
|
||||
msg = build_skill_invocation_message("/no-sub")
|
||||
|
||||
assert msg is not None
|
||||
# Template token must survive when substitution is disabled.
|
||||
assert "${HERMES_SKILL_DIR}/scripts/foo.js" in msg
|
||||
|
||||
|
||||
class TestInlineShellExpansion:
|
||||
"""Inline ``!`cmd`` snippets in SKILL.md run before the agent sees the
|
||||
content — but only when the user has opted in via config."""
|
||||
|
||||
def test_inline_shell_is_off_by_default(self, tmp_path):
|
||||
with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
|
||||
_make_skill(
|
||||
tmp_path,
|
||||
"dyn-default-off",
|
||||
body="Today is !`echo INLINE_RAN`.",
|
||||
)
|
||||
scan_skill_commands()
|
||||
msg = build_skill_invocation_message("/dyn-default-off")
|
||||
|
||||
assert msg is not None
|
||||
# Default config has inline_shell=False — snippet must stay literal.
|
||||
assert "!`echo INLINE_RAN`" in msg
|
||||
assert "Today is INLINE_RAN." not in msg
|
||||
|
||||
def test_inline_shell_runs_when_enabled(self, tmp_path):
|
||||
with (
|
||||
patch("tools.skills_tool.SKILLS_DIR", tmp_path),
|
||||
patch(
|
||||
"agent.skill_commands._load_skills_config",
|
||||
return_value={"template_vars": True, "inline_shell": True,
|
||||
"inline_shell_timeout": 5},
|
||||
),
|
||||
):
|
||||
_make_skill(
|
||||
tmp_path,
|
||||
"dyn-on",
|
||||
body="Marker: !`echo INLINE_RAN`.",
|
||||
)
|
||||
scan_skill_commands()
|
||||
msg = build_skill_invocation_message("/dyn-on")
|
||||
|
||||
assert msg is not None
|
||||
assert "Marker: INLINE_RAN." in msg
|
||||
assert "!`echo INLINE_RAN`" not in msg
|
||||
|
||||
def test_inline_shell_runs_in_skill_directory(self, tmp_path):
|
||||
"""Inline snippets get the skill dir as CWD so relative paths work."""
|
||||
with (
|
||||
patch("tools.skills_tool.SKILLS_DIR", tmp_path),
|
||||
patch(
|
||||
"agent.skill_commands._load_skills_config",
|
||||
return_value={"template_vars": True, "inline_shell": True,
|
||||
"inline_shell_timeout": 5},
|
||||
),
|
||||
):
|
||||
skill_dir = _make_skill(
|
||||
tmp_path,
|
||||
"dyn-cwd",
|
||||
body="Here: !`pwd`",
|
||||
)
|
||||
scan_skill_commands()
|
||||
msg = build_skill_invocation_message("/dyn-cwd")
|
||||
|
||||
assert msg is not None
|
||||
assert f"Here: {skill_dir}" in msg
|
||||
|
||||
def test_inline_shell_timeout_does_not_break_message(self, tmp_path):
|
||||
with (
|
||||
patch("tools.skills_tool.SKILLS_DIR", tmp_path),
|
||||
patch(
|
||||
"agent.skill_commands._load_skills_config",
|
||||
return_value={"template_vars": True, "inline_shell": True,
|
||||
"inline_shell_timeout": 1},
|
||||
),
|
||||
):
|
||||
_make_skill(
|
||||
tmp_path,
|
||||
"dyn-slow",
|
||||
body="Slow: !`sleep 5 && printf DYN_MARKER`",
|
||||
)
|
||||
scan_skill_commands()
|
||||
msg = build_skill_invocation_message("/dyn-slow")
|
||||
|
||||
assert msg is not None
|
||||
# Timeout is surfaced as a marker instead of propagating as an error,
|
||||
# and the rest of the skill message still renders.
|
||||
assert "inline-shell timeout" in msg
|
||||
# The command's intended stdout never made it through — only the
|
||||
# timeout marker (which echoes the command text) survives.
|
||||
assert "DYN_MARKER" not in msg.replace("sleep 5 && printf DYN_MARKER", "")
|
||||
|
|
|
|||
0
tests/agent/transports/__init__.py
Normal file
0
tests/agent/transports/__init__.py
Normal file
220
tests/agent/transports/test_transport.py
Normal file
220
tests/agent/transports/test_transport.py
Normal file
|
|
@ -0,0 +1,220 @@
|
|||
"""Tests for the transport ABC, registry, and AnthropicTransport."""
|
||||
|
||||
import pytest
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
from agent.transports.base import ProviderTransport
|
||||
from agent.transports.types import NormalizedResponse, ToolCall, Usage
|
||||
from agent.transports import get_transport, register_transport, _REGISTRY
|
||||
|
||||
|
||||
# ── ABC contract tests ──────────────────────────────────────────────────
|
||||
|
||||
class TestProviderTransportABC:
|
||||
"""Verify the ABC contract is enforceable."""
|
||||
|
||||
def test_cannot_instantiate_abc(self):
|
||||
with pytest.raises(TypeError):
|
||||
ProviderTransport()
|
||||
|
||||
def test_concrete_must_implement_all_abstract(self):
|
||||
class Incomplete(ProviderTransport):
|
||||
@property
|
||||
def api_mode(self):
|
||||
return "test"
|
||||
with pytest.raises(TypeError):
|
||||
Incomplete()
|
||||
|
||||
def test_minimal_concrete(self):
|
||||
class Minimal(ProviderTransport):
|
||||
@property
|
||||
def api_mode(self):
|
||||
return "test_minimal"
|
||||
def convert_messages(self, messages, **kw):
|
||||
return messages
|
||||
def convert_tools(self, tools):
|
||||
return tools
|
||||
def build_kwargs(self, model, messages, tools=None, **params):
|
||||
return {"model": model, "messages": messages}
|
||||
def normalize_response(self, response, **kw):
|
||||
return NormalizedResponse(content="ok", tool_calls=None, finish_reason="stop")
|
||||
|
||||
t = Minimal()
|
||||
assert t.api_mode == "test_minimal"
|
||||
assert t.validate_response(None) is True # default
|
||||
assert t.extract_cache_stats(None) is None # default
|
||||
assert t.map_finish_reason("end_turn") == "end_turn" # default passthrough
|
||||
|
||||
|
||||
# ── Registry tests ───────────────────────────────────────────────────────
|
||||
|
||||
class TestTransportRegistry:
|
||||
|
||||
def test_get_unregistered_returns_none(self):
|
||||
assert get_transport("nonexistent_mode") is None
|
||||
|
||||
def test_anthropic_registered_on_import(self):
|
||||
import agent.transports.anthropic # noqa: F401
|
||||
t = get_transport("anthropic_messages")
|
||||
assert t is not None
|
||||
assert t.api_mode == "anthropic_messages"
|
||||
|
||||
def test_register_and_get(self):
|
||||
class DummyTransport(ProviderTransport):
|
||||
@property
|
||||
def api_mode(self):
|
||||
return "dummy_test"
|
||||
def convert_messages(self, messages, **kw):
|
||||
return messages
|
||||
def convert_tools(self, tools):
|
||||
return tools
|
||||
def build_kwargs(self, model, messages, tools=None, **params):
|
||||
return {}
|
||||
def normalize_response(self, response, **kw):
|
||||
return NormalizedResponse(content=None, tool_calls=None, finish_reason="stop")
|
||||
|
||||
register_transport("dummy_test", DummyTransport)
|
||||
t = get_transport("dummy_test")
|
||||
assert t.api_mode == "dummy_test"
|
||||
# Cleanup
|
||||
_REGISTRY.pop("dummy_test", None)
|
||||
|
||||
|
||||
# ── AnthropicTransport tests ────────────────────────────────────────────
|
||||
|
||||
class TestAnthropicTransport:
|
||||
|
||||
@pytest.fixture
|
||||
def transport(self):
|
||||
import agent.transports.anthropic # noqa: F401
|
||||
return get_transport("anthropic_messages")
|
||||
|
||||
def test_api_mode(self, transport):
|
||||
assert transport.api_mode == "anthropic_messages"
|
||||
|
||||
def test_convert_tools_simple(self, transport):
|
||||
tools = [{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "test_tool",
|
||||
"description": "A test",
|
||||
"parameters": {"type": "object", "properties": {}},
|
||||
}
|
||||
}]
|
||||
result = transport.convert_tools(tools)
|
||||
assert len(result) == 1
|
||||
assert result[0]["name"] == "test_tool"
|
||||
assert "input_schema" in result[0]
|
||||
|
||||
def test_validate_response_none(self, transport):
|
||||
assert transport.validate_response(None) is False
|
||||
|
||||
def test_validate_response_empty_content(self, transport):
|
||||
r = SimpleNamespace(content=[])
|
||||
assert transport.validate_response(r) is False
|
||||
|
||||
def test_validate_response_valid(self, transport):
|
||||
r = SimpleNamespace(content=[SimpleNamespace(type="text", text="hello")])
|
||||
assert transport.validate_response(r) is True
|
||||
|
||||
def test_map_finish_reason(self, transport):
|
||||
assert transport.map_finish_reason("end_turn") == "stop"
|
||||
assert transport.map_finish_reason("tool_use") == "tool_calls"
|
||||
assert transport.map_finish_reason("max_tokens") == "length"
|
||||
assert transport.map_finish_reason("stop_sequence") == "stop"
|
||||
assert transport.map_finish_reason("refusal") == "content_filter"
|
||||
assert transport.map_finish_reason("model_context_window_exceeded") == "length"
|
||||
assert transport.map_finish_reason("unknown") == "stop"
|
||||
|
||||
def test_extract_cache_stats_none_usage(self, transport):
|
||||
r = SimpleNamespace(usage=None)
|
||||
assert transport.extract_cache_stats(r) is None
|
||||
|
||||
def test_extract_cache_stats_with_cache(self, transport):
|
||||
usage = SimpleNamespace(cache_read_input_tokens=100, cache_creation_input_tokens=50)
|
||||
r = SimpleNamespace(usage=usage)
|
||||
result = transport.extract_cache_stats(r)
|
||||
assert result == {"cached_tokens": 100, "creation_tokens": 50}
|
||||
|
||||
def test_extract_cache_stats_zero(self, transport):
|
||||
usage = SimpleNamespace(cache_read_input_tokens=0, cache_creation_input_tokens=0)
|
||||
r = SimpleNamespace(usage=usage)
|
||||
assert transport.extract_cache_stats(r) is None
|
||||
|
||||
def test_normalize_response_text(self, transport):
|
||||
"""Test normalization of a simple text response."""
|
||||
r = SimpleNamespace(
|
||||
content=[SimpleNamespace(type="text", text="Hello world")],
|
||||
stop_reason="end_turn",
|
||||
usage=SimpleNamespace(input_tokens=10, output_tokens=5),
|
||||
model="claude-sonnet-4-6",
|
||||
)
|
||||
nr = transport.normalize_response(r)
|
||||
assert isinstance(nr, NormalizedResponse)
|
||||
assert nr.content == "Hello world"
|
||||
assert nr.tool_calls is None or nr.tool_calls == []
|
||||
assert nr.finish_reason == "stop"
|
||||
|
||||
def test_normalize_response_tool_calls(self, transport):
|
||||
"""Test normalization of a tool-use response."""
|
||||
r = SimpleNamespace(
|
||||
content=[
|
||||
SimpleNamespace(
|
||||
type="tool_use",
|
||||
id="toolu_123",
|
||||
name="terminal",
|
||||
input={"command": "ls"},
|
||||
),
|
||||
],
|
||||
stop_reason="tool_use",
|
||||
usage=SimpleNamespace(input_tokens=10, output_tokens=20),
|
||||
model="claude-sonnet-4-6",
|
||||
)
|
||||
nr = transport.normalize_response(r)
|
||||
assert nr.finish_reason == "tool_calls"
|
||||
assert len(nr.tool_calls) == 1
|
||||
tc = nr.tool_calls[0]
|
||||
assert tc.name == "terminal"
|
||||
assert tc.id == "toolu_123"
|
||||
assert '"command"' in tc.arguments
|
||||
|
||||
def test_normalize_response_thinking(self, transport):
|
||||
"""Test normalization preserves thinking content."""
|
||||
r = SimpleNamespace(
|
||||
content=[
|
||||
SimpleNamespace(type="thinking", thinking="Let me think..."),
|
||||
SimpleNamespace(type="text", text="The answer is 42"),
|
||||
],
|
||||
stop_reason="end_turn",
|
||||
usage=SimpleNamespace(input_tokens=10, output_tokens=15),
|
||||
model="claude-sonnet-4-6",
|
||||
)
|
||||
nr = transport.normalize_response(r)
|
||||
assert nr.content == "The answer is 42"
|
||||
assert nr.reasoning == "Let me think..."
|
||||
|
||||
def test_build_kwargs_returns_dict(self, transport):
|
||||
"""Test build_kwargs produces a usable kwargs dict."""
|
||||
messages = [{"role": "user", "content": "Hello"}]
|
||||
kw = transport.build_kwargs(
|
||||
model="claude-sonnet-4-6",
|
||||
messages=messages,
|
||||
max_tokens=1024,
|
||||
)
|
||||
assert isinstance(kw, dict)
|
||||
assert "model" in kw
|
||||
assert "max_tokens" in kw
|
||||
assert "messages" in kw
|
||||
|
||||
def test_convert_messages_extracts_system(self, transport):
|
||||
"""Test convert_messages separates system from messages."""
|
||||
messages = [
|
||||
{"role": "system", "content": "You are helpful."},
|
||||
{"role": "user", "content": "Hi"},
|
||||
]
|
||||
system, msgs = transport.convert_messages(messages)
|
||||
# System should be extracted
|
||||
assert system is not None
|
||||
# Messages should only have user
|
||||
assert len(msgs) >= 1
|
||||
151
tests/agent/transports/test_types.py
Normal file
151
tests/agent/transports/test_types.py
Normal file
|
|
@ -0,0 +1,151 @@
|
|||
"""Tests for agent/transports/types.py — dataclass construction + helpers."""
|
||||
|
||||
import json
|
||||
import pytest
|
||||
|
||||
from agent.transports.types import (
|
||||
NormalizedResponse,
|
||||
ToolCall,
|
||||
Usage,
|
||||
build_tool_call,
|
||||
map_finish_reason,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ToolCall
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestToolCall:
|
||||
def test_basic_construction(self):
|
||||
tc = ToolCall(id="call_abc", name="terminal", arguments='{"cmd": "ls"}')
|
||||
assert tc.id == "call_abc"
|
||||
assert tc.name == "terminal"
|
||||
assert tc.arguments == '{"cmd": "ls"}'
|
||||
assert tc.provider_data is None
|
||||
|
||||
def test_none_id(self):
|
||||
tc = ToolCall(id=None, name="read_file", arguments="{}")
|
||||
assert tc.id is None
|
||||
|
||||
def test_provider_data(self):
|
||||
tc = ToolCall(
|
||||
id="call_x",
|
||||
name="t",
|
||||
arguments="{}",
|
||||
provider_data={"call_id": "call_x", "response_item_id": "fc_x"},
|
||||
)
|
||||
assert tc.provider_data["call_id"] == "call_x"
|
||||
assert tc.provider_data["response_item_id"] == "fc_x"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Usage
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestUsage:
|
||||
def test_defaults(self):
|
||||
u = Usage()
|
||||
assert u.prompt_tokens == 0
|
||||
assert u.completion_tokens == 0
|
||||
assert u.total_tokens == 0
|
||||
assert u.cached_tokens == 0
|
||||
|
||||
def test_explicit(self):
|
||||
u = Usage(prompt_tokens=100, completion_tokens=50, total_tokens=150, cached_tokens=80)
|
||||
assert u.total_tokens == 150
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# NormalizedResponse
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestNormalizedResponse:
|
||||
def test_text_only(self):
|
||||
r = NormalizedResponse(content="hello", tool_calls=None, finish_reason="stop")
|
||||
assert r.content == "hello"
|
||||
assert r.tool_calls is None
|
||||
assert r.finish_reason == "stop"
|
||||
assert r.reasoning is None
|
||||
assert r.usage is None
|
||||
assert r.provider_data is None
|
||||
|
||||
def test_with_tool_calls(self):
|
||||
tcs = [ToolCall(id="call_1", name="terminal", arguments='{"cmd":"pwd"}')]
|
||||
r = NormalizedResponse(content=None, tool_calls=tcs, finish_reason="tool_calls")
|
||||
assert r.finish_reason == "tool_calls"
|
||||
assert len(r.tool_calls) == 1
|
||||
assert r.tool_calls[0].name == "terminal"
|
||||
|
||||
def test_with_reasoning(self):
|
||||
r = NormalizedResponse(
|
||||
content="answer",
|
||||
tool_calls=None,
|
||||
finish_reason="stop",
|
||||
reasoning="I thought about it",
|
||||
)
|
||||
assert r.reasoning == "I thought about it"
|
||||
|
||||
def test_with_provider_data(self):
|
||||
r = NormalizedResponse(
|
||||
content=None,
|
||||
tool_calls=None,
|
||||
finish_reason="stop",
|
||||
provider_data={"reasoning_details": [{"type": "thinking", "thinking": "hmm"}]},
|
||||
)
|
||||
assert r.provider_data["reasoning_details"][0]["type"] == "thinking"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# build_tool_call
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestBuildToolCall:
|
||||
def test_dict_arguments_serialized(self):
|
||||
tc = build_tool_call(id="call_1", name="terminal", arguments={"cmd": "ls"})
|
||||
assert tc.arguments == json.dumps({"cmd": "ls"})
|
||||
assert tc.provider_data is None
|
||||
|
||||
def test_string_arguments_passthrough(self):
|
||||
tc = build_tool_call(id="call_2", name="read_file", arguments='{"path": "/tmp"}')
|
||||
assert tc.arguments == '{"path": "/tmp"}'
|
||||
|
||||
def test_provider_fields(self):
|
||||
tc = build_tool_call(
|
||||
id="call_3",
|
||||
name="terminal",
|
||||
arguments="{}",
|
||||
call_id="call_3",
|
||||
response_item_id="fc_3",
|
||||
)
|
||||
assert tc.provider_data == {"call_id": "call_3", "response_item_id": "fc_3"}
|
||||
|
||||
def test_none_id(self):
|
||||
tc = build_tool_call(id=None, name="t", arguments="{}")
|
||||
assert tc.id is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# map_finish_reason
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestMapFinishReason:
|
||||
ANTHROPIC_MAP = {
|
||||
"end_turn": "stop",
|
||||
"tool_use": "tool_calls",
|
||||
"max_tokens": "length",
|
||||
"stop_sequence": "stop",
|
||||
"refusal": "content_filter",
|
||||
}
|
||||
|
||||
def test_known_reason(self):
|
||||
assert map_finish_reason("end_turn", self.ANTHROPIC_MAP) == "stop"
|
||||
assert map_finish_reason("tool_use", self.ANTHROPIC_MAP) == "tool_calls"
|
||||
assert map_finish_reason("max_tokens", self.ANTHROPIC_MAP) == "length"
|
||||
assert map_finish_reason("refusal", self.ANTHROPIC_MAP) == "content_filter"
|
||||
|
||||
def test_unknown_reason_defaults_to_stop(self):
|
||||
assert map_finish_reason("something_new", self.ANTHROPIC_MAP) == "stop"
|
||||
|
||||
def test_none_reason(self):
|
||||
assert map_finish_reason(None, self.ANTHROPIC_MAP) == "stop"
|
||||
Loading…
Add table
Add a link
Reference in a new issue