mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
Smart model routing (auto-routing short/simple turns to a cheap model across providers) was opt-in and disabled by default. This removes the feature wholesale: the routing module, its config keys, docs, tests, and the orchestration scaffolding it required in cli.py / gateway/run.py / cron/scheduler.py. The /fast (Priority Processing / Anthropic fast mode) feature kept its hooks into _resolve_turn_agent_config — those still build a route dict and attach request_overrides when the model supports it; the route now just always uses the session's primary model/provider rather than running prompts through choose_cheap_model_route() first. Also removed: - DEFAULT_CONFIG['smart_model_routing'] block and matching commented-out example sections in hermes_cli/config.py and cli-config.yaml.example - _load_smart_model_routing() / self._smart_model_routing on GatewayRunner - self._smart_model_routing / self._active_agent_route_signature on HermesCLI (signature kept; just no longer initialised through the smart-routing pipeline) - route_label parameter on HermesCLI._init_agent (only set by smart routing; never read elsewhere) - 'Smart Model Routing' section in website/docs/integrations/providers.md - tip in hermes_cli/tips.py - entries in hermes_cli/dump.py + hermes_cli/web_server.py - row in skills/autonomous-ai-agents/hermes-agent/SKILL.md Tests: - Deleted tests/agent/test_smart_model_routing.py - Rewrote tests/agent/test_credential_pool_routing.py to target the simplified _resolve_turn_agent_config directly (preserves credential pool propagation + 429 rotation coverage) - Dropped 'cheap model' test from test_cli_provider_resolution.py - Dropped resolve_turn_route patches from cli + gateway test_fast_command — they now exercise the real method end-to-end - Removed _smart_model_routing stub assignments from gateway/cron test helpers Targeted suites: 74/74 in the directly affected test files; tests/agent + tests/cron + tests/cli pass except 5 failures that already exist on main (cron silent-delivery + alias quick-command).
380 lines
14 KiB
Python
380 lines
14 KiB
Python
"""Tests for the /fast CLI command and service-tier config handling."""
|
|
|
|
import unittest
|
|
from types import SimpleNamespace
|
|
from unittest.mock import MagicMock, patch
|
|
|
|
|
|
def _import_cli():
|
|
import hermes_cli.config as config_mod
|
|
|
|
if not hasattr(config_mod, "save_env_value_secure"):
|
|
config_mod.save_env_value_secure = lambda key, value: {
|
|
"success": True,
|
|
"stored_as": key,
|
|
"validated": False,
|
|
}
|
|
|
|
import cli as cli_mod
|
|
|
|
return cli_mod
|
|
|
|
|
|
class TestParseServiceTierConfig(unittest.TestCase):
|
|
def _parse(self, raw):
|
|
cli_mod = _import_cli()
|
|
return cli_mod._parse_service_tier_config(raw)
|
|
|
|
def test_fast_maps_to_priority(self):
|
|
self.assertEqual(self._parse("fast"), "priority")
|
|
self.assertEqual(self._parse("priority"), "priority")
|
|
|
|
def test_normal_disables_service_tier(self):
|
|
self.assertIsNone(self._parse("normal"))
|
|
self.assertIsNone(self._parse("off"))
|
|
self.assertIsNone(self._parse(""))
|
|
|
|
|
|
class TestHandleFastCommand(unittest.TestCase):
|
|
def _make_cli(self, service_tier=None):
|
|
return SimpleNamespace(
|
|
service_tier=service_tier,
|
|
provider="openai-codex",
|
|
requested_provider="openai-codex",
|
|
model="gpt-5.4",
|
|
_fast_command_available=lambda: True,
|
|
agent=MagicMock(),
|
|
)
|
|
|
|
def test_no_args_shows_status(self):
|
|
cli_mod = _import_cli()
|
|
stub = self._make_cli(service_tier=None)
|
|
with (
|
|
patch.object(cli_mod, "_cprint") as mock_cprint,
|
|
patch.object(cli_mod, "save_config_value") as mock_save,
|
|
):
|
|
cli_mod.HermesCLI._handle_fast_command(stub, "/fast")
|
|
|
|
# Bare /fast shows status, does not change config
|
|
mock_save.assert_not_called()
|
|
# Should have printed the status line
|
|
printed = " ".join(str(c) for c in mock_cprint.call_args_list)
|
|
self.assertIn("normal", printed)
|
|
|
|
def test_no_args_shows_fast_when_enabled(self):
|
|
cli_mod = _import_cli()
|
|
stub = self._make_cli(service_tier="priority")
|
|
with (
|
|
patch.object(cli_mod, "_cprint") as mock_cprint,
|
|
patch.object(cli_mod, "save_config_value") as mock_save,
|
|
):
|
|
cli_mod.HermesCLI._handle_fast_command(stub, "/fast")
|
|
|
|
mock_save.assert_not_called()
|
|
printed = " ".join(str(c) for c in mock_cprint.call_args_list)
|
|
self.assertIn("fast", printed)
|
|
|
|
def test_normal_argument_clears_service_tier(self):
|
|
cli_mod = _import_cli()
|
|
stub = self._make_cli(service_tier="priority")
|
|
with (
|
|
patch.object(cli_mod, "_cprint"),
|
|
patch.object(cli_mod, "save_config_value", return_value=True) as mock_save,
|
|
):
|
|
cli_mod.HermesCLI._handle_fast_command(stub, "/fast normal")
|
|
|
|
mock_save.assert_called_once_with("agent.service_tier", "normal")
|
|
self.assertIsNone(stub.service_tier)
|
|
self.assertIsNone(stub.agent)
|
|
|
|
def test_unsupported_model_does_not_expose_fast(self):
|
|
cli_mod = _import_cli()
|
|
stub = SimpleNamespace(
|
|
service_tier=None,
|
|
provider="openai-codex",
|
|
requested_provider="openai-codex",
|
|
model="gpt-5.3-codex",
|
|
_fast_command_available=lambda: False,
|
|
agent=MagicMock(),
|
|
)
|
|
|
|
with (
|
|
patch.object(cli_mod, "_cprint") as mock_cprint,
|
|
patch.object(cli_mod, "save_config_value") as mock_save,
|
|
):
|
|
cli_mod.HermesCLI._handle_fast_command(stub, "/fast")
|
|
|
|
mock_save.assert_not_called()
|
|
self.assertTrue(mock_cprint.called)
|
|
|
|
|
|
class TestPriorityProcessingModels(unittest.TestCase):
|
|
"""Verify the expanded Priority Processing model registry."""
|
|
|
|
def test_all_documented_models_supported(self):
|
|
from hermes_cli.models import model_supports_fast_mode
|
|
|
|
# All models from OpenAI's Priority Processing pricing table
|
|
supported = [
|
|
"gpt-5.4", "gpt-5.4-mini", "gpt-5.2",
|
|
"gpt-5.1", "gpt-5", "gpt-5-mini",
|
|
"gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano",
|
|
"gpt-4o", "gpt-4o-mini",
|
|
"o3", "o4-mini",
|
|
]
|
|
for model in supported:
|
|
assert model_supports_fast_mode(model), f"{model} should support fast mode"
|
|
|
|
def test_vendor_prefix_stripped(self):
|
|
from hermes_cli.models import model_supports_fast_mode
|
|
|
|
assert model_supports_fast_mode("openai/gpt-5.4") is True
|
|
assert model_supports_fast_mode("openai/gpt-4.1") is True
|
|
assert model_supports_fast_mode("openai/o3") is True
|
|
|
|
def test_non_priority_models_rejected(self):
|
|
from hermes_cli.models import model_supports_fast_mode
|
|
|
|
assert model_supports_fast_mode("gpt-5.3-codex") is False
|
|
assert model_supports_fast_mode("claude-sonnet-4") is False
|
|
assert model_supports_fast_mode("") is False
|
|
assert model_supports_fast_mode(None) is False
|
|
|
|
def test_resolve_overrides_returns_service_tier(self):
|
|
from hermes_cli.models import resolve_fast_mode_overrides
|
|
|
|
result = resolve_fast_mode_overrides("gpt-5.4")
|
|
assert result == {"service_tier": "priority"}
|
|
|
|
result = resolve_fast_mode_overrides("gpt-4.1")
|
|
assert result == {"service_tier": "priority"}
|
|
|
|
def test_resolve_overrides_none_for_unsupported(self):
|
|
from hermes_cli.models import resolve_fast_mode_overrides
|
|
|
|
assert resolve_fast_mode_overrides("gpt-5.3-codex") is None
|
|
assert resolve_fast_mode_overrides("claude-sonnet-4") is None
|
|
|
|
|
|
class TestFastModeRouting(unittest.TestCase):
|
|
def test_fast_command_exposed_for_model_even_when_provider_is_auto(self):
|
|
cli_mod = _import_cli()
|
|
stub = SimpleNamespace(provider="auto", requested_provider="auto", model="gpt-5.4", agent=None)
|
|
|
|
assert cli_mod.HermesCLI._fast_command_available(stub) is True
|
|
|
|
def test_fast_command_exposed_for_non_codex_models(self):
|
|
cli_mod = _import_cli()
|
|
stub = SimpleNamespace(provider="openai", requested_provider="openai", model="gpt-4.1", agent=None)
|
|
assert cli_mod.HermesCLI._fast_command_available(stub) is True
|
|
|
|
stub = SimpleNamespace(provider="openrouter", requested_provider="openrouter", model="o3", agent=None)
|
|
assert cli_mod.HermesCLI._fast_command_available(stub) is True
|
|
|
|
def test_turn_route_injects_overrides_without_provider_switch(self):
|
|
"""Fast mode should add request_overrides but NOT change the provider/runtime."""
|
|
cli_mod = _import_cli()
|
|
stub = SimpleNamespace(
|
|
model="gpt-5.4",
|
|
api_key="primary-key",
|
|
base_url="https://openrouter.ai/api/v1",
|
|
provider="openrouter",
|
|
api_mode="chat_completions",
|
|
acp_command=None,
|
|
acp_args=[],
|
|
_credential_pool=None,
|
|
service_tier="priority",
|
|
)
|
|
|
|
route = cli_mod.HermesCLI._resolve_turn_agent_config(stub, "hi")
|
|
|
|
# Provider should NOT have changed
|
|
assert route["runtime"]["provider"] == "openrouter"
|
|
assert route["runtime"]["api_mode"] == "chat_completions"
|
|
# But request_overrides should be set
|
|
assert route["request_overrides"] == {"service_tier": "priority"}
|
|
|
|
def test_turn_route_keeps_primary_runtime_when_model_has_no_fast_backend(self):
|
|
cli_mod = _import_cli()
|
|
stub = SimpleNamespace(
|
|
model="gpt-5.3-codex",
|
|
api_key="primary-key",
|
|
base_url="https://openrouter.ai/api/v1",
|
|
provider="openrouter",
|
|
api_mode="chat_completions",
|
|
acp_command=None,
|
|
acp_args=[],
|
|
_credential_pool=None,
|
|
service_tier="priority",
|
|
)
|
|
|
|
route = cli_mod.HermesCLI._resolve_turn_agent_config(stub, "hi")
|
|
|
|
assert route["runtime"]["provider"] == "openrouter"
|
|
assert route.get("request_overrides") is None
|
|
|
|
|
|
class TestAnthropicFastMode(unittest.TestCase):
|
|
"""Verify Anthropic Fast Mode model support and override resolution."""
|
|
|
|
def test_anthropic_opus_supported(self):
|
|
from hermes_cli.models import model_supports_fast_mode
|
|
|
|
# Native Anthropic format (hyphens)
|
|
assert model_supports_fast_mode("claude-opus-4-6") is True
|
|
# OpenRouter format (dots)
|
|
assert model_supports_fast_mode("claude-opus-4.6") is True
|
|
# With vendor prefix
|
|
assert model_supports_fast_mode("anthropic/claude-opus-4-6") is True
|
|
assert model_supports_fast_mode("anthropic/claude-opus-4.6") is True
|
|
|
|
def test_anthropic_non_opus_rejected(self):
|
|
from hermes_cli.models import model_supports_fast_mode
|
|
|
|
assert model_supports_fast_mode("claude-sonnet-4-6") is False
|
|
assert model_supports_fast_mode("claude-sonnet-4.6") is False
|
|
assert model_supports_fast_mode("claude-haiku-4-5") is False
|
|
assert model_supports_fast_mode("anthropic/claude-sonnet-4.6") is False
|
|
|
|
def test_anthropic_variant_tags_stripped(self):
|
|
from hermes_cli.models import model_supports_fast_mode
|
|
|
|
# OpenRouter variant tags after colon should be stripped
|
|
assert model_supports_fast_mode("claude-opus-4.6:fast") is True
|
|
assert model_supports_fast_mode("claude-opus-4.6:beta") is True
|
|
|
|
def test_resolve_overrides_returns_speed_for_anthropic(self):
|
|
from hermes_cli.models import resolve_fast_mode_overrides
|
|
|
|
result = resolve_fast_mode_overrides("claude-opus-4-6")
|
|
assert result == {"speed": "fast"}
|
|
|
|
result = resolve_fast_mode_overrides("anthropic/claude-opus-4.6")
|
|
assert result == {"speed": "fast"}
|
|
|
|
def test_resolve_overrides_returns_service_tier_for_openai(self):
|
|
"""OpenAI models should still get service_tier, not speed."""
|
|
from hermes_cli.models import resolve_fast_mode_overrides
|
|
|
|
result = resolve_fast_mode_overrides("gpt-5.4")
|
|
assert result == {"service_tier": "priority"}
|
|
|
|
def test_is_anthropic_fast_model(self):
|
|
from hermes_cli.models import _is_anthropic_fast_model
|
|
|
|
assert _is_anthropic_fast_model("claude-opus-4-6") is True
|
|
assert _is_anthropic_fast_model("claude-opus-4.6") is True
|
|
assert _is_anthropic_fast_model("anthropic/claude-opus-4-6") is True
|
|
assert _is_anthropic_fast_model("gpt-5.4") is False
|
|
assert _is_anthropic_fast_model("claude-sonnet-4-6") is False
|
|
|
|
def test_fast_command_exposed_for_anthropic_model(self):
|
|
cli_mod = _import_cli()
|
|
stub = SimpleNamespace(
|
|
provider="anthropic", requested_provider="anthropic",
|
|
model="claude-opus-4-6", agent=None,
|
|
)
|
|
assert cli_mod.HermesCLI._fast_command_available(stub) is True
|
|
|
|
def test_fast_command_hidden_for_anthropic_sonnet(self):
|
|
cli_mod = _import_cli()
|
|
stub = SimpleNamespace(
|
|
provider="anthropic", requested_provider="anthropic",
|
|
model="claude-sonnet-4-6", agent=None,
|
|
)
|
|
assert cli_mod.HermesCLI._fast_command_available(stub) is False
|
|
|
|
def test_turn_route_injects_speed_for_anthropic(self):
|
|
"""Anthropic models should get speed:'fast' override, not service_tier."""
|
|
cli_mod = _import_cli()
|
|
stub = SimpleNamespace(
|
|
model="claude-opus-4-6",
|
|
api_key="sk-ant-test",
|
|
base_url="https://api.anthropic.com",
|
|
provider="anthropic",
|
|
api_mode="anthropic_messages",
|
|
acp_command=None,
|
|
acp_args=[],
|
|
_credential_pool=None,
|
|
service_tier="priority",
|
|
)
|
|
|
|
route = cli_mod.HermesCLI._resolve_turn_agent_config(stub, "hi")
|
|
|
|
assert route["runtime"]["provider"] == "anthropic"
|
|
assert route["request_overrides"] == {"speed": "fast"}
|
|
|
|
|
|
class TestAnthropicFastModeAdapter(unittest.TestCase):
|
|
"""Verify build_anthropic_kwargs handles fast_mode parameter."""
|
|
|
|
def test_fast_mode_adds_speed_and_beta(self):
|
|
from agent.anthropic_adapter import build_anthropic_kwargs, _FAST_MODE_BETA
|
|
|
|
kwargs = build_anthropic_kwargs(
|
|
model="claude-opus-4-6",
|
|
messages=[{"role": "user", "content": [{"type": "text", "text": "hi"}]}],
|
|
tools=None,
|
|
max_tokens=None,
|
|
reasoning_config=None,
|
|
fast_mode=True,
|
|
)
|
|
assert kwargs.get("extra_body", {}).get("speed") == "fast"
|
|
assert "speed" not in kwargs
|
|
assert "extra_headers" in kwargs
|
|
assert _FAST_MODE_BETA in kwargs["extra_headers"].get("anthropic-beta", "")
|
|
|
|
def test_fast_mode_off_no_speed(self):
|
|
from agent.anthropic_adapter import build_anthropic_kwargs
|
|
|
|
kwargs = build_anthropic_kwargs(
|
|
model="claude-opus-4-6",
|
|
messages=[{"role": "user", "content": [{"type": "text", "text": "hi"}]}],
|
|
tools=None,
|
|
max_tokens=None,
|
|
reasoning_config=None,
|
|
fast_mode=False,
|
|
)
|
|
assert kwargs.get("extra_body", {}).get("speed") is None
|
|
assert "speed" not in kwargs
|
|
assert "extra_headers" not in kwargs
|
|
|
|
def test_fast_mode_skipped_for_third_party_endpoint(self):
|
|
from agent.anthropic_adapter import build_anthropic_kwargs
|
|
|
|
kwargs = build_anthropic_kwargs(
|
|
model="claude-opus-4-6",
|
|
messages=[{"role": "user", "content": [{"type": "text", "text": "hi"}]}],
|
|
tools=None,
|
|
max_tokens=None,
|
|
reasoning_config=None,
|
|
fast_mode=True,
|
|
base_url="https://api.minimax.io/anthropic/v1",
|
|
)
|
|
# Third-party endpoints should NOT get speed or fast-mode beta
|
|
assert kwargs.get("extra_body", {}).get("speed") is None
|
|
assert "speed" not in kwargs
|
|
assert "extra_headers" not in kwargs
|
|
|
|
def test_fast_mode_kwargs_are_safe_for_sdk_unpacking(self):
|
|
from agent.anthropic_adapter import build_anthropic_kwargs
|
|
|
|
kwargs = build_anthropic_kwargs(
|
|
model="claude-opus-4-6",
|
|
messages=[{"role": "user", "content": [{"type": "text", "text": "hi"}]}],
|
|
tools=None,
|
|
max_tokens=None,
|
|
reasoning_config=None,
|
|
fast_mode=True,
|
|
)
|
|
assert "speed" not in kwargs
|
|
assert kwargs.get("extra_body", {}).get("speed") == "fast"
|
|
|
|
|
|
class TestConfigDefault(unittest.TestCase):
|
|
def test_default_config_has_service_tier(self):
|
|
from hermes_cli.config import DEFAULT_CONFIG
|
|
|
|
agent = DEFAULT_CONFIG.get("agent", {})
|
|
self.assertIn("service_tier", agent)
|
|
self.assertEqual(agent["service_tier"], "")
|