mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
Smart model routing (auto-routing short/simple turns to a cheap model across providers) was opt-in and disabled by default. This removes the feature wholesale: the routing module, its config keys, docs, tests, and the orchestration scaffolding it required in cli.py / gateway/run.py / cron/scheduler.py. The /fast (Priority Processing / Anthropic fast mode) feature kept its hooks into _resolve_turn_agent_config — those still build a route dict and attach request_overrides when the model supports it; the route now just always uses the session's primary model/provider rather than running prompts through choose_cheap_model_route() first. Also removed: - DEFAULT_CONFIG['smart_model_routing'] block and matching commented-out example sections in hermes_cli/config.py and cli-config.yaml.example - _load_smart_model_routing() / self._smart_model_routing on GatewayRunner - self._smart_model_routing / self._active_agent_route_signature on HermesCLI (signature kept; just no longer initialised through the smart-routing pipeline) - route_label parameter on HermesCLI._init_agent (only set by smart routing; never read elsewhere) - 'Smart Model Routing' section in website/docs/integrations/providers.md - tip in hermes_cli/tips.py - entries in hermes_cli/dump.py + hermes_cli/web_server.py - row in skills/autonomous-ai-agents/hermes-agent/SKILL.md Tests: - Deleted tests/agent/test_smart_model_routing.py - Rewrote tests/agent/test_credential_pool_routing.py to target the simplified _resolve_turn_agent_config directly (preserves credential pool propagation + 429 rotation coverage) - Dropped 'cheap model' test from test_cli_provider_resolution.py - Dropped resolve_turn_route patches from cli + gateway test_fast_command — they now exercise the real method end-to-end - Removed _smart_model_routing stub assignments from gateway/cron test helpers Targeted suites: 74/74 in the directly affected test files; tests/agent + tests/cron + tests/cli pass except 5 failures that already exist on main (cron silent-delivery + alias quick-command).
178 lines
5.9 KiB
Python
178 lines
5.9 KiB
Python
"""Tests for gateway /fast support and Priority Processing routing."""
|
|
|
|
import sys
|
|
import threading
|
|
import types
|
|
from types import SimpleNamespace
|
|
from unittest.mock import AsyncMock
|
|
|
|
import pytest
|
|
import yaml
|
|
|
|
import gateway.run as gateway_run
|
|
from gateway.config import Platform
|
|
from gateway.platforms.base import MessageEvent
|
|
from gateway.session import SessionSource
|
|
|
|
|
|
class _CapturingAgent:
|
|
last_init = None
|
|
last_run = None
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
type(self).last_init = dict(kwargs)
|
|
self.tools = []
|
|
|
|
def run_conversation(self, user_message, conversation_history=None, task_id=None, persist_user_message=None):
|
|
type(self).last_run = {
|
|
"user_message": user_message,
|
|
"conversation_history": conversation_history,
|
|
"task_id": task_id,
|
|
"persist_user_message": persist_user_message,
|
|
}
|
|
return {
|
|
"final_response": "ok",
|
|
"messages": [],
|
|
"api_calls": 1,
|
|
"completed": True,
|
|
}
|
|
|
|
|
|
def _install_fake_agent(monkeypatch):
|
|
fake_run_agent = types.ModuleType("run_agent")
|
|
fake_run_agent.AIAgent = _CapturingAgent
|
|
monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
|
|
|
|
|
|
def _make_runner():
|
|
runner = object.__new__(gateway_run.GatewayRunner)
|
|
runner.adapters = {}
|
|
runner._ephemeral_system_prompt = ""
|
|
runner._prefill_messages = []
|
|
runner._reasoning_config = None
|
|
runner._service_tier = None
|
|
runner._provider_routing = {}
|
|
runner._fallback_model = None
|
|
runner._running_agents = {}
|
|
runner._pending_model_notes = {}
|
|
runner._session_db = None
|
|
runner._agent_cache = {}
|
|
runner._agent_cache_lock = threading.Lock()
|
|
runner._session_model_overrides = {}
|
|
runner.hooks = SimpleNamespace(loaded_hooks=False)
|
|
runner.config = SimpleNamespace(streaming=None)
|
|
runner.session_store = SimpleNamespace(
|
|
get_or_create_session=lambda source: SimpleNamespace(session_id="session-1"),
|
|
load_transcript=lambda session_id: [],
|
|
)
|
|
runner._get_or_create_gateway_honcho = lambda session_key: (None, None)
|
|
runner._enrich_message_with_vision = AsyncMock(return_value="ENRICHED")
|
|
return runner
|
|
|
|
|
|
def _make_source() -> SessionSource:
|
|
return SessionSource(
|
|
platform=Platform.TELEGRAM,
|
|
chat_id="12345",
|
|
chat_type="dm",
|
|
user_id="user-1",
|
|
)
|
|
|
|
|
|
def _make_event(text: str) -> MessageEvent:
|
|
return MessageEvent(text=text, source=_make_source(), message_id="m1")
|
|
|
|
|
|
def test_turn_route_injects_priority_processing_without_changing_runtime():
|
|
runner = _make_runner()
|
|
runner._service_tier = "priority"
|
|
runtime_kwargs = {
|
|
"api_key": "***",
|
|
"base_url": "https://openrouter.ai/api/v1",
|
|
"provider": "openrouter",
|
|
"api_mode": "chat_completions",
|
|
"command": None,
|
|
"args": [],
|
|
"credential_pool": None,
|
|
}
|
|
|
|
route = gateway_run.GatewayRunner._resolve_turn_agent_config(runner, "hi", "gpt-5.4", runtime_kwargs)
|
|
|
|
assert route["runtime"]["provider"] == "openrouter"
|
|
assert route["runtime"]["api_mode"] == "chat_completions"
|
|
assert route["request_overrides"] == {"service_tier": "priority"}
|
|
|
|
|
|
def test_turn_route_skips_priority_processing_for_unsupported_models():
|
|
runner = _make_runner()
|
|
runner._service_tier = "priority"
|
|
runtime_kwargs = {
|
|
"api_key": "***",
|
|
"base_url": "https://openrouter.ai/api/v1",
|
|
"provider": "openrouter",
|
|
"api_mode": "chat_completions",
|
|
"command": None,
|
|
"args": [],
|
|
"credential_pool": None,
|
|
}
|
|
|
|
route = gateway_run.GatewayRunner._resolve_turn_agent_config(runner, "hi", "gpt-5.3-codex", runtime_kwargs)
|
|
|
|
assert route["request_overrides"] is None
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_handle_fast_command_persists_config(monkeypatch, tmp_path):
|
|
runner = _make_runner()
|
|
|
|
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
|
|
monkeypatch.setattr(gateway_run, "_load_gateway_config", lambda: {})
|
|
monkeypatch.setattr(gateway_run, "_resolve_gateway_model", lambda config=None: "gpt-5.4")
|
|
|
|
response = await runner._handle_fast_command(_make_event("/fast fast"))
|
|
|
|
assert "FAST" in response
|
|
assert runner._service_tier == "priority"
|
|
|
|
saved = yaml.safe_load((tmp_path / "config.yaml").read_text(encoding="utf-8"))
|
|
assert saved["agent"]["service_tier"] == "fast"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_run_agent_passes_priority_processing_to_gateway_agent(monkeypatch, tmp_path):
|
|
_install_fake_agent(monkeypatch)
|
|
runner = _make_runner()
|
|
|
|
(tmp_path / "config.yaml").write_text("agent:\n service_tier: fast\n", encoding="utf-8")
|
|
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
|
|
monkeypatch.setattr(gateway_run, "_env_path", tmp_path / ".env")
|
|
monkeypatch.setattr(gateway_run, "load_dotenv", lambda *args, **kwargs: None)
|
|
monkeypatch.setattr(gateway_run, "_load_gateway_config", lambda: {})
|
|
monkeypatch.setattr(gateway_run, "_resolve_gateway_model", lambda config=None: "gpt-5.4")
|
|
monkeypatch.setattr(
|
|
gateway_run,
|
|
"_resolve_runtime_agent_kwargs",
|
|
lambda: {
|
|
"provider": "openrouter",
|
|
"api_mode": "chat_completions",
|
|
"base_url": "https://openrouter.ai/api/v1",
|
|
"api_key": "***",
|
|
},
|
|
)
|
|
|
|
import hermes_cli.tools_config as tools_config
|
|
monkeypatch.setattr(tools_config, "_get_platform_tools", lambda user_config, platform_key: {"core"})
|
|
|
|
_CapturingAgent.last_init = None
|
|
result = await runner._run_agent(
|
|
message="hi",
|
|
context_prompt="",
|
|
history=[],
|
|
source=_make_source(),
|
|
session_id="session-1",
|
|
session_key="agent:main:telegram:dm:12345",
|
|
)
|
|
|
|
assert result["final_response"] == "ok"
|
|
assert _CapturingAgent.last_init["service_tier"] == "priority"
|
|
assert _CapturingAgent.last_init["request_overrides"] == {"service_tier": "priority"}
|