mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
refactor: remove smart_model_routing feature (#12732)
Smart model routing (auto-routing short/simple turns to a cheap model across providers) was opt-in and disabled by default. This removes the feature wholesale: the routing module, its config keys, docs, tests, and the orchestration scaffolding it required in cli.py / gateway/run.py / cron/scheduler.py. The /fast (Priority Processing / Anthropic fast mode) feature kept its hooks into _resolve_turn_agent_config — those still build a route dict and attach request_overrides when the model supports it; the route now just always uses the session's primary model/provider rather than running prompts through choose_cheap_model_route() first. Also removed: - DEFAULT_CONFIG['smart_model_routing'] block and matching commented-out example sections in hermes_cli/config.py and cli-config.yaml.example - _load_smart_model_routing() / self._smart_model_routing on GatewayRunner - self._smart_model_routing / self._active_agent_route_signature on HermesCLI (signature kept; just no longer initialised through the smart-routing pipeline) - route_label parameter on HermesCLI._init_agent (only set by smart routing; never read elsewhere) - 'Smart Model Routing' section in website/docs/integrations/providers.md - tip in hermes_cli/tips.py - entries in hermes_cli/dump.py + hermes_cli/web_server.py - row in skills/autonomous-ai-agents/hermes-agent/SKILL.md Tests: - Deleted tests/agent/test_smart_model_routing.py - Rewrote tests/agent/test_credential_pool_routing.py to target the simplified _resolve_turn_agent_config directly (preserves credential pool propagation + 429 rotation coverage) - Dropped 'cheap model' test from test_cli_provider_resolution.py - Dropped resolve_turn_route patches from cli + gateway test_fast_command — they now exercise the real method end-to-end - Removed _smart_model_routing stub assignments from gateway/cron test helpers Targeted suites: 74/74 in the directly affected test files; tests/agent + tests/cron + tests/cli pass except 5 failures that already exist on main (cron silent-delivery + alias quick-command).
This commit is contained in:
parent
5f0a91f31a
commit
424e9f36b0
18 changed files with 96 additions and 664 deletions
|
|
@ -629,7 +629,6 @@ class GatewayRunner:
|
|||
self._restart_drain_timeout = self._load_restart_drain_timeout()
|
||||
self._provider_routing = self._load_provider_routing()
|
||||
self._fallback_model = self._load_fallback_model()
|
||||
self._smart_model_routing = self._load_smart_model_routing()
|
||||
|
||||
# Wire process registry into session store for reset protection
|
||||
from tools.process_registry import process_registry
|
||||
|
|
@ -1082,11 +1081,16 @@ class GatewayRunner:
|
|||
return model, runtime_kwargs
|
||||
|
||||
def _resolve_turn_agent_config(self, user_message: str, model: str, runtime_kwargs: dict) -> dict:
|
||||
from agent.smart_model_routing import resolve_turn_route
|
||||
"""Build the effective model/runtime config for a single turn.
|
||||
|
||||
Always uses the session's primary model/provider. If `/fast` is
|
||||
enabled and the model supports Priority Processing / Anthropic fast
|
||||
mode, attach `request_overrides` so the API call is marked
|
||||
accordingly.
|
||||
"""
|
||||
from hermes_cli.models import resolve_fast_mode_overrides
|
||||
|
||||
primary = {
|
||||
"model": model,
|
||||
runtime = {
|
||||
"api_key": runtime_kwargs.get("api_key"),
|
||||
"base_url": runtime_kwargs.get("base_url"),
|
||||
"provider": runtime_kwargs.get("provider"),
|
||||
|
|
@ -1095,7 +1099,18 @@ class GatewayRunner:
|
|||
"args": list(runtime_kwargs.get("args") or []),
|
||||
"credential_pool": runtime_kwargs.get("credential_pool"),
|
||||
}
|
||||
route = resolve_turn_route(user_message, getattr(self, "_smart_model_routing", {}), primary)
|
||||
route = {
|
||||
"model": model,
|
||||
"runtime": runtime,
|
||||
"signature": (
|
||||
model,
|
||||
runtime["provider"],
|
||||
runtime["base_url"],
|
||||
runtime["api_mode"],
|
||||
runtime["command"],
|
||||
tuple(runtime["args"]),
|
||||
),
|
||||
}
|
||||
|
||||
service_tier = getattr(self, "_service_tier", None)
|
||||
if not service_tier:
|
||||
|
|
@ -1103,7 +1118,7 @@ class GatewayRunner:
|
|||
return route
|
||||
|
||||
try:
|
||||
overrides = resolve_fast_mode_overrides(route.get("model"))
|
||||
overrides = resolve_fast_mode_overrides(route["model"])
|
||||
except Exception:
|
||||
overrides = None
|
||||
route["request_overrides"] = overrides
|
||||
|
|
@ -1461,20 +1476,6 @@ class GatewayRunner:
|
|||
pass
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _load_smart_model_routing() -> dict:
|
||||
"""Load optional smart cheap-vs-strong model routing config."""
|
||||
try:
|
||||
import yaml as _y
|
||||
cfg_path = _hermes_home / "config.yaml"
|
||||
if cfg_path.exists():
|
||||
with open(cfg_path, encoding="utf-8") as _f:
|
||||
cfg = _y.safe_load(_f) or {}
|
||||
return cfg.get("smart_model_routing", {}) or {}
|
||||
except Exception:
|
||||
pass
|
||||
return {}
|
||||
|
||||
def _snapshot_running_agents(self) -> Dict[str, Any]:
|
||||
return {
|
||||
session_key: agent
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue