refactor: remove smart_model_routing feature (#12732)

Smart model routing (auto-routing short/simple turns to a cheap model
across providers) was opt-in and disabled by default.  This removes the
feature wholesale: the routing module, its config keys, docs, tests, and
the orchestration scaffolding it required in cli.py / gateway/run.py /
cron/scheduler.py.

The /fast (Priority Processing / Anthropic fast mode) feature kept its
hooks into _resolve_turn_agent_config — those still build a route dict
and attach request_overrides when the model supports it; the route now
just always uses the session's primary model/provider rather than
running prompts through choose_cheap_model_route() first.

Also removed:
- DEFAULT_CONFIG['smart_model_routing'] block and matching commented-out
  example sections in hermes_cli/config.py and cli-config.yaml.example
- _load_smart_model_routing() / self._smart_model_routing on GatewayRunner
- self._smart_model_routing / self._active_agent_route_signature on
  HermesCLI (signature kept; just no longer initialised through the
  smart-routing pipeline)
- route_label parameter on HermesCLI._init_agent (only set by smart
  routing; never read elsewhere)
- 'Smart Model Routing' section in website/docs/integrations/providers.md
- tip in hermes_cli/tips.py
- entries in hermes_cli/dump.py + hermes_cli/web_server.py
- row in skills/autonomous-ai-agents/hermes-agent/SKILL.md

Tests:
- Deleted tests/agent/test_smart_model_routing.py
- Rewrote tests/agent/test_credential_pool_routing.py to target the
  simplified _resolve_turn_agent_config directly (preserves credential
  pool propagation + 429 rotation coverage)
- Dropped 'cheap model' test from test_cli_provider_resolution.py
- Dropped resolve_turn_route patches from cli + gateway test_fast_command
  — they now exercise the real method end-to-end
- Removed _smart_model_routing stub assignments from gateway/cron test
  helpers

Targeted suites: 74/74 in the directly affected test files;
tests/agent + tests/cron + tests/cli pass except 5 failures that
already exist on main (cron silent-delivery + alias quick-command).
This commit is contained in:
Teknium 2026-04-19 18:12:55 -07:00 committed by GitHub
parent 5f0a91f31a
commit 424e9f36b0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
18 changed files with 96 additions and 664 deletions

View file

@ -629,7 +629,6 @@ class GatewayRunner:
self._restart_drain_timeout = self._load_restart_drain_timeout()
self._provider_routing = self._load_provider_routing()
self._fallback_model = self._load_fallback_model()
self._smart_model_routing = self._load_smart_model_routing()
# Wire process registry into session store for reset protection
from tools.process_registry import process_registry
@ -1082,11 +1081,16 @@ class GatewayRunner:
return model, runtime_kwargs
def _resolve_turn_agent_config(self, user_message: str, model: str, runtime_kwargs: dict) -> dict:
from agent.smart_model_routing import resolve_turn_route
"""Build the effective model/runtime config for a single turn.
Always uses the session's primary model/provider. If `/fast` is
enabled and the model supports Priority Processing / Anthropic fast
mode, attach `request_overrides` so the API call is marked
accordingly.
"""
from hermes_cli.models import resolve_fast_mode_overrides
primary = {
"model": model,
runtime = {
"api_key": runtime_kwargs.get("api_key"),
"base_url": runtime_kwargs.get("base_url"),
"provider": runtime_kwargs.get("provider"),
@ -1095,7 +1099,18 @@ class GatewayRunner:
"args": list(runtime_kwargs.get("args") or []),
"credential_pool": runtime_kwargs.get("credential_pool"),
}
route = resolve_turn_route(user_message, getattr(self, "_smart_model_routing", {}), primary)
route = {
"model": model,
"runtime": runtime,
"signature": (
model,
runtime["provider"],
runtime["base_url"],
runtime["api_mode"],
runtime["command"],
tuple(runtime["args"]),
),
}
service_tier = getattr(self, "_service_tier", None)
if not service_tier:
@ -1103,7 +1118,7 @@ class GatewayRunner:
return route
try:
overrides = resolve_fast_mode_overrides(route.get("model"))
overrides = resolve_fast_mode_overrides(route["model"])
except Exception:
overrides = None
route["request_overrides"] = overrides
@ -1461,20 +1476,6 @@ class GatewayRunner:
pass
return None
@staticmethod
def _load_smart_model_routing() -> dict:
"""Load optional smart cheap-vs-strong model routing config."""
try:
import yaml as _y
cfg_path = _hermes_home / "config.yaml"
if cfg_path.exists():
with open(cfg_path, encoding="utf-8") as _f:
cfg = _y.safe_load(_f) or {}
return cfg.get("smart_model_routing", {}) or {}
except Exception:
pass
return {}
def _snapshot_running_agents(self) -> Dict[str, Any]:
return {
session_key: agent