mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
refactor: remove smart_model_routing feature (#12732)
Smart model routing (auto-routing short/simple turns to a cheap model across providers) was opt-in and disabled by default. This removes the feature wholesale: the routing module, its config keys, docs, tests, and the orchestration scaffolding it required in cli.py / gateway/run.py / cron/scheduler.py. The /fast (Priority Processing / Anthropic fast mode) feature kept its hooks into _resolve_turn_agent_config — those still build a route dict and attach request_overrides when the model supports it; the route now just always uses the session's primary model/provider rather than running prompts through choose_cheap_model_route() first. Also removed: - DEFAULT_CONFIG['smart_model_routing'] block and matching commented-out example sections in hermes_cli/config.py and cli-config.yaml.example - _load_smart_model_routing() / self._smart_model_routing on GatewayRunner - self._smart_model_routing / self._active_agent_route_signature on HermesCLI (signature kept; just no longer initialised through the smart-routing pipeline) - route_label parameter on HermesCLI._init_agent (only set by smart routing; never read elsewhere) - 'Smart Model Routing' section in website/docs/integrations/providers.md - tip in hermes_cli/tips.py - entries in hermes_cli/dump.py + hermes_cli/web_server.py - row in skills/autonomous-ai-agents/hermes-agent/SKILL.md Tests: - Deleted tests/agent/test_smart_model_routing.py - Rewrote tests/agent/test_credential_pool_routing.py to target the simplified _resolve_turn_agent_config directly (preserves credential pool propagation + 429 rotation coverage) - Dropped 'cheap model' test from test_cli_provider_resolution.py - Dropped resolve_turn_route patches from cli + gateway test_fast_command — they now exercise the real method end-to-end - Removed _smart_model_routing stub assignments from gateway/cron test helpers Targeted suites: 74/74 in the directly affected test files; tests/agent + tests/cron + tests/cli pass except 5 failures that already exist on main (cron silent-delivery + alias quick-command).
This commit is contained in:
parent
5f0a91f31a
commit
424e9f36b0
18 changed files with 96 additions and 664 deletions
|
|
@ -1,195 +0,0 @@
|
|||
"""Helpers for optional cheap-vs-strong model routing."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import re
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from utils import is_truthy_value
|
||||
|
||||
_COMPLEX_KEYWORDS = {
|
||||
"debug",
|
||||
"debugging",
|
||||
"implement",
|
||||
"implementation",
|
||||
"refactor",
|
||||
"patch",
|
||||
"traceback",
|
||||
"stacktrace",
|
||||
"exception",
|
||||
"error",
|
||||
"analyze",
|
||||
"analysis",
|
||||
"investigate",
|
||||
"architecture",
|
||||
"design",
|
||||
"compare",
|
||||
"benchmark",
|
||||
"optimize",
|
||||
"optimise",
|
||||
"review",
|
||||
"terminal",
|
||||
"shell",
|
||||
"tool",
|
||||
"tools",
|
||||
"pytest",
|
||||
"test",
|
||||
"tests",
|
||||
"plan",
|
||||
"planning",
|
||||
"delegate",
|
||||
"subagent",
|
||||
"cron",
|
||||
"docker",
|
||||
"kubernetes",
|
||||
}
|
||||
|
||||
_URL_RE = re.compile(r"https?://|www\.", re.IGNORECASE)
|
||||
|
||||
|
||||
def _coerce_bool(value: Any, default: bool = False) -> bool:
|
||||
return is_truthy_value(value, default=default)
|
||||
|
||||
|
||||
def _coerce_int(value: Any, default: int) -> int:
|
||||
try:
|
||||
return int(value)
|
||||
except (TypeError, ValueError):
|
||||
return default
|
||||
|
||||
|
||||
def choose_cheap_model_route(user_message: str, routing_config: Optional[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
|
||||
"""Return the configured cheap-model route when a message looks simple.
|
||||
|
||||
Conservative by design: if the message has signs of code/tool/debugging/
|
||||
long-form work, keep the primary model.
|
||||
"""
|
||||
cfg = routing_config or {}
|
||||
if not _coerce_bool(cfg.get("enabled"), False):
|
||||
return None
|
||||
|
||||
cheap_model = cfg.get("cheap_model") or {}
|
||||
if not isinstance(cheap_model, dict):
|
||||
return None
|
||||
provider = str(cheap_model.get("provider") or "").strip().lower()
|
||||
model = str(cheap_model.get("model") or "").strip()
|
||||
if not provider or not model:
|
||||
return None
|
||||
|
||||
text = (user_message or "").strip()
|
||||
if not text:
|
||||
return None
|
||||
|
||||
max_chars = _coerce_int(cfg.get("max_simple_chars"), 160)
|
||||
max_words = _coerce_int(cfg.get("max_simple_words"), 28)
|
||||
|
||||
if len(text) > max_chars:
|
||||
return None
|
||||
if len(text.split()) > max_words:
|
||||
return None
|
||||
if text.count("\n") > 1:
|
||||
return None
|
||||
if "```" in text or "`" in text:
|
||||
return None
|
||||
if _URL_RE.search(text):
|
||||
return None
|
||||
|
||||
lowered = text.lower()
|
||||
words = {token.strip(".,:;!?()[]{}\"'`") for token in lowered.split()}
|
||||
if words & _COMPLEX_KEYWORDS:
|
||||
return None
|
||||
|
||||
route = dict(cheap_model)
|
||||
route["provider"] = provider
|
||||
route["model"] = model
|
||||
route["routing_reason"] = "simple_turn"
|
||||
return route
|
||||
|
||||
|
||||
def resolve_turn_route(user_message: str, routing_config: Optional[Dict[str, Any]], primary: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Resolve the effective model/runtime for one turn.
|
||||
|
||||
Returns a dict with model/runtime/signature/label fields.
|
||||
"""
|
||||
route = choose_cheap_model_route(user_message, routing_config)
|
||||
if not route:
|
||||
return {
|
||||
"model": primary.get("model"),
|
||||
"runtime": {
|
||||
"api_key": primary.get("api_key"),
|
||||
"base_url": primary.get("base_url"),
|
||||
"provider": primary.get("provider"),
|
||||
"api_mode": primary.get("api_mode"),
|
||||
"command": primary.get("command"),
|
||||
"args": list(primary.get("args") or []),
|
||||
"credential_pool": primary.get("credential_pool"),
|
||||
},
|
||||
"label": None,
|
||||
"signature": (
|
||||
primary.get("model"),
|
||||
primary.get("provider"),
|
||||
primary.get("base_url"),
|
||||
primary.get("api_mode"),
|
||||
primary.get("command"),
|
||||
tuple(primary.get("args") or ()),
|
||||
),
|
||||
}
|
||||
|
||||
from hermes_cli.runtime_provider import resolve_runtime_provider
|
||||
|
||||
explicit_api_key = None
|
||||
api_key_env = str(route.get("api_key_env") or "").strip()
|
||||
if api_key_env:
|
||||
explicit_api_key = os.getenv(api_key_env) or None
|
||||
|
||||
try:
|
||||
runtime = resolve_runtime_provider(
|
||||
requested=route.get("provider"),
|
||||
explicit_api_key=explicit_api_key,
|
||||
explicit_base_url=route.get("base_url"),
|
||||
)
|
||||
except Exception:
|
||||
return {
|
||||
"model": primary.get("model"),
|
||||
"runtime": {
|
||||
"api_key": primary.get("api_key"),
|
||||
"base_url": primary.get("base_url"),
|
||||
"provider": primary.get("provider"),
|
||||
"api_mode": primary.get("api_mode"),
|
||||
"command": primary.get("command"),
|
||||
"args": list(primary.get("args") or []),
|
||||
"credential_pool": primary.get("credential_pool"),
|
||||
},
|
||||
"label": None,
|
||||
"signature": (
|
||||
primary.get("model"),
|
||||
primary.get("provider"),
|
||||
primary.get("base_url"),
|
||||
primary.get("api_mode"),
|
||||
primary.get("command"),
|
||||
tuple(primary.get("args") or ()),
|
||||
),
|
||||
}
|
||||
|
||||
return {
|
||||
"model": route.get("model"),
|
||||
"runtime": {
|
||||
"api_key": runtime.get("api_key"),
|
||||
"base_url": runtime.get("base_url"),
|
||||
"provider": runtime.get("provider"),
|
||||
"api_mode": runtime.get("api_mode"),
|
||||
"command": runtime.get("command"),
|
||||
"args": list(runtime.get("args") or []),
|
||||
"credential_pool": runtime.get("credential_pool"),
|
||||
},
|
||||
"label": f"smart route → {route.get('model')} ({runtime.get('provider')})",
|
||||
"signature": (
|
||||
route.get("model"),
|
||||
runtime.get("provider"),
|
||||
runtime.get("base_url"),
|
||||
runtime.get("api_mode"),
|
||||
runtime.get("command"),
|
||||
tuple(runtime.get("args") or ()),
|
||||
),
|
||||
}
|
||||
|
|
@ -114,20 +114,6 @@ model:
|
|||
# # Data policy: "allow" (default) or "deny" to exclude providers that may store data
|
||||
# # data_collection: "deny"
|
||||
|
||||
# =============================================================================
|
||||
# Smart Model Routing (optional)
|
||||
# =============================================================================
|
||||
# Use a cheaper model for short/simple turns while keeping your main model for
|
||||
# more complex requests. Disabled by default.
|
||||
#
|
||||
# smart_model_routing:
|
||||
# enabled: true
|
||||
# max_simple_chars: 160
|
||||
# max_simple_words: 28
|
||||
# cheap_model:
|
||||
# provider: openrouter
|
||||
# model: google/gemini-2.5-flash
|
||||
|
||||
# =============================================================================
|
||||
# Git Worktree Isolation
|
||||
# =============================================================================
|
||||
|
|
|
|||
61
cli.py
61
cli.py
|
|
@ -310,12 +310,6 @@ def load_cli_config() -> Dict[str, Any]:
|
|||
"enabled": True, # Auto-compress when approaching context limit
|
||||
"threshold": 0.50, # Compress at 50% of model's context limit
|
||||
},
|
||||
"smart_model_routing": {
|
||||
"enabled": False,
|
||||
"max_simple_chars": 160,
|
||||
"max_simple_words": 28,
|
||||
"cheap_model": {},
|
||||
},
|
||||
"agent": {
|
||||
"max_turns": 90, # Default max tool-calling iterations (shared with subagents)
|
||||
"verbose": False,
|
||||
|
|
@ -1857,8 +1851,9 @@ class HermesCLI:
|
|||
fb = [fb] if fb.get("provider") and fb.get("model") else []
|
||||
self._fallback_model = fb
|
||||
|
||||
# Optional cheap-vs-strong routing for simple turns
|
||||
self._smart_model_routing = CLI_CONFIG.get("smart_model_routing", {}) or {}
|
||||
# Signature of the currently-initialised agent's runtime. Used to
|
||||
# rebuild the agent when provider / model / base_url changes across
|
||||
# turns (e.g. after /model or credential rotation).
|
||||
self._active_agent_route_signature = None
|
||||
|
||||
# Agent will be initialized on first use
|
||||
|
|
@ -2883,24 +2878,36 @@ class HermesCLI:
|
|||
return True
|
||||
|
||||
def _resolve_turn_agent_config(self, user_message: str) -> dict:
|
||||
"""Resolve model/runtime overrides for a single user turn."""
|
||||
from agent.smart_model_routing import resolve_turn_route
|
||||
"""Build the effective model/runtime config for a single user turn.
|
||||
|
||||
Always uses the session's primary model/provider. If the user has
|
||||
toggled `/fast` on and the current model supports Priority
|
||||
Processing / Anthropic fast mode, attach `request_overrides` so the
|
||||
API call is marked accordingly.
|
||||
"""
|
||||
from hermes_cli.models import resolve_fast_mode_overrides
|
||||
|
||||
route = resolve_turn_route(
|
||||
user_message,
|
||||
self._smart_model_routing,
|
||||
{
|
||||
"model": self.model,
|
||||
"api_key": self.api_key,
|
||||
"base_url": self.base_url,
|
||||
"provider": self.provider,
|
||||
"api_mode": self.api_mode,
|
||||
"command": self.acp_command,
|
||||
"args": list(self.acp_args or []),
|
||||
"credential_pool": getattr(self, "_credential_pool", None),
|
||||
},
|
||||
)
|
||||
runtime = {
|
||||
"api_key": self.api_key,
|
||||
"base_url": self.base_url,
|
||||
"provider": self.provider,
|
||||
"api_mode": self.api_mode,
|
||||
"command": self.acp_command,
|
||||
"args": list(self.acp_args or []),
|
||||
"credential_pool": getattr(self, "_credential_pool", None),
|
||||
}
|
||||
route = {
|
||||
"model": self.model,
|
||||
"runtime": runtime,
|
||||
"signature": (
|
||||
self.model,
|
||||
runtime["provider"],
|
||||
runtime["base_url"],
|
||||
runtime["api_mode"],
|
||||
runtime["command"],
|
||||
tuple(runtime["args"]),
|
||||
),
|
||||
}
|
||||
|
||||
service_tier = getattr(self, "service_tier", None)
|
||||
if not service_tier:
|
||||
|
|
@ -2908,13 +2915,13 @@ class HermesCLI:
|
|||
return route
|
||||
|
||||
try:
|
||||
overrides = resolve_fast_mode_overrides(route.get("model"))
|
||||
overrides = resolve_fast_mode_overrides(route["model"])
|
||||
except Exception:
|
||||
overrides = None
|
||||
route["request_overrides"] = overrides
|
||||
return route
|
||||
|
||||
def _init_agent(self, *, model_override: str = None, runtime_override: dict = None, route_label: str = None, request_overrides: dict | None = None) -> bool:
|
||||
def _init_agent(self, *, model_override: str = None, runtime_override: dict = None, request_overrides: dict | None = None) -> bool:
|
||||
"""
|
||||
Initialize the agent on first use.
|
||||
When resuming a session, restores conversation history from SQLite.
|
||||
|
|
@ -7911,7 +7918,6 @@ class HermesCLI:
|
|||
if not self._init_agent(
|
||||
model_override=turn_route["model"],
|
||||
runtime_override=turn_route["runtime"],
|
||||
route_label=turn_route["label"],
|
||||
request_overrides=turn_route.get("request_overrides"),
|
||||
):
|
||||
return None
|
||||
|
|
@ -10535,7 +10541,6 @@ def main(
|
|||
if cli._init_agent(
|
||||
model_override=turn_route["model"],
|
||||
runtime_override=turn_route["runtime"],
|
||||
route_label=turn_route["label"],
|
||||
request_overrides=turn_route.get("request_overrides"),
|
||||
):
|
||||
cli.agent.quiet_mode = True
|
||||
|
|
|
|||
|
|
@ -826,7 +826,6 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
|||
|
||||
# Provider routing
|
||||
pr = _cfg.get("provider_routing", {})
|
||||
smart_routing = _cfg.get("smart_model_routing", {}) or {}
|
||||
|
||||
from hermes_cli.runtime_provider import (
|
||||
resolve_runtime_provider,
|
||||
|
|
@ -843,24 +842,9 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
|||
message = format_runtime_provider_error(exc)
|
||||
raise RuntimeError(message) from exc
|
||||
|
||||
from agent.smart_model_routing import resolve_turn_route
|
||||
turn_route = resolve_turn_route(
|
||||
prompt,
|
||||
smart_routing,
|
||||
{
|
||||
"model": model,
|
||||
"api_key": runtime.get("api_key"),
|
||||
"base_url": runtime.get("base_url"),
|
||||
"provider": runtime.get("provider"),
|
||||
"api_mode": runtime.get("api_mode"),
|
||||
"command": runtime.get("command"),
|
||||
"args": list(runtime.get("args") or []),
|
||||
},
|
||||
)
|
||||
|
||||
fallback_model = _cfg.get("fallback_providers") or _cfg.get("fallback_model") or None
|
||||
credential_pool = None
|
||||
runtime_provider = str(turn_route["runtime"].get("provider") or "").strip().lower()
|
||||
runtime_provider = str(runtime.get("provider") or "").strip().lower()
|
||||
if runtime_provider:
|
||||
try:
|
||||
from agent.credential_pool import load_pool
|
||||
|
|
@ -877,13 +861,13 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
|||
logger.debug("Job '%s': failed to load credential pool for %s: %s", job_id, runtime_provider, e)
|
||||
|
||||
agent = AIAgent(
|
||||
model=turn_route["model"],
|
||||
api_key=turn_route["runtime"].get("api_key"),
|
||||
base_url=turn_route["runtime"].get("base_url"),
|
||||
provider=turn_route["runtime"].get("provider"),
|
||||
api_mode=turn_route["runtime"].get("api_mode"),
|
||||
acp_command=turn_route["runtime"].get("command"),
|
||||
acp_args=turn_route["runtime"].get("args"),
|
||||
model=model,
|
||||
api_key=runtime.get("api_key"),
|
||||
base_url=runtime.get("base_url"),
|
||||
provider=runtime.get("provider"),
|
||||
api_mode=runtime.get("api_mode"),
|
||||
acp_command=runtime.get("command"),
|
||||
acp_args=runtime.get("args"),
|
||||
max_iterations=max_iterations,
|
||||
reasoning_config=reasoning_config,
|
||||
prefill_messages=prefill_messages,
|
||||
|
|
|
|||
|
|
@ -629,7 +629,6 @@ class GatewayRunner:
|
|||
self._restart_drain_timeout = self._load_restart_drain_timeout()
|
||||
self._provider_routing = self._load_provider_routing()
|
||||
self._fallback_model = self._load_fallback_model()
|
||||
self._smart_model_routing = self._load_smart_model_routing()
|
||||
|
||||
# Wire process registry into session store for reset protection
|
||||
from tools.process_registry import process_registry
|
||||
|
|
@ -1082,11 +1081,16 @@ class GatewayRunner:
|
|||
return model, runtime_kwargs
|
||||
|
||||
def _resolve_turn_agent_config(self, user_message: str, model: str, runtime_kwargs: dict) -> dict:
|
||||
from agent.smart_model_routing import resolve_turn_route
|
||||
"""Build the effective model/runtime config for a single turn.
|
||||
|
||||
Always uses the session's primary model/provider. If `/fast` is
|
||||
enabled and the model supports Priority Processing / Anthropic fast
|
||||
mode, attach `request_overrides` so the API call is marked
|
||||
accordingly.
|
||||
"""
|
||||
from hermes_cli.models import resolve_fast_mode_overrides
|
||||
|
||||
primary = {
|
||||
"model": model,
|
||||
runtime = {
|
||||
"api_key": runtime_kwargs.get("api_key"),
|
||||
"base_url": runtime_kwargs.get("base_url"),
|
||||
"provider": runtime_kwargs.get("provider"),
|
||||
|
|
@ -1095,7 +1099,18 @@ class GatewayRunner:
|
|||
"args": list(runtime_kwargs.get("args") or []),
|
||||
"credential_pool": runtime_kwargs.get("credential_pool"),
|
||||
}
|
||||
route = resolve_turn_route(user_message, getattr(self, "_smart_model_routing", {}), primary)
|
||||
route = {
|
||||
"model": model,
|
||||
"runtime": runtime,
|
||||
"signature": (
|
||||
model,
|
||||
runtime["provider"],
|
||||
runtime["base_url"],
|
||||
runtime["api_mode"],
|
||||
runtime["command"],
|
||||
tuple(runtime["args"]),
|
||||
),
|
||||
}
|
||||
|
||||
service_tier = getattr(self, "_service_tier", None)
|
||||
if not service_tier:
|
||||
|
|
@ -1103,7 +1118,7 @@ class GatewayRunner:
|
|||
return route
|
||||
|
||||
try:
|
||||
overrides = resolve_fast_mode_overrides(route.get("model"))
|
||||
overrides = resolve_fast_mode_overrides(route["model"])
|
||||
except Exception:
|
||||
overrides = None
|
||||
route["request_overrides"] = overrides
|
||||
|
|
@ -1461,20 +1476,6 @@ class GatewayRunner:
|
|||
pass
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _load_smart_model_routing() -> dict:
|
||||
"""Load optional smart cheap-vs-strong model routing config."""
|
||||
try:
|
||||
import yaml as _y
|
||||
cfg_path = _hermes_home / "config.yaml"
|
||||
if cfg_path.exists():
|
||||
with open(cfg_path, encoding="utf-8") as _f:
|
||||
cfg = _y.safe_load(_f) or {}
|
||||
return cfg.get("smart_model_routing", {}) or {}
|
||||
except Exception:
|
||||
pass
|
||||
return {}
|
||||
|
||||
def _snapshot_running_agents(self) -> Dict[str, Any]:
|
||||
return {
|
||||
session_key: agent
|
||||
|
|
|
|||
|
|
@ -474,13 +474,6 @@ DEFAULT_CONFIG = {
|
|||
},
|
||||
},
|
||||
|
||||
"smart_model_routing": {
|
||||
"enabled": False,
|
||||
"max_simple_chars": 160,
|
||||
"max_simple_words": 28,
|
||||
"cheap_model": {},
|
||||
},
|
||||
|
||||
# Auxiliary model config — provider:model for each side task.
|
||||
# Format: provider is the provider name, model is the model slug.
|
||||
# "auto" for provider = auto-detect best available provider.
|
||||
|
|
@ -2878,19 +2871,6 @@ _FALLBACK_COMMENT = """
|
|||
# fallback_model:
|
||||
# provider: openrouter
|
||||
# model: anthropic/claude-sonnet-4
|
||||
#
|
||||
# ── Smart Model Routing ────────────────────────────────────────────────
|
||||
# Optional cheap-vs-strong routing for simple turns.
|
||||
# Keeps the primary model for complex work, but can route short/simple
|
||||
# messages to a cheaper model across providers.
|
||||
#
|
||||
# smart_model_routing:
|
||||
# enabled: true
|
||||
# max_simple_chars: 160
|
||||
# max_simple_words: 28
|
||||
# cheap_model:
|
||||
# provider: openrouter
|
||||
# model: google/gemini-2.5-flash
|
||||
"""
|
||||
|
||||
|
||||
|
|
@ -2922,19 +2902,6 @@ _COMMENTED_SECTIONS = """
|
|||
# fallback_model:
|
||||
# provider: openrouter
|
||||
# model: anthropic/claude-sonnet-4
|
||||
#
|
||||
# ── Smart Model Routing ────────────────────────────────────────────────
|
||||
# Optional cheap-vs-strong routing for simple turns.
|
||||
# Keeps the primary model for complex work, but can route short/simple
|
||||
# messages to a cheaper model across providers.
|
||||
#
|
||||
# smart_model_routing:
|
||||
# enabled: true
|
||||
# max_simple_chars: 160
|
||||
# max_simple_words: 28
|
||||
# cheap_model:
|
||||
# provider: openrouter
|
||||
# model: google/gemini-2.5-flash
|
||||
"""
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -160,7 +160,6 @@ def _config_overrides(config: dict) -> dict[str, str]:
|
|||
("display", "streaming"),
|
||||
("display", "skin"),
|
||||
("display", "show_reasoning"),
|
||||
("smart_model_routing", "enabled"),
|
||||
("privacy", "redact_pii"),
|
||||
("tts", "provider"),
|
||||
]
|
||||
|
|
|
|||
|
|
@ -323,7 +323,6 @@ TIPS = [
|
|||
"GPT-5 and Codex use 'developer' role instead of 'system' in the message format.",
|
||||
"Per-task auxiliary overrides: auxiliary.vision.provider, auxiliary.compression.model, etc. in config.yaml.",
|
||||
"The auxiliary client treats 'main' as a provider alias — resolves to your actual primary provider + model.",
|
||||
"Smart routing can auto-route simple queries to a cheaper model — set smart_model_routing.enabled: true.",
|
||||
"hermes claw migrate --dry-run previews OpenClaw migration without writing anything.",
|
||||
"File paths pasted with quotes or escaped spaces are handled automatically — no manual cleanup needed.",
|
||||
"Slash commands never trigger the large-paste collapse — /command with big arguments works correctly.",
|
||||
|
|
|
|||
|
|
@ -232,7 +232,6 @@ _CATEGORY_MERGE: Dict[str, str] = {
|
|||
"checkpoints": "agent",
|
||||
"approvals": "security",
|
||||
"human_delay": "display",
|
||||
"smart_model_routing": "agent",
|
||||
"dashboard": "display",
|
||||
"code_execution": "agent",
|
||||
}
|
||||
|
|
|
|||
|
|
@ -338,7 +338,6 @@ Edit with `hermes config edit` or `hermes config set section.key value`.
|
|||
| `memory` | `memory_enabled`, `user_profile_enabled`, `provider` |
|
||||
| `security` | `tirith_enabled`, `website_blocklist` |
|
||||
| `delegation` | `model`, `provider`, `base_url`, `api_key`, `max_iterations` (50), `reasoning_effort` |
|
||||
| `smart_model_routing` | `enabled`, `cheap_model` |
|
||||
| `checkpoints` | `enabled`, `max_snapshots` (50) |
|
||||
|
||||
Full config reference: https://hermes-agent.nousresearch.com/docs/user-guide/configuration
|
||||
|
|
|
|||
|
|
@ -1,129 +1,25 @@
|
|||
"""Tests for credential pool preservation through smart routing and 429 recovery.
|
||||
"""Tests for credential pool preservation through turn config and 429 recovery.
|
||||
|
||||
Covers:
|
||||
1. credential_pool flows through resolve_turn_route (no-route and fallback paths)
|
||||
2. CLI _resolve_turn_agent_config passes credential_pool to primary dict
|
||||
3. Gateway _resolve_turn_agent_config passes credential_pool to primary dict
|
||||
4. Eager fallback deferred when credential pool has credentials
|
||||
5. Eager fallback fires when no credential pool exists
|
||||
6. Full 429 rotation cycle: retry-same → rotate → exhaust → fallback
|
||||
1. CLI _resolve_turn_agent_config passes credential_pool to runtime dict
|
||||
2. Gateway _resolve_turn_agent_config passes credential_pool to runtime dict
|
||||
3. Eager fallback deferred when credential pool has credentials
|
||||
4. Eager fallback fires when no credential pool exists
|
||||
5. Full 429 rotation cycle: retry-same → rotate → exhaust → fallback
|
||||
"""
|
||||
|
||||
import os
|
||||
import time
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import MagicMock, patch, PropertyMock
|
||||
|
||||
import pytest
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 1. smart_model_routing: credential_pool preserved in no-route path
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestSmartRoutingPoolPreservation:
|
||||
def test_no_route_preserves_credential_pool(self):
|
||||
from agent.smart_model_routing import resolve_turn_route
|
||||
|
||||
fake_pool = MagicMock(name="CredentialPool")
|
||||
primary = {
|
||||
"model": "gpt-5.4",
|
||||
"api_key": "sk-test",
|
||||
"base_url": None,
|
||||
"provider": "openai-codex",
|
||||
"api_mode": "codex_responses",
|
||||
"command": None,
|
||||
"args": [],
|
||||
"credential_pool": fake_pool,
|
||||
}
|
||||
# routing disabled
|
||||
result = resolve_turn_route("hello", None, primary)
|
||||
assert result["runtime"]["credential_pool"] is fake_pool
|
||||
|
||||
def test_no_route_none_pool(self):
|
||||
from agent.smart_model_routing import resolve_turn_route
|
||||
|
||||
primary = {
|
||||
"model": "gpt-5.4",
|
||||
"api_key": "sk-test",
|
||||
"base_url": None,
|
||||
"provider": "openai-codex",
|
||||
"api_mode": "codex_responses",
|
||||
"command": None,
|
||||
"args": [],
|
||||
}
|
||||
result = resolve_turn_route("hello", None, primary)
|
||||
assert result["runtime"]["credential_pool"] is None
|
||||
|
||||
def test_routing_disabled_preserves_pool(self):
|
||||
from agent.smart_model_routing import resolve_turn_route
|
||||
|
||||
fake_pool = MagicMock(name="CredentialPool")
|
||||
primary = {
|
||||
"model": "gpt-5.4",
|
||||
"api_key": "sk-test",
|
||||
"base_url": None,
|
||||
"provider": "openai-codex",
|
||||
"api_mode": "codex_responses",
|
||||
"command": None,
|
||||
"args": [],
|
||||
"credential_pool": fake_pool,
|
||||
}
|
||||
# routing explicitly disabled
|
||||
result = resolve_turn_route("hello", {"enabled": False}, primary)
|
||||
assert result["runtime"]["credential_pool"] is fake_pool
|
||||
|
||||
def test_route_fallback_on_resolve_error_preserves_pool(self, monkeypatch):
|
||||
"""When smart routing picks a cheap model but resolve_runtime_provider
|
||||
fails, the fallback to primary must still include credential_pool."""
|
||||
from agent.smart_model_routing import resolve_turn_route
|
||||
|
||||
fake_pool = MagicMock(name="CredentialPool")
|
||||
primary = {
|
||||
"model": "gpt-5.4",
|
||||
"api_key": "sk-test",
|
||||
"base_url": None,
|
||||
"provider": "openai-codex",
|
||||
"api_mode": "codex_responses",
|
||||
"command": None,
|
||||
"args": [],
|
||||
"credential_pool": fake_pool,
|
||||
}
|
||||
routing_config = {
|
||||
"enabled": True,
|
||||
"cheap_model": "openai/gpt-4.1-mini",
|
||||
"cheap_provider": "openrouter",
|
||||
"max_tokens": 200,
|
||||
"patterns": ["^(hi|hello|hey)"],
|
||||
}
|
||||
# Force resolve_runtime_provider to fail so it falls back to primary
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.runtime_provider.resolve_runtime_provider",
|
||||
MagicMock(side_effect=RuntimeError("no credentials")),
|
||||
)
|
||||
result = resolve_turn_route("hi", routing_config, primary)
|
||||
assert result["runtime"]["credential_pool"] is fake_pool
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 2 & 3. CLI and Gateway _resolve_turn_agent_config include credential_pool
|
||||
# 1. CLI _resolve_turn_agent_config includes credential_pool
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestCliTurnRoutePool:
|
||||
def test_resolve_turn_includes_pool(self, monkeypatch, tmp_path):
|
||||
"""CLI's _resolve_turn_agent_config must pass credential_pool to primary."""
|
||||
from agent.smart_model_routing import resolve_turn_route
|
||||
captured = {}
|
||||
|
||||
def spy_resolve(user_message, routing_config, primary):
|
||||
captured["primary"] = primary
|
||||
return resolve_turn_route(user_message, routing_config, primary)
|
||||
|
||||
monkeypatch.setattr(
|
||||
"agent.smart_model_routing.resolve_turn_route", spy_resolve
|
||||
)
|
||||
|
||||
# Build a minimal HermesCLI-like object with the method
|
||||
def test_resolve_turn_includes_pool(self):
|
||||
"""CLI's _resolve_turn_agent_config must pass credential_pool in runtime."""
|
||||
fake_pool = MagicMock(name="FakePool")
|
||||
shell = SimpleNamespace(
|
||||
model="gpt-5.4",
|
||||
api_key="sk-test",
|
||||
|
|
@ -132,58 +28,46 @@ class TestCliTurnRoutePool:
|
|||
api_mode="codex_responses",
|
||||
acp_command=None,
|
||||
acp_args=[],
|
||||
_credential_pool=MagicMock(name="FakePool"),
|
||||
_smart_model_routing={"enabled": False},
|
||||
_credential_pool=fake_pool,
|
||||
service_tier=None,
|
||||
)
|
||||
|
||||
# Import and bind the real method
|
||||
from cli import HermesCLI
|
||||
bound = HermesCLI._resolve_turn_agent_config.__get__(shell)
|
||||
bound("test message")
|
||||
route = bound("test message")
|
||||
|
||||
assert "credential_pool" in captured["primary"]
|
||||
assert captured["primary"]["credential_pool"] is shell._credential_pool
|
||||
assert route["runtime"]["credential_pool"] is fake_pool
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 2. Gateway _resolve_turn_agent_config includes credential_pool
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestGatewayTurnRoutePool:
|
||||
def test_resolve_turn_includes_pool(self, monkeypatch):
|
||||
def test_resolve_turn_includes_pool(self):
|
||||
"""Gateway's _resolve_turn_agent_config must pass credential_pool."""
|
||||
from agent.smart_model_routing import resolve_turn_route
|
||||
captured = {}
|
||||
|
||||
def spy_resolve(user_message, routing_config, primary):
|
||||
captured["primary"] = primary
|
||||
return resolve_turn_route(user_message, routing_config, primary)
|
||||
|
||||
monkeypatch.setattr(
|
||||
"agent.smart_model_routing.resolve_turn_route", spy_resolve
|
||||
)
|
||||
|
||||
from gateway.run import GatewayRunner
|
||||
|
||||
runner = SimpleNamespace(
|
||||
_smart_model_routing={"enabled": False},
|
||||
)
|
||||
|
||||
fake_pool = MagicMock(name="FakePool")
|
||||
runner = SimpleNamespace(_service_tier=None)
|
||||
runtime_kwargs = {
|
||||
"api_key": "sk-test",
|
||||
"api_key": "***",
|
||||
"base_url": None,
|
||||
"provider": "openai-codex",
|
||||
"api_mode": "codex_responses",
|
||||
"command": None,
|
||||
"args": [],
|
||||
"credential_pool": MagicMock(name="FakePool"),
|
||||
"credential_pool": fake_pool,
|
||||
}
|
||||
|
||||
bound = GatewayRunner._resolve_turn_agent_config.__get__(runner)
|
||||
bound("test message", "gpt-5.4", runtime_kwargs)
|
||||
route = bound("test message", "gpt-5.4", runtime_kwargs)
|
||||
|
||||
assert "credential_pool" in captured["primary"]
|
||||
assert captured["primary"]["credential_pool"] is runtime_kwargs["credential_pool"]
|
||||
assert route["runtime"]["credential_pool"] is fake_pool
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 4 & 5. Eager fallback deferred/fires based on credential pool
|
||||
# 3 & 4. Eager fallback deferred/fires based on credential pool
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestEagerFallbackWithPool:
|
||||
|
|
@ -251,7 +135,7 @@ class TestEagerFallbackWithPool:
|
|||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 6. Full 429 rotation cycle via _recover_with_credential_pool
|
||||
# 5. Full 429 rotation cycle via _recover_with_credential_pool
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestPoolRotationCycle:
|
||||
|
|
|
|||
|
|
@ -1,61 +0,0 @@
|
|||
from agent.smart_model_routing import choose_cheap_model_route
|
||||
|
||||
|
||||
_BASE_CONFIG = {
|
||||
"enabled": True,
|
||||
"cheap_model": {
|
||||
"provider": "openrouter",
|
||||
"model": "google/gemini-2.5-flash",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def test_returns_none_when_disabled():
|
||||
cfg = {**_BASE_CONFIG, "enabled": False}
|
||||
assert choose_cheap_model_route("what time is it in tokyo?", cfg) is None
|
||||
|
||||
|
||||
def test_routes_short_simple_prompt():
|
||||
result = choose_cheap_model_route("what time is it in tokyo?", _BASE_CONFIG)
|
||||
assert result is not None
|
||||
assert result["provider"] == "openrouter"
|
||||
assert result["model"] == "google/gemini-2.5-flash"
|
||||
assert result["routing_reason"] == "simple_turn"
|
||||
|
||||
|
||||
def test_skips_long_prompt():
|
||||
prompt = "please summarize this carefully " * 20
|
||||
assert choose_cheap_model_route(prompt, _BASE_CONFIG) is None
|
||||
|
||||
|
||||
def test_skips_code_like_prompt():
|
||||
prompt = "debug this traceback: ```python\nraise ValueError('bad')\n```"
|
||||
assert choose_cheap_model_route(prompt, _BASE_CONFIG) is None
|
||||
|
||||
|
||||
def test_skips_tool_heavy_prompt_keywords():
|
||||
prompt = "implement a patch for this docker error"
|
||||
assert choose_cheap_model_route(prompt, _BASE_CONFIG) is None
|
||||
|
||||
|
||||
def test_resolve_turn_route_falls_back_to_primary_when_route_runtime_cannot_be_resolved(monkeypatch):
|
||||
from agent.smart_model_routing import resolve_turn_route
|
||||
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.runtime_provider.resolve_runtime_provider",
|
||||
lambda **kwargs: (_ for _ in ()).throw(RuntimeError("bad route")),
|
||||
)
|
||||
result = resolve_turn_route(
|
||||
"what time is it in tokyo?",
|
||||
_BASE_CONFIG,
|
||||
{
|
||||
"model": "anthropic/claude-sonnet-4",
|
||||
"provider": "openrouter",
|
||||
"base_url": "https://openrouter.ai/api/v1",
|
||||
"api_mode": "chat_completions",
|
||||
"api_key": "sk-primary",
|
||||
},
|
||||
)
|
||||
assert result["model"] == "anthropic/claude-sonnet-4"
|
||||
assert result["runtime"]["provider"] == "openrouter"
|
||||
assert result["label"] is None
|
||||
|
|
@ -207,48 +207,11 @@ def test_cli_turn_routing_uses_primary_when_disabled(monkeypatch):
|
|||
shell.api_mode = "chat_completions"
|
||||
shell.base_url = "https://openrouter.ai/api/v1"
|
||||
shell.api_key = "sk-primary"
|
||||
shell._smart_model_routing = {"enabled": False}
|
||||
|
||||
result = shell._resolve_turn_agent_config("what time is it in tokyo?")
|
||||
|
||||
assert result["model"] == "gpt-5"
|
||||
assert result["runtime"]["provider"] == "openrouter"
|
||||
assert result["label"] is None
|
||||
|
||||
|
||||
def test_cli_turn_routing_uses_cheap_model_when_simple(monkeypatch):
|
||||
cli = _import_cli()
|
||||
|
||||
def _runtime_resolve(**kwargs):
|
||||
assert kwargs["requested"] == "zai"
|
||||
return {
|
||||
"provider": "zai",
|
||||
"api_mode": "chat_completions",
|
||||
"base_url": "https://open.z.ai/api/v1",
|
||||
"api_key": "cheap-key",
|
||||
"source": "env/config",
|
||||
}
|
||||
|
||||
monkeypatch.setattr("hermes_cli.runtime_provider.resolve_runtime_provider", _runtime_resolve)
|
||||
|
||||
shell = cli.HermesCLI(model="anthropic/claude-sonnet-4", compact=True, max_turns=1)
|
||||
shell.provider = "openrouter"
|
||||
shell.api_mode = "chat_completions"
|
||||
shell.base_url = "https://openrouter.ai/api/v1"
|
||||
shell.api_key = "primary-key"
|
||||
shell._smart_model_routing = {
|
||||
"enabled": True,
|
||||
"cheap_model": {"provider": "zai", "model": "glm-5-air"},
|
||||
"max_simple_chars": 160,
|
||||
"max_simple_words": 28,
|
||||
}
|
||||
|
||||
result = shell._resolve_turn_agent_config("what time is it in tokyo?")
|
||||
|
||||
assert result["model"] == "glm-5-air"
|
||||
assert result["runtime"]["provider"] == "zai"
|
||||
assert result["runtime"]["api_key"] == "cheap-key"
|
||||
assert result["label"] is not None
|
||||
|
||||
|
||||
def test_cli_prefers_config_provider_over_stale_env_override(monkeypatch):
|
||||
|
|
|
|||
|
|
@ -183,27 +183,10 @@ class TestFastModeRouting(unittest.TestCase):
|
|||
acp_command=None,
|
||||
acp_args=[],
|
||||
_credential_pool=None,
|
||||
_smart_model_routing={},
|
||||
service_tier="priority",
|
||||
)
|
||||
|
||||
original_runtime = {
|
||||
"api_key": "***",
|
||||
"base_url": "https://openrouter.ai/api/v1",
|
||||
"provider": "openrouter",
|
||||
"api_mode": "chat_completions",
|
||||
"command": None,
|
||||
"args": [],
|
||||
"credential_pool": None,
|
||||
}
|
||||
|
||||
with patch("agent.smart_model_routing.resolve_turn_route", return_value={
|
||||
"model": "gpt-5.4",
|
||||
"runtime": dict(original_runtime),
|
||||
"label": None,
|
||||
"signature": ("gpt-5.4", "openrouter", "https://openrouter.ai/api/v1", "chat_completions", None, ()),
|
||||
}):
|
||||
route = cli_mod.HermesCLI._resolve_turn_agent_config(stub, "hi")
|
||||
route = cli_mod.HermesCLI._resolve_turn_agent_config(stub, "hi")
|
||||
|
||||
# Provider should NOT have changed
|
||||
assert route["runtime"]["provider"] == "openrouter"
|
||||
|
|
@ -222,26 +205,10 @@ class TestFastModeRouting(unittest.TestCase):
|
|||
acp_command=None,
|
||||
acp_args=[],
|
||||
_credential_pool=None,
|
||||
_smart_model_routing={},
|
||||
service_tier="priority",
|
||||
)
|
||||
|
||||
primary_route = {
|
||||
"model": "gpt-5.3-codex",
|
||||
"runtime": {
|
||||
"api_key": "***",
|
||||
"base_url": "https://openrouter.ai/api/v1",
|
||||
"provider": "openrouter",
|
||||
"api_mode": "chat_completions",
|
||||
"command": None,
|
||||
"args": [],
|
||||
"credential_pool": None,
|
||||
},
|
||||
"label": None,
|
||||
"signature": ("gpt-5.3-codex", "openrouter", "https://openrouter.ai/api/v1", "chat_completions", None, ()),
|
||||
}
|
||||
with patch("agent.smart_model_routing.resolve_turn_route", return_value=primary_route):
|
||||
route = cli_mod.HermesCLI._resolve_turn_agent_config(stub, "hi")
|
||||
route = cli_mod.HermesCLI._resolve_turn_agent_config(stub, "hi")
|
||||
|
||||
assert route["runtime"]["provider"] == "openrouter"
|
||||
assert route.get("request_overrides") is None
|
||||
|
|
@ -329,27 +296,10 @@ class TestAnthropicFastMode(unittest.TestCase):
|
|||
acp_command=None,
|
||||
acp_args=[],
|
||||
_credential_pool=None,
|
||||
_smart_model_routing={},
|
||||
service_tier="priority",
|
||||
)
|
||||
|
||||
original_runtime = {
|
||||
"api_key": "***",
|
||||
"base_url": "https://api.anthropic.com",
|
||||
"provider": "anthropic",
|
||||
"api_mode": "anthropic_messages",
|
||||
"command": None,
|
||||
"args": [],
|
||||
"credential_pool": None,
|
||||
}
|
||||
|
||||
with patch("agent.smart_model_routing.resolve_turn_route", return_value={
|
||||
"model": "claude-opus-4-6",
|
||||
"runtime": dict(original_runtime),
|
||||
"label": None,
|
||||
"signature": ("claude-opus-4-6", "anthropic", "https://api.anthropic.com", "anthropic_messages", None, ()),
|
||||
}):
|
||||
route = cli_mod.HermesCLI._resolve_turn_agent_config(stub, "hi")
|
||||
route = cli_mod.HermesCLI._resolve_turn_agent_config(stub, "hi")
|
||||
|
||||
assert route["runtime"]["provider"] == "anthropic"
|
||||
assert route["request_overrides"] == {"speed": "fast"}
|
||||
|
|
|
|||
|
|
@ -152,7 +152,6 @@ def test_gateway_run_agent_codex_path_handles_internal_401_refresh(monkeypatch):
|
|||
runner._provider_routing = {}
|
||||
runner._fallback_model = None
|
||||
runner._running_agents = {}
|
||||
runner._smart_model_routing = {}
|
||||
from unittest.mock import MagicMock, AsyncMock
|
||||
runner.hooks = MagicMock()
|
||||
runner.hooks.emit = AsyncMock()
|
||||
|
|
|
|||
|
|
@ -75,7 +75,6 @@ def _make_runner():
|
|||
runner._service_tier = None
|
||||
runner._provider_routing = {}
|
||||
runner._fallback_model = None
|
||||
runner._smart_model_routing = {}
|
||||
runner._running_agents = {}
|
||||
runner._pending_model_notes = {}
|
||||
runner._session_db = None
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ import sys
|
|||
import threading
|
||||
import types
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import AsyncMock, patch
|
||||
from unittest.mock import AsyncMock
|
||||
|
||||
import pytest
|
||||
import yaml
|
||||
|
|
@ -53,7 +53,6 @@ def _make_runner():
|
|||
runner._service_tier = None
|
||||
runner._provider_routing = {}
|
||||
runner._fallback_model = None
|
||||
runner._smart_model_routing = {}
|
||||
runner._running_agents = {}
|
||||
runner._pending_model_notes = {}
|
||||
runner._session_db = None
|
||||
|
|
@ -97,13 +96,7 @@ def test_turn_route_injects_priority_processing_without_changing_runtime():
|
|||
"credential_pool": None,
|
||||
}
|
||||
|
||||
with patch("agent.smart_model_routing.resolve_turn_route", return_value={
|
||||
"model": "gpt-5.4",
|
||||
"runtime": dict(runtime_kwargs),
|
||||
"label": None,
|
||||
"signature": ("gpt-5.4", "openrouter", "https://openrouter.ai/api/v1", "chat_completions", None, ()),
|
||||
}):
|
||||
route = gateway_run.GatewayRunner._resolve_turn_agent_config(runner, "hi", "gpt-5.4", runtime_kwargs)
|
||||
route = gateway_run.GatewayRunner._resolve_turn_agent_config(runner, "hi", "gpt-5.4", runtime_kwargs)
|
||||
|
||||
assert route["runtime"]["provider"] == "openrouter"
|
||||
assert route["runtime"]["api_mode"] == "chat_completions"
|
||||
|
|
@ -123,13 +116,7 @@ def test_turn_route_skips_priority_processing_for_unsupported_models():
|
|||
"credential_pool": None,
|
||||
}
|
||||
|
||||
with patch("agent.smart_model_routing.resolve_turn_route", return_value={
|
||||
"model": "gpt-5.3-codex",
|
||||
"runtime": dict(runtime_kwargs),
|
||||
"label": None,
|
||||
"signature": ("gpt-5.3-codex", "openrouter", "https://openrouter.ai/api/v1", "chat_completions", None, ()),
|
||||
}):
|
||||
route = gateway_run.GatewayRunner._resolve_turn_agent_config(runner, "hi", "gpt-5.3-codex", runtime_kwargs)
|
||||
route = gateway_run.GatewayRunner._resolve_turn_agent_config(runner, "hi", "gpt-5.3-codex", runtime_kwargs)
|
||||
|
||||
assert route["request_overrides"] is None
|
||||
|
||||
|
|
|
|||
|
|
@ -1165,39 +1165,6 @@ Supported providers: `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-a
|
|||
Fallback is configured exclusively through `config.yaml` — there are no environment variables for it. For full details on when it triggers, supported providers, and how it interacts with auxiliary tasks and delegation, see [Fallback Providers](/docs/user-guide/features/fallback-providers).
|
||||
:::
|
||||
|
||||
## Smart Model Routing
|
||||
|
||||
Optional cheap-vs-strong routing lets Hermes keep your main model for complex work while sending very short/simple turns to a cheaper model.
|
||||
|
||||
```yaml
|
||||
smart_model_routing:
|
||||
enabled: true
|
||||
max_simple_chars: 160
|
||||
max_simple_words: 28
|
||||
cheap_model:
|
||||
provider: openrouter
|
||||
model: google/gemini-2.5-flash
|
||||
# base_url: http://localhost:8000/v1 # optional custom endpoint
|
||||
# key_env: MY_CUSTOM_KEY # optional env var name for that endpoint's API key
|
||||
```
|
||||
|
||||
How it works:
|
||||
- If a turn is short, single-line, and does not look code/tool/debug heavy, Hermes may route it to `cheap_model`
|
||||
- If the turn looks complex, Hermes stays on your primary model/provider
|
||||
- If the cheap route cannot be resolved cleanly, Hermes falls back to the primary model automatically
|
||||
|
||||
This is intentionally conservative. It is meant for quick, low-stakes turns like:
|
||||
- short factual questions
|
||||
- quick rewrites
|
||||
- lightweight summaries
|
||||
|
||||
It will avoid routing prompts that look like:
|
||||
- coding/debugging work
|
||||
- tool-heavy requests
|
||||
- long or multi-line analysis asks
|
||||
|
||||
Use this when you want lower latency or cost without fully changing your default model.
|
||||
|
||||
---
|
||||
|
||||
## See Also
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue