feat: simple fallback model for provider resilience

When the primary model/provider fails after retries (rate limit, overload,
auth errors, connection failures), Hermes automatically switches to a
configured fallback model for the remainder of the session.

Config (in ~/.hermes/config.yaml):

  fallback_model:
    provider: openrouter
    model: anthropic/claude-sonnet-4

Supports all major providers: OpenRouter, OpenAI, Nous, DeepSeek, Together,
Groq, Fireworks, Mistral, Gemini — plus custom endpoints via base_url and
api_key_env overrides.

Design principles:
- Dead simple: one fallback model, not a chain
- One-shot: switches once, doesn't ping-pong back
- Zero new dependencies: uses existing OpenAI client
- Minimal code: ~100 lines in run_agent.py, ~5 lines in cli.py/gateway
- Three trigger points: max retries exhausted, non-retryable client errors,
  and invalid response exhaustion

Does NOT trigger on context overflow or payload-too-large errors (those
are handled by the existing compression system).

Addresses #737.

25 new tests, 2492 total passing.
This commit is contained in:
teknium1 2026-03-08 20:22:33 -07:00
parent 4d7d9d9715
commit 161436cfdd
6 changed files with 410 additions and 0 deletions

5
cli.py
View file

@ -1118,6 +1118,10 @@ class HermesCLI:
self._provider_require_params = pr.get("require_parameters", False)
self._provider_data_collection = pr.get("data_collection")
# Fallback model config — tried when primary provider fails after retries
fb = CLI_CONFIG.get("fallback_model") or {}
self._fallback_model = fb if fb.get("provider") and fb.get("model") else None
# Agent will be initialized on first use
self.agent: Optional[AIAgent] = None
self._app = None # prompt_toolkit Application (set in run())
@ -1349,6 +1353,7 @@ class HermesCLI:
session_db=self._session_db,
clarify_callback=self._clarify_callback,
honcho_session_key=self.session_id,
fallback_model=self._fallback_model,
)
# Apply any pending title now that the session exists in the DB
if self._pending_title and self._session_db:

View file

@ -194,6 +194,7 @@ class GatewayRunner:
self._ephemeral_system_prompt = self._load_ephemeral_system_prompt()
self._reasoning_config = self._load_reasoning_config()
self._provider_routing = self._load_provider_routing()
self._fallback_model = self._load_fallback_model()
# Wire process registry into session store for reset protection
from tools.process_registry import process_registry
@ -393,6 +394,26 @@ class GatewayRunner:
pass
return {}
@staticmethod
def _load_fallback_model() -> dict | None:
"""Load fallback model config from config.yaml.
Returns a dict with 'provider' and 'model' keys, or None if
not configured / both fields empty.
"""
try:
import yaml as _y
cfg_path = _hermes_home / "config.yaml"
if cfg_path.exists():
with open(cfg_path) as _f:
cfg = _y.safe_load(_f) or {}
fb = cfg.get("fallback_model", {}) or {}
if fb.get("provider") and fb.get("model"):
return fb
except Exception:
pass
return None
async def start(self) -> bool:
"""
Start the gateway and all configured platform adapters.
@ -2623,6 +2644,7 @@ class GatewayRunner:
platform=platform_key,
honcho_session_key=session_key,
session_db=self._session_db,
fallback_model=self._fallback_model,
)
# Store agent reference for interrupt support

View file

@ -103,6 +103,18 @@ DEFAULT_CONFIG = {
},
},
# Fallback model — used when the primary model/provider fails after retries.
# When the primary hits rate limits (429), overload (529), or service errors (503),
# Hermes will automatically switch to this model for the remainder of the session.
# Set to None / omit to disable fallback.
"fallback_model": {
"provider": "", # e.g. "openrouter", "openai", "nous", "deepseek", "together", "groq"
"model": "", # e.g. "anthropic/claude-sonnet-4", "gpt-4.1", "deepseek-chat"
# Optional overrides (usually auto-resolved from provider):
# "base_url": "", # custom endpoint URL
# "api_key_env": "", # env var name for API key (e.g. "MY_CUSTOM_KEY")
},
"display": {
"compact": False,
"personality": "kawaii",

View file

@ -183,6 +183,7 @@ class AIAgent:
session_db=None,
honcho_session_key: str = None,
iteration_budget: "IterationBudget" = None,
fallback_model: Dict[str, Any] = None,
):
"""
Initialize the AI Agent.
@ -406,6 +407,17 @@ class AIAgent:
except Exception as e:
raise RuntimeError(f"Failed to initialize OpenAI client: {e}")
# Provider fallback — a single backup model/provider tried when the
# primary is exhausted (rate-limit, overload, connection failure).
# Config shape: {"provider": "openrouter", "model": "anthropic/claude-sonnet-4"}
self._fallback_model = fallback_model if isinstance(fallback_model, dict) else None
self._fallback_activated = False
if self._fallback_model:
fb_p = self._fallback_model.get("provider", "")
fb_m = self._fallback_model.get("model", "")
if fb_p and fb_m and not self.quiet_mode:
print(f"🔄 Fallback model: {fb_m} ({fb_p})")
# Get available tools with filtering
self.tools = get_tool_definitions(
enabled_toolsets=enabled_toolsets,
@ -2146,6 +2158,103 @@ class AIAgent:
raise result["error"]
return result["response"]
# ── Provider fallback ──────────────────────────────────────────────────
# Maps provider id → (default_base_url, [env_var_names])
_FALLBACK_PROVIDERS = {
"openrouter": (OPENROUTER_BASE_URL, ["OPENROUTER_API_KEY"]),
"openai": ("https://api.openai.com/v1", ["OPENAI_API_KEY"]),
"nous": ("https://inference-api.nousresearch.com/v1", ["NOUS_API_KEY"]),
"deepseek": ("https://api.deepseek.com/v1", ["DEEPSEEK_API_KEY"]),
"together": ("https://api.together.xyz/v1", ["TOGETHER_API_KEY"]),
"groq": ("https://api.groq.com/openai/v1", ["GROQ_API_KEY"]),
"fireworks": ("https://api.fireworks.ai/inference/v1", ["FIREWORKS_API_KEY"]),
"mistral": ("https://api.mistral.ai/v1", ["MISTRAL_API_KEY"]),
"gemini": ("https://generativelanguage.googleapis.com/v1beta/openai", ["GEMINI_API_KEY", "GOOGLE_API_KEY"]),
}
def _try_activate_fallback(self) -> bool:
"""Switch to the configured fallback model/provider.
Called when the primary model is failing after retries. Swaps the
OpenAI client, model slug, and provider in-place so the retry loop
can continue with the new backend. One-shot: returns False if
already activated or not configured.
"""
if self._fallback_activated or not self._fallback_model:
return False
fb = self._fallback_model
fb_provider = (fb.get("provider") or "").strip().lower()
fb_model = (fb.get("model") or "").strip()
if not fb_provider or not fb_model:
return False
# Resolve API key
fb_key = (fb.get("api_key") or "").strip()
if not fb_key:
key_env = (fb.get("api_key_env") or "").strip()
if key_env:
fb_key = os.getenv(key_env, "")
elif fb_provider in self._FALLBACK_PROVIDERS:
for env_var in self._FALLBACK_PROVIDERS[fb_provider][1]:
fb_key = os.getenv(env_var, "")
if fb_key:
break
if not fb_key:
logging.warning(
"Fallback model configured but no API key found for provider '%s'",
fb_provider,
)
return False
# Resolve base URL
fb_base_url = (fb.get("base_url") or "").strip()
if not fb_base_url and fb_provider in self._FALLBACK_PROVIDERS:
fb_base_url = self._FALLBACK_PROVIDERS[fb_provider][0]
if not fb_base_url:
fb_base_url = OPENROUTER_BASE_URL
# Build new client
try:
client_kwargs = {"api_key": fb_key, "base_url": fb_base_url}
if "openrouter" in fb_base_url.lower():
client_kwargs["default_headers"] = {
"HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
"X-OpenRouter-Title": "Hermes Agent",
"X-OpenRouter-Categories": "productivity,cli-agent",
}
self.client = OpenAI(**client_kwargs)
self._client_kwargs = client_kwargs
old_model = self.model
self.model = fb_model
self.provider = fb_provider
self.base_url = fb_base_url
self.api_mode = "chat_completions"
self._fallback_activated = True
# Re-evaluate prompt caching for the new provider/model
self._use_prompt_caching = (
"openrouter" in fb_base_url.lower()
and "claude" in fb_model.lower()
)
print(
f"{self.log_prefix}🔄 Primary model failed — switching to fallback: "
f"{fb_model} via {fb_provider}"
)
logging.info(
"Fallback activated: %s%s (%s)",
old_model, fb_model, fb_provider,
)
return True
except Exception as e:
logging.error("Failed to activate fallback model: %s", e)
return False
# ── End provider fallback ──────────────────────────────────────────────
def _build_api_kwargs(self, api_messages: list) -> dict:
"""Build the keyword arguments dict for the active API mode."""
if self.api_mode == "codex_responses":
@ -3252,6 +3361,10 @@ class AIAgent:
print(f"{self.log_prefix} ⏱️ Response time: {api_duration:.2f}s (fast response often indicates rate limiting)")
if retry_count >= max_retries:
# Try fallback before giving up
if self._try_activate_fallback():
retry_count = 0
continue
print(f"{self.log_prefix}❌ Max retries ({max_retries}) exceeded for invalid responses. Giving up.")
logging.error(f"{self.log_prefix}Invalid API response after {max_retries} retries.")
self._persist_session(messages, conversation_history)
@ -3576,6 +3689,11 @@ class AIAgent:
])) and not is_context_length_error
if is_client_error:
# Try fallback before aborting — a different provider
# may not have the same issue (rate limit, auth, etc.)
if self._try_activate_fallback():
retry_count = 0
continue
self._dump_api_request_debug(
api_kwargs, reason="non_retryable_client_error", error=api_error,
)
@ -3593,6 +3711,10 @@ class AIAgent:
}
if retry_count >= max_retries:
# Try fallback before giving up entirely
if self._try_activate_fallback():
retry_count = 0
continue
print(f"{self.log_prefix}❌ Max retries ({max_retries}) exceeded. Giving up.")
logging.error(f"{self.log_prefix}API call failed after {max_retries} retries. Last error: {api_error}")
logging.error(f"{self.log_prefix}Request details - Messages: {len(api_messages)}, Approx tokens: {approx_tokens:,}")

View file

@ -149,6 +149,7 @@ def test_gateway_run_agent_codex_path_handles_internal_401_refresh(monkeypatch):
runner._prefill_messages = []
runner._reasoning_config = None
runner._provider_routing = {}
runner._fallback_model = None
runner._running_agents = {}
from unittest.mock import MagicMock, AsyncMock
runner.hooks = MagicMock()

View file

@ -0,0 +1,248 @@
"""Tests for the provider fallback model feature.
Verifies that AIAgent can switch to a configured fallback model/provider
when the primary fails after retries.
"""
import os
from types import SimpleNamespace
from unittest.mock import MagicMock, patch
import pytest
from run_agent import AIAgent
def _make_tool_defs(*names: str) -> list:
return [
{
"type": "function",
"function": {
"name": n,
"description": f"{n} tool",
"parameters": {"type": "object", "properties": {}},
},
}
for n in names
]
def _make_agent(fallback_model=None):
"""Create a minimal AIAgent with optional fallback config."""
with (
patch("run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search")),
patch("run_agent.check_toolset_requirements", return_value={}),
patch("run_agent.OpenAI"),
):
agent = AIAgent(
api_key="test-key-primary",
quiet_mode=True,
skip_context_files=True,
skip_memory=True,
fallback_model=fallback_model,
)
agent.client = MagicMock()
return agent
# =============================================================================
# _try_activate_fallback()
# =============================================================================
class TestTryActivateFallback:
def test_returns_false_when_not_configured(self):
agent = _make_agent(fallback_model=None)
assert agent._try_activate_fallback() is False
assert agent._fallback_activated is False
def test_returns_false_for_empty_config(self):
agent = _make_agent(fallback_model={"provider": "", "model": ""})
assert agent._try_activate_fallback() is False
def test_returns_false_for_missing_provider(self):
agent = _make_agent(fallback_model={"model": "gpt-4.1"})
assert agent._try_activate_fallback() is False
def test_returns_false_for_missing_model(self):
agent = _make_agent(fallback_model={"provider": "openai"})
assert agent._try_activate_fallback() is False
def test_activates_openrouter_fallback(self):
agent = _make_agent(
fallback_model={"provider": "openrouter", "model": "anthropic/claude-sonnet-4"},
)
with (
patch.dict("os.environ", {"OPENROUTER_API_KEY": "sk-or-fallback-key"}),
patch("run_agent.OpenAI") as mock_openai,
):
result = agent._try_activate_fallback()
assert result is True
assert agent._fallback_activated is True
assert agent.model == "anthropic/claude-sonnet-4"
assert agent.provider == "openrouter"
assert agent.api_mode == "chat_completions"
mock_openai.assert_called_once()
call_kwargs = mock_openai.call_args[1]
assert call_kwargs["api_key"] == "sk-or-fallback-key"
assert "openrouter" in call_kwargs["base_url"].lower()
# OpenRouter should get attribution headers
assert "default_headers" in call_kwargs
def test_activates_openai_fallback(self):
agent = _make_agent(
fallback_model={"provider": "openai", "model": "gpt-4.1"},
)
with (
patch.dict("os.environ", {"OPENAI_API_KEY": "sk-openai-key"}),
patch("run_agent.OpenAI") as mock_openai,
):
result = agent._try_activate_fallback()
assert result is True
assert agent.model == "gpt-4.1"
assert agent.provider == "openai"
call_kwargs = mock_openai.call_args[1]
assert call_kwargs["api_key"] == "sk-openai-key"
assert "openai.com" in call_kwargs["base_url"]
def test_activates_deepseek_fallback(self):
agent = _make_agent(
fallback_model={"provider": "deepseek", "model": "deepseek-chat"},
)
with (
patch.dict("os.environ", {"DEEPSEEK_API_KEY": "sk-ds-key"}),
patch("run_agent.OpenAI"),
):
assert agent._try_activate_fallback() is True
assert agent.model == "deepseek-chat"
assert agent.provider == "deepseek"
def test_only_fires_once(self):
agent = _make_agent(
fallback_model={"provider": "openrouter", "model": "anthropic/claude-sonnet-4"},
)
with (
patch.dict("os.environ", {"OPENROUTER_API_KEY": "sk-or-key"}),
patch("run_agent.OpenAI"),
):
assert agent._try_activate_fallback() is True
# Second attempt should return False
assert agent._try_activate_fallback() is False
def test_returns_false_when_no_api_key(self):
"""Fallback should fail gracefully when the API key env var is unset."""
agent = _make_agent(
fallback_model={"provider": "deepseek", "model": "deepseek-chat"},
)
# Ensure DEEPSEEK_API_KEY is not in the environment
env = {k: v for k, v in os.environ.items() if k != "DEEPSEEK_API_KEY"}
with patch.dict("os.environ", env, clear=True):
assert agent._try_activate_fallback() is False
assert agent._fallback_activated is False
def test_custom_base_url(self):
"""Custom base_url in config should override the provider default."""
agent = _make_agent(
fallback_model={
"provider": "custom",
"model": "my-model",
"base_url": "http://localhost:8080/v1",
"api_key_env": "MY_CUSTOM_KEY",
},
)
with (
patch.dict("os.environ", {"MY_CUSTOM_KEY": "custom-secret"}),
patch("run_agent.OpenAI") as mock_openai,
):
assert agent._try_activate_fallback() is True
call_kwargs = mock_openai.call_args[1]
assert call_kwargs["base_url"] == "http://localhost:8080/v1"
assert call_kwargs["api_key"] == "custom-secret"
def test_prompt_caching_enabled_for_claude_on_openrouter(self):
agent = _make_agent(
fallback_model={"provider": "openrouter", "model": "anthropic/claude-sonnet-4"},
)
with (
patch.dict("os.environ", {"OPENROUTER_API_KEY": "sk-or-key"}),
patch("run_agent.OpenAI"),
):
agent._try_activate_fallback()
assert agent._use_prompt_caching is True
def test_prompt_caching_disabled_for_non_claude(self):
agent = _make_agent(
fallback_model={"provider": "openrouter", "model": "google/gemini-2.5-flash"},
)
with (
patch.dict("os.environ", {"OPENROUTER_API_KEY": "sk-or-key"}),
patch("run_agent.OpenAI"),
):
agent._try_activate_fallback()
assert agent._use_prompt_caching is False
def test_prompt_caching_disabled_for_non_openrouter(self):
agent = _make_agent(
fallback_model={"provider": "openai", "model": "gpt-4.1"},
)
with (
patch.dict("os.environ", {"OPENAI_API_KEY": "sk-oai-key"}),
patch("run_agent.OpenAI"),
):
agent._try_activate_fallback()
assert agent._use_prompt_caching is False
# =============================================================================
# Fallback config init
# =============================================================================
class TestFallbackInit:
def test_fallback_stored_when_configured(self):
agent = _make_agent(
fallback_model={"provider": "openrouter", "model": "anthropic/claude-sonnet-4"},
)
assert agent._fallback_model is not None
assert agent._fallback_model["provider"] == "openrouter"
assert agent._fallback_activated is False
def test_fallback_none_when_not_configured(self):
agent = _make_agent(fallback_model=None)
assert agent._fallback_model is None
assert agent._fallback_activated is False
def test_fallback_none_for_non_dict(self):
agent = _make_agent(fallback_model="not-a-dict")
assert agent._fallback_model is None
# =============================================================================
# Provider credential resolution
# =============================================================================
class TestProviderCredentials:
"""Verify that each known provider resolves its API key correctly."""
@pytest.mark.parametrize("provider,env_var,base_url_fragment", [
("openrouter", "OPENROUTER_API_KEY", "openrouter"),
("openai", "OPENAI_API_KEY", "openai.com"),
("deepseek", "DEEPSEEK_API_KEY", "deepseek.com"),
("together", "TOGETHER_API_KEY", "together.xyz"),
("groq", "GROQ_API_KEY", "groq.com"),
("fireworks", "FIREWORKS_API_KEY", "fireworks.ai"),
("mistral", "MISTRAL_API_KEY", "mistral.ai"),
("gemini", "GEMINI_API_KEY", "googleapis.com"),
("nous", "NOUS_API_KEY", "nousresearch.com"),
])
def test_provider_resolves(self, provider, env_var, base_url_fragment):
agent = _make_agent(
fallback_model={"provider": provider, "model": "test-model"},
)
with (
patch.dict("os.environ", {env_var: "test-key-123"}),
patch("run_agent.OpenAI") as mock_openai,
):
result = agent._try_activate_fallback()
assert result is True, f"Failed to activate fallback for {provider}"
call_kwargs = mock_openai.call_args[1]
assert call_kwargs["api_key"] == "test-key-123"
assert base_url_fragment in call_kwargs["base_url"].lower()