diff --git a/cli.py b/cli.py index 8e67492b8a..fdb34e68d1 100755 --- a/cli.py +++ b/cli.py @@ -1118,6 +1118,10 @@ class HermesCLI: self._provider_require_params = pr.get("require_parameters", False) self._provider_data_collection = pr.get("data_collection") + # Fallback model config — tried when primary provider fails after retries + fb = CLI_CONFIG.get("fallback_model") or {} + self._fallback_model = fb if fb.get("provider") and fb.get("model") else None + # Agent will be initialized on first use self.agent: Optional[AIAgent] = None self._app = None # prompt_toolkit Application (set in run()) @@ -1349,6 +1353,7 @@ class HermesCLI: session_db=self._session_db, clarify_callback=self._clarify_callback, honcho_session_key=self.session_id, + fallback_model=self._fallback_model, ) # Apply any pending title now that the session exists in the DB if self._pending_title and self._session_db: diff --git a/gateway/run.py b/gateway/run.py index cd5b478b52..20cb470a6a 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -194,6 +194,7 @@ class GatewayRunner: self._ephemeral_system_prompt = self._load_ephemeral_system_prompt() self._reasoning_config = self._load_reasoning_config() self._provider_routing = self._load_provider_routing() + self._fallback_model = self._load_fallback_model() # Wire process registry into session store for reset protection from tools.process_registry import process_registry @@ -393,6 +394,26 @@ class GatewayRunner: pass return {} + @staticmethod + def _load_fallback_model() -> dict | None: + """Load fallback model config from config.yaml. + + Returns a dict with 'provider' and 'model' keys, or None if + not configured / both fields empty. + """ + try: + import yaml as _y + cfg_path = _hermes_home / "config.yaml" + if cfg_path.exists(): + with open(cfg_path) as _f: + cfg = _y.safe_load(_f) or {} + fb = cfg.get("fallback_model", {}) or {} + if fb.get("provider") and fb.get("model"): + return fb + except Exception: + pass + return None + async def start(self) -> bool: """ Start the gateway and all configured platform adapters. @@ -2623,6 +2644,7 @@ class GatewayRunner: platform=platform_key, honcho_session_key=session_key, session_db=self._session_db, + fallback_model=self._fallback_model, ) # Store agent reference for interrupt support diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 208b95cb70..119b6c1f25 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -103,6 +103,18 @@ DEFAULT_CONFIG = { }, }, + # Fallback model — used when the primary model/provider fails after retries. + # When the primary hits rate limits (429), overload (529), or service errors (503), + # Hermes will automatically switch to this model for the remainder of the session. + # Set to None / omit to disable fallback. + "fallback_model": { + "provider": "", # e.g. "openrouter", "openai", "nous", "deepseek", "together", "groq" + "model": "", # e.g. "anthropic/claude-sonnet-4", "gpt-4.1", "deepseek-chat" + # Optional overrides (usually auto-resolved from provider): + # "base_url": "", # custom endpoint URL + # "api_key_env": "", # env var name for API key (e.g. "MY_CUSTOM_KEY") + }, + "display": { "compact": False, "personality": "kawaii", diff --git a/run_agent.py b/run_agent.py index 3b0e4a262b..9764478568 100644 --- a/run_agent.py +++ b/run_agent.py @@ -183,6 +183,7 @@ class AIAgent: session_db=None, honcho_session_key: str = None, iteration_budget: "IterationBudget" = None, + fallback_model: Dict[str, Any] = None, ): """ Initialize the AI Agent. @@ -406,6 +407,17 @@ class AIAgent: except Exception as e: raise RuntimeError(f"Failed to initialize OpenAI client: {e}") + # Provider fallback — a single backup model/provider tried when the + # primary is exhausted (rate-limit, overload, connection failure). + # Config shape: {"provider": "openrouter", "model": "anthropic/claude-sonnet-4"} + self._fallback_model = fallback_model if isinstance(fallback_model, dict) else None + self._fallback_activated = False + if self._fallback_model: + fb_p = self._fallback_model.get("provider", "") + fb_m = self._fallback_model.get("model", "") + if fb_p and fb_m and not self.quiet_mode: + print(f"🔄 Fallback model: {fb_m} ({fb_p})") + # Get available tools with filtering self.tools = get_tool_definitions( enabled_toolsets=enabled_toolsets, @@ -2146,6 +2158,103 @@ class AIAgent: raise result["error"] return result["response"] + # ── Provider fallback ────────────────────────────────────────────────── + + # Maps provider id → (default_base_url, [env_var_names]) + _FALLBACK_PROVIDERS = { + "openrouter": (OPENROUTER_BASE_URL, ["OPENROUTER_API_KEY"]), + "openai": ("https://api.openai.com/v1", ["OPENAI_API_KEY"]), + "nous": ("https://inference-api.nousresearch.com/v1", ["NOUS_API_KEY"]), + "deepseek": ("https://api.deepseek.com/v1", ["DEEPSEEK_API_KEY"]), + "together": ("https://api.together.xyz/v1", ["TOGETHER_API_KEY"]), + "groq": ("https://api.groq.com/openai/v1", ["GROQ_API_KEY"]), + "fireworks": ("https://api.fireworks.ai/inference/v1", ["FIREWORKS_API_KEY"]), + "mistral": ("https://api.mistral.ai/v1", ["MISTRAL_API_KEY"]), + "gemini": ("https://generativelanguage.googleapis.com/v1beta/openai", ["GEMINI_API_KEY", "GOOGLE_API_KEY"]), + } + + def _try_activate_fallback(self) -> bool: + """Switch to the configured fallback model/provider. + + Called when the primary model is failing after retries. Swaps the + OpenAI client, model slug, and provider in-place so the retry loop + can continue with the new backend. One-shot: returns False if + already activated or not configured. + """ + if self._fallback_activated or not self._fallback_model: + return False + + fb = self._fallback_model + fb_provider = (fb.get("provider") or "").strip().lower() + fb_model = (fb.get("model") or "").strip() + if not fb_provider or not fb_model: + return False + + # Resolve API key + fb_key = (fb.get("api_key") or "").strip() + if not fb_key: + key_env = (fb.get("api_key_env") or "").strip() + if key_env: + fb_key = os.getenv(key_env, "") + elif fb_provider in self._FALLBACK_PROVIDERS: + for env_var in self._FALLBACK_PROVIDERS[fb_provider][1]: + fb_key = os.getenv(env_var, "") + if fb_key: + break + if not fb_key: + logging.warning( + "Fallback model configured but no API key found for provider '%s'", + fb_provider, + ) + return False + + # Resolve base URL + fb_base_url = (fb.get("base_url") or "").strip() + if not fb_base_url and fb_provider in self._FALLBACK_PROVIDERS: + fb_base_url = self._FALLBACK_PROVIDERS[fb_provider][0] + if not fb_base_url: + fb_base_url = OPENROUTER_BASE_URL + + # Build new client + try: + client_kwargs = {"api_key": fb_key, "base_url": fb_base_url} + if "openrouter" in fb_base_url.lower(): + client_kwargs["default_headers"] = { + "HTTP-Referer": "https://github.com/NousResearch/hermes-agent", + "X-OpenRouter-Title": "Hermes Agent", + "X-OpenRouter-Categories": "productivity,cli-agent", + } + + self.client = OpenAI(**client_kwargs) + self._client_kwargs = client_kwargs + old_model = self.model + self.model = fb_model + self.provider = fb_provider + self.base_url = fb_base_url + self.api_mode = "chat_completions" + self._fallback_activated = True + + # Re-evaluate prompt caching for the new provider/model + self._use_prompt_caching = ( + "openrouter" in fb_base_url.lower() + and "claude" in fb_model.lower() + ) + + print( + f"{self.log_prefix}🔄 Primary model failed — switching to fallback: " + f"{fb_model} via {fb_provider}" + ) + logging.info( + "Fallback activated: %s → %s (%s)", + old_model, fb_model, fb_provider, + ) + return True + except Exception as e: + logging.error("Failed to activate fallback model: %s", e) + return False + + # ── End provider fallback ────────────────────────────────────────────── + def _build_api_kwargs(self, api_messages: list) -> dict: """Build the keyword arguments dict for the active API mode.""" if self.api_mode == "codex_responses": @@ -3252,6 +3361,10 @@ class AIAgent: print(f"{self.log_prefix} ⏱️ Response time: {api_duration:.2f}s (fast response often indicates rate limiting)") if retry_count >= max_retries: + # Try fallback before giving up + if self._try_activate_fallback(): + retry_count = 0 + continue print(f"{self.log_prefix}❌ Max retries ({max_retries}) exceeded for invalid responses. Giving up.") logging.error(f"{self.log_prefix}Invalid API response after {max_retries} retries.") self._persist_session(messages, conversation_history) @@ -3576,6 +3689,11 @@ class AIAgent: ])) and not is_context_length_error if is_client_error: + # Try fallback before aborting — a different provider + # may not have the same issue (rate limit, auth, etc.) + if self._try_activate_fallback(): + retry_count = 0 + continue self._dump_api_request_debug( api_kwargs, reason="non_retryable_client_error", error=api_error, ) @@ -3593,6 +3711,10 @@ class AIAgent: } if retry_count >= max_retries: + # Try fallback before giving up entirely + if self._try_activate_fallback(): + retry_count = 0 + continue print(f"{self.log_prefix}❌ Max retries ({max_retries}) exceeded. Giving up.") logging.error(f"{self.log_prefix}API call failed after {max_retries} retries. Last error: {api_error}") logging.error(f"{self.log_prefix}Request details - Messages: {len(api_messages)}, Approx tokens: {approx_tokens:,}") diff --git a/tests/test_codex_execution_paths.py b/tests/test_codex_execution_paths.py index b3550d883c..2a6044294f 100644 --- a/tests/test_codex_execution_paths.py +++ b/tests/test_codex_execution_paths.py @@ -149,6 +149,7 @@ def test_gateway_run_agent_codex_path_handles_internal_401_refresh(monkeypatch): runner._prefill_messages = [] runner._reasoning_config = None runner._provider_routing = {} + runner._fallback_model = None runner._running_agents = {} from unittest.mock import MagicMock, AsyncMock runner.hooks = MagicMock() diff --git a/tests/test_fallback_model.py b/tests/test_fallback_model.py new file mode 100644 index 0000000000..d348a2597e --- /dev/null +++ b/tests/test_fallback_model.py @@ -0,0 +1,248 @@ +"""Tests for the provider fallback model feature. + +Verifies that AIAgent can switch to a configured fallback model/provider +when the primary fails after retries. +""" + +import os +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + +import pytest + +from run_agent import AIAgent + + +def _make_tool_defs(*names: str) -> list: + return [ + { + "type": "function", + "function": { + "name": n, + "description": f"{n} tool", + "parameters": {"type": "object", "properties": {}}, + }, + } + for n in names + ] + + +def _make_agent(fallback_model=None): + """Create a minimal AIAgent with optional fallback config.""" + with ( + patch("run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search")), + patch("run_agent.check_toolset_requirements", return_value={}), + patch("run_agent.OpenAI"), + ): + agent = AIAgent( + api_key="test-key-primary", + quiet_mode=True, + skip_context_files=True, + skip_memory=True, + fallback_model=fallback_model, + ) + agent.client = MagicMock() + return agent + + +# ============================================================================= +# _try_activate_fallback() +# ============================================================================= + +class TestTryActivateFallback: + def test_returns_false_when_not_configured(self): + agent = _make_agent(fallback_model=None) + assert agent._try_activate_fallback() is False + assert agent._fallback_activated is False + + def test_returns_false_for_empty_config(self): + agent = _make_agent(fallback_model={"provider": "", "model": ""}) + assert agent._try_activate_fallback() is False + + def test_returns_false_for_missing_provider(self): + agent = _make_agent(fallback_model={"model": "gpt-4.1"}) + assert agent._try_activate_fallback() is False + + def test_returns_false_for_missing_model(self): + agent = _make_agent(fallback_model={"provider": "openai"}) + assert agent._try_activate_fallback() is False + + def test_activates_openrouter_fallback(self): + agent = _make_agent( + fallback_model={"provider": "openrouter", "model": "anthropic/claude-sonnet-4"}, + ) + with ( + patch.dict("os.environ", {"OPENROUTER_API_KEY": "sk-or-fallback-key"}), + patch("run_agent.OpenAI") as mock_openai, + ): + result = agent._try_activate_fallback() + assert result is True + assert agent._fallback_activated is True + assert agent.model == "anthropic/claude-sonnet-4" + assert agent.provider == "openrouter" + assert agent.api_mode == "chat_completions" + mock_openai.assert_called_once() + call_kwargs = mock_openai.call_args[1] + assert call_kwargs["api_key"] == "sk-or-fallback-key" + assert "openrouter" in call_kwargs["base_url"].lower() + # OpenRouter should get attribution headers + assert "default_headers" in call_kwargs + + def test_activates_openai_fallback(self): + agent = _make_agent( + fallback_model={"provider": "openai", "model": "gpt-4.1"}, + ) + with ( + patch.dict("os.environ", {"OPENAI_API_KEY": "sk-openai-key"}), + patch("run_agent.OpenAI") as mock_openai, + ): + result = agent._try_activate_fallback() + assert result is True + assert agent.model == "gpt-4.1" + assert agent.provider == "openai" + call_kwargs = mock_openai.call_args[1] + assert call_kwargs["api_key"] == "sk-openai-key" + assert "openai.com" in call_kwargs["base_url"] + + def test_activates_deepseek_fallback(self): + agent = _make_agent( + fallback_model={"provider": "deepseek", "model": "deepseek-chat"}, + ) + with ( + patch.dict("os.environ", {"DEEPSEEK_API_KEY": "sk-ds-key"}), + patch("run_agent.OpenAI"), + ): + assert agent._try_activate_fallback() is True + assert agent.model == "deepseek-chat" + assert agent.provider == "deepseek" + + def test_only_fires_once(self): + agent = _make_agent( + fallback_model={"provider": "openrouter", "model": "anthropic/claude-sonnet-4"}, + ) + with ( + patch.dict("os.environ", {"OPENROUTER_API_KEY": "sk-or-key"}), + patch("run_agent.OpenAI"), + ): + assert agent._try_activate_fallback() is True + # Second attempt should return False + assert agent._try_activate_fallback() is False + + def test_returns_false_when_no_api_key(self): + """Fallback should fail gracefully when the API key env var is unset.""" + agent = _make_agent( + fallback_model={"provider": "deepseek", "model": "deepseek-chat"}, + ) + # Ensure DEEPSEEK_API_KEY is not in the environment + env = {k: v for k, v in os.environ.items() if k != "DEEPSEEK_API_KEY"} + with patch.dict("os.environ", env, clear=True): + assert agent._try_activate_fallback() is False + assert agent._fallback_activated is False + + def test_custom_base_url(self): + """Custom base_url in config should override the provider default.""" + agent = _make_agent( + fallback_model={ + "provider": "custom", + "model": "my-model", + "base_url": "http://localhost:8080/v1", + "api_key_env": "MY_CUSTOM_KEY", + }, + ) + with ( + patch.dict("os.environ", {"MY_CUSTOM_KEY": "custom-secret"}), + patch("run_agent.OpenAI") as mock_openai, + ): + assert agent._try_activate_fallback() is True + call_kwargs = mock_openai.call_args[1] + assert call_kwargs["base_url"] == "http://localhost:8080/v1" + assert call_kwargs["api_key"] == "custom-secret" + + def test_prompt_caching_enabled_for_claude_on_openrouter(self): + agent = _make_agent( + fallback_model={"provider": "openrouter", "model": "anthropic/claude-sonnet-4"}, + ) + with ( + patch.dict("os.environ", {"OPENROUTER_API_KEY": "sk-or-key"}), + patch("run_agent.OpenAI"), + ): + agent._try_activate_fallback() + assert agent._use_prompt_caching is True + + def test_prompt_caching_disabled_for_non_claude(self): + agent = _make_agent( + fallback_model={"provider": "openrouter", "model": "google/gemini-2.5-flash"}, + ) + with ( + patch.dict("os.environ", {"OPENROUTER_API_KEY": "sk-or-key"}), + patch("run_agent.OpenAI"), + ): + agent._try_activate_fallback() + assert agent._use_prompt_caching is False + + def test_prompt_caching_disabled_for_non_openrouter(self): + agent = _make_agent( + fallback_model={"provider": "openai", "model": "gpt-4.1"}, + ) + with ( + patch.dict("os.environ", {"OPENAI_API_KEY": "sk-oai-key"}), + patch("run_agent.OpenAI"), + ): + agent._try_activate_fallback() + assert agent._use_prompt_caching is False + + +# ============================================================================= +# Fallback config init +# ============================================================================= + +class TestFallbackInit: + def test_fallback_stored_when_configured(self): + agent = _make_agent( + fallback_model={"provider": "openrouter", "model": "anthropic/claude-sonnet-4"}, + ) + assert agent._fallback_model is not None + assert agent._fallback_model["provider"] == "openrouter" + assert agent._fallback_activated is False + + def test_fallback_none_when_not_configured(self): + agent = _make_agent(fallback_model=None) + assert agent._fallback_model is None + assert agent._fallback_activated is False + + def test_fallback_none_for_non_dict(self): + agent = _make_agent(fallback_model="not-a-dict") + assert agent._fallback_model is None + + +# ============================================================================= +# Provider credential resolution +# ============================================================================= + +class TestProviderCredentials: + """Verify that each known provider resolves its API key correctly.""" + + @pytest.mark.parametrize("provider,env_var,base_url_fragment", [ + ("openrouter", "OPENROUTER_API_KEY", "openrouter"), + ("openai", "OPENAI_API_KEY", "openai.com"), + ("deepseek", "DEEPSEEK_API_KEY", "deepseek.com"), + ("together", "TOGETHER_API_KEY", "together.xyz"), + ("groq", "GROQ_API_KEY", "groq.com"), + ("fireworks", "FIREWORKS_API_KEY", "fireworks.ai"), + ("mistral", "MISTRAL_API_KEY", "mistral.ai"), + ("gemini", "GEMINI_API_KEY", "googleapis.com"), + ("nous", "NOUS_API_KEY", "nousresearch.com"), + ]) + def test_provider_resolves(self, provider, env_var, base_url_fragment): + agent = _make_agent( + fallback_model={"provider": provider, "model": "test-model"}, + ) + with ( + patch.dict("os.environ", {env_var: "test-key-123"}), + patch("run_agent.OpenAI") as mock_openai, + ): + result = agent._try_activate_fallback() + assert result is True, f"Failed to activate fallback for {provider}" + call_kwargs = mock_openai.call_args[1] + assert call_kwargs["api_key"] == "test-key-123" + assert base_url_fragment in call_kwargs["base_url"].lower()