From b82c83d32088143e9a46df9a79dee1a5a8ff58b0 Mon Sep 17 00:00:00 2001 From: herbalizer404 <8180647+herbalizer404@users.noreply.github.com> Date: Thu, 25 Jun 2026 12:50:50 -0700 Subject: [PATCH] fix(auxiliary): honor fallback chain when compression provider auth is unavailable When an explicit aux provider cannot build a client before any request is sent (missing raw env key, exhausted/unavailable OAuth or credential-pool auth, resolver returning (None, None)), call_llm raised a misleading "no API key was found" error and bypassed the configured fallback_chain entirely. A provider authenticated through Hermes auth / the credential pool (e.g. ollama-cloud) whose pool entry is exhausted hit this path, so compression failed instead of routing to the configured fallback. Adds _try_configured_fallback_for_unavailable_client() and wires it into both sync and async call_llm before the raise, and into the startup compression feasibility check. Salvaged from #51835 by @herbalizer404. --- agent/auxiliary_client.py | 64 +++++++++++++--- agent/conversation_compression.py | 18 ++++- tests/agent/test_auxiliary_client.py | 75 +++++++++++++++++++ .../run_agent/test_compression_feasibility.py | 33 ++++++++ 4 files changed, 174 insertions(+), 16 deletions(-) diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index d20f1f948d5..57e79f36e07 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -3205,6 +3205,28 @@ def _try_configured_fallback_chain( return None, None, "" +def _try_configured_fallback_for_unavailable_client( + task: Optional[str], + failed_provider: str, +) -> Tuple[Optional[Any], Optional[str], str]: + """Try task fallback_chain when an explicit aux provider cannot build. + + This covers the "no client" case before any request is sent: missing + raw env key, unavailable OAuth/pool credentials, or provider resolver + returning ``(None, None)``. It deliberately stops at the configured + per-task fallback chain; the main-agent model remains the last-resort + runtime fallback for request-time capacity errors. + """ + explicit = (failed_provider or "").strip().lower() + if not task or not explicit or explicit in {"auto"}: + return None, None, "" + return _try_configured_fallback_chain( + task, + explicit, + reason="provider unavailable", + ) + + def _fallback_entry_api_key(entry: Dict[str, Any]) -> Optional[str]: """Resolve inline or env-backed API key from a fallback-chain entry.""" explicit = str(entry.get("api_key") or "").strip() @@ -5346,21 +5368,30 @@ def call_llm( ) if client is None: # When the user explicitly chose a non-OpenRouter provider but no - # credentials were found, fail fast instead of silently routing - # through OpenRouter (which causes confusing 404s). + # credentials were found, honor the task fallback_chain before + # raising. Missing raw env keys are recoverable for auxiliary + # tasks because fallback entries may use OAuth / credential-pool + # auth (for example openai-codex). _explicit = (resolved_provider or "").strip().lower() if _explicit and _explicit not in {"auto", "openrouter", "custom"}: - raise RuntimeError( - f"Provider '{_explicit}' is set in config.yaml but no API key " - f"was found. Set the {_explicit.upper()}_API_KEY environment " - f"variable, or switch to a different provider with `hermes model`." + fb_client, fb_model, fb_label = _try_configured_fallback_for_unavailable_client( + task, _explicit, ) + if fb_client is not None: + client, final_model = fb_client, fb_model + resolved_provider = fb_label or resolved_provider + else: + raise RuntimeError( + f"Provider '{_explicit}' is set in config.yaml but no API key " + f"was found. Set the {_explicit.upper()}_API_KEY environment " + f"variable, or switch to a different provider with `hermes model`." + ) # For auto/custom with no credentials, try the full auto chain # rather than hardcoding OpenRouter (which may be depleted). # Pass model=None so each provider uses its own default — # resolved_model may be an OpenRouter-format slug that doesn't # work on other providers. - if not resolved_base_url: + if client is None and not resolved_base_url: logger.info("Auxiliary %s: provider %s unavailable, trying auto-detection chain", task or "call", resolved_provider) client, final_model = _get_cached_client("auto", main_runtime=main_runtime, task=task) @@ -5858,12 +5889,21 @@ async def async_call_llm( if client is None: _explicit = (resolved_provider or "").strip().lower() if _explicit and _explicit not in {"auto", "openrouter", "custom"}: - raise RuntimeError( - f"Provider '{_explicit}' is set in config.yaml but no API key " - f"was found. Set the {_explicit.upper()}_API_KEY environment " - f"variable, or switch to a different provider with `hermes model`." + fb_client, fb_model, fb_label = _try_configured_fallback_for_unavailable_client( + task, _explicit, ) - if not resolved_base_url: + if fb_client is not None: + client, final_model = _to_async_client( + fb_client, fb_model or "", is_vision=(task == "vision") + ) + resolved_provider = fb_label or resolved_provider + else: + raise RuntimeError( + f"Provider '{_explicit}' is set in config.yaml but no API key " + f"was found. Set the {_explicit.upper()}_API_KEY environment " + f"variable, or switch to a different provider with `hermes model`." + ) + if client is None and not resolved_base_url: logger.info("Auxiliary %s: provider %s unavailable, trying auto-detection chain", task or "call", resolved_provider) client, final_model = _get_cached_client("auto", async_mode=True, main_runtime=main_runtime, task=task) diff --git a/agent/conversation_compression.py b/agent/conversation_compression.py index ba67f036954..70d997631bc 100644 --- a/agent/conversation_compression.py +++ b/agent/conversation_compression.py @@ -90,6 +90,7 @@ def check_compression_model_feasibility(agent: Any) -> None: try: from agent.auxiliary_client import ( _resolve_task_provider_model, + _try_configured_fallback_for_unavailable_client, get_text_auxiliary_client, ) from agent.model_metadata import ( @@ -97,10 +98,6 @@ def check_compression_model_feasibility(agent: Any) -> None: get_model_context_length, ) - client, aux_model = get_text_auxiliary_client( - "compression", - main_runtime=agent._current_main_runtime(), - ) # Best-effort aux provider label for the warning message. The # configured provider may be "auto", in which case we fall back # to the client's base_url hostname so the user can still tell @@ -109,6 +106,19 @@ def check_compression_model_feasibility(agent: Any) -> None: _aux_cfg_provider, _, _, _, _ = _resolve_task_provider_model("compression") except Exception: _aux_cfg_provider = "" + client, aux_model = get_text_auxiliary_client( + "compression", + main_runtime=agent._current_main_runtime(), + ) + if client is None or not aux_model: + fb_client, fb_model, fb_label = _try_configured_fallback_for_unavailable_client( + "compression", + _aux_cfg_provider, + ) + if fb_client is not None and fb_model: + client, aux_model = fb_client, fb_model + if "(" in fb_label and fb_label.endswith(")"): + _aux_cfg_provider = fb_label.rsplit("(", 1)[1][:-1] if client is None or not aux_model: if _aux_cfg_provider and _aux_cfg_provider != "auto": msg = ( diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py index aa75b993495..d790920b1ee 100644 --- a/tests/agent/test_auxiliary_client.py +++ b/tests/agent/test_auxiliary_client.py @@ -1949,6 +1949,81 @@ class TestAuxiliaryFallbackLayering: "all fallbacks exhausted" in r.message for r in caplog.records ), f"Expected exhaustion warning, got: {[r.message for r in caplog.records]}" + def test_explicit_provider_no_client_uses_configured_chain_before_error(self, monkeypatch): + """Missing primary credentials should still honor auxiliary fallback_chain.""" + chain_client = MagicMock() + chain_client.chat.completions.create.return_value = MagicMock(choices=[ + MagicMock(message=MagicMock(content="from configured chain")) + ]) + + with patch("agent.auxiliary_client._get_cached_client", + return_value=(None, None)), \ + patch("agent.auxiliary_client._resolve_task_provider_model", + return_value=("ollama-cloud", "deepseek-v4-flash:cloud", None, None, None)), \ + patch("agent.auxiliary_client._try_configured_fallback_chain", + return_value=(chain_client, "gpt-5.4-mini", "fallback_chain[0](openai-codex)")) as mock_chain: + result = call_llm( + task="compression", + messages=[{"role": "user", "content": "hello"}], + ) + + assert chain_client.chat.completions.create.called + assert result.choices[0].message.content == "from configured chain" + mock_chain.assert_called_once_with( + "compression", + "ollama-cloud", + reason="provider unavailable", + ) + + def test_explicit_provider_no_client_without_chain_keeps_clear_error(self, monkeypatch): + """No fallback configured: keep the existing actionable missing-key error.""" + with patch("agent.auxiliary_client._get_cached_client", + return_value=(None, None)), \ + patch("agent.auxiliary_client._resolve_task_provider_model", + return_value=("ollama-cloud", "deepseek-v4-flash:cloud", None, None, None)), \ + patch("agent.auxiliary_client._try_configured_fallback_chain", + return_value=(None, None, "")) as mock_chain: + with pytest.raises(RuntimeError, match="Provider 'ollama-cloud'.*no API key"): + call_llm( + task="compression", + messages=[{"role": "user", "content": "hello"}], + ) + + mock_chain.assert_called_once_with( + "compression", + "ollama-cloud", + reason="provider unavailable", + ) + + def test_fallback_entry_openai_codex_uses_oauth_pool_without_inline_key(self): + """Configured Codex fallback resolves through Hermes auth / credential pool.""" + from agent.auxiliary_client import _resolve_fallback_entry + + pool_entry = MagicMock() + pool_entry.id = "codex-pool-1" + pool_entry.runtime_api_key = "codex-oauth-token" + pool_entry.access_token = "codex-oauth-token" + pool_entry.runtime_base_url = "https://chatgpt.com/backend-api/codex" + + real_client = MagicMock() + real_client.api_key = "codex-oauth-token" + real_client.base_url = "https://chatgpt.com/backend-api/codex" + + with patch("agent.auxiliary_client._select_pool_entry", + return_value=(True, pool_entry)), \ + patch("agent.auxiliary_client._read_codex_access_token", + side_effect=AssertionError("should use pool token")), \ + patch("agent.auxiliary_client.OpenAI", return_value=real_client) as mock_openai: + client, model = _resolve_fallback_entry({ + "provider": "openai-codex", + "model": "gpt-5.4-mini", + }) + + assert client is not None + assert model == "gpt-5.4-mini" + mock_openai.assert_called_once() + assert mock_openai.call_args.kwargs["api_key"] == "codex-oauth-token" + class TestTryMainAgentModelFallback: """_try_main_agent_model_fallback resolves the user's main provider+model as a safety net.""" diff --git a/tests/run_agent/test_compression_feasibility.py b/tests/run_agent/test_compression_feasibility.py index 3be0f0235a3..222ebb2c0bb 100644 --- a/tests/run_agent/test_compression_feasibility.py +++ b/tests/run_agent/test_compression_feasibility.py @@ -306,6 +306,39 @@ def test_warns_when_no_auxiliary_provider(mock_get_client): assert agent._compression_warning is not None +def test_no_unavailable_warning_when_configured_fallback_chain_resolves(): + """Primary compression provider can be down if configured fallback works.""" + agent = _make_agent(main_context=200_000, threshold_percent=0.50) + fallback_client = MagicMock() + fallback_client.base_url = "https://chatgpt.com/backend-api/codex" + fallback_client.api_key = "codex-oauth-token" + + messages = [] + agent._emit_status = lambda msg: messages.append(msg) + + with patch( + "agent.auxiliary_client._resolve_task_provider_model", + return_value=("ollama-cloud", "deepseek-v4-flash:cloud", None, None, None), + ), patch( + "agent.auxiliary_client.get_text_auxiliary_client", + return_value=(None, None), + ), patch( + "agent.auxiliary_client._try_configured_fallback_for_unavailable_client", + return_value=(fallback_client, "gpt-5.4-mini", "fallback_chain[0](openai-codex)"), + ) as mock_fallback, patch( + "agent.model_metadata.get_model_context_length", + return_value=200_000, + ) as mock_ctx_len: + agent._check_compression_model_feasibility() + + assert messages == [] + assert agent._compression_warning is None + mock_fallback.assert_called_once_with("compression", "ollama-cloud") + mock_ctx_len.assert_called_once() + assert mock_ctx_len.call_args.args == ("gpt-5.4-mini",) + assert mock_ctx_len.call_args.kwargs["provider"] == "openai-codex" + + def test_skips_check_when_compression_disabled(): """No check performed when compression is disabled.""" agent = _make_agent(compression_enabled=False)