mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-27 11:22:03 +00:00
fix(auxiliary): honor fallback chain when compression provider auth is unavailable
When an explicit aux provider cannot build a client before any request is sent (missing raw env key, exhausted/unavailable OAuth or credential-pool auth, resolver returning (None, None)), call_llm raised a misleading "no API key was found" error and bypassed the configured fallback_chain entirely. A provider authenticated through Hermes auth / the credential pool (e.g. ollama-cloud) whose pool entry is exhausted hit this path, so compression failed instead of routing to the configured fallback. Adds _try_configured_fallback_for_unavailable_client() and wires it into both sync and async call_llm before the raise, and into the startup compression feasibility check. Salvaged from #51835 by @herbalizer404.
This commit is contained in:
parent
751adfa6b9
commit
b82c83d320
4 changed files with 174 additions and 16 deletions
|
|
@ -3205,6 +3205,28 @@ def _try_configured_fallback_chain(
|
|||
return None, None, ""
|
||||
|
||||
|
||||
def _try_configured_fallback_for_unavailable_client(
|
||||
task: Optional[str],
|
||||
failed_provider: str,
|
||||
) -> Tuple[Optional[Any], Optional[str], str]:
|
||||
"""Try task fallback_chain when an explicit aux provider cannot build.
|
||||
|
||||
This covers the "no client" case before any request is sent: missing
|
||||
raw env key, unavailable OAuth/pool credentials, or provider resolver
|
||||
returning ``(None, None)``. It deliberately stops at the configured
|
||||
per-task fallback chain; the main-agent model remains the last-resort
|
||||
runtime fallback for request-time capacity errors.
|
||||
"""
|
||||
explicit = (failed_provider or "").strip().lower()
|
||||
if not task or not explicit or explicit in {"auto"}:
|
||||
return None, None, ""
|
||||
return _try_configured_fallback_chain(
|
||||
task,
|
||||
explicit,
|
||||
reason="provider unavailable",
|
||||
)
|
||||
|
||||
|
||||
def _fallback_entry_api_key(entry: Dict[str, Any]) -> Optional[str]:
|
||||
"""Resolve inline or env-backed API key from a fallback-chain entry."""
|
||||
explicit = str(entry.get("api_key") or "").strip()
|
||||
|
|
@ -5346,21 +5368,30 @@ def call_llm(
|
|||
)
|
||||
if client is None:
|
||||
# When the user explicitly chose a non-OpenRouter provider but no
|
||||
# credentials were found, fail fast instead of silently routing
|
||||
# through OpenRouter (which causes confusing 404s).
|
||||
# credentials were found, honor the task fallback_chain before
|
||||
# raising. Missing raw env keys are recoverable for auxiliary
|
||||
# tasks because fallback entries may use OAuth / credential-pool
|
||||
# auth (for example openai-codex).
|
||||
_explicit = (resolved_provider or "").strip().lower()
|
||||
if _explicit and _explicit not in {"auto", "openrouter", "custom"}:
|
||||
raise RuntimeError(
|
||||
f"Provider '{_explicit}' is set in config.yaml but no API key "
|
||||
f"was found. Set the {_explicit.upper()}_API_KEY environment "
|
||||
f"variable, or switch to a different provider with `hermes model`."
|
||||
fb_client, fb_model, fb_label = _try_configured_fallback_for_unavailable_client(
|
||||
task, _explicit,
|
||||
)
|
||||
if fb_client is not None:
|
||||
client, final_model = fb_client, fb_model
|
||||
resolved_provider = fb_label or resolved_provider
|
||||
else:
|
||||
raise RuntimeError(
|
||||
f"Provider '{_explicit}' is set in config.yaml but no API key "
|
||||
f"was found. Set the {_explicit.upper()}_API_KEY environment "
|
||||
f"variable, or switch to a different provider with `hermes model`."
|
||||
)
|
||||
# For auto/custom with no credentials, try the full auto chain
|
||||
# rather than hardcoding OpenRouter (which may be depleted).
|
||||
# Pass model=None so each provider uses its own default —
|
||||
# resolved_model may be an OpenRouter-format slug that doesn't
|
||||
# work on other providers.
|
||||
if not resolved_base_url:
|
||||
if client is None and not resolved_base_url:
|
||||
logger.info("Auxiliary %s: provider %s unavailable, trying auto-detection chain",
|
||||
task or "call", resolved_provider)
|
||||
client, final_model = _get_cached_client("auto", main_runtime=main_runtime, task=task)
|
||||
|
|
@ -5858,12 +5889,21 @@ async def async_call_llm(
|
|||
if client is None:
|
||||
_explicit = (resolved_provider or "").strip().lower()
|
||||
if _explicit and _explicit not in {"auto", "openrouter", "custom"}:
|
||||
raise RuntimeError(
|
||||
f"Provider '{_explicit}' is set in config.yaml but no API key "
|
||||
f"was found. Set the {_explicit.upper()}_API_KEY environment "
|
||||
f"variable, or switch to a different provider with `hermes model`."
|
||||
fb_client, fb_model, fb_label = _try_configured_fallback_for_unavailable_client(
|
||||
task, _explicit,
|
||||
)
|
||||
if not resolved_base_url:
|
||||
if fb_client is not None:
|
||||
client, final_model = _to_async_client(
|
||||
fb_client, fb_model or "", is_vision=(task == "vision")
|
||||
)
|
||||
resolved_provider = fb_label or resolved_provider
|
||||
else:
|
||||
raise RuntimeError(
|
||||
f"Provider '{_explicit}' is set in config.yaml but no API key "
|
||||
f"was found. Set the {_explicit.upper()}_API_KEY environment "
|
||||
f"variable, or switch to a different provider with `hermes model`."
|
||||
)
|
||||
if client is None and not resolved_base_url:
|
||||
logger.info("Auxiliary %s: provider %s unavailable, trying auto-detection chain",
|
||||
task or "call", resolved_provider)
|
||||
client, final_model = _get_cached_client("auto", async_mode=True, main_runtime=main_runtime, task=task)
|
||||
|
|
|
|||
|
|
@ -90,6 +90,7 @@ def check_compression_model_feasibility(agent: Any) -> None:
|
|||
try:
|
||||
from agent.auxiliary_client import (
|
||||
_resolve_task_provider_model,
|
||||
_try_configured_fallback_for_unavailable_client,
|
||||
get_text_auxiliary_client,
|
||||
)
|
||||
from agent.model_metadata import (
|
||||
|
|
@ -97,10 +98,6 @@ def check_compression_model_feasibility(agent: Any) -> None:
|
|||
get_model_context_length,
|
||||
)
|
||||
|
||||
client, aux_model = get_text_auxiliary_client(
|
||||
"compression",
|
||||
main_runtime=agent._current_main_runtime(),
|
||||
)
|
||||
# Best-effort aux provider label for the warning message. The
|
||||
# configured provider may be "auto", in which case we fall back
|
||||
# to the client's base_url hostname so the user can still tell
|
||||
|
|
@ -109,6 +106,19 @@ def check_compression_model_feasibility(agent: Any) -> None:
|
|||
_aux_cfg_provider, _, _, _, _ = _resolve_task_provider_model("compression")
|
||||
except Exception:
|
||||
_aux_cfg_provider = ""
|
||||
client, aux_model = get_text_auxiliary_client(
|
||||
"compression",
|
||||
main_runtime=agent._current_main_runtime(),
|
||||
)
|
||||
if client is None or not aux_model:
|
||||
fb_client, fb_model, fb_label = _try_configured_fallback_for_unavailable_client(
|
||||
"compression",
|
||||
_aux_cfg_provider,
|
||||
)
|
||||
if fb_client is not None and fb_model:
|
||||
client, aux_model = fb_client, fb_model
|
||||
if "(" in fb_label and fb_label.endswith(")"):
|
||||
_aux_cfg_provider = fb_label.rsplit("(", 1)[1][:-1]
|
||||
if client is None or not aux_model:
|
||||
if _aux_cfg_provider and _aux_cfg_provider != "auto":
|
||||
msg = (
|
||||
|
|
|
|||
|
|
@ -1949,6 +1949,81 @@ class TestAuxiliaryFallbackLayering:
|
|||
"all fallbacks exhausted" in r.message for r in caplog.records
|
||||
), f"Expected exhaustion warning, got: {[r.message for r in caplog.records]}"
|
||||
|
||||
def test_explicit_provider_no_client_uses_configured_chain_before_error(self, monkeypatch):
|
||||
"""Missing primary credentials should still honor auxiliary fallback_chain."""
|
||||
chain_client = MagicMock()
|
||||
chain_client.chat.completions.create.return_value = MagicMock(choices=[
|
||||
MagicMock(message=MagicMock(content="from configured chain"))
|
||||
])
|
||||
|
||||
with patch("agent.auxiliary_client._get_cached_client",
|
||||
return_value=(None, None)), \
|
||||
patch("agent.auxiliary_client._resolve_task_provider_model",
|
||||
return_value=("ollama-cloud", "deepseek-v4-flash:cloud", None, None, None)), \
|
||||
patch("agent.auxiliary_client._try_configured_fallback_chain",
|
||||
return_value=(chain_client, "gpt-5.4-mini", "fallback_chain[0](openai-codex)")) as mock_chain:
|
||||
result = call_llm(
|
||||
task="compression",
|
||||
messages=[{"role": "user", "content": "hello"}],
|
||||
)
|
||||
|
||||
assert chain_client.chat.completions.create.called
|
||||
assert result.choices[0].message.content == "from configured chain"
|
||||
mock_chain.assert_called_once_with(
|
||||
"compression",
|
||||
"ollama-cloud",
|
||||
reason="provider unavailable",
|
||||
)
|
||||
|
||||
def test_explicit_provider_no_client_without_chain_keeps_clear_error(self, monkeypatch):
|
||||
"""No fallback configured: keep the existing actionable missing-key error."""
|
||||
with patch("agent.auxiliary_client._get_cached_client",
|
||||
return_value=(None, None)), \
|
||||
patch("agent.auxiliary_client._resolve_task_provider_model",
|
||||
return_value=("ollama-cloud", "deepseek-v4-flash:cloud", None, None, None)), \
|
||||
patch("agent.auxiliary_client._try_configured_fallback_chain",
|
||||
return_value=(None, None, "")) as mock_chain:
|
||||
with pytest.raises(RuntimeError, match="Provider 'ollama-cloud'.*no API key"):
|
||||
call_llm(
|
||||
task="compression",
|
||||
messages=[{"role": "user", "content": "hello"}],
|
||||
)
|
||||
|
||||
mock_chain.assert_called_once_with(
|
||||
"compression",
|
||||
"ollama-cloud",
|
||||
reason="provider unavailable",
|
||||
)
|
||||
|
||||
def test_fallback_entry_openai_codex_uses_oauth_pool_without_inline_key(self):
|
||||
"""Configured Codex fallback resolves through Hermes auth / credential pool."""
|
||||
from agent.auxiliary_client import _resolve_fallback_entry
|
||||
|
||||
pool_entry = MagicMock()
|
||||
pool_entry.id = "codex-pool-1"
|
||||
pool_entry.runtime_api_key = "codex-oauth-token"
|
||||
pool_entry.access_token = "codex-oauth-token"
|
||||
pool_entry.runtime_base_url = "https://chatgpt.com/backend-api/codex"
|
||||
|
||||
real_client = MagicMock()
|
||||
real_client.api_key = "codex-oauth-token"
|
||||
real_client.base_url = "https://chatgpt.com/backend-api/codex"
|
||||
|
||||
with patch("agent.auxiliary_client._select_pool_entry",
|
||||
return_value=(True, pool_entry)), \
|
||||
patch("agent.auxiliary_client._read_codex_access_token",
|
||||
side_effect=AssertionError("should use pool token")), \
|
||||
patch("agent.auxiliary_client.OpenAI", return_value=real_client) as mock_openai:
|
||||
client, model = _resolve_fallback_entry({
|
||||
"provider": "openai-codex",
|
||||
"model": "gpt-5.4-mini",
|
||||
})
|
||||
|
||||
assert client is not None
|
||||
assert model == "gpt-5.4-mini"
|
||||
mock_openai.assert_called_once()
|
||||
assert mock_openai.call_args.kwargs["api_key"] == "codex-oauth-token"
|
||||
|
||||
|
||||
class TestTryMainAgentModelFallback:
|
||||
"""_try_main_agent_model_fallback resolves the user's main provider+model as a safety net."""
|
||||
|
|
|
|||
|
|
@ -306,6 +306,39 @@ def test_warns_when_no_auxiliary_provider(mock_get_client):
|
|||
assert agent._compression_warning is not None
|
||||
|
||||
|
||||
def test_no_unavailable_warning_when_configured_fallback_chain_resolves():
|
||||
"""Primary compression provider can be down if configured fallback works."""
|
||||
agent = _make_agent(main_context=200_000, threshold_percent=0.50)
|
||||
fallback_client = MagicMock()
|
||||
fallback_client.base_url = "https://chatgpt.com/backend-api/codex"
|
||||
fallback_client.api_key = "codex-oauth-token"
|
||||
|
||||
messages = []
|
||||
agent._emit_status = lambda msg: messages.append(msg)
|
||||
|
||||
with patch(
|
||||
"agent.auxiliary_client._resolve_task_provider_model",
|
||||
return_value=("ollama-cloud", "deepseek-v4-flash:cloud", None, None, None),
|
||||
), patch(
|
||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
||||
return_value=(None, None),
|
||||
), patch(
|
||||
"agent.auxiliary_client._try_configured_fallback_for_unavailable_client",
|
||||
return_value=(fallback_client, "gpt-5.4-mini", "fallback_chain[0](openai-codex)"),
|
||||
) as mock_fallback, patch(
|
||||
"agent.model_metadata.get_model_context_length",
|
||||
return_value=200_000,
|
||||
) as mock_ctx_len:
|
||||
agent._check_compression_model_feasibility()
|
||||
|
||||
assert messages == []
|
||||
assert agent._compression_warning is None
|
||||
mock_fallback.assert_called_once_with("compression", "ollama-cloud")
|
||||
mock_ctx_len.assert_called_once()
|
||||
assert mock_ctx_len.call_args.args == ("gpt-5.4-mini",)
|
||||
assert mock_ctx_len.call_args.kwargs["provider"] == "openai-codex"
|
||||
|
||||
|
||||
def test_skips_check_when_compression_disabled():
|
||||
"""No check performed when compression is disabled."""
|
||||
agent = _make_agent(compression_enabled=False)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue