fix(fallback): skip chain entries matching current provider/model/base_url (#22780)

_try_activate_fallback() walked the chain by index without comparing
the candidate entry against the currently-failing backend. So a
misconfigured chain that listed the same provider+model as the primary,
or two custom_providers entries pointing at the same shim URL, would
loop the same failure 3x for the same backend.

After the fix, advance() skips:
  - entries where (provider, model) match the current agent's
  - entries with a base_url + model matching the current backend
    (catches two custom_providers names pointing at the same shim)

Recursing through self._try_activate_fallback() continues to the next
chain entry; if everything matches, returns False and the caller
moves on without retrying the same broken path.

3 regression tests covering same-provider-same-model skip, same-base_url-
same-model skip, and the all-self-matching-returns-False exhaustion path.

Closes #22548 (the Hermes-side portion). The 120s timeout itself in
the downstream claude-cli shim is a deployment concern documented in
that issue's wherewolf87 comment.
This commit is contained in:
Teknium 2026-05-09 12:48:19 -07:00 committed by GitHub
parent 70bc52e408
commit e7c0d6ee53
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 111 additions and 0 deletions

View file

@ -8042,6 +8042,32 @@ class AIAgent:
if not fb_provider or not fb_model:
return self._try_activate_fallback() # skip invalid, try next
# Skip entries that resolve to the current (provider, model) — falling
# back to the same backend that just failed loops the failure. Compare
# base_url too so two distinct custom_providers entries pointing at the
# same shim/proxy URL also dedup. See issue #22548.
current_provider = (getattr(self, "provider", "") or "").strip().lower()
current_model = (getattr(self, "model", "") or "").strip()
current_base_url = str(getattr(self, "base_url", "") or "").rstrip("/").lower()
fb_base_url_for_dedup = (fb.get("base_url") or "").strip().rstrip("/").lower()
if fb_provider == current_provider and fb_model == current_model:
logging.warning(
"Fallback skip: chain entry %s/%s matches current provider/model",
fb_provider, fb_model,
)
return self._try_activate_fallback()
if (
fb_base_url_for_dedup
and current_base_url
and fb_base_url_for_dedup == current_base_url
and fb_model == current_model
):
logging.warning(
"Fallback skip: chain entry base_url %s matches current backend",
fb_base_url_for_dedup,
)
return self._try_activate_fallback()
# Use centralized router for client construction.
# raw_codex=True because the main agent needs direct responses.stream()
# access for Codex providers.

View file

@ -220,3 +220,88 @@ class TestPoolRotationRoom:
def test_many_credentials_available_returns_true(self):
assert _pool_may_recover_from_rate_limit(_pool(10)) is True
# ── Skip-self dedup (#22548) ───────────────────────────────────────────────
class TestFallbackChainDedup:
"""A fallback chain entry that resolves to the current provider/model
(or the same custom-provider base_url) must be skipped, not retried.
Otherwise a misconfigured chain or two custom_providers entries pointing
at the same shim loop the same failure. See issue #22548."""
def test_skips_entry_matching_current_provider_and_model(self):
"""Chain has [same-as-current, real-fallback]; activate must skip
the first and use the second."""
fbs = [
# First entry == current state. Should be skipped.
{"provider": "openrouter", "model": "z-ai/glm-4.7"},
# Second entry: real fallback.
{"provider": "zai", "model": "glm-4.7"},
]
agent = _make_agent(fallback_model=fbs)
agent.provider = "openrouter"
agent.model = "z-ai/glm-4.7"
agent.base_url = "https://openrouter.ai/api/v1"
# Stub out resolve_provider_client so we can assert which entry was
# actually used — return a MagicMock client tagged with the provider.
called = []
def _resolve(provider, model=None, raw_codex=False, **kwargs):
called.append((provider, model))
return _mock_client(), model
with patch("agent.auxiliary_client.resolve_provider_client", side_effect=_resolve):
with patch("hermes_cli.model_normalize.normalize_model_for_provider", side_effect=lambda m, p: m):
ok = agent._try_activate_fallback()
assert ok is True
# The first entry was skipped — only the second reached resolve.
assert called == [("zai", "glm-4.7")], (
f"expected fallback to skip same-state entry, got call order: {called}"
)
def test_skips_entry_matching_current_base_url_and_model(self):
"""Two custom_providers entries pointing at the same shim URL
with the same model should dedup even if their provider names differ."""
fbs = [
# Different provider name but same shim URL + model — same backend.
{"provider": "claude-cli-alt", "model": "claude-opus-4.7",
"base_url": "http://127.0.0.1:7891/v1"},
# Real different fallback.
{"provider": "openrouter", "model": "anthropic/claude-opus-4.7"},
]
agent = _make_agent(fallback_model=fbs)
agent.provider = "claude-cli"
agent.model = "claude-opus-4.7"
agent.base_url = "http://127.0.0.1:7891/v1"
called = []
def _resolve(provider, model=None, raw_codex=False, **kwargs):
called.append((provider, model))
return _mock_client(), model
with patch("agent.auxiliary_client.resolve_provider_client", side_effect=_resolve):
with patch("hermes_cli.model_normalize.normalize_model_for_provider", side_effect=lambda m, p: m):
ok = agent._try_activate_fallback()
assert ok is True
# Same shim/base_url+model entry skipped, second one used.
assert called == [("openrouter", "anthropic/claude-opus-4.7")], (
f"expected base_url-aware dedup, got call order: {called}"
)
def test_returns_false_when_only_self_matching_entries(self):
"""A chain with only self-matching entries exhausts to False."""
fbs = [
{"provider": "openrouter", "model": "z-ai/glm-4.7"},
]
agent = _make_agent(fallback_model=fbs)
agent.provider = "openrouter"
agent.model = "z-ai/glm-4.7"
agent.base_url = "https://openrouter.ai/api/v1"
with patch("agent.auxiliary_client.resolve_provider_client") as mock_resolve:
ok = agent._try_activate_fallback()
assert ok is False
mock_resolve.assert_not_called()