fix(fallback): attach credential pool after provider switch

When automatic fallback activates a provider that differs from the
primary, try_activate_fallback() cleared the primary's pool (to avoid
cross-provider base_url contamination, #33163) but never loaded the
fallback provider's own pool. The fallback then ran with no pool, so
rate_limit/billing/auth recovery couldn't rotate its credentials.

After clearing a mismatched pool, load_pool(fb_provider) and attach it
when it has credentials, so provider-specific rotation continues to
work on the fallback target.
This commit is contained in:
herbalizer404 2026-06-27 04:07:59 -07:00 committed by Teknium
parent 635841d210
commit 3fe16e3cd5
2 changed files with 76 additions and 13 deletions

View file

@ -1210,14 +1210,16 @@ def try_activate_fallback(agent, reason: "FailoverReason | None" = None) -> bool
agent._transport_cache.clear()
agent._fallback_activated = True
# Clear the credential pool when the fallback provider doesn't match
# the pool's provider. The pool was seeded for the primary provider;
# leaving it attached means downstream recovery (rate_limit / billing /
# auth) calls ``_swap_credential`` with a primary entry which overwrites
# the agent's ``base_url`` back to the primary's endpoint — every
# fallback request then 404s against the wrong host. See #33163.
# Rebind the credential pool to the fallback provider when the provider
# changes. Keeping the primary pool attached would make downstream
# recovery (rate_limit / billing / auth) mutate the wrong credential
# set and can overwrite the fallback's base_url back to the primary
# endpoint. See #33163.
#
# When the fallback shares the pool's provider (e.g. both openrouter
# entries with different routing) the pool is preserved.
# entries with different routing) the pool is preserved. When the
# providers differ, load the fallback provider's own pool if one exists
# so provider-specific rotation continues to work after the switch.
_existing_pool = getattr(agent, "_credential_pool", None)
if _existing_pool is not None:
_pool_provider = (getattr(_existing_pool, "provider", "") or "").strip().lower()
@ -1228,6 +1230,22 @@ def try_activate_fallback(agent, reason: "FailoverReason | None" = None) -> bool
fb_provider, fb_model, _pool_provider,
)
agent._credential_pool = None
if getattr(agent, "_credential_pool", None) is None:
try:
from agent.credential_pool import load_pool
fallback_pool = load_pool(fb_provider)
if fallback_pool and fallback_pool.has_credentials():
agent._credential_pool = fallback_pool
logger.info(
"Fallback to %s/%s: attached fallback credential pool",
fb_provider, fb_model,
)
except Exception as exc:
logger.debug(
"Fallback to %s/%s: could not attach credential pool: %s",
fb_provider, fb_model, exc,
)
# Honor per-provider / per-model request_timeout_seconds for the
# fallback target (same knob the primary client uses). None = use

View file

@ -11,8 +11,8 @@ _swap_credential continue operating on the PRIMARY's credential pool during
fallback calls, contaminating primary state with fallback-provider errors.
"""
import sys
from unittest.mock import MagicMock
from types import SimpleNamespace
from unittest.mock import MagicMock, patch
@ -81,10 +81,8 @@ class TestFallbackCredentialIsolation:
def test_fallback_clears_primary_pool(self):
"""When switching from openai-codex to openrouter, the codex pool is cleared."""
# Import the real method
sys.path.insert(0, "/mnt/g/knowledge/project/hermes-agent")
# We test the isolation logic directly, not the full _try_activate_fallback
# which has many dependencies. Instead we verify the pool-clearing guard.
# We test the isolation logic directly here as a minimal guard; the
# integration-style test below calls the real fallback activator.
agent = _make_agent(provider="openai-codex", base_url="https://chatgpt.com/backend-api/codex")
agent._fallback_activated = True
@ -122,6 +120,53 @@ class TestFallbackCredentialIsolation:
"Pool should be preserved when fallback provider matches pool provider"
)
def test_fallback_attaches_matching_pool_after_clear(self):
"""Provider-switch fallback should attach the fallback provider's pool."""
from agent.chat_completion_helpers import try_activate_fallback
agent = _make_agent(
provider="ollama-cloud",
model="glm-5.2",
base_url="https://ollama.com/v1",
api_mode="chat_completions",
)
agent._fallback_chain = [{"provider": "openai-codex", "model": "gpt-5.5"}]
agent._credential_pool = _make_pool("ollama-cloud")
agent._buffer_status = MagicMock()
agent._is_azure_openai_url.return_value = False
agent._is_direct_openai_url.return_value = False
agent._provider_model_requires_responses_api.return_value = False
agent._anthropic_prompt_cache_policy.return_value = (False, False)
agent._ensure_lmstudio_runtime_loaded = MagicMock()
agent._replace_primary_openai_client = MagicMock()
agent.context_compressor = None
fallback_client = SimpleNamespace(
api_key="codex-key",
base_url="https://chatgpt.com/backend-api/codex",
_custom_headers={},
)
fallback_pool = _make_pool("openai-codex")
with patch(
"agent.auxiliary_client.resolve_provider_client",
return_value=(fallback_client, "gpt-5.5"),
) as resolve_provider_client, patch(
"agent.credential_pool.load_pool",
return_value=fallback_pool,
) as load_pool:
assert try_activate_fallback(agent) is True
resolve_provider_client.assert_called_once()
load_pool.assert_called_once_with("openai-codex")
assert agent.provider == "openai-codex"
assert agent.model == "gpt-5.5"
assert agent.base_url == "https://chatgpt.com/backend-api/codex"
assert agent.api_mode == "codex_responses"
assert agent._credential_pool is fallback_pool
assert agent._credential_pool.provider == "openai-codex"
assert agent._transport_cache == {}
# ── Test: _recover_with_credential_pool rejects mismatched pool ──────