diff --git a/agent/chat_completion_helpers.py b/agent/chat_completion_helpers.py index 9a5bcba0924..5c9c8cd771d 100644 --- a/agent/chat_completion_helpers.py +++ b/agent/chat_completion_helpers.py @@ -1210,14 +1210,16 @@ def try_activate_fallback(agent, reason: "FailoverReason | None" = None) -> bool agent._transport_cache.clear() agent._fallback_activated = True - # Clear the credential pool when the fallback provider doesn't match - # the pool's provider. The pool was seeded for the primary provider; - # leaving it attached means downstream recovery (rate_limit / billing / - # auth) calls ``_swap_credential`` with a primary entry which overwrites - # the agent's ``base_url`` back to the primary's endpoint — every - # fallback request then 404s against the wrong host. See #33163. + # Rebind the credential pool to the fallback provider when the provider + # changes. Keeping the primary pool attached would make downstream + # recovery (rate_limit / billing / auth) mutate the wrong credential + # set and can overwrite the fallback's base_url back to the primary + # endpoint. See #33163. + # # When the fallback shares the pool's provider (e.g. both openrouter - # entries with different routing) the pool is preserved. + # entries with different routing) the pool is preserved. When the + # providers differ, load the fallback provider's own pool if one exists + # so provider-specific rotation continues to work after the switch. _existing_pool = getattr(agent, "_credential_pool", None) if _existing_pool is not None: _pool_provider = (getattr(_existing_pool, "provider", "") or "").strip().lower() @@ -1228,6 +1230,22 @@ def try_activate_fallback(agent, reason: "FailoverReason | None" = None) -> bool fb_provider, fb_model, _pool_provider, ) agent._credential_pool = None + if getattr(agent, "_credential_pool", None) is None: + try: + from agent.credential_pool import load_pool + + fallback_pool = load_pool(fb_provider) + if fallback_pool and fallback_pool.has_credentials(): + agent._credential_pool = fallback_pool + logger.info( + "Fallback to %s/%s: attached fallback credential pool", + fb_provider, fb_model, + ) + except Exception as exc: + logger.debug( + "Fallback to %s/%s: could not attach credential pool: %s", + fb_provider, fb_model, exc, + ) # Honor per-provider / per-model request_timeout_seconds for the # fallback target (same knob the primary client uses). None = use diff --git a/tests/run_agent/test_fallback_credential_isolation.py b/tests/run_agent/test_fallback_credential_isolation.py index 54e352b3b88..0427772be98 100644 --- a/tests/run_agent/test_fallback_credential_isolation.py +++ b/tests/run_agent/test_fallback_credential_isolation.py @@ -11,8 +11,8 @@ _swap_credential continue operating on the PRIMARY's credential pool during fallback calls, contaminating primary state with fallback-provider errors. """ -import sys -from unittest.mock import MagicMock +from types import SimpleNamespace +from unittest.mock import MagicMock, patch @@ -81,10 +81,8 @@ class TestFallbackCredentialIsolation: def test_fallback_clears_primary_pool(self): """When switching from openai-codex to openrouter, the codex pool is cleared.""" - # Import the real method - sys.path.insert(0, "/mnt/g/knowledge/project/hermes-agent") - # We test the isolation logic directly, not the full _try_activate_fallback - # which has many dependencies. Instead we verify the pool-clearing guard. + # We test the isolation logic directly here as a minimal guard; the + # integration-style test below calls the real fallback activator. agent = _make_agent(provider="openai-codex", base_url="https://chatgpt.com/backend-api/codex") agent._fallback_activated = True @@ -122,6 +120,53 @@ class TestFallbackCredentialIsolation: "Pool should be preserved when fallback provider matches pool provider" ) + def test_fallback_attaches_matching_pool_after_clear(self): + """Provider-switch fallback should attach the fallback provider's pool.""" + from agent.chat_completion_helpers import try_activate_fallback + + agent = _make_agent( + provider="ollama-cloud", + model="glm-5.2", + base_url="https://ollama.com/v1", + api_mode="chat_completions", + ) + agent._fallback_chain = [{"provider": "openai-codex", "model": "gpt-5.5"}] + agent._credential_pool = _make_pool("ollama-cloud") + agent._buffer_status = MagicMock() + agent._is_azure_openai_url.return_value = False + agent._is_direct_openai_url.return_value = False + agent._provider_model_requires_responses_api.return_value = False + agent._anthropic_prompt_cache_policy.return_value = (False, False) + agent._ensure_lmstudio_runtime_loaded = MagicMock() + agent._replace_primary_openai_client = MagicMock() + agent.context_compressor = None + + fallback_client = SimpleNamespace( + api_key="codex-key", + base_url="https://chatgpt.com/backend-api/codex", + _custom_headers={}, + ) + fallback_pool = _make_pool("openai-codex") + + with patch( + "agent.auxiliary_client.resolve_provider_client", + return_value=(fallback_client, "gpt-5.5"), + ) as resolve_provider_client, patch( + "agent.credential_pool.load_pool", + return_value=fallback_pool, + ) as load_pool: + assert try_activate_fallback(agent) is True + + resolve_provider_client.assert_called_once() + load_pool.assert_called_once_with("openai-codex") + assert agent.provider == "openai-codex" + assert agent.model == "gpt-5.5" + assert agent.base_url == "https://chatgpt.com/backend-api/codex" + assert agent.api_mode == "codex_responses" + assert agent._credential_pool is fallback_pool + assert agent._credential_pool.provider == "openai-codex" + assert agent._transport_cache == {} + # ── Test: _recover_with_credential_pool rejects mismatched pool ──────