diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 252a0b88232..c47c3a4a1d2 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -2476,6 +2476,25 @@ def _is_connection_error(exc: Exception) -> bool: return False +def _is_transient_transport_error(exc: Exception) -> bool: + """Return True for a one-off transport blip worth retrying ONCE on the + same provider before any provider/model fallback. + + Covers connection/streaming-close errors (via the canonical + ``_is_connection_error`` detector, shared so the two cannot drift) plus a + pure 5xx/408 HTTP status. Deliberately narrow: this is the "retry the + same target once" gate, distinct from ``_is_payment_error`` / + ``_is_auth_error`` / ``_is_rate_limit_error`` which the except-chain + handles by switching provider, refreshing creds, or rotating the pool. + """ + if _is_connection_error(exc): + return True + status = getattr(exc, "status_code", None) or getattr( + getattr(exc, "response", None), "status_code", None + ) + return isinstance(status, int) and (status == 408 or 500 <= status < 600) + + def _is_auth_error(exc: Exception) -> bool: """Detect auth failures that should trigger provider-specific refresh.""" status = getattr(exc, "status_code", None) @@ -5147,8 +5166,28 @@ def call_llm( # Handle unsupported temperature, max_tokens vs max_completion_tokens retry, # then payment fallback. try: - return _validate_llm_response( - client.chat.completions.create(**kwargs), task) + # Retry ONCE on the same provider for a one-off transient transport + # blip (streaming-close / incomplete chunked read / 5xx / 408) before + # the except-chain below escalates to provider/model fallback. A + # single dropped connection shouldn't abandon an otherwise-healthy + # provider. A second failure (or any non-transient error) falls + # through to ``first_err`` and the existing fallback handling + # unchanged. This is the unified home for the transient retry that + # every auxiliary task (compression, memory flush, title-gen, + # session-search, vision) shares. (PR #16587) + try: + return _validate_llm_response( + client.chat.completions.create(**kwargs), task) + except Exception as transient_err: + if not _is_transient_transport_error(transient_err): + raise + logger.info( + "Auxiliary %s: transient transport error; retrying once on " + "the same provider before fallback: %s", + task or "call", transient_err, + ) + return _validate_llm_response( + client.chat.completions.create(**kwargs), task) except Exception as first_err: if "temperature" in kwargs and _is_unsupported_temperature_error(first_err): retry_kwargs = dict(kwargs) @@ -5614,8 +5653,22 @@ async def async_call_llm( kwargs["messages"] = _convert_openai_images_to_anthropic(kwargs["messages"]) try: - return _validate_llm_response( - await client.chat.completions.create(**kwargs), task) + # Retry ONCE on the same provider for a transient transport blip + # before the except-chain escalates to fallback — see call_llm() + # for the rationale. (PR #16587) + try: + return _validate_llm_response( + await client.chat.completions.create(**kwargs), task) + except Exception as transient_err: + if not _is_transient_transport_error(transient_err): + raise + logger.info( + "Auxiliary %s (async): transient transport error; retrying " + "once on the same provider before fallback: %s", + task or "call", transient_err, + ) + return _validate_llm_response( + await client.chat.completions.create(**kwargs), task) except Exception as first_err: if "temperature" in kwargs and _is_unsupported_temperature_error(first_err): retry_kwargs = dict(kwargs) diff --git a/scripts/release.py b/scripts/release.py index d8328038113..449eb7f8606 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -45,6 +45,7 @@ ACP_REGISTRY_MANIFEST = REPO_ROOT / "acp_registry" / "agent.json" # Auto-extracted from noreply emails + manual overrides AUTHOR_MAP = { + "alberto.regalado@ymail.com": "ARegalado1", "alchemistchaos@protonmail.com": "AlchemistChaos", # co-author only "gilad@smiti.ai": "giladbau", "yusufalweshdemir@gmail.com": "Dusk1e", diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py index c446d874e57..68355482fe0 100644 --- a/tests/agent/test_auxiliary_client.py +++ b/tests/agent/test_auxiliary_client.py @@ -1794,6 +1794,108 @@ def test_resolve_api_key_provider_skips_unconfigured_anthropic(monkeypatch): # --------------------------------------------------------------------------- +class TestTransientTransportRetry: + """call_llm retries ONCE on the same provider for a transient transport + blip before escalating to the fallback chain. + + Salvaged from PR #16587 (@ARegalado1). The original fixed only the + context-compression caller; this lives in call_llm so every auxiliary + task (compression, memory flush, title-gen, session-search, vision) + gets the same same-target retry, and the gate reuses the canonical + _is_connection_error detector. + """ + + def _patches(self, client): + return ( + patch( + "agent.auxiliary_client._resolve_task_provider_model", + return_value=("openrouter", "some-model", None, None, None), + ), + patch( + "agent.auxiliary_client._get_cached_client", + return_value=(client, "some-model"), + ), + patch( + "agent.auxiliary_client._validate_llm_response", + side_effect=lambda resp, _task: resp, + ), + ) + + def test_retries_streaming_close_once_same_provider(self): + client = MagicMock() + client.base_url = "https://openrouter.ai/api/v1" + client.chat.completions.create.side_effect = [ + Exception( + "peer closed connection without sending complete message body " + "(incomplete chunked read)" + ), + {"ok": True}, + ] + p1, p2, p3 = self._patches(client) + with p1, p2, p3: + result = call_llm(task="compression", messages=[{"role": "user", "content": "hi"}]) + assert result == {"ok": True} + # Same client called twice — no provider fallback needed. + assert client.chat.completions.create.call_count == 2 + + def test_retries_5xx_once_same_provider(self): + class _Err503(Exception): + status_code = 503 + + client = MagicMock() + client.base_url = "https://openrouter.ai/api/v1" + client.chat.completions.create.side_effect = [_Err503("upstream"), {"ok": True}] + p1, p2, p3 = self._patches(client) + with p1, p2, p3: + result = call_llm(task="compression", messages=[{"role": "user", "content": "hi"}]) + assert result == {"ok": True} + assert client.chat.completions.create.call_count == 2 + + def test_does_not_retry_non_transient_400(self): + class _Err400(Exception): + status_code = 400 + + client = MagicMock() + client.base_url = "https://openrouter.ai/api/v1" + client.chat.completions.create.side_effect = _Err400("bad request") + p1, p2, p3 = self._patches(client) + with p1, p2, p3, pytest.raises(_Err400): + call_llm(task="compression", messages=[{"role": "user", "content": "hi"}]) + # Non-transient: single attempt, no same-target retry. + assert client.chat.completions.create.call_count == 1 + + def test_second_transient_failure_escalates_to_fallback(self): + """Two transient failures in a row exhaust the same-target retry and + fall through to the existing connection-error provider fallback.""" + primary = MagicMock() + primary.base_url = "https://openrouter.ai/api/v1" + primary.chat.completions.create.side_effect = Exception( + "peer closed connection without sending complete message body" + ) + + fb_client = MagicMock() + fb_client.base_url = "https://api.openai.com/v1" + fb_client.chat.completions.create.return_value = {"fallback": True} + + p1, p2, p3 = self._patches(primary) + with ( + p1, p2, p3, + patch( + "agent.auxiliary_client._try_configured_fallback_chain", + return_value=(None, None, ""), + ), + patch( + "agent.auxiliary_client._try_main_agent_model_fallback", + return_value=(fb_client, "fb-model", "openai"), + ), + ): + result = call_llm(task="compression", messages=[{"role": "user", "content": "hi"}]) + assert result == {"fallback": True} + # Primary tried twice (initial + same-target retry), then fallback. + assert primary.chat.completions.create.call_count == 2 + assert fb_client.chat.completions.create.call_count == 1 + + class TestIsConnectionError: """Tests for _is_connection_error detection."""