mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-09 08:21:50 +00:00
fix(auxiliary): retry transient transport error once before fallback (#16587)
Some checks failed
Deploy Site / deploy-vercel (push) Waiting to run
Deploy Site / deploy-docs (push) Waiting to run
Docker Build and Publish / build-amd64 (push) Waiting to run
Docker Build and Publish / build-arm64 (push) Waiting to run
Docker Build and Publish / merge (push) Blocked by required conditions
Lint (ruff + ty) / ruff + ty diff (push) Waiting to run
Lint (ruff + ty) / ruff enforcement (blocking) (push) Waiting to run
Lint (ruff + ty) / Windows footguns (blocking) (push) Waiting to run
Nix / nix (macos-latest) (push) Waiting to run
Nix / nix (ubuntu-latest) (push) Waiting to run
Tests / test (1) (push) Waiting to run
Tests / test (2) (push) Waiting to run
Tests / test (3) (push) Waiting to run
Tests / test (4) (push) Waiting to run
Tests / test (5) (push) Waiting to run
Tests / test (6) (push) Waiting to run
Tests / save-durations (push) Blocked by required conditions
Tests / e2e (push) Waiting to run
Nix Lockfile Fix / auto-fix-main (push) Has been cancelled
Nix Lockfile Fix / fix (push) Has been cancelled
Some checks failed
Deploy Site / deploy-vercel (push) Waiting to run
Deploy Site / deploy-docs (push) Waiting to run
Docker Build and Publish / build-amd64 (push) Waiting to run
Docker Build and Publish / build-arm64 (push) Waiting to run
Docker Build and Publish / merge (push) Blocked by required conditions
Lint (ruff + ty) / ruff + ty diff (push) Waiting to run
Lint (ruff + ty) / ruff enforcement (blocking) (push) Waiting to run
Lint (ruff + ty) / Windows footguns (blocking) (push) Waiting to run
Nix / nix (macos-latest) (push) Waiting to run
Nix / nix (ubuntu-latest) (push) Waiting to run
Tests / test (1) (push) Waiting to run
Tests / test (2) (push) Waiting to run
Tests / test (3) (push) Waiting to run
Tests / test (4) (push) Waiting to run
Tests / test (5) (push) Waiting to run
Tests / test (6) (push) Waiting to run
Tests / save-durations (push) Blocked by required conditions
Tests / e2e (push) Waiting to run
Nix Lockfile Fix / auto-fix-main (push) Has been cancelled
Nix Lockfile Fix / fix (push) Has been cancelled
A one-off transient transport failure (streaming-close / incomplete chunked read / 5xx / 408) on an auxiliary LLM call escalated straight to provider/model fallback (or, for context compression, dropped the summary and entered cooldown), even when an immediate retry on the same provider would have succeeded. Add a single same-target retry at the top of call_llm() and async_call_llm() — before the existing except-chain — gated on a new _is_transient_transport_error() that reuses the canonical _is_connection_error() detector plus a 5xx/408 status check. A second failure (or any non-transient error: auth, other 4xx, malformed payload) falls through to first_err and the existing fallback handling unchanged. This lives in call_llm so every auxiliary task (compression, memory flush, title generation, session search, vision) shares one transient-retry surface, rather than each caller re-implementing it. The context compressor needs no change — it calls call_llm and inherits the retry; its existing fallback-to-main path (#18458) now composes naturally (retry the aux model once, then fall back to main only if the retry also fails). Co-authored-by: ARegalado1 <alberto.regalado@ymail.com>
This commit is contained in:
parent
4107076128
commit
02a4d66951
3 changed files with 160 additions and 4 deletions
|
|
@ -2476,6 +2476,25 @@ def _is_connection_error(exc: Exception) -> bool:
|
|||
return False
|
||||
|
||||
|
||||
def _is_transient_transport_error(exc: Exception) -> bool:
|
||||
"""Return True for a one-off transport blip worth retrying ONCE on the
|
||||
same provider before any provider/model fallback.
|
||||
|
||||
Covers connection/streaming-close errors (via the canonical
|
||||
``_is_connection_error`` detector, shared so the two cannot drift) plus a
|
||||
pure 5xx/408 HTTP status. Deliberately narrow: this is the "retry the
|
||||
same target once" gate, distinct from ``_is_payment_error`` /
|
||||
``_is_auth_error`` / ``_is_rate_limit_error`` which the except-chain
|
||||
handles by switching provider, refreshing creds, or rotating the pool.
|
||||
"""
|
||||
if _is_connection_error(exc):
|
||||
return True
|
||||
status = getattr(exc, "status_code", None) or getattr(
|
||||
getattr(exc, "response", None), "status_code", None
|
||||
)
|
||||
return isinstance(status, int) and (status == 408 or 500 <= status < 600)
|
||||
|
||||
|
||||
def _is_auth_error(exc: Exception) -> bool:
|
||||
"""Detect auth failures that should trigger provider-specific refresh."""
|
||||
status = getattr(exc, "status_code", None)
|
||||
|
|
@ -5147,8 +5166,28 @@ def call_llm(
|
|||
# Handle unsupported temperature, max_tokens vs max_completion_tokens retry,
|
||||
# then payment fallback.
|
||||
try:
|
||||
return _validate_llm_response(
|
||||
client.chat.completions.create(**kwargs), task)
|
||||
# Retry ONCE on the same provider for a one-off transient transport
|
||||
# blip (streaming-close / incomplete chunked read / 5xx / 408) before
|
||||
# the except-chain below escalates to provider/model fallback. A
|
||||
# single dropped connection shouldn't abandon an otherwise-healthy
|
||||
# provider. A second failure (or any non-transient error) falls
|
||||
# through to ``first_err`` and the existing fallback handling
|
||||
# unchanged. This is the unified home for the transient retry that
|
||||
# every auxiliary task (compression, memory flush, title-gen,
|
||||
# session-search, vision) shares. (PR #16587)
|
||||
try:
|
||||
return _validate_llm_response(
|
||||
client.chat.completions.create(**kwargs), task)
|
||||
except Exception as transient_err:
|
||||
if not _is_transient_transport_error(transient_err):
|
||||
raise
|
||||
logger.info(
|
||||
"Auxiliary %s: transient transport error; retrying once on "
|
||||
"the same provider before fallback: %s",
|
||||
task or "call", transient_err,
|
||||
)
|
||||
return _validate_llm_response(
|
||||
client.chat.completions.create(**kwargs), task)
|
||||
except Exception as first_err:
|
||||
if "temperature" in kwargs and _is_unsupported_temperature_error(first_err):
|
||||
retry_kwargs = dict(kwargs)
|
||||
|
|
@ -5614,8 +5653,22 @@ async def async_call_llm(
|
|||
kwargs["messages"] = _convert_openai_images_to_anthropic(kwargs["messages"])
|
||||
|
||||
try:
|
||||
return _validate_llm_response(
|
||||
await client.chat.completions.create(**kwargs), task)
|
||||
# Retry ONCE on the same provider for a transient transport blip
|
||||
# before the except-chain escalates to fallback — see call_llm()
|
||||
# for the rationale. (PR #16587)
|
||||
try:
|
||||
return _validate_llm_response(
|
||||
await client.chat.completions.create(**kwargs), task)
|
||||
except Exception as transient_err:
|
||||
if not _is_transient_transport_error(transient_err):
|
||||
raise
|
||||
logger.info(
|
||||
"Auxiliary %s (async): transient transport error; retrying "
|
||||
"once on the same provider before fallback: %s",
|
||||
task or "call", transient_err,
|
||||
)
|
||||
return _validate_llm_response(
|
||||
await client.chat.completions.create(**kwargs), task)
|
||||
except Exception as first_err:
|
||||
if "temperature" in kwargs and _is_unsupported_temperature_error(first_err):
|
||||
retry_kwargs = dict(kwargs)
|
||||
|
|
|
|||
|
|
@ -45,6 +45,7 @@ ACP_REGISTRY_MANIFEST = REPO_ROOT / "acp_registry" / "agent.json"
|
|||
|
||||
# Auto-extracted from noreply emails + manual overrides
|
||||
AUTHOR_MAP = {
|
||||
"alberto.regalado@ymail.com": "ARegalado1",
|
||||
"alchemistchaos@protonmail.com": "AlchemistChaos", # co-author only
|
||||
"gilad@smiti.ai": "giladbau",
|
||||
"yusufalweshdemir@gmail.com": "Dusk1e",
|
||||
|
|
|
|||
|
|
@ -1794,6 +1794,108 @@ def test_resolve_api_key_provider_skips_unconfigured_anthropic(monkeypatch):
|
|||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestTransientTransportRetry:
|
||||
"""call_llm retries ONCE on the same provider for a transient transport
|
||||
blip before escalating to the fallback chain.
|
||||
|
||||
Salvaged from PR #16587 (@ARegalado1). The original fixed only the
|
||||
context-compression caller; this lives in call_llm so every auxiliary
|
||||
task (compression, memory flush, title-gen, session-search, vision)
|
||||
gets the same same-target retry, and the gate reuses the canonical
|
||||
_is_connection_error detector.
|
||||
"""
|
||||
|
||||
def _patches(self, client):
|
||||
return (
|
||||
patch(
|
||||
"agent.auxiliary_client._resolve_task_provider_model",
|
||||
return_value=("openrouter", "some-model", None, None, None),
|
||||
),
|
||||
patch(
|
||||
"agent.auxiliary_client._get_cached_client",
|
||||
return_value=(client, "some-model"),
|
||||
),
|
||||
patch(
|
||||
"agent.auxiliary_client._validate_llm_response",
|
||||
side_effect=lambda resp, _task: resp,
|
||||
),
|
||||
)
|
||||
|
||||
def test_retries_streaming_close_once_same_provider(self):
|
||||
client = MagicMock()
|
||||
client.base_url = "https://openrouter.ai/api/v1"
|
||||
client.chat.completions.create.side_effect = [
|
||||
Exception(
|
||||
"peer closed connection without sending complete message body "
|
||||
"(incomplete chunked read)"
|
||||
),
|
||||
{"ok": True},
|
||||
]
|
||||
p1, p2, p3 = self._patches(client)
|
||||
with p1, p2, p3:
|
||||
result = call_llm(task="compression", messages=[{"role": "user", "content": "hi"}])
|
||||
assert result == {"ok": True}
|
||||
# Same client called twice — no provider fallback needed.
|
||||
assert client.chat.completions.create.call_count == 2
|
||||
|
||||
def test_retries_5xx_once_same_provider(self):
|
||||
class _Err503(Exception):
|
||||
status_code = 503
|
||||
|
||||
client = MagicMock()
|
||||
client.base_url = "https://openrouter.ai/api/v1"
|
||||
client.chat.completions.create.side_effect = [_Err503("upstream"), {"ok": True}]
|
||||
p1, p2, p3 = self._patches(client)
|
||||
with p1, p2, p3:
|
||||
result = call_llm(task="compression", messages=[{"role": "user", "content": "hi"}])
|
||||
assert result == {"ok": True}
|
||||
assert client.chat.completions.create.call_count == 2
|
||||
|
||||
def test_does_not_retry_non_transient_400(self):
|
||||
class _Err400(Exception):
|
||||
status_code = 400
|
||||
|
||||
client = MagicMock()
|
||||
client.base_url = "https://openrouter.ai/api/v1"
|
||||
client.chat.completions.create.side_effect = _Err400("bad request")
|
||||
p1, p2, p3 = self._patches(client)
|
||||
with p1, p2, p3, pytest.raises(_Err400):
|
||||
call_llm(task="compression", messages=[{"role": "user", "content": "hi"}])
|
||||
# Non-transient: single attempt, no same-target retry.
|
||||
assert client.chat.completions.create.call_count == 1
|
||||
|
||||
def test_second_transient_failure_escalates_to_fallback(self):
|
||||
"""Two transient failures in a row exhaust the same-target retry and
|
||||
fall through to the existing connection-error provider fallback."""
|
||||
primary = MagicMock()
|
||||
primary.base_url = "https://openrouter.ai/api/v1"
|
||||
primary.chat.completions.create.side_effect = Exception(
|
||||
"peer closed connection without sending complete message body"
|
||||
)
|
||||
|
||||
fb_client = MagicMock()
|
||||
fb_client.base_url = "https://api.openai.com/v1"
|
||||
fb_client.chat.completions.create.return_value = {"fallback": True}
|
||||
|
||||
p1, p2, p3 = self._patches(primary)
|
||||
with (
|
||||
p1, p2, p3,
|
||||
patch(
|
||||
"agent.auxiliary_client._try_configured_fallback_chain",
|
||||
return_value=(None, None, ""),
|
||||
),
|
||||
patch(
|
||||
"agent.auxiliary_client._try_main_agent_model_fallback",
|
||||
return_value=(fb_client, "fb-model", "openai"),
|
||||
),
|
||||
):
|
||||
result = call_llm(task="compression", messages=[{"role": "user", "content": "hi"}])
|
||||
assert result == {"fallback": True}
|
||||
# Primary tried twice (initial + same-target retry), then fallback.
|
||||
assert primary.chat.completions.create.call_count == 2
|
||||
assert fb_client.chat.completions.create.call_count == 1
|
||||
|
||||
|
||||
class TestIsConnectionError:
|
||||
"""Tests for _is_connection_error detection."""
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue