mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-07-01 12:02:05 +00:00
fix(agent): route content-filter stream stalls to fallback chain (#32421)
When a provider's output-layer safety filter (MiniMax "output new_sensitive (1027)", Azure content_filter, etc.) kills a streaming response after deltas were already sent, interruptible_streaming_api_call swallows the raw error into a finish_reason=length partial-stream stub. The conversation loop then burned 3 continuation retries against the SAME primary — re-hitting the content-deterministic filter every time — and gave up with "Response remained truncated after 3 continuation attempts", never consulting fallback_providers. Builds on @595650661's classifier change (cherry-picked) so error_classifier recognizes the filter; then: - chat_completion_helpers: run the swallowed error through error_classifier at the stub-creation point and stamp _content_filter_terminated on the stub (single source of truth — no parallel pattern list). - conversation_loop: read the tag and activate the fallback chain BEFORE burning any continuation retries; roll partial content back to the last clean turn and re-issue against the new provider (restart_with_rebuilt_messages). Plain network stalls are unaffected (only content_policy_blocked is tagged). Credits #32479 (@sweetcornna) and #33845 (@Tranquil-Flow) which fixed the same issue via the stub-tag and loop-escalation approaches respectively. Live E2E confirmed: before, _try_activate_fallback called 0x; after, fallback fires on the first stub and the fallback provider completes the turn.
This commit is contained in:
parent
b8e2268628
commit
578e3989d4
6 changed files with 286 additions and 1 deletions
|
|
@ -1699,6 +1699,56 @@ def run_conversation(
|
|||
|
||||
if agent.api_mode in {"chat_completions", "bedrock_converse", "anthropic_messages"}:
|
||||
assistant_message = _trunc_msg
|
||||
# ── Content-filter stream stall → fallback (#32421) ──
|
||||
# When the provider's output-layer safety filter (e.g.
|
||||
# MiniMax "output new_sensitive (1027)", Azure
|
||||
# content_filter) kills the stream mid-delivery, the
|
||||
# raw error was classified at the swallow point and the
|
||||
# stub tagged ``_content_filter_terminated``. This
|
||||
# filter is content-deterministic — continuation
|
||||
# retries against the SAME primary just re-hit it and
|
||||
# burn paid attempts (the loop used to give up with
|
||||
# "Response remained truncated after 3 continuation
|
||||
# attempts" and never consult the fallback chain).
|
||||
# Escalate to the configured fallback BEFORE retrying.
|
||||
_cf_terminated = getattr(
|
||||
response, "_content_filter_terminated", False
|
||||
)
|
||||
if (
|
||||
_cf_terminated
|
||||
and agent._fallback_index < len(agent._fallback_chain)
|
||||
):
|
||||
agent._vprint(
|
||||
f"{agent.log_prefix}🛡️ Content filter terminated "
|
||||
f"stream — activating fallback provider...",
|
||||
force=True,
|
||||
)
|
||||
agent._emit_status(
|
||||
"Content filter terminated stream; switching to fallback..."
|
||||
)
|
||||
if agent._try_activate_fallback():
|
||||
# Roll the partial content (if any was already
|
||||
# appended in a prior continuation pass) back to
|
||||
# the last clean turn so the fallback provider
|
||||
# gets a coherent continuation point.
|
||||
if truncated_response_parts:
|
||||
messages = agent._get_messages_up_to_last_assistant(messages)
|
||||
agent._session_messages = messages
|
||||
length_continue_retries = 0
|
||||
truncated_response_parts = []
|
||||
retry_count = 0
|
||||
compression_attempts = 0
|
||||
_retry.primary_recovery_attempted = False
|
||||
_retry.restart_with_rebuilt_messages = True
|
||||
break
|
||||
# No fallback available — fall through to normal
|
||||
# continuation (best-effort, may loop).
|
||||
agent._vprint(
|
||||
f"{agent.log_prefix}⚠️ No fallback provider "
|
||||
f"configured — retrying with same provider "
|
||||
f"(may re-hit filter)...",
|
||||
force=True,
|
||||
)
|
||||
if assistant_message is not None and not _trunc_has_tool_calls:
|
||||
length_continue_retries += 1
|
||||
interim_msg = agent._build_assistant_message(assistant_message, finish_reason)
|
||||
|
|
@ -3781,6 +3831,17 @@ def run_conversation(
|
|||
_retry.restart_with_compressed_messages = False
|
||||
continue
|
||||
|
||||
if _retry.restart_with_rebuilt_messages:
|
||||
# A content-filter stream stall (#32421) was escalated to the
|
||||
# fallback chain and the partial content rolled back. Re-issue
|
||||
# the API call against the now-active fallback provider. Refund
|
||||
# the budget/count for the stalled attempt so the fallback gets a
|
||||
# fair turn.
|
||||
api_call_count -= 1
|
||||
agent.iteration_budget.refund()
|
||||
_retry.restart_with_rebuilt_messages = False
|
||||
continue
|
||||
|
||||
if _retry.restart_with_length_continuation:
|
||||
# Progressively boost the output token budget on each retry.
|
||||
# Retry 1 → 2× base, retry 2 → 3× base, capped at 32 768.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue