fix(auxiliary): fallback on invalid provider responses

This commit is contained in:
helix4u 2026-06-25 22:52:43 -06:00 committed by kshitij
parent fbfccbb3ee
commit 063fe4f6ef
2 changed files with 202 additions and 0 deletions

View file

@ -2783,6 +2783,25 @@ def _is_model_incompatible_error(exc: Exception) -> bool:
))
def _is_invalid_aux_response_error(exc: Exception) -> bool:
"""Detect provider responses that authenticated but cannot serve aux shape.
Some OpenAI-compatible routes return HTTP 200 with an empty/malformed
ChatCompletion instead of a normal provider error. That is still a
provider/model capability failure for auxiliary tasks: downstream callers
need ``choices[0].message`` and should be able to continue through the
same fallback path as explicit model-incompatibility errors.
"""
if not isinstance(exc, RuntimeError):
return False
msg = str(exc).lower()
return (
"auxiliary " in msg
and "llm returned invalid response" in msg
and "choices[0].message" in msg
)
def _evict_cached_clients(provider: str) -> None:
"""Drop cached auxiliary clients for a provider so fresh creds are used."""
normalized = _normalize_aux_provider(provider)
@ -5474,6 +5493,9 @@ def _validate_llm_response(response: Any, task: str = None) -> Any:
if not choices or not hasattr(choices[0], "message"):
raise AttributeError("missing choices[0].message")
except (AttributeError, TypeError, IndexError) as exc:
recovered = _recover_aux_response_message(response)
if recovered is not None:
return recovered
response_type = type(response).__name__
response_preview = str(response)[:120]
raise RuntimeError(
@ -5485,6 +5507,64 @@ def _validate_llm_response(response: Any, task: str = None) -> Any:
return response
def _recover_aux_response_message(response: Any) -> Optional[Any]:
"""Synthesize chat-completions shape from Responses-style text fields.
Auxiliary callers consume ``choices[0].message``. Some compatible
endpoints return text outside ``choices`` (for example ``output_text`` or
``output`` items). Preserve that response before declaring it malformed.
"""
text = _extract_aux_response_text(response)
if not text:
return None
choice = SimpleNamespace(
message=SimpleNamespace(content=text),
finish_reason=getattr(response, "finish_reason", None) or "stop",
)
try:
response.choices = [choice]
return response
except Exception:
return SimpleNamespace(
id=getattr(response, "id", ""),
model=getattr(response, "model", ""),
object=getattr(response, "object", "chat.completion"),
choices=[choice],
usage=getattr(response, "usage", None),
)
def _extract_aux_response_text(response: Any) -> str:
output_text = _obj_get(response, "output_text")
if isinstance(output_text, str) and output_text.strip():
return output_text.strip()
output = _obj_get(response, "output")
if not isinstance(output, list):
return ""
parts: List[str] = []
for item in output:
item_type = _obj_get(item, "type")
if item_type and item_type != "message":
continue
for part in (_obj_get(item, "content") or []):
part_type = _obj_get(part, "type")
if part_type in {"output_text", "text", None}:
text = _obj_get(part, "text")
if isinstance(text, str) and text.strip():
parts.append(text.strip())
return "\n".join(parts).strip()
def _obj_get(obj: Any, key: str, default: Any = None) -> Any:
value = getattr(obj, key, default)
if value is default and isinstance(obj, dict):
value = obj.get(key, default)
return value
def call_llm(
task: str = None,
*,
@ -5887,6 +5967,7 @@ def call_llm(
or _is_connection_error(first_err)
or _is_rate_limit_error(first_err)
or _is_model_incompatible_error(first_err)
or _is_invalid_aux_response_error(first_err)
)
# Respect explicit provider choice for transient errors (auth, request
# validation, etc.) but allow fallback when the provider clearly cannot
@ -5909,6 +5990,7 @@ def call_llm(
or _is_connection_error(first_err)
or _is_rate_limit_error(first_err)
or _is_model_incompatible_error(first_err)
or _is_invalid_aux_response_error(first_err)
)
if should_fallback and (is_auto or is_capacity_error):
if _is_payment_error(first_err):
@ -5924,6 +6006,8 @@ def call_llm(
reason = "rate limit"
elif _is_model_incompatible_error(first_err):
reason = "model incompatible with route"
elif _is_invalid_aux_response_error(first_err):
reason = "invalid provider response"
else:
reason = "connection error"
logger.info("Auxiliary %s: %s on %s (%s), trying fallback",
@ -6363,6 +6447,7 @@ async def async_call_llm(
or _is_connection_error(first_err)
or _is_rate_limit_error(first_err)
or _is_model_incompatible_error(first_err)
or _is_invalid_aux_response_error(first_err)
)
# Capacity errors (payment/quota/connection/rate-limit) bypass the
# explicit-provider gate — the provider cannot serve the request
@ -6377,6 +6462,7 @@ async def async_call_llm(
or _is_connection_error(first_err)
or _is_rate_limit_error(first_err)
or _is_model_incompatible_error(first_err)
or _is_invalid_aux_response_error(first_err)
)
if should_fallback and (is_auto or is_capacity_error):
if _is_payment_error(first_err):
@ -6388,6 +6474,8 @@ async def async_call_llm(
reason = "rate limit"
elif _is_model_incompatible_error(first_err):
reason = "model incompatible with route"
elif _is_invalid_aux_response_error(first_err):
reason = "invalid provider response"
else:
reason = "connection error"
logger.info("Auxiliary %s (async): %s on %s (%s), trying fallback",

View file

@ -1851,6 +1851,120 @@ class TestAuxiliaryFallbackLayering:
exc.status_code = 402
return exc
def test_empty_choices_with_output_text_is_recovered_before_fallback(self, monkeypatch):
"""Responses-style output_text should be used before provider fallback."""
primary_client = MagicMock()
primary_client.chat.completions.create.return_value = SimpleNamespace(
choices=[],
output_text="recovered title",
model="minimaxai/minimax-m3",
)
with patch("agent.auxiliary_client._get_cached_client",
return_value=(primary_client, "minimaxai/minimax-m3")), \
patch("agent.auxiliary_client._resolve_task_provider_model",
return_value=("nvidia", "minimaxai/minimax-m3", None, None, None)), \
patch("agent.auxiliary_client._try_configured_fallback_chain") as mock_chain:
result = call_llm(
task="title_generation",
messages=[{"role": "user", "content": "hello"}],
)
assert result.choices[0].message.content == "recovered title"
mock_chain.assert_not_called()
def test_empty_choices_with_output_items_is_recovered_before_fallback(self, monkeypatch):
"""Responses-style output message items should be normalized for aux callers."""
primary_client = MagicMock()
primary_client.chat.completions.create.return_value = SimpleNamespace(
choices=[],
output=[
SimpleNamespace(
type="message",
content=[
SimpleNamespace(type="output_text", text="part one"),
{"type": "text", "text": "part two"},
],
)
],
model="minimaxai/minimax-m3",
)
with patch("agent.auxiliary_client._get_cached_client",
return_value=(primary_client, "minimaxai/minimax-m3")), \
patch("agent.auxiliary_client._resolve_task_provider_model",
return_value=("nvidia", "minimaxai/minimax-m3", None, None, None)), \
patch("agent.auxiliary_client._try_configured_fallback_chain") as mock_chain:
result = call_llm(
task="compression",
messages=[{"role": "user", "content": "hello"}],
)
assert result.choices[0].message.content == "part one\npart two"
mock_chain.assert_not_called()
def test_invalid_empty_choices_response_triggers_fallback(self, monkeypatch):
"""HTTP-200 malformed chat completions should not abort aux fallback."""
primary_client = MagicMock()
primary_client.chat.completions.create.return_value = MagicMock(choices=[])
fallback_client = MagicMock()
fallback_client.chat.completions.create.return_value = MagicMock(choices=[
MagicMock(message=MagicMock(content="from fallback chain"))
])
with patch("agent.auxiliary_client._get_cached_client",
return_value=(primary_client, "minimaxai/minimax-m3")), \
patch("agent.auxiliary_client._resolve_task_provider_model",
return_value=("nvidia", "minimaxai/minimax-m3", None, None, None)), \
patch("agent.auxiliary_client._try_configured_fallback_chain",
return_value=(fallback_client, "gpt-5.4-mini", "fallback_chain[0](openai-codex)")) as mock_chain, \
patch("agent.auxiliary_client._try_main_agent_model_fallback") as mock_main:
result = call_llm(
task="title_generation",
messages=[{"role": "user", "content": "hello"}],
)
assert result.choices[0].message.content == "from fallback chain"
mock_chain.assert_called_once_with(
"title_generation",
"nvidia",
reason="invalid provider response",
)
mock_main.assert_not_called()
@pytest.mark.asyncio
async def test_async_invalid_empty_choices_response_triggers_fallback(self, monkeypatch):
"""Async aux calls use the same malformed-response fallback path."""
primary_client = MagicMock()
primary_client.chat.completions.create = AsyncMock(return_value=MagicMock(choices=[]))
fallback_client = MagicMock()
async_fallback_client = MagicMock()
async_fallback_client.chat.completions.create = AsyncMock(return_value=MagicMock(choices=[
MagicMock(message=MagicMock(content="from async fallback"))
]))
with patch("agent.auxiliary_client._get_cached_client",
return_value=(primary_client, "minimaxai/minimax-m3")), \
patch("agent.auxiliary_client._resolve_task_provider_model",
return_value=("nvidia", "minimaxai/minimax-m3", None, None, None)), \
patch("agent.auxiliary_client._try_configured_fallback_chain",
return_value=(fallback_client, "gpt-5.4-mini", "fallback_chain[0](openai-codex)")) as mock_chain, \
patch("agent.auxiliary_client._to_async_client",
return_value=(async_fallback_client, "gpt-5.4-mini")):
result = await async_call_llm(
task="compression",
messages=[{"role": "user", "content": "hello"}],
)
assert result.choices[0].message.content == "from async fallback"
mock_chain.assert_called_once_with(
"compression",
"nvidia",
reason="invalid provider response",
)
def test_auto_provider_uses_task_then_main_chain_before_builtin_chain(self, monkeypatch):
"""Auto aux call failures try per-task then top-level fallback before built-ins."""
primary_client = MagicMock()