diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index df0ccbe0350..a4fe065b6d3 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -2783,6 +2783,25 @@ def _is_model_incompatible_error(exc: Exception) -> bool: )) +def _is_invalid_aux_response_error(exc: Exception) -> bool: + """Detect provider responses that authenticated but cannot serve aux shape. + + Some OpenAI-compatible routes return HTTP 200 with an empty/malformed + ChatCompletion instead of a normal provider error. That is still a + provider/model capability failure for auxiliary tasks: downstream callers + need ``choices[0].message`` and should be able to continue through the + same fallback path as explicit model-incompatibility errors. + """ + if not isinstance(exc, RuntimeError): + return False + msg = str(exc).lower() + return ( + "auxiliary " in msg + and "llm returned invalid response" in msg + and "choices[0].message" in msg + ) + + def _evict_cached_clients(provider: str) -> None: """Drop cached auxiliary clients for a provider so fresh creds are used.""" normalized = _normalize_aux_provider(provider) @@ -5474,6 +5493,9 @@ def _validate_llm_response(response: Any, task: str = None) -> Any: if not choices or not hasattr(choices[0], "message"): raise AttributeError("missing choices[0].message") except (AttributeError, TypeError, IndexError) as exc: + recovered = _recover_aux_response_message(response) + if recovered is not None: + return recovered response_type = type(response).__name__ response_preview = str(response)[:120] raise RuntimeError( @@ -5485,6 +5507,64 @@ def _validate_llm_response(response: Any, task: str = None) -> Any: return response +def _recover_aux_response_message(response: Any) -> Optional[Any]: + """Synthesize chat-completions shape from Responses-style text fields. + + Auxiliary callers consume ``choices[0].message``. Some compatible + endpoints return text outside ``choices`` (for example ``output_text`` or + ``output`` items). Preserve that response before declaring it malformed. + """ + text = _extract_aux_response_text(response) + if not text: + return None + + choice = SimpleNamespace( + message=SimpleNamespace(content=text), + finish_reason=getattr(response, "finish_reason", None) or "stop", + ) + try: + response.choices = [choice] + return response + except Exception: + return SimpleNamespace( + id=getattr(response, "id", ""), + model=getattr(response, "model", ""), + object=getattr(response, "object", "chat.completion"), + choices=[choice], + usage=getattr(response, "usage", None), + ) + + +def _extract_aux_response_text(response: Any) -> str: + output_text = _obj_get(response, "output_text") + if isinstance(output_text, str) and output_text.strip(): + return output_text.strip() + + output = _obj_get(response, "output") + if not isinstance(output, list): + return "" + + parts: List[str] = [] + for item in output: + item_type = _obj_get(item, "type") + if item_type and item_type != "message": + continue + for part in (_obj_get(item, "content") or []): + part_type = _obj_get(part, "type") + if part_type in {"output_text", "text", None}: + text = _obj_get(part, "text") + if isinstance(text, str) and text.strip(): + parts.append(text.strip()) + return "\n".join(parts).strip() + + +def _obj_get(obj: Any, key: str, default: Any = None) -> Any: + value = getattr(obj, key, default) + if value is default and isinstance(obj, dict): + value = obj.get(key, default) + return value + + def call_llm( task: str = None, *, @@ -5887,6 +5967,7 @@ def call_llm( or _is_connection_error(first_err) or _is_rate_limit_error(first_err) or _is_model_incompatible_error(first_err) + or _is_invalid_aux_response_error(first_err) ) # Respect explicit provider choice for transient errors (auth, request # validation, etc.) but allow fallback when the provider clearly cannot @@ -5909,6 +5990,7 @@ def call_llm( or _is_connection_error(first_err) or _is_rate_limit_error(first_err) or _is_model_incompatible_error(first_err) + or _is_invalid_aux_response_error(first_err) ) if should_fallback and (is_auto or is_capacity_error): if _is_payment_error(first_err): @@ -5924,6 +6006,8 @@ def call_llm( reason = "rate limit" elif _is_model_incompatible_error(first_err): reason = "model incompatible with route" + elif _is_invalid_aux_response_error(first_err): + reason = "invalid provider response" else: reason = "connection error" logger.info("Auxiliary %s: %s on %s (%s), trying fallback", @@ -6363,6 +6447,7 @@ async def async_call_llm( or _is_connection_error(first_err) or _is_rate_limit_error(first_err) or _is_model_incompatible_error(first_err) + or _is_invalid_aux_response_error(first_err) ) # Capacity errors (payment/quota/connection/rate-limit) bypass the # explicit-provider gate — the provider cannot serve the request @@ -6377,6 +6462,7 @@ async def async_call_llm( or _is_connection_error(first_err) or _is_rate_limit_error(first_err) or _is_model_incompatible_error(first_err) + or _is_invalid_aux_response_error(first_err) ) if should_fallback and (is_auto or is_capacity_error): if _is_payment_error(first_err): @@ -6388,6 +6474,8 @@ async def async_call_llm( reason = "rate limit" elif _is_model_incompatible_error(first_err): reason = "model incompatible with route" + elif _is_invalid_aux_response_error(first_err): + reason = "invalid provider response" else: reason = "connection error" logger.info("Auxiliary %s (async): %s on %s (%s), trying fallback", diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py index 527c3bf0b2c..7637b06d9f1 100644 --- a/tests/agent/test_auxiliary_client.py +++ b/tests/agent/test_auxiliary_client.py @@ -1851,6 +1851,120 @@ class TestAuxiliaryFallbackLayering: exc.status_code = 402 return exc + def test_empty_choices_with_output_text_is_recovered_before_fallback(self, monkeypatch): + """Responses-style output_text should be used before provider fallback.""" + primary_client = MagicMock() + primary_client.chat.completions.create.return_value = SimpleNamespace( + choices=[], + output_text="recovered title", + model="minimaxai/minimax-m3", + ) + + with patch("agent.auxiliary_client._get_cached_client", + return_value=(primary_client, "minimaxai/minimax-m3")), \ + patch("agent.auxiliary_client._resolve_task_provider_model", + return_value=("nvidia", "minimaxai/minimax-m3", None, None, None)), \ + patch("agent.auxiliary_client._try_configured_fallback_chain") as mock_chain: + result = call_llm( + task="title_generation", + messages=[{"role": "user", "content": "hello"}], + ) + + assert result.choices[0].message.content == "recovered title" + mock_chain.assert_not_called() + + def test_empty_choices_with_output_items_is_recovered_before_fallback(self, monkeypatch): + """Responses-style output message items should be normalized for aux callers.""" + primary_client = MagicMock() + primary_client.chat.completions.create.return_value = SimpleNamespace( + choices=[], + output=[ + SimpleNamespace( + type="message", + content=[ + SimpleNamespace(type="output_text", text="part one"), + {"type": "text", "text": "part two"}, + ], + ) + ], + model="minimaxai/minimax-m3", + ) + + with patch("agent.auxiliary_client._get_cached_client", + return_value=(primary_client, "minimaxai/minimax-m3")), \ + patch("agent.auxiliary_client._resolve_task_provider_model", + return_value=("nvidia", "minimaxai/minimax-m3", None, None, None)), \ + patch("agent.auxiliary_client._try_configured_fallback_chain") as mock_chain: + result = call_llm( + task="compression", + messages=[{"role": "user", "content": "hello"}], + ) + + assert result.choices[0].message.content == "part one\npart two" + mock_chain.assert_not_called() + + def test_invalid_empty_choices_response_triggers_fallback(self, monkeypatch): + """HTTP-200 malformed chat completions should not abort aux fallback.""" + primary_client = MagicMock() + primary_client.chat.completions.create.return_value = MagicMock(choices=[]) + + fallback_client = MagicMock() + fallback_client.chat.completions.create.return_value = MagicMock(choices=[ + MagicMock(message=MagicMock(content="from fallback chain")) + ]) + + with patch("agent.auxiliary_client._get_cached_client", + return_value=(primary_client, "minimaxai/minimax-m3")), \ + patch("agent.auxiliary_client._resolve_task_provider_model", + return_value=("nvidia", "minimaxai/minimax-m3", None, None, None)), \ + patch("agent.auxiliary_client._try_configured_fallback_chain", + return_value=(fallback_client, "gpt-5.4-mini", "fallback_chain[0](openai-codex)")) as mock_chain, \ + patch("agent.auxiliary_client._try_main_agent_model_fallback") as mock_main: + result = call_llm( + task="title_generation", + messages=[{"role": "user", "content": "hello"}], + ) + + assert result.choices[0].message.content == "from fallback chain" + mock_chain.assert_called_once_with( + "title_generation", + "nvidia", + reason="invalid provider response", + ) + mock_main.assert_not_called() + + @pytest.mark.asyncio + async def test_async_invalid_empty_choices_response_triggers_fallback(self, monkeypatch): + """Async aux calls use the same malformed-response fallback path.""" + primary_client = MagicMock() + primary_client.chat.completions.create = AsyncMock(return_value=MagicMock(choices=[])) + + fallback_client = MagicMock() + async_fallback_client = MagicMock() + async_fallback_client.chat.completions.create = AsyncMock(return_value=MagicMock(choices=[ + MagicMock(message=MagicMock(content="from async fallback")) + ])) + + with patch("agent.auxiliary_client._get_cached_client", + return_value=(primary_client, "minimaxai/minimax-m3")), \ + patch("agent.auxiliary_client._resolve_task_provider_model", + return_value=("nvidia", "minimaxai/minimax-m3", None, None, None)), \ + patch("agent.auxiliary_client._try_configured_fallback_chain", + return_value=(fallback_client, "gpt-5.4-mini", "fallback_chain[0](openai-codex)")) as mock_chain, \ + patch("agent.auxiliary_client._to_async_client", + return_value=(async_fallback_client, "gpt-5.4-mini")): + result = await async_call_llm( + task="compression", + messages=[{"role": "user", "content": "hello"}], + ) + + assert result.choices[0].message.content == "from async fallback" + mock_chain.assert_called_once_with( + "compression", + "nvidia", + reason="invalid provider response", + ) + def test_auto_provider_uses_task_then_main_chain_before_builtin_chain(self, monkeypatch): """Auto aux call failures try per-task then top-level fallback before built-ins.""" primary_client = MagicMock()