mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-26 11:12:03 +00:00
fix(auxiliary): fallback on invalid provider responses
This commit is contained in:
parent
fbfccbb3ee
commit
063fe4f6ef
2 changed files with 202 additions and 0 deletions
|
|
@ -2783,6 +2783,25 @@ def _is_model_incompatible_error(exc: Exception) -> bool:
|
|||
))
|
||||
|
||||
|
||||
def _is_invalid_aux_response_error(exc: Exception) -> bool:
|
||||
"""Detect provider responses that authenticated but cannot serve aux shape.
|
||||
|
||||
Some OpenAI-compatible routes return HTTP 200 with an empty/malformed
|
||||
ChatCompletion instead of a normal provider error. That is still a
|
||||
provider/model capability failure for auxiliary tasks: downstream callers
|
||||
need ``choices[0].message`` and should be able to continue through the
|
||||
same fallback path as explicit model-incompatibility errors.
|
||||
"""
|
||||
if not isinstance(exc, RuntimeError):
|
||||
return False
|
||||
msg = str(exc).lower()
|
||||
return (
|
||||
"auxiliary " in msg
|
||||
and "llm returned invalid response" in msg
|
||||
and "choices[0].message" in msg
|
||||
)
|
||||
|
||||
|
||||
def _evict_cached_clients(provider: str) -> None:
|
||||
"""Drop cached auxiliary clients for a provider so fresh creds are used."""
|
||||
normalized = _normalize_aux_provider(provider)
|
||||
|
|
@ -5474,6 +5493,9 @@ def _validate_llm_response(response: Any, task: str = None) -> Any:
|
|||
if not choices or not hasattr(choices[0], "message"):
|
||||
raise AttributeError("missing choices[0].message")
|
||||
except (AttributeError, TypeError, IndexError) as exc:
|
||||
recovered = _recover_aux_response_message(response)
|
||||
if recovered is not None:
|
||||
return recovered
|
||||
response_type = type(response).__name__
|
||||
response_preview = str(response)[:120]
|
||||
raise RuntimeError(
|
||||
|
|
@ -5485,6 +5507,64 @@ def _validate_llm_response(response: Any, task: str = None) -> Any:
|
|||
return response
|
||||
|
||||
|
||||
def _recover_aux_response_message(response: Any) -> Optional[Any]:
|
||||
"""Synthesize chat-completions shape from Responses-style text fields.
|
||||
|
||||
Auxiliary callers consume ``choices[0].message``. Some compatible
|
||||
endpoints return text outside ``choices`` (for example ``output_text`` or
|
||||
``output`` items). Preserve that response before declaring it malformed.
|
||||
"""
|
||||
text = _extract_aux_response_text(response)
|
||||
if not text:
|
||||
return None
|
||||
|
||||
choice = SimpleNamespace(
|
||||
message=SimpleNamespace(content=text),
|
||||
finish_reason=getattr(response, "finish_reason", None) or "stop",
|
||||
)
|
||||
try:
|
||||
response.choices = [choice]
|
||||
return response
|
||||
except Exception:
|
||||
return SimpleNamespace(
|
||||
id=getattr(response, "id", ""),
|
||||
model=getattr(response, "model", ""),
|
||||
object=getattr(response, "object", "chat.completion"),
|
||||
choices=[choice],
|
||||
usage=getattr(response, "usage", None),
|
||||
)
|
||||
|
||||
|
||||
def _extract_aux_response_text(response: Any) -> str:
|
||||
output_text = _obj_get(response, "output_text")
|
||||
if isinstance(output_text, str) and output_text.strip():
|
||||
return output_text.strip()
|
||||
|
||||
output = _obj_get(response, "output")
|
||||
if not isinstance(output, list):
|
||||
return ""
|
||||
|
||||
parts: List[str] = []
|
||||
for item in output:
|
||||
item_type = _obj_get(item, "type")
|
||||
if item_type and item_type != "message":
|
||||
continue
|
||||
for part in (_obj_get(item, "content") or []):
|
||||
part_type = _obj_get(part, "type")
|
||||
if part_type in {"output_text", "text", None}:
|
||||
text = _obj_get(part, "text")
|
||||
if isinstance(text, str) and text.strip():
|
||||
parts.append(text.strip())
|
||||
return "\n".join(parts).strip()
|
||||
|
||||
|
||||
def _obj_get(obj: Any, key: str, default: Any = None) -> Any:
|
||||
value = getattr(obj, key, default)
|
||||
if value is default and isinstance(obj, dict):
|
||||
value = obj.get(key, default)
|
||||
return value
|
||||
|
||||
|
||||
def call_llm(
|
||||
task: str = None,
|
||||
*,
|
||||
|
|
@ -5887,6 +5967,7 @@ def call_llm(
|
|||
or _is_connection_error(first_err)
|
||||
or _is_rate_limit_error(first_err)
|
||||
or _is_model_incompatible_error(first_err)
|
||||
or _is_invalid_aux_response_error(first_err)
|
||||
)
|
||||
# Respect explicit provider choice for transient errors (auth, request
|
||||
# validation, etc.) but allow fallback when the provider clearly cannot
|
||||
|
|
@ -5909,6 +5990,7 @@ def call_llm(
|
|||
or _is_connection_error(first_err)
|
||||
or _is_rate_limit_error(first_err)
|
||||
or _is_model_incompatible_error(first_err)
|
||||
or _is_invalid_aux_response_error(first_err)
|
||||
)
|
||||
if should_fallback and (is_auto or is_capacity_error):
|
||||
if _is_payment_error(first_err):
|
||||
|
|
@ -5924,6 +6006,8 @@ def call_llm(
|
|||
reason = "rate limit"
|
||||
elif _is_model_incompatible_error(first_err):
|
||||
reason = "model incompatible with route"
|
||||
elif _is_invalid_aux_response_error(first_err):
|
||||
reason = "invalid provider response"
|
||||
else:
|
||||
reason = "connection error"
|
||||
logger.info("Auxiliary %s: %s on %s (%s), trying fallback",
|
||||
|
|
@ -6363,6 +6447,7 @@ async def async_call_llm(
|
|||
or _is_connection_error(first_err)
|
||||
or _is_rate_limit_error(first_err)
|
||||
or _is_model_incompatible_error(first_err)
|
||||
or _is_invalid_aux_response_error(first_err)
|
||||
)
|
||||
# Capacity errors (payment/quota/connection/rate-limit) bypass the
|
||||
# explicit-provider gate — the provider cannot serve the request
|
||||
|
|
@ -6377,6 +6462,7 @@ async def async_call_llm(
|
|||
or _is_connection_error(first_err)
|
||||
or _is_rate_limit_error(first_err)
|
||||
or _is_model_incompatible_error(first_err)
|
||||
or _is_invalid_aux_response_error(first_err)
|
||||
)
|
||||
if should_fallback and (is_auto or is_capacity_error):
|
||||
if _is_payment_error(first_err):
|
||||
|
|
@ -6388,6 +6474,8 @@ async def async_call_llm(
|
|||
reason = "rate limit"
|
||||
elif _is_model_incompatible_error(first_err):
|
||||
reason = "model incompatible with route"
|
||||
elif _is_invalid_aux_response_error(first_err):
|
||||
reason = "invalid provider response"
|
||||
else:
|
||||
reason = "connection error"
|
||||
logger.info("Auxiliary %s (async): %s on %s (%s), trying fallback",
|
||||
|
|
|
|||
|
|
@ -1851,6 +1851,120 @@ class TestAuxiliaryFallbackLayering:
|
|||
exc.status_code = 402
|
||||
return exc
|
||||
|
||||
def test_empty_choices_with_output_text_is_recovered_before_fallback(self, monkeypatch):
|
||||
"""Responses-style output_text should be used before provider fallback."""
|
||||
primary_client = MagicMock()
|
||||
primary_client.chat.completions.create.return_value = SimpleNamespace(
|
||||
choices=[],
|
||||
output_text="recovered title",
|
||||
model="minimaxai/minimax-m3",
|
||||
)
|
||||
|
||||
with patch("agent.auxiliary_client._get_cached_client",
|
||||
return_value=(primary_client, "minimaxai/minimax-m3")), \
|
||||
patch("agent.auxiliary_client._resolve_task_provider_model",
|
||||
return_value=("nvidia", "minimaxai/minimax-m3", None, None, None)), \
|
||||
patch("agent.auxiliary_client._try_configured_fallback_chain") as mock_chain:
|
||||
result = call_llm(
|
||||
task="title_generation",
|
||||
messages=[{"role": "user", "content": "hello"}],
|
||||
)
|
||||
|
||||
assert result.choices[0].message.content == "recovered title"
|
||||
mock_chain.assert_not_called()
|
||||
|
||||
def test_empty_choices_with_output_items_is_recovered_before_fallback(self, monkeypatch):
|
||||
"""Responses-style output message items should be normalized for aux callers."""
|
||||
primary_client = MagicMock()
|
||||
primary_client.chat.completions.create.return_value = SimpleNamespace(
|
||||
choices=[],
|
||||
output=[
|
||||
SimpleNamespace(
|
||||
type="message",
|
||||
content=[
|
||||
SimpleNamespace(type="output_text", text="part one"),
|
||||
{"type": "text", "text": "part two"},
|
||||
],
|
||||
)
|
||||
],
|
||||
model="minimaxai/minimax-m3",
|
||||
)
|
||||
|
||||
with patch("agent.auxiliary_client._get_cached_client",
|
||||
return_value=(primary_client, "minimaxai/minimax-m3")), \
|
||||
patch("agent.auxiliary_client._resolve_task_provider_model",
|
||||
return_value=("nvidia", "minimaxai/minimax-m3", None, None, None)), \
|
||||
patch("agent.auxiliary_client._try_configured_fallback_chain") as mock_chain:
|
||||
result = call_llm(
|
||||
task="compression",
|
||||
messages=[{"role": "user", "content": "hello"}],
|
||||
)
|
||||
|
||||
assert result.choices[0].message.content == "part one\npart two"
|
||||
mock_chain.assert_not_called()
|
||||
|
||||
def test_invalid_empty_choices_response_triggers_fallback(self, monkeypatch):
|
||||
"""HTTP-200 malformed chat completions should not abort aux fallback."""
|
||||
primary_client = MagicMock()
|
||||
primary_client.chat.completions.create.return_value = MagicMock(choices=[])
|
||||
|
||||
fallback_client = MagicMock()
|
||||
fallback_client.chat.completions.create.return_value = MagicMock(choices=[
|
||||
MagicMock(message=MagicMock(content="from fallback chain"))
|
||||
])
|
||||
|
||||
with patch("agent.auxiliary_client._get_cached_client",
|
||||
return_value=(primary_client, "minimaxai/minimax-m3")), \
|
||||
patch("agent.auxiliary_client._resolve_task_provider_model",
|
||||
return_value=("nvidia", "minimaxai/minimax-m3", None, None, None)), \
|
||||
patch("agent.auxiliary_client._try_configured_fallback_chain",
|
||||
return_value=(fallback_client, "gpt-5.4-mini", "fallback_chain[0](openai-codex)")) as mock_chain, \
|
||||
patch("agent.auxiliary_client._try_main_agent_model_fallback") as mock_main:
|
||||
result = call_llm(
|
||||
task="title_generation",
|
||||
messages=[{"role": "user", "content": "hello"}],
|
||||
)
|
||||
|
||||
assert result.choices[0].message.content == "from fallback chain"
|
||||
mock_chain.assert_called_once_with(
|
||||
"title_generation",
|
||||
"nvidia",
|
||||
reason="invalid provider response",
|
||||
)
|
||||
mock_main.assert_not_called()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_async_invalid_empty_choices_response_triggers_fallback(self, monkeypatch):
|
||||
"""Async aux calls use the same malformed-response fallback path."""
|
||||
primary_client = MagicMock()
|
||||
primary_client.chat.completions.create = AsyncMock(return_value=MagicMock(choices=[]))
|
||||
|
||||
fallback_client = MagicMock()
|
||||
async_fallback_client = MagicMock()
|
||||
async_fallback_client.chat.completions.create = AsyncMock(return_value=MagicMock(choices=[
|
||||
MagicMock(message=MagicMock(content="from async fallback"))
|
||||
]))
|
||||
|
||||
with patch("agent.auxiliary_client._get_cached_client",
|
||||
return_value=(primary_client, "minimaxai/minimax-m3")), \
|
||||
patch("agent.auxiliary_client._resolve_task_provider_model",
|
||||
return_value=("nvidia", "minimaxai/minimax-m3", None, None, None)), \
|
||||
patch("agent.auxiliary_client._try_configured_fallback_chain",
|
||||
return_value=(fallback_client, "gpt-5.4-mini", "fallback_chain[0](openai-codex)")) as mock_chain, \
|
||||
patch("agent.auxiliary_client._to_async_client",
|
||||
return_value=(async_fallback_client, "gpt-5.4-mini")):
|
||||
result = await async_call_llm(
|
||||
task="compression",
|
||||
messages=[{"role": "user", "content": "hello"}],
|
||||
)
|
||||
|
||||
assert result.choices[0].message.content == "from async fallback"
|
||||
mock_chain.assert_called_once_with(
|
||||
"compression",
|
||||
"nvidia",
|
||||
reason="invalid provider response",
|
||||
)
|
||||
|
||||
def test_auto_provider_uses_task_then_main_chain_before_builtin_chain(self, monkeypatch):
|
||||
"""Auto aux call failures try per-task then top-level fallback before built-ins."""
|
||||
primary_client = MagicMock()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue