fix(agent): recover from providers rejecting list-type tool content (#27344) (#30259)

Some providers (Xiaomi MiMo, some Alibaba endpoints, a long tail of
OpenAI-compatible servers) follow the OpenAI spec strictly and require
tool message `content` to be a string — they reject our list-type
content (text + image_url parts) with HTTP 400 'text is not set' /
'tool message content must be a string'.

Instead of an allowlist of known-good providers (maintenance burden,
guaranteed to miss aggregators like OpenRouter where the underlying
model determines support, not the aggregator name), this lands a
reactive recovery:

1. New `FailoverReason.multimodal_tool_content_unsupported` with a
   small pattern list covering the common 400 wordings.
2. `AIAgent._try_strip_image_parts_from_tool_messages` walks the API
   message list, downgrades any `role:tool` message whose content is
   list-with-image to a plain text summary (preserves text parts) in
   place, AND records the active (provider, model) in a session-scoped
   `_no_list_tool_content_models` set.
3. `_tool_result_content_for_active_model` short-circuits to a text
   summary when (provider, model) is in the cache — so after the first
   400 + retry, subsequent screenshots in the same session skip the
   round trip entirely.
4. Retry hook in `agent.conversation_loop` mirrors the existing
   `image_too_large` recovery: detect the reason, run the helper,
   retry once, fall through to the normal error path if no list-type
   tool content was actually present.

Cache is transient (per-session) by design — next session retries in
case the provider added support, no persistent state to maintain.

Fixes #27344. Closes #27351 (allowlist approach superseded by reactive
recovery).
This commit is contained in:
Teknium 2026-05-21 23:40:16 -07:00 committed by GitHub
parent 372e9a18cd
commit c769be344a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 490 additions and 0 deletions

View file

@ -56,6 +56,7 @@ class TestFailoverReason:
"overloaded", "server_error", "timeout",
"context_overflow", "payload_too_large", "image_too_large",
"model_not_found", "format_error",
"multimodal_tool_content_unsupported",
"provider_policy_blocked",
"thinking_signature", "long_context_tier",
"oauth_long_context_beta_forbidden",
@ -1256,3 +1257,66 @@ class TestRateLimitErrorWithoutStatusCode:
e.status_code = None
result = classify_api_error(e, provider="copilot", model="gpt-4o")
assert result.reason != FailoverReason.rate_limit
# ── Test: multimodal_tool_content_unsupported pattern ───────────────────
class TestMultimodalToolContentUnsupported:
"""Issue #27344 — providers that reject list-type tool message content
should be classified as ``multimodal_tool_content_unsupported`` so the
retry loop can downgrade screenshots to text and try again.
"""
def test_xiaomi_mimo_text_is_not_set_pattern(self):
"""The actual Xiaomi MiMo 400 wording from the bug report."""
e = MockAPIError(
"Error code: 400 - {'error': {'code': '400', 'message': 'Param Incorrect', 'param': 'text is not set', 'type': ''}}",
status_code=400,
)
result = classify_api_error(e, provider="xiaomi", model="mimo-v2.5")
assert result.reason == FailoverReason.multimodal_tool_content_unsupported
assert result.retryable is True
def test_generic_tool_message_must_be_string(self):
e = MockAPIError(
"tool message content must be a string",
status_code=400,
)
result = classify_api_error(e, provider="custom", model="some-model")
assert result.reason == FailoverReason.multimodal_tool_content_unsupported
def test_expected_string_got_list(self):
e = MockAPIError(
"Schema validation failed: expected string, got list",
status_code=400,
)
result = classify_api_error(e, provider="custom", model="some-model")
assert result.reason == FailoverReason.multimodal_tool_content_unsupported
def test_multimodal_tool_content_takes_priority_over_context_overflow(self):
"""Some providers return a 400 whose message contains BOTH
'text is not set' and a length-shaped phrase; the tool-content
recovery is cheaper than compression so it must win the priority.
"""
e = MockAPIError(
"text is not set; context length exceeded",
status_code=400,
)
result = classify_api_error(e, provider="xiaomi", model="mimo-v2.5")
assert result.reason == FailoverReason.multimodal_tool_content_unsupported
def test_no_status_code_path_also_classifies(self):
"""When the error reaches us without a status code (transport
layer ate it) the message-only classifier branch must also
recognise the pattern.
"""
e = MockTransportError("tool_call.content must be string")
result = classify_api_error(e, provider="alibaba", model="qwen3.5-plus")
assert result.reason == FailoverReason.multimodal_tool_content_unsupported
def test_unrelated_400_is_not_misclassified(self):
"""Make sure the patterns don't false-positive on normal 400s."""
e = MockAPIError("bad request: missing field 'model'", status_code=400)
result = classify_api_error(e, provider="openrouter", model="anthropic/claude-sonnet-4")
assert result.reason != FailoverReason.multimodal_tool_content_unsupported