"""Tests for reactive multimodal-tool-content recovery. Covers the full chain for providers that reject list-type content in ``role: "tool"`` messages (Xiaomi MiMo's 400 "text is not set", etc.): 1. agent/error_classifier.py: 400 with the right wording classifies as ``FailoverReason.multimodal_tool_content_unsupported``. 2. run_agent._try_strip_image_parts_from_tool_messages downgrades tool messages whose ``content`` is a list-with-image to a string text summary, in-place, and records the active (provider, model) in ``self._no_list_tool_content_models`` so future tool results in this session preemptively downgrade. 3. run_agent._tool_result_content_for_active_model short-circuits to a text summary when the (provider, model) is in the cache, even though ``_model_supports_vision`` returns True — avoiding a wasted round trip on every subsequent screenshot in the session. The end-to-end retry loop wiring (`conversation_loop.py`) is exercised by the classifier signal + helper-mutation tests; the integration only adds a trivial flag-and-continue around the existing pattern used for ``image_too_large`` recovery. See: https://github.com/NousResearch/hermes-agent/issues/27344 """ from __future__ import annotations import pytest from agent.error_classifier import FailoverReason, classify_api_error class _FakeApiError(Exception): """Stand-in for an openai.BadRequestError with status_code + body.""" def __init__(self, status_code: int, message: str, body: dict | None = None): super().__init__(message) self.status_code = status_code self.body = body or {"error": {"message": message}} self.response = None def _make_agent(provider: str = "xiaomi", model: str = "mimo-v2.5"): """Build a bare AIAgent for method-level testing, no provider setup.""" from run_agent import AIAgent agent = object.__new__(AIAgent) agent.provider = provider agent.model = model return agent # ─── Strip helper ──────────────────────────────────────────────────────────── class TestStripImagePartsHelper: def test_no_messages_returns_false(self): agent = _make_agent() assert agent._try_strip_image_parts_from_tool_messages([]) is False assert agent._try_strip_image_parts_from_tool_messages(None) is False def test_no_tool_messages_returns_false(self): agent = _make_agent() msgs = [ {"role": "user", "content": "plain text"}, {"role": "assistant", "content": "ack"}, ] assert agent._try_strip_image_parts_from_tool_messages(msgs) is False def test_tool_message_with_string_content_unchanged(self): agent = _make_agent() msgs = [ {"role": "tool", "tool_call_id": "x", "content": "plain string result"}, ] assert agent._try_strip_image_parts_from_tool_messages(msgs) is False assert msgs[0]["content"] == "plain string result" def test_tool_message_list_without_image_unchanged(self): """List content with only text parts is left alone — caller surfaces the original error if this turns out to also be rejected.""" agent = _make_agent() msgs = [ {"role": "tool", "tool_call_id": "x", "content": [ {"type": "text", "text": "hello"}, ]}, ] assert agent._try_strip_image_parts_from_tool_messages(msgs) is False def test_tool_message_list_with_image_downgrades(self): agent = _make_agent() msgs = [ {"role": "tool", "tool_call_id": "x", "content": [ {"type": "text", "text": "AX summary: 5 buttons visible"}, {"type": "image_url", "image_url": {"url": "data:image/png;base64,iVBOR..."}}, ]}, ] assert agent._try_strip_image_parts_from_tool_messages(msgs) is True # Image stripped; text preserved as a string. assert isinstance(msgs[0]["content"], str) assert "AX summary" in msgs[0]["content"] assert "image_url" not in msgs[0]["content"] assert "iVBOR" not in msgs[0]["content"] def test_tool_message_image_only_gets_placeholder(self): """If the list had nothing but image parts, leave a placeholder so the assistant message has something to reference.""" agent = _make_agent() msgs = [ {"role": "tool", "tool_call_id": "x", "content": [ {"type": "image_url", "image_url": {"url": "data:image/png;base64,iVBOR..."}}, ]}, ] assert agent._try_strip_image_parts_from_tool_messages(msgs) is True assert isinstance(msgs[0]["content"], str) assert "image content removed" in msgs[0]["content"] def test_records_provider_model_in_session_cache(self): agent = _make_agent(provider="xiaomi", model="mimo-v2.5") msgs = [ {"role": "tool", "tool_call_id": "x", "content": [ {"type": "text", "text": "summary"}, {"type": "image_url", "image_url": {"url": "data:image/png;base64,X"}}, ]}, ] agent._try_strip_image_parts_from_tool_messages(msgs) assert ("xiaomi", "mimo-v2.5") in agent._no_list_tool_content_models def test_only_tool_messages_get_downgraded(self): """User / assistant messages with list-type content are out of scope — they're handled by the existing image-routing path.""" agent = _make_agent() msgs = [ {"role": "user", "content": [ {"type": "text", "text": "describe"}, {"type": "image_url", "image_url": {"url": "data:image/png;base64,X"}}, ]}, {"role": "tool", "tool_call_id": "x", "content": [ {"type": "text", "text": "summary"}, {"type": "image_url", "image_url": {"url": "data:image/png;base64,Y"}}, ]}, ] agent._try_strip_image_parts_from_tool_messages(msgs) # User message untouched. assert isinstance(msgs[0]["content"], list) assert any(p.get("type") == "image_url" for p in msgs[0]["content"]) # Tool message downgraded. assert isinstance(msgs[1]["content"], str) assert "summary" in msgs[1]["content"] def test_skips_recording_when_no_model_id(self): """Don't poison the cache with empty keys when provider/model is unset (e.g. lazy-initialised mid-handshake).""" agent = _make_agent(provider="", model="") msgs = [ {"role": "tool", "tool_call_id": "x", "content": [ {"type": "text", "text": "summary"}, {"type": "image_url", "image_url": {"url": "data:image/png;base64,X"}}, ]}, ] agent._try_strip_image_parts_from_tool_messages(msgs) assert agent._no_list_tool_content_models == set() # ─── Short-circuit on cached models ────────────────────────────────────────── class TestToolResultContentShortCircuit: """Once the session has learned that (provider, model) rejects list content, ``_tool_result_content_for_active_model`` returns a text summary even though ``_model_supports_vision`` reports True. """ def _multimodal_result(self, png_b64: str = "iVBORw0KGgoAAAA"): return { "_multimodal": True, "content": [ {"type": "text", "text": "capture mode=som 800x600 app=Safari"}, {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{png_b64}"}}, ], "text_summary": "capture mode=som 800x600 app=Safari", "meta": {"mode": "som", "width": 800, "height": 600, "elements": 5, "png_bytes": 1024}, } def test_returns_list_when_cache_empty_and_vision_supported(self, monkeypatch): agent = _make_agent(provider="xiaomi", model="mimo-v2.5") agent._no_list_tool_content_models = set() # explicit empty monkeypatch.setattr(agent, "_model_supports_vision", lambda: True) out = agent._tool_result_content_for_active_model( "computer_use", self._multimodal_result() ) # Native multimodal path: returns the content parts list. assert isinstance(out, list) assert any(p.get("type") == "image_url" for p in out) def test_returns_text_summary_when_model_in_cache(self, monkeypatch): agent = _make_agent(provider="xiaomi", model="mimo-v2.5") agent._no_list_tool_content_models = {("xiaomi", "mimo-v2.5")} monkeypatch.setattr(agent, "_model_supports_vision", lambda: True) out = agent._tool_result_content_for_active_model( "computer_use", self._multimodal_result() ) # Short-circuit: a plain string summary, no image_url present. assert isinstance(out, str) assert "data:image" not in out assert "image_url" not in out def test_cache_miss_on_different_model(self, monkeypatch): """Cache is per (provider, model). A cached entry for mimo-v2.5 must NOT affect a session running on a different model. """ agent = _make_agent(provider="xiaomi", model="mimo-v2.5-pro") agent._no_list_tool_content_models = {("xiaomi", "mimo-v2.5")} monkeypatch.setattr(agent, "_model_supports_vision", lambda: True) out = agent._tool_result_content_for_active_model( "computer_use", self._multimodal_result() ) assert isinstance(out, list) def test_missing_cache_attribute_falls_through(self, monkeypatch): """Tests that build agents via ``object.__new__`` without calling ``__init__`` must not crash — the cache attribute may be absent. """ agent = _make_agent() # Deliberately do not assign _no_list_tool_content_models. monkeypatch.setattr(agent, "_model_supports_vision", lambda: True) out = agent._tool_result_content_for_active_model( "computer_use", self._multimodal_result() ) assert isinstance(out, list) # ─── Classifier ────────────────────────────────────────────────────────────── class TestRecoveryEndToEndClassification: """Lock in that the patterns used by the recovery path classify to the right ``FailoverReason``. (The recovery hook in ``agent.conversation_loop`` consumes this reason directly.) """ def test_xiaomi_mimo_classifies(self): err = _FakeApiError( status_code=400, message=( "Error code: 400 - {'error': {'code': '400', 'message': " "'Param Incorrect', 'param': 'text is not set', 'type': ''}}" ), ) result = classify_api_error(err, provider="xiaomi", model="mimo-v2.5") assert result.reason == FailoverReason.multimodal_tool_content_unsupported assert result.retryable is True def test_alibaba_variant_classifies(self): err = _FakeApiError( status_code=400, message="tool_call.content must be string", ) result = classify_api_error(err, provider="alibaba", model="qwen3.5-plus") assert result.reason == FailoverReason.multimodal_tool_content_unsupported