hermes-agent/tests/run_agent/test_multimodal_tool_content_recovery.py
kshitijk4poor 66827f8947 chore: prune unused imports and duplicate import redefinitions
Remove unused imports (F401) and duplicate/shadowed import
redefinitions (F811) across the codebase using ruff's safe
autofixes. No behavioral changes -- imports only.

- ~1400 safe autofixes applied across 644 files (net -1072 lines)
- __init__.py re-exports preserved (excluded from F401 removal so
  public re-export surfaces stay intact)
- Re-exports that are imported or monkeypatched by tests but look
  unused in their defining module are kept with explicit # noqa:
  F401 (gateway/run.py load_dotenv; run_agent re-exports from
  agent.message_sanitization, agent.context_compressor,
  agent.retry_utils, agent.prompt_builder, agent.process_bootstrap,
  agent.codex_responses_adapter)
- Unsafe F841 (unused-variable) fixes deliberately skipped -- those
  can change behavior when the RHS has side effects
- ruff lints remain disabled in pyproject.toml (only PLW1514 is
  selected); this is a one-time cleanup, not a config change

Verification:
- python -m compileall: clean
- pytest --collect-only: all 27161 tests collect (zero import errors)
- core entry points import clean (run_agent, model_tools, cli,
  toolsets, hermes_state, batch_runner, gateway)
- static scan: every name any test imports directly from an edited
  module still resolves
2026-05-28 22:26:25 -07:00

259 lines
11 KiB
Python

"""Tests for reactive multimodal-tool-content recovery.
Covers the full chain for providers that reject list-type content in
``role: "tool"`` messages (Xiaomi MiMo's 400 "text is not set", etc.):
1. agent/error_classifier.py: 400 with the right wording classifies as
``FailoverReason.multimodal_tool_content_unsupported``.
2. run_agent._try_strip_image_parts_from_tool_messages downgrades tool
messages whose ``content`` is a list-with-image to a string text
summary, in-place, and records the active (provider, model) in
``self._no_list_tool_content_models`` so future tool results in this
session preemptively downgrade.
3. run_agent._tool_result_content_for_active_model short-circuits to a
text summary when the (provider, model) is in the cache, even though
``_model_supports_vision`` returns True — avoiding a wasted round
trip on every subsequent screenshot in the session.
The end-to-end retry loop wiring (`conversation_loop.py`) is exercised by
the classifier signal + helper-mutation tests; the integration only adds
a trivial flag-and-continue around the existing pattern used for
``image_too_large`` recovery.
See: https://github.com/NousResearch/hermes-agent/issues/27344
"""
from __future__ import annotations
from agent.error_classifier import FailoverReason, classify_api_error
class _FakeApiError(Exception):
"""Stand-in for an openai.BadRequestError with status_code + body."""
def __init__(self, status_code: int, message: str, body: dict | None = None):
super().__init__(message)
self.status_code = status_code
self.body = body or {"error": {"message": message}}
self.response = None
def _make_agent(provider: str = "xiaomi", model: str = "mimo-v2.5"):
"""Build a bare AIAgent for method-level testing, no provider setup."""
from run_agent import AIAgent
agent = object.__new__(AIAgent)
agent.provider = provider
agent.model = model
return agent
# ─── Strip helper ────────────────────────────────────────────────────────────
class TestStripImagePartsHelper:
def test_no_messages_returns_false(self):
agent = _make_agent()
assert agent._try_strip_image_parts_from_tool_messages([]) is False
assert agent._try_strip_image_parts_from_tool_messages(None) is False
def test_no_tool_messages_returns_false(self):
agent = _make_agent()
msgs = [
{"role": "user", "content": "plain text"},
{"role": "assistant", "content": "ack"},
]
assert agent._try_strip_image_parts_from_tool_messages(msgs) is False
def test_tool_message_with_string_content_unchanged(self):
agent = _make_agent()
msgs = [
{"role": "tool", "tool_call_id": "x", "content": "plain string result"},
]
assert agent._try_strip_image_parts_from_tool_messages(msgs) is False
assert msgs[0]["content"] == "plain string result"
def test_tool_message_list_without_image_unchanged(self):
"""List content with only text parts is left alone — caller surfaces
the original error if this turns out to also be rejected."""
agent = _make_agent()
msgs = [
{"role": "tool", "tool_call_id": "x", "content": [
{"type": "text", "text": "hello"},
]},
]
assert agent._try_strip_image_parts_from_tool_messages(msgs) is False
def test_tool_message_list_with_image_downgrades(self):
agent = _make_agent()
msgs = [
{"role": "tool", "tool_call_id": "x", "content": [
{"type": "text", "text": "AX summary: 5 buttons visible"},
{"type": "image_url", "image_url": {"url": "data:image/png;base64,iVBOR..."}},
]},
]
assert agent._try_strip_image_parts_from_tool_messages(msgs) is True
# Image stripped; text preserved as a string.
assert isinstance(msgs[0]["content"], str)
assert "AX summary" in msgs[0]["content"]
assert "image_url" not in msgs[0]["content"]
assert "iVBOR" not in msgs[0]["content"]
def test_tool_message_image_only_gets_placeholder(self):
"""If the list had nothing but image parts, leave a placeholder so
the assistant message has something to reference."""
agent = _make_agent()
msgs = [
{"role": "tool", "tool_call_id": "x", "content": [
{"type": "image_url", "image_url": {"url": "data:image/png;base64,iVBOR..."}},
]},
]
assert agent._try_strip_image_parts_from_tool_messages(msgs) is True
assert isinstance(msgs[0]["content"], str)
assert "image content removed" in msgs[0]["content"]
def test_records_provider_model_in_session_cache(self):
agent = _make_agent(provider="xiaomi", model="mimo-v2.5")
msgs = [
{"role": "tool", "tool_call_id": "x", "content": [
{"type": "text", "text": "summary"},
{"type": "image_url", "image_url": {"url": "data:image/png;base64,X"}},
]},
]
agent._try_strip_image_parts_from_tool_messages(msgs)
assert ("xiaomi", "mimo-v2.5") in agent._no_list_tool_content_models
def test_only_tool_messages_get_downgraded(self):
"""User / assistant messages with list-type content are out of
scope — they're handled by the existing image-routing path."""
agent = _make_agent()
msgs = [
{"role": "user", "content": [
{"type": "text", "text": "describe"},
{"type": "image_url", "image_url": {"url": "data:image/png;base64,X"}},
]},
{"role": "tool", "tool_call_id": "x", "content": [
{"type": "text", "text": "summary"},
{"type": "image_url", "image_url": {"url": "data:image/png;base64,Y"}},
]},
]
agent._try_strip_image_parts_from_tool_messages(msgs)
# User message untouched.
assert isinstance(msgs[0]["content"], list)
assert any(p.get("type") == "image_url" for p in msgs[0]["content"])
# Tool message downgraded.
assert isinstance(msgs[1]["content"], str)
assert "summary" in msgs[1]["content"]
def test_skips_recording_when_no_model_id(self):
"""Don't poison the cache with empty keys when provider/model is
unset (e.g. lazy-initialised mid-handshake)."""
agent = _make_agent(provider="", model="")
msgs = [
{"role": "tool", "tool_call_id": "x", "content": [
{"type": "text", "text": "summary"},
{"type": "image_url", "image_url": {"url": "data:image/png;base64,X"}},
]},
]
agent._try_strip_image_parts_from_tool_messages(msgs)
assert agent._no_list_tool_content_models == set()
# ─── Short-circuit on cached models ──────────────────────────────────────────
class TestToolResultContentShortCircuit:
"""Once the session has learned that (provider, model) rejects list
content, ``_tool_result_content_for_active_model`` returns a text
summary even though ``_model_supports_vision`` reports True.
"""
def _multimodal_result(self, png_b64: str = "iVBORw0KGgoAAAA"):
return {
"_multimodal": True,
"content": [
{"type": "text", "text": "capture mode=som 800x600 app=Safari"},
{"type": "image_url",
"image_url": {"url": f"data:image/png;base64,{png_b64}"}},
],
"text_summary": "capture mode=som 800x600 app=Safari",
"meta": {"mode": "som", "width": 800, "height": 600, "elements": 5,
"png_bytes": 1024},
}
def test_returns_list_when_cache_empty_and_vision_supported(self, monkeypatch):
agent = _make_agent(provider="xiaomi", model="mimo-v2.5")
agent._no_list_tool_content_models = set() # explicit empty
monkeypatch.setattr(agent, "_model_supports_vision", lambda: True)
out = agent._tool_result_content_for_active_model(
"computer_use", self._multimodal_result()
)
# Native multimodal path: returns the content parts list.
assert isinstance(out, list)
assert any(p.get("type") == "image_url" for p in out)
def test_returns_text_summary_when_model_in_cache(self, monkeypatch):
agent = _make_agent(provider="xiaomi", model="mimo-v2.5")
agent._no_list_tool_content_models = {("xiaomi", "mimo-v2.5")}
monkeypatch.setattr(agent, "_model_supports_vision", lambda: True)
out = agent._tool_result_content_for_active_model(
"computer_use", self._multimodal_result()
)
# Short-circuit: a plain string summary, no image_url present.
assert isinstance(out, str)
assert "data:image" not in out
assert "image_url" not in out
def test_cache_miss_on_different_model(self, monkeypatch):
"""Cache is per (provider, model). A cached entry for mimo-v2.5
must NOT affect a session running on a different model.
"""
agent = _make_agent(provider="xiaomi", model="mimo-v2.5-pro")
agent._no_list_tool_content_models = {("xiaomi", "mimo-v2.5")}
monkeypatch.setattr(agent, "_model_supports_vision", lambda: True)
out = agent._tool_result_content_for_active_model(
"computer_use", self._multimodal_result()
)
assert isinstance(out, list)
def test_missing_cache_attribute_falls_through(self, monkeypatch):
"""Tests that build agents via ``object.__new__`` without calling
``__init__`` must not crash — the cache attribute may be absent.
"""
agent = _make_agent()
# Deliberately do not assign _no_list_tool_content_models.
monkeypatch.setattr(agent, "_model_supports_vision", lambda: True)
out = agent._tool_result_content_for_active_model(
"computer_use", self._multimodal_result()
)
assert isinstance(out, list)
# ─── Classifier ──────────────────────────────────────────────────────────────
class TestRecoveryEndToEndClassification:
"""Lock in that the patterns used by the recovery path classify to
the right ``FailoverReason``. (The recovery hook in
``agent.conversation_loop`` consumes this reason directly.)
"""
def test_xiaomi_mimo_classifies(self):
err = _FakeApiError(
status_code=400,
message=(
"Error code: 400 - {'error': {'code': '400', 'message': "
"'Param Incorrect', 'param': 'text is not set', 'type': ''}}"
),
)
result = classify_api_error(err, provider="xiaomi", model="mimo-v2.5")
assert result.reason == FailoverReason.multimodal_tool_content_unsupported
assert result.retryable is True
def test_alibaba_variant_classifies(self):
err = _FakeApiError(
status_code=400,
message="tool_call.content must be string",
)
result = classify_api_error(err, provider="alibaba", model="qwen3.5-plus")
assert result.reason == FailoverReason.multimodal_tool_content_unsupported