test: extend test coverage to native image routing

This commit is contained in:
tillfalko 2026-05-21 17:32:40 +02:00 committed by Teknium
parent f8b8dffccf
commit 2402ec5e7b
2 changed files with 201 additions and 27 deletions

View file

@ -250,6 +250,131 @@ class TestBrowserVisionConfig:
assert mock_llm.call_args.kwargs["temperature"] == 0.1
assert mock_llm.call_args.kwargs["timeout"] == 120.0
def test_browser_vision_native_fast_path_returns_multimodal(self, tmp_path):
from agent.auxiliary_client import clear_runtime_main, set_runtime_main
from tools.browser_tool import browser_vision
shots_dir, screenshot = self._setup_screenshot(tmp_path)
annotations = [{"id": 1, "label": "Search box"}]
set_runtime_main("brand-new-provider", "llava-v1.6")
try:
with (
patch("hermes_constants.get_hermes_dir", return_value=shots_dir),
patch("tools.browser_tool._cleanup_old_screenshots"),
patch(
"tools.browser_tool._run_browser_command",
return_value={
"success": True,
"data": {
"path": str(screenshot),
"annotations": annotations,
},
},
),
patch(
"hermes_cli.config.load_config",
return_value={"model": {"supports_vision": True}},
),
patch("tools.browser_tool._get_vision_model") as mock_get_vision_model,
patch("tools.browser_tool.call_llm") as mock_llm,
):
result = browser_vision(
"what is on the page?", annotate=True, task_id="test"
)
finally:
clear_runtime_main()
assert isinstance(result, dict)
assert result["_multimodal"] is True
assert result["meta"]["screenshot_path"] == str(screenshot)
assert result["meta"]["annotations"] == annotations
assert any(p.get("type") == "image_url" for p in result["content"])
assert "what is on the page?" in result["content"][0]["text"]
assert str(screenshot) in result["content"][0]["text"]
assert "Screenshot path:" in result["text_summary"]
mock_get_vision_model.assert_not_called()
mock_llm.assert_not_called()
def test_browser_vision_native_mode_without_supports_vision_uses_aux_llm(self, tmp_path):
from agent.auxiliary_client import clear_runtime_main, set_runtime_main
from tools.browser_tool import browser_vision
shots_dir, screenshot = self._setup_screenshot(tmp_path)
mock_response = MagicMock()
mock_choice = MagicMock()
mock_choice.message.content = "Fallback screenshot analysis"
mock_response.choices = [mock_choice]
set_runtime_main("brand-new-provider", "opaque-model")
try:
with (
patch("hermes_constants.get_hermes_dir", return_value=shots_dir),
patch("tools.browser_tool._cleanup_old_screenshots"),
patch(
"tools.browser_tool._run_browser_command",
return_value={"success": True, "data": {"path": str(screenshot)}},
),
patch(
"hermes_cli.config.load_config",
return_value={"agent": {"image_input_mode": "native"}},
),
patch("tools.browser_tool._get_vision_model", return_value="test-model"),
patch("tools.browser_tool.call_llm", return_value=mock_response) as mock_llm,
):
result = json.loads(browser_vision("what is on the page?", task_id="test"))
finally:
clear_runtime_main()
assert result["success"] is True
assert result["analysis"] == "Fallback screenshot analysis"
assert result["screenshot_path"] == str(screenshot)
mock_llm.assert_called_once()
kwargs = mock_llm.call_args.kwargs
assert kwargs["task"] == "vision"
assert kwargs["model"] == "test-model"
assert kwargs["messages"][0]["content"][1]["type"] == "image_url"
assert kwargs["messages"][0]["content"][1]["image_url"]["url"].startswith(
"data:image/png;base64,"
)
def test_browser_vision_text_mode_blocks_native_fast_path(self, tmp_path):
from agent.auxiliary_client import clear_runtime_main, set_runtime_main
from tools.browser_tool import browser_vision
shots_dir, screenshot = self._setup_screenshot(tmp_path)
mock_response = MagicMock()
mock_choice = MagicMock()
mock_choice.message.content = "Text-mode screenshot analysis"
mock_response.choices = [mock_choice]
set_runtime_main("brand-new-provider", "llava-v1.6")
try:
with (
patch("hermes_constants.get_hermes_dir", return_value=shots_dir),
patch("tools.browser_tool._cleanup_old_screenshots"),
patch(
"tools.browser_tool._run_browser_command",
return_value={"success": True, "data": {"path": str(screenshot)}},
),
patch(
"hermes_cli.config.load_config",
return_value={
"agent": {"image_input_mode": "text"},
"model": {"supports_vision": True},
},
),
patch("tools.browser_tool._get_vision_model", return_value="test-model"),
patch("tools.browser_tool.call_llm", return_value=mock_response) as mock_llm,
):
result = json.loads(browser_vision("what is on the page?", task_id="test"))
finally:
clear_runtime_main()
assert result["success"] is True
assert result["analysis"] == "Text-mode screenshot analysis"
assert result["screenshot_path"] == str(screenshot)
mock_llm.assert_called_once()
# ── auto-recording config ────────────────────────────────────────────

View file

@ -146,32 +146,35 @@ class TestVisionAnalyzeNative:
class TestHandleVisionAnalyzeFastPath:
"""Verify the dispatcher chooses fast-path vs aux-LLM correctly."""
def test_vision_capable_main_model_uses_fast_path(self, tmp_path, monkeypatch):
"""Main model supports native vision → fast path returns multimodal."""
def test_native_mode_with_supported_transport_uses_fast_path(self, tmp_path):
"""Explicit native mode + known transport returns multimodal."""
img = tmp_path / "x.png"
img.write_bytes(_TINY_PNG)
# Set runtime override so the handler thinks we're on opus@openrouter
async def _aux_sentinel(*args, **kwargs):
return '{"sentinel": "aux-path"}'
from agent.auxiliary_client import set_runtime_main, clear_runtime_main
set_runtime_main("openrouter", "anthropic/claude-opus-4.6")
try:
# Mock decide_image_input_mode to always return "native" so the
# fast path fires regardless of model-catalog state in CI.
with patch(
"agent.image_routing.decide_image_input_mode",
return_value="native",
):
coro = _handle_vision_analyze({"image_url": str(img), "question": "?"})
result = asyncio.get_event_loop().run_until_complete(coro)
"hermes_cli.config.load_config",
return_value={"agent": {"image_input_mode": "native"}},
), patch("tools.vision_tools.vision_analyze_tool", side_effect=_aux_sentinel) as mock_aux:
result = asyncio.get_event_loop().run_until_complete(
_handle_vision_analyze({"image_url": str(img), "question": "?"})
)
finally:
clear_runtime_main()
assert isinstance(result, dict), \
assert isinstance(result, dict), (
f"Expected multimodal envelope, got {type(result).__name__}: {str(result)[:200]}"
)
assert result.get("_multimodal") is True
mock_aux.assert_not_called()
def test_non_vision_main_model_falls_through_to_aux(self, tmp_path, monkeypatch):
"""Non-vision main model → fast path skipped, aux LLM path attempted."""
def test_native_mode_with_unsupported_transport_falls_through(self, tmp_path):
"""Explicit native mode still respects the transport gate."""
img = tmp_path / "x.png"
img.write_bytes(_TINY_PNG)
@ -179,19 +182,27 @@ class TestHandleVisionAnalyzeFastPath:
return '{"sentinel": "aux-path"}'
from agent.auxiliary_client import set_runtime_main, clear_runtime_main
set_runtime_main("openrouter", "qwen/qwen3-coder")
set_runtime_main("brand-new-provider", "opaque-model")
try:
with patch("tools.vision_tools.vision_analyze_tool", side_effect=_aux_sentinel):
coro = _handle_vision_analyze({"image_url": str(img), "question": "?"})
result = asyncio.get_event_loop().run_until_complete(coro)
with (
patch(
"hermes_cli.config.load_config",
return_value={"agent": {"image_input_mode": "native"}},
),
patch("tools.vision_tools.vision_analyze_tool", side_effect=_aux_sentinel) as mock_aux,
):
result = asyncio.get_event_loop().run_until_complete(
_handle_vision_analyze({"image_url": str(img), "question": "?"})
)
finally:
clear_runtime_main()
assert not (isinstance(result, dict) and result.get("_multimodal") is True), \
"Fast path fired for non-vision model; should have fallen through to aux LLM"
assert isinstance(result, str)
assert json.loads(result) == {"sentinel": "aux-path"}
mock_aux.assert_called_once()
def test_fast_path_disabled_for_unsupported_provider(self, tmp_path, monkeypatch):
"""Even with vision-capable model, unknown provider → fall through."""
def test_supports_vision_bypasses_transport_gate(self, tmp_path):
"""supports_vision=True enables fast path even on unknown providers."""
img = tmp_path / "x.png"
img.write_bytes(_TINY_PNG)
@ -199,13 +210,51 @@ class TestHandleVisionAnalyzeFastPath:
return '{"sentinel": "aux-path"}'
from agent.auxiliary_client import set_runtime_main, clear_runtime_main
set_runtime_main("brand-new-provider", "anthropic/claude-opus-4.6")
set_runtime_main("brand-new-provider", "llava-v1.6")
try:
with patch("tools.vision_tools.vision_analyze_tool", side_effect=_aux_sentinel):
coro = _handle_vision_analyze({"image_url": str(img), "question": "?"})
result = asyncio.get_event_loop().run_until_complete(coro)
with patch(
"hermes_cli.config.load_config",
return_value={"model": {"supports_vision": True}},
), patch("tools.vision_tools.vision_analyze_tool", side_effect=_aux_sentinel) as mock_aux:
result = asyncio.get_event_loop().run_until_complete(
_handle_vision_analyze({"image_url": str(img), "question": "?"})
)
finally:
clear_runtime_main()
assert not (isinstance(result, dict) and result.get("_multimodal") is True), \
"Fast path fired for unknown provider; should have fallen through"
assert isinstance(result, dict), (
f"Expected multimodal envelope, got {type(result).__name__}: {str(result)[:200]}"
)
assert result.get("_multimodal") is True
mock_aux.assert_not_called()
def test_text_mode_still_blocks_fast_path_when_supports_vision_true(self, tmp_path):
"""Routing mode wins over supports_vision when text mode was chosen."""
img = tmp_path / "x.png"
img.write_bytes(_TINY_PNG)
async def _aux_sentinel(*args, **kwargs):
return '{"sentinel": "aux-path"}'
from agent.auxiliary_client import set_runtime_main, clear_runtime_main
set_runtime_main("brand-new-provider", "llava-v1.6")
try:
with (
patch(
"hermes_cli.config.load_config",
return_value={
"agent": {"image_input_mode": "text"},
"model": {"supports_vision": True},
},
),
patch("tools.vision_tools.vision_analyze_tool", side_effect=_aux_sentinel) as mock_aux,
):
result = asyncio.get_event_loop().run_until_complete(
_handle_vision_analyze({"image_url": str(img), "question": "?"})
)
finally:
clear_runtime_main()
assert isinstance(result, str)
assert json.loads(result) == {"sentinel": "aux-path"}
mock_aux.assert_called_once()