mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-12 08:51:53 +00:00
test: extend test coverage to native image routing
This commit is contained in:
parent
f8b8dffccf
commit
2402ec5e7b
2 changed files with 201 additions and 27 deletions
|
|
@ -250,6 +250,131 @@ class TestBrowserVisionConfig:
|
|||
assert mock_llm.call_args.kwargs["temperature"] == 0.1
|
||||
assert mock_llm.call_args.kwargs["timeout"] == 120.0
|
||||
|
||||
def test_browser_vision_native_fast_path_returns_multimodal(self, tmp_path):
|
||||
from agent.auxiliary_client import clear_runtime_main, set_runtime_main
|
||||
from tools.browser_tool import browser_vision
|
||||
|
||||
shots_dir, screenshot = self._setup_screenshot(tmp_path)
|
||||
annotations = [{"id": 1, "label": "Search box"}]
|
||||
set_runtime_main("brand-new-provider", "llava-v1.6")
|
||||
try:
|
||||
with (
|
||||
patch("hermes_constants.get_hermes_dir", return_value=shots_dir),
|
||||
patch("tools.browser_tool._cleanup_old_screenshots"),
|
||||
patch(
|
||||
"tools.browser_tool._run_browser_command",
|
||||
return_value={
|
||||
"success": True,
|
||||
"data": {
|
||||
"path": str(screenshot),
|
||||
"annotations": annotations,
|
||||
},
|
||||
},
|
||||
),
|
||||
patch(
|
||||
"hermes_cli.config.load_config",
|
||||
return_value={"model": {"supports_vision": True}},
|
||||
),
|
||||
patch("tools.browser_tool._get_vision_model") as mock_get_vision_model,
|
||||
patch("tools.browser_tool.call_llm") as mock_llm,
|
||||
):
|
||||
result = browser_vision(
|
||||
"what is on the page?", annotate=True, task_id="test"
|
||||
)
|
||||
finally:
|
||||
clear_runtime_main()
|
||||
|
||||
assert isinstance(result, dict)
|
||||
assert result["_multimodal"] is True
|
||||
assert result["meta"]["screenshot_path"] == str(screenshot)
|
||||
assert result["meta"]["annotations"] == annotations
|
||||
assert any(p.get("type") == "image_url" for p in result["content"])
|
||||
assert "what is on the page?" in result["content"][0]["text"]
|
||||
assert str(screenshot) in result["content"][0]["text"]
|
||||
assert "Screenshot path:" in result["text_summary"]
|
||||
mock_get_vision_model.assert_not_called()
|
||||
mock_llm.assert_not_called()
|
||||
|
||||
def test_browser_vision_native_mode_without_supports_vision_uses_aux_llm(self, tmp_path):
|
||||
from agent.auxiliary_client import clear_runtime_main, set_runtime_main
|
||||
from tools.browser_tool import browser_vision
|
||||
|
||||
shots_dir, screenshot = self._setup_screenshot(tmp_path)
|
||||
mock_response = MagicMock()
|
||||
mock_choice = MagicMock()
|
||||
mock_choice.message.content = "Fallback screenshot analysis"
|
||||
mock_response.choices = [mock_choice]
|
||||
|
||||
set_runtime_main("brand-new-provider", "opaque-model")
|
||||
try:
|
||||
with (
|
||||
patch("hermes_constants.get_hermes_dir", return_value=shots_dir),
|
||||
patch("tools.browser_tool._cleanup_old_screenshots"),
|
||||
patch(
|
||||
"tools.browser_tool._run_browser_command",
|
||||
return_value={"success": True, "data": {"path": str(screenshot)}},
|
||||
),
|
||||
patch(
|
||||
"hermes_cli.config.load_config",
|
||||
return_value={"agent": {"image_input_mode": "native"}},
|
||||
),
|
||||
patch("tools.browser_tool._get_vision_model", return_value="test-model"),
|
||||
patch("tools.browser_tool.call_llm", return_value=mock_response) as mock_llm,
|
||||
):
|
||||
result = json.loads(browser_vision("what is on the page?", task_id="test"))
|
||||
finally:
|
||||
clear_runtime_main()
|
||||
|
||||
assert result["success"] is True
|
||||
assert result["analysis"] == "Fallback screenshot analysis"
|
||||
assert result["screenshot_path"] == str(screenshot)
|
||||
mock_llm.assert_called_once()
|
||||
kwargs = mock_llm.call_args.kwargs
|
||||
assert kwargs["task"] == "vision"
|
||||
assert kwargs["model"] == "test-model"
|
||||
assert kwargs["messages"][0]["content"][1]["type"] == "image_url"
|
||||
assert kwargs["messages"][0]["content"][1]["image_url"]["url"].startswith(
|
||||
"data:image/png;base64,"
|
||||
)
|
||||
|
||||
def test_browser_vision_text_mode_blocks_native_fast_path(self, tmp_path):
|
||||
from agent.auxiliary_client import clear_runtime_main, set_runtime_main
|
||||
from tools.browser_tool import browser_vision
|
||||
|
||||
shots_dir, screenshot = self._setup_screenshot(tmp_path)
|
||||
mock_response = MagicMock()
|
||||
mock_choice = MagicMock()
|
||||
mock_choice.message.content = "Text-mode screenshot analysis"
|
||||
mock_response.choices = [mock_choice]
|
||||
|
||||
set_runtime_main("brand-new-provider", "llava-v1.6")
|
||||
try:
|
||||
with (
|
||||
patch("hermes_constants.get_hermes_dir", return_value=shots_dir),
|
||||
patch("tools.browser_tool._cleanup_old_screenshots"),
|
||||
patch(
|
||||
"tools.browser_tool._run_browser_command",
|
||||
return_value={"success": True, "data": {"path": str(screenshot)}},
|
||||
),
|
||||
patch(
|
||||
"hermes_cli.config.load_config",
|
||||
return_value={
|
||||
"agent": {"image_input_mode": "text"},
|
||||
"model": {"supports_vision": True},
|
||||
},
|
||||
),
|
||||
patch("tools.browser_tool._get_vision_model", return_value="test-model"),
|
||||
patch("tools.browser_tool.call_llm", return_value=mock_response) as mock_llm,
|
||||
):
|
||||
result = json.loads(browser_vision("what is on the page?", task_id="test"))
|
||||
finally:
|
||||
clear_runtime_main()
|
||||
|
||||
assert result["success"] is True
|
||||
assert result["analysis"] == "Text-mode screenshot analysis"
|
||||
assert result["screenshot_path"] == str(screenshot)
|
||||
mock_llm.assert_called_once()
|
||||
|
||||
|
||||
# ── auto-recording config ────────────────────────────────────────────
|
||||
|
||||
|
|
|
|||
|
|
@ -146,32 +146,35 @@ class TestVisionAnalyzeNative:
|
|||
class TestHandleVisionAnalyzeFastPath:
|
||||
"""Verify the dispatcher chooses fast-path vs aux-LLM correctly."""
|
||||
|
||||
def test_vision_capable_main_model_uses_fast_path(self, tmp_path, monkeypatch):
|
||||
"""Main model supports native vision → fast path returns multimodal."""
|
||||
def test_native_mode_with_supported_transport_uses_fast_path(self, tmp_path):
|
||||
"""Explicit native mode + known transport returns multimodal."""
|
||||
img = tmp_path / "x.png"
|
||||
img.write_bytes(_TINY_PNG)
|
||||
|
||||
# Set runtime override so the handler thinks we're on opus@openrouter
|
||||
async def _aux_sentinel(*args, **kwargs):
|
||||
return '{"sentinel": "aux-path"}'
|
||||
|
||||
from agent.auxiliary_client import set_runtime_main, clear_runtime_main
|
||||
set_runtime_main("openrouter", "anthropic/claude-opus-4.6")
|
||||
try:
|
||||
# Mock decide_image_input_mode to always return "native" so the
|
||||
# fast path fires regardless of model-catalog state in CI.
|
||||
with patch(
|
||||
"agent.image_routing.decide_image_input_mode",
|
||||
return_value="native",
|
||||
):
|
||||
coro = _handle_vision_analyze({"image_url": str(img), "question": "?"})
|
||||
result = asyncio.get_event_loop().run_until_complete(coro)
|
||||
"hermes_cli.config.load_config",
|
||||
return_value={"agent": {"image_input_mode": "native"}},
|
||||
), patch("tools.vision_tools.vision_analyze_tool", side_effect=_aux_sentinel) as mock_aux:
|
||||
result = asyncio.get_event_loop().run_until_complete(
|
||||
_handle_vision_analyze({"image_url": str(img), "question": "?"})
|
||||
)
|
||||
finally:
|
||||
clear_runtime_main()
|
||||
|
||||
assert isinstance(result, dict), \
|
||||
assert isinstance(result, dict), (
|
||||
f"Expected multimodal envelope, got {type(result).__name__}: {str(result)[:200]}"
|
||||
)
|
||||
assert result.get("_multimodal") is True
|
||||
mock_aux.assert_not_called()
|
||||
|
||||
def test_non_vision_main_model_falls_through_to_aux(self, tmp_path, monkeypatch):
|
||||
"""Non-vision main model → fast path skipped, aux LLM path attempted."""
|
||||
def test_native_mode_with_unsupported_transport_falls_through(self, tmp_path):
|
||||
"""Explicit native mode still respects the transport gate."""
|
||||
img = tmp_path / "x.png"
|
||||
img.write_bytes(_TINY_PNG)
|
||||
|
||||
|
|
@ -179,19 +182,27 @@ class TestHandleVisionAnalyzeFastPath:
|
|||
return '{"sentinel": "aux-path"}'
|
||||
|
||||
from agent.auxiliary_client import set_runtime_main, clear_runtime_main
|
||||
set_runtime_main("openrouter", "qwen/qwen3-coder")
|
||||
set_runtime_main("brand-new-provider", "opaque-model")
|
||||
try:
|
||||
with patch("tools.vision_tools.vision_analyze_tool", side_effect=_aux_sentinel):
|
||||
coro = _handle_vision_analyze({"image_url": str(img), "question": "?"})
|
||||
result = asyncio.get_event_loop().run_until_complete(coro)
|
||||
with (
|
||||
patch(
|
||||
"hermes_cli.config.load_config",
|
||||
return_value={"agent": {"image_input_mode": "native"}},
|
||||
),
|
||||
patch("tools.vision_tools.vision_analyze_tool", side_effect=_aux_sentinel) as mock_aux,
|
||||
):
|
||||
result = asyncio.get_event_loop().run_until_complete(
|
||||
_handle_vision_analyze({"image_url": str(img), "question": "?"})
|
||||
)
|
||||
finally:
|
||||
clear_runtime_main()
|
||||
|
||||
assert not (isinstance(result, dict) and result.get("_multimodal") is True), \
|
||||
"Fast path fired for non-vision model; should have fallen through to aux LLM"
|
||||
assert isinstance(result, str)
|
||||
assert json.loads(result) == {"sentinel": "aux-path"}
|
||||
mock_aux.assert_called_once()
|
||||
|
||||
def test_fast_path_disabled_for_unsupported_provider(self, tmp_path, monkeypatch):
|
||||
"""Even with vision-capable model, unknown provider → fall through."""
|
||||
def test_supports_vision_bypasses_transport_gate(self, tmp_path):
|
||||
"""supports_vision=True enables fast path even on unknown providers."""
|
||||
img = tmp_path / "x.png"
|
||||
img.write_bytes(_TINY_PNG)
|
||||
|
||||
|
|
@ -199,13 +210,51 @@ class TestHandleVisionAnalyzeFastPath:
|
|||
return '{"sentinel": "aux-path"}'
|
||||
|
||||
from agent.auxiliary_client import set_runtime_main, clear_runtime_main
|
||||
set_runtime_main("brand-new-provider", "anthropic/claude-opus-4.6")
|
||||
set_runtime_main("brand-new-provider", "llava-v1.6")
|
||||
try:
|
||||
with patch("tools.vision_tools.vision_analyze_tool", side_effect=_aux_sentinel):
|
||||
coro = _handle_vision_analyze({"image_url": str(img), "question": "?"})
|
||||
result = asyncio.get_event_loop().run_until_complete(coro)
|
||||
with patch(
|
||||
"hermes_cli.config.load_config",
|
||||
return_value={"model": {"supports_vision": True}},
|
||||
), patch("tools.vision_tools.vision_analyze_tool", side_effect=_aux_sentinel) as mock_aux:
|
||||
result = asyncio.get_event_loop().run_until_complete(
|
||||
_handle_vision_analyze({"image_url": str(img), "question": "?"})
|
||||
)
|
||||
finally:
|
||||
clear_runtime_main()
|
||||
|
||||
assert not (isinstance(result, dict) and result.get("_multimodal") is True), \
|
||||
"Fast path fired for unknown provider; should have fallen through"
|
||||
assert isinstance(result, dict), (
|
||||
f"Expected multimodal envelope, got {type(result).__name__}: {str(result)[:200]}"
|
||||
)
|
||||
assert result.get("_multimodal") is True
|
||||
mock_aux.assert_not_called()
|
||||
|
||||
def test_text_mode_still_blocks_fast_path_when_supports_vision_true(self, tmp_path):
|
||||
"""Routing mode wins over supports_vision when text mode was chosen."""
|
||||
img = tmp_path / "x.png"
|
||||
img.write_bytes(_TINY_PNG)
|
||||
|
||||
async def _aux_sentinel(*args, **kwargs):
|
||||
return '{"sentinel": "aux-path"}'
|
||||
|
||||
from agent.auxiliary_client import set_runtime_main, clear_runtime_main
|
||||
set_runtime_main("brand-new-provider", "llava-v1.6")
|
||||
try:
|
||||
with (
|
||||
patch(
|
||||
"hermes_cli.config.load_config",
|
||||
return_value={
|
||||
"agent": {"image_input_mode": "text"},
|
||||
"model": {"supports_vision": True},
|
||||
},
|
||||
),
|
||||
patch("tools.vision_tools.vision_analyze_tool", side_effect=_aux_sentinel) as mock_aux,
|
||||
):
|
||||
result = asyncio.get_event_loop().run_until_complete(
|
||||
_handle_vision_analyze({"image_url": str(img), "question": "?"})
|
||||
)
|
||||
finally:
|
||||
clear_runtime_main()
|
||||
|
||||
assert isinstance(result, str)
|
||||
assert json.loads(result) == {"sentinel": "aux-path"}
|
||||
mock_aux.assert_called_once()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue