mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-12 08:51:53 +00:00
fix(computer_use): honor custom vision routing
This commit is contained in:
parent
ffe665277c
commit
591e6fb8f4
6 changed files with 207 additions and 7 deletions
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
|
|
@ -360,7 +361,9 @@ class TestCaptureResponse:
|
|||
def focus_app(self, app, raise_window=False): ...
|
||||
|
||||
cu_tool.reset_backend_for_tests()
|
||||
with patch.object(cu_tool, "_get_backend", return_value=FakeBackend()):
|
||||
with patch.object(cu_tool, "_get_backend", return_value=FakeBackend()), \
|
||||
patch.object(cu_tool, "_should_route_through_aux_vision",
|
||||
return_value=False):
|
||||
out = cu_tool.handle_computer_use({"action": "capture", "mode": "vision"})
|
||||
|
||||
assert isinstance(out, dict)
|
||||
|
|
@ -398,7 +401,9 @@ class TestCaptureResponse:
|
|||
def focus_app(self, app, raise_window=False): ...
|
||||
|
||||
cu_tool.reset_backend_for_tests()
|
||||
with patch.object(cu_tool, "_get_backend", return_value=FakeBackend()):
|
||||
with patch.object(cu_tool, "_get_backend", return_value=FakeBackend()), \
|
||||
patch.object(cu_tool, "_should_route_through_aux_vision",
|
||||
return_value=False):
|
||||
out = cu_tool.handle_computer_use({"action": "capture", "mode": "som"})
|
||||
assert isinstance(out, dict)
|
||||
text_part = next(p for p in out["content"] if p.get("type") == "text")
|
||||
|
|
@ -436,6 +441,7 @@ class TestCaptureResponse:
|
|||
|
||||
return FakeBackend()
|
||||
|
||||
|
||||
def test_capture_ax_caps_elements_at_default_for_dense_trees(self):
|
||||
"""Regression for #22865: an Electron-style 600-element AX tree must
|
||||
not emit the entire array verbatim into the tool result.
|
||||
|
|
@ -582,7 +588,9 @@ class TestCaptureResponse:
|
|||
def focus_app(self, app, raise_window=False): ...
|
||||
|
||||
cu_tool.reset_backend_for_tests()
|
||||
with patch.object(cu_tool, "_get_backend", return_value=FakeBackend()):
|
||||
with patch.object(cu_tool, "_get_backend", return_value=FakeBackend()), \
|
||||
patch.object(cu_tool, "_should_route_through_aux_vision",
|
||||
return_value=False):
|
||||
out = cu_tool.handle_computer_use({"action": "capture", "mode": "som"})
|
||||
|
||||
assert isinstance(out, dict) and out["_multimodal"] is True
|
||||
|
|
@ -594,6 +602,32 @@ class TestCaptureResponse:
|
|||
assert "truncated to" not in out["text_summary"]
|
||||
|
||||
|
||||
class TestCuaCaptureImageDimensions:
|
||||
def test_png_dimensions_are_sniffed_from_image_bytes(self):
|
||||
from tools.computer_use.cua_backend import _image_dimensions_from_bytes
|
||||
|
||||
raw_png = base64.b64decode(
|
||||
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42m"
|
||||
"NkYAAAAAYAAjCB0C8AAAAASUVORK5CYII=",
|
||||
validate=False,
|
||||
)
|
||||
assert _image_dimensions_from_bytes(raw_png) == (1, 1)
|
||||
|
||||
def test_jpeg_dimensions_are_sniffed_from_sof_segment(self):
|
||||
from tools.computer_use.cua_backend import _image_dimensions_from_bytes
|
||||
|
||||
raw_jpeg = (
|
||||
b"\xff\xd8" +
|
||||
b"\xff\xe0\x00\x10" + (b"0" * 14)
|
||||
+ b"\xff\xc0\x00\x11\x08"
|
||||
+ b"\x01\x2c" # height: 300
|
||||
+ b"\x01\x90" # width: 400
|
||||
+ b"\x03\x01\x11\x00\x02\x11\x00\x03\x11\x00"
|
||||
+ b"\xff\xd9"
|
||||
)
|
||||
assert _image_dimensions_from_bytes(raw_jpeg) == (400, 300)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Anthropic adapter: multimodal tool-result conversion
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -241,6 +241,39 @@ class TestCaptureResponseRoutedToAuxVision:
|
|||
assert observed_path["path"]
|
||||
assert not os.path.exists(observed_path["path"])
|
||||
|
||||
def test_aux_route_creates_missing_cache_dir(self, tmp_path):
|
||||
from tools.computer_use import tool as cu_tool
|
||||
|
||||
cache_dir = tmp_path / "missing" / "cache_vision"
|
||||
cap = _make_capture(mode="som")
|
||||
observed_path = {}
|
||||
|
||||
def _fake_get(*_args, **_kw):
|
||||
return cache_dir
|
||||
|
||||
def _fake_run_async(_coro):
|
||||
return _stub_aux_analysis("description goes here")
|
||||
|
||||
def _fake_vat(image_path, _prompt):
|
||||
observed_path["path"] = image_path
|
||||
assert os.path.exists(image_path)
|
||||
return "<coro>"
|
||||
|
||||
fake_vat = MagicMock(side_effect=_fake_vat)
|
||||
|
||||
with patch.object(cu_tool, "_should_route_through_aux_vision",
|
||||
return_value=True), \
|
||||
patch("hermes_constants.get_hermes_dir", _fake_get), \
|
||||
patch("model_tools._run_async", side_effect=_fake_run_async), \
|
||||
patch("tools.vision_tools.vision_analyze_tool",
|
||||
new_callable=lambda: fake_vat):
|
||||
resp = cu_tool._capture_response(cap)
|
||||
|
||||
assert isinstance(resp, str)
|
||||
assert cache_dir.is_dir()
|
||||
assert observed_path["path"]
|
||||
assert not os.path.exists(observed_path["path"])
|
||||
|
||||
def test_temp_file_cleaned_up_even_when_aux_call_raises(
|
||||
self, tmp_cache_dir,
|
||||
):
|
||||
|
|
|
|||
|
|
@ -160,6 +160,42 @@ class TestRouteDecision:
|
|||
"some-aggregator", "some-vision-model", {}
|
||||
) is True
|
||||
|
||||
def test_user_declared_vision_support_keeps_custom_provider_native(self):
|
||||
"""Local/custom VLMs use config as their tool-result image escape hatch."""
|
||||
from tools.computer_use import vision_routing
|
||||
|
||||
cfg = {
|
||||
"model": {
|
||||
"default": "Qwen3.6-35B-A3B-local-vlm",
|
||||
"provider": "omlx",
|
||||
"supports_vision": True,
|
||||
}
|
||||
}
|
||||
with patch.object(vision_routing,
|
||||
"_provider_accepts_multimodal_tool_result",
|
||||
return_value=False):
|
||||
assert vision_routing.should_route_capture_to_aux_vision(
|
||||
"custom", "Qwen3.6-35B-A3B-local-vlm", cfg
|
||||
) is False
|
||||
|
||||
def test_user_declared_no_vision_routes_custom_provider_to_aux(self):
|
||||
"""An explicit false override should not fall through to native routing."""
|
||||
from tools.computer_use import vision_routing
|
||||
|
||||
cfg = {
|
||||
"model": {
|
||||
"default": "local-text-model",
|
||||
"provider": "omlx",
|
||||
"supports_vision": False,
|
||||
}
|
||||
}
|
||||
with patch.object(vision_routing,
|
||||
"_provider_accepts_multimodal_tool_result",
|
||||
return_value=True):
|
||||
assert vision_routing.should_route_capture_to_aux_vision(
|
||||
"custom", "local-text-model", cfg
|
||||
) is True
|
||||
|
||||
def test_unknown_provider_capabilities_fail_closed(self):
|
||||
"""When tool-result lookup returns None, route to aux (safe default)."""
|
||||
from tools.computer_use import vision_routing
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue