fix(computer_use): honor custom vision routing

This commit is contained in:
helix4u 2026-06-03 21:03:31 -06:00 committed by Teknium
parent ffe665277c
commit 591e6fb8f4
6 changed files with 207 additions and 7 deletions

View file

@ -2,6 +2,7 @@
from __future__ import annotations
import base64
import json
import os
import sys
@ -360,7 +361,9 @@ class TestCaptureResponse:
def focus_app(self, app, raise_window=False): ...
cu_tool.reset_backend_for_tests()
with patch.object(cu_tool, "_get_backend", return_value=FakeBackend()):
with patch.object(cu_tool, "_get_backend", return_value=FakeBackend()), \
patch.object(cu_tool, "_should_route_through_aux_vision",
return_value=False):
out = cu_tool.handle_computer_use({"action": "capture", "mode": "vision"})
assert isinstance(out, dict)
@ -398,7 +401,9 @@ class TestCaptureResponse:
def focus_app(self, app, raise_window=False): ...
cu_tool.reset_backend_for_tests()
with patch.object(cu_tool, "_get_backend", return_value=FakeBackend()):
with patch.object(cu_tool, "_get_backend", return_value=FakeBackend()), \
patch.object(cu_tool, "_should_route_through_aux_vision",
return_value=False):
out = cu_tool.handle_computer_use({"action": "capture", "mode": "som"})
assert isinstance(out, dict)
text_part = next(p for p in out["content"] if p.get("type") == "text")
@ -436,6 +441,7 @@ class TestCaptureResponse:
return FakeBackend()
def test_capture_ax_caps_elements_at_default_for_dense_trees(self):
"""Regression for #22865: an Electron-style 600-element AX tree must
not emit the entire array verbatim into the tool result.
@ -582,7 +588,9 @@ class TestCaptureResponse:
def focus_app(self, app, raise_window=False): ...
cu_tool.reset_backend_for_tests()
with patch.object(cu_tool, "_get_backend", return_value=FakeBackend()):
with patch.object(cu_tool, "_get_backend", return_value=FakeBackend()), \
patch.object(cu_tool, "_should_route_through_aux_vision",
return_value=False):
out = cu_tool.handle_computer_use({"action": "capture", "mode": "som"})
assert isinstance(out, dict) and out["_multimodal"] is True
@ -594,6 +602,32 @@ class TestCaptureResponse:
assert "truncated to" not in out["text_summary"]
class TestCuaCaptureImageDimensions:
def test_png_dimensions_are_sniffed_from_image_bytes(self):
from tools.computer_use.cua_backend import _image_dimensions_from_bytes
raw_png = base64.b64decode(
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42m"
"NkYAAAAAYAAjCB0C8AAAAASUVORK5CYII=",
validate=False,
)
assert _image_dimensions_from_bytes(raw_png) == (1, 1)
def test_jpeg_dimensions_are_sniffed_from_sof_segment(self):
from tools.computer_use.cua_backend import _image_dimensions_from_bytes
raw_jpeg = (
b"\xff\xd8" +
b"\xff\xe0\x00\x10" + (b"0" * 14)
+ b"\xff\xc0\x00\x11\x08"
+ b"\x01\x2c" # height: 300
+ b"\x01\x90" # width: 400
+ b"\x03\x01\x11\x00\x02\x11\x00\x03\x11\x00"
+ b"\xff\xd9"
)
assert _image_dimensions_from_bytes(raw_jpeg) == (400, 300)
# ---------------------------------------------------------------------------
# Anthropic adapter: multimodal tool-result conversion
# ---------------------------------------------------------------------------

View file

@ -241,6 +241,39 @@ class TestCaptureResponseRoutedToAuxVision:
assert observed_path["path"]
assert not os.path.exists(observed_path["path"])
def test_aux_route_creates_missing_cache_dir(self, tmp_path):
from tools.computer_use import tool as cu_tool
cache_dir = tmp_path / "missing" / "cache_vision"
cap = _make_capture(mode="som")
observed_path = {}
def _fake_get(*_args, **_kw):
return cache_dir
def _fake_run_async(_coro):
return _stub_aux_analysis("description goes here")
def _fake_vat(image_path, _prompt):
observed_path["path"] = image_path
assert os.path.exists(image_path)
return "<coro>"
fake_vat = MagicMock(side_effect=_fake_vat)
with patch.object(cu_tool, "_should_route_through_aux_vision",
return_value=True), \
patch("hermes_constants.get_hermes_dir", _fake_get), \
patch("model_tools._run_async", side_effect=_fake_run_async), \
patch("tools.vision_tools.vision_analyze_tool",
new_callable=lambda: fake_vat):
resp = cu_tool._capture_response(cap)
assert isinstance(resp, str)
assert cache_dir.is_dir()
assert observed_path["path"]
assert not os.path.exists(observed_path["path"])
def test_temp_file_cleaned_up_even_when_aux_call_raises(
self, tmp_cache_dir,
):

View file

@ -160,6 +160,42 @@ class TestRouteDecision:
"some-aggregator", "some-vision-model", {}
) is True
def test_user_declared_vision_support_keeps_custom_provider_native(self):
"""Local/custom VLMs use config as their tool-result image escape hatch."""
from tools.computer_use import vision_routing
cfg = {
"model": {
"default": "Qwen3.6-35B-A3B-local-vlm",
"provider": "omlx",
"supports_vision": True,
}
}
with patch.object(vision_routing,
"_provider_accepts_multimodal_tool_result",
return_value=False):
assert vision_routing.should_route_capture_to_aux_vision(
"custom", "Qwen3.6-35B-A3B-local-vlm", cfg
) is False
def test_user_declared_no_vision_routes_custom_provider_to_aux(self):
"""An explicit false override should not fall through to native routing."""
from tools.computer_use import vision_routing
cfg = {
"model": {
"default": "local-text-model",
"provider": "omlx",
"supports_vision": False,
}
}
with patch.object(vision_routing,
"_provider_accepts_multimodal_tool_result",
return_value=True):
assert vision_routing.should_route_capture_to_aux_vision(
"custom", "local-text-model", cfg
) is True
def test_unknown_provider_capabilities_fail_closed(self):
"""When tool-result lookup returns None, route to aux (safe default)."""
from tools.computer_use import vision_routing