fix(computer_use): honor custom vision routing

This commit is contained in:
helix4u 2026-06-03 21:03:31 -06:00 committed by Teknium
parent ffe665277c
commit 591e6fb8f4
6 changed files with 207 additions and 7 deletions

View file

@ -2,6 +2,7 @@
from __future__ import annotations
import base64
import json
import os
import sys
@ -360,7 +361,9 @@ class TestCaptureResponse:
def focus_app(self, app, raise_window=False): ...
cu_tool.reset_backend_for_tests()
with patch.object(cu_tool, "_get_backend", return_value=FakeBackend()):
with patch.object(cu_tool, "_get_backend", return_value=FakeBackend()), \
patch.object(cu_tool, "_should_route_through_aux_vision",
return_value=False):
out = cu_tool.handle_computer_use({"action": "capture", "mode": "vision"})
assert isinstance(out, dict)
@ -398,7 +401,9 @@ class TestCaptureResponse:
def focus_app(self, app, raise_window=False): ...
cu_tool.reset_backend_for_tests()
with patch.object(cu_tool, "_get_backend", return_value=FakeBackend()):
with patch.object(cu_tool, "_get_backend", return_value=FakeBackend()), \
patch.object(cu_tool, "_should_route_through_aux_vision",
return_value=False):
out = cu_tool.handle_computer_use({"action": "capture", "mode": "som"})
assert isinstance(out, dict)
text_part = next(p for p in out["content"] if p.get("type") == "text")
@ -436,6 +441,7 @@ class TestCaptureResponse:
return FakeBackend()
def test_capture_ax_caps_elements_at_default_for_dense_trees(self):
"""Regression for #22865: an Electron-style 600-element AX tree must
not emit the entire array verbatim into the tool result.
@ -582,7 +588,9 @@ class TestCaptureResponse:
def focus_app(self, app, raise_window=False): ...
cu_tool.reset_backend_for_tests()
with patch.object(cu_tool, "_get_backend", return_value=FakeBackend()):
with patch.object(cu_tool, "_get_backend", return_value=FakeBackend()), \
patch.object(cu_tool, "_should_route_through_aux_vision",
return_value=False):
out = cu_tool.handle_computer_use({"action": "capture", "mode": "som"})
assert isinstance(out, dict) and out["_multimodal"] is True
@ -594,6 +602,32 @@ class TestCaptureResponse:
assert "truncated to" not in out["text_summary"]
class TestCuaCaptureImageDimensions:
def test_png_dimensions_are_sniffed_from_image_bytes(self):
from tools.computer_use.cua_backend import _image_dimensions_from_bytes
raw_png = base64.b64decode(
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42m"
"NkYAAAAAYAAjCB0C8AAAAASUVORK5CYII=",
validate=False,
)
assert _image_dimensions_from_bytes(raw_png) == (1, 1)
def test_jpeg_dimensions_are_sniffed_from_sof_segment(self):
from tools.computer_use.cua_backend import _image_dimensions_from_bytes
raw_jpeg = (
b"\xff\xd8" +
b"\xff\xe0\x00\x10" + (b"0" * 14)
+ b"\xff\xc0\x00\x11\x08"
+ b"\x01\x2c" # height: 300
+ b"\x01\x90" # width: 400
+ b"\x03\x01\x11\x00\x02\x11\x00\x03\x11\x00"
+ b"\xff\xd9"
)
assert _image_dimensions_from_bytes(raw_jpeg) == (400, 300)
# ---------------------------------------------------------------------------
# Anthropic adapter: multimodal tool-result conversion
# ---------------------------------------------------------------------------