mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-12 08:51:53 +00:00
fix(computer_use): honor custom vision routing
This commit is contained in:
parent
ffe665277c
commit
591e6fb8f4
6 changed files with 207 additions and 7 deletions
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
|
|
@ -360,7 +361,9 @@ class TestCaptureResponse:
|
|||
def focus_app(self, app, raise_window=False): ...
|
||||
|
||||
cu_tool.reset_backend_for_tests()
|
||||
with patch.object(cu_tool, "_get_backend", return_value=FakeBackend()):
|
||||
with patch.object(cu_tool, "_get_backend", return_value=FakeBackend()), \
|
||||
patch.object(cu_tool, "_should_route_through_aux_vision",
|
||||
return_value=False):
|
||||
out = cu_tool.handle_computer_use({"action": "capture", "mode": "vision"})
|
||||
|
||||
assert isinstance(out, dict)
|
||||
|
|
@ -398,7 +401,9 @@ class TestCaptureResponse:
|
|||
def focus_app(self, app, raise_window=False): ...
|
||||
|
||||
cu_tool.reset_backend_for_tests()
|
||||
with patch.object(cu_tool, "_get_backend", return_value=FakeBackend()):
|
||||
with patch.object(cu_tool, "_get_backend", return_value=FakeBackend()), \
|
||||
patch.object(cu_tool, "_should_route_through_aux_vision",
|
||||
return_value=False):
|
||||
out = cu_tool.handle_computer_use({"action": "capture", "mode": "som"})
|
||||
assert isinstance(out, dict)
|
||||
text_part = next(p for p in out["content"] if p.get("type") == "text")
|
||||
|
|
@ -436,6 +441,7 @@ class TestCaptureResponse:
|
|||
|
||||
return FakeBackend()
|
||||
|
||||
|
||||
def test_capture_ax_caps_elements_at_default_for_dense_trees(self):
|
||||
"""Regression for #22865: an Electron-style 600-element AX tree must
|
||||
not emit the entire array verbatim into the tool result.
|
||||
|
|
@ -582,7 +588,9 @@ class TestCaptureResponse:
|
|||
def focus_app(self, app, raise_window=False): ...
|
||||
|
||||
cu_tool.reset_backend_for_tests()
|
||||
with patch.object(cu_tool, "_get_backend", return_value=FakeBackend()):
|
||||
with patch.object(cu_tool, "_get_backend", return_value=FakeBackend()), \
|
||||
patch.object(cu_tool, "_should_route_through_aux_vision",
|
||||
return_value=False):
|
||||
out = cu_tool.handle_computer_use({"action": "capture", "mode": "som"})
|
||||
|
||||
assert isinstance(out, dict) and out["_multimodal"] is True
|
||||
|
|
@ -594,6 +602,32 @@ class TestCaptureResponse:
|
|||
assert "truncated to" not in out["text_summary"]
|
||||
|
||||
|
||||
class TestCuaCaptureImageDimensions:
|
||||
def test_png_dimensions_are_sniffed_from_image_bytes(self):
|
||||
from tools.computer_use.cua_backend import _image_dimensions_from_bytes
|
||||
|
||||
raw_png = base64.b64decode(
|
||||
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42m"
|
||||
"NkYAAAAAYAAjCB0C8AAAAASUVORK5CYII=",
|
||||
validate=False,
|
||||
)
|
||||
assert _image_dimensions_from_bytes(raw_png) == (1, 1)
|
||||
|
||||
def test_jpeg_dimensions_are_sniffed_from_sof_segment(self):
|
||||
from tools.computer_use.cua_backend import _image_dimensions_from_bytes
|
||||
|
||||
raw_jpeg = (
|
||||
b"\xff\xd8" +
|
||||
b"\xff\xe0\x00\x10" + (b"0" * 14)
|
||||
+ b"\xff\xc0\x00\x11\x08"
|
||||
+ b"\x01\x2c" # height: 300
|
||||
+ b"\x01\x90" # width: 400
|
||||
+ b"\x03\x01\x11\x00\x02\x11\x00\x03\x11\x00"
|
||||
+ b"\xff\xd9"
|
||||
)
|
||||
assert _image_dimensions_from_bytes(raw_jpeg) == (400, 300)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Anthropic adapter: multimodal tool-result conversion
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue