mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-23 10:42:00 +00:00
Make the computer_use toolset platform-agnostic by driving cua-driver on macOS, Windows, and Linux. Consumes the 8 cua-driver decoupling surfaces (capability discovery, structuredContent AX tree, opaque element_token, click button enum, explicit mimeType, machine-readable manifest, structured list_windows, structured health_report), each degrading gracefully on older drivers. Adds `hermes computer-use doctor` (drives cua-driver health_report with a per-OS check matrix and an exit 0/1/2 ok/degraded/blocked contract), full typed wrappers for the previously-uncovered cua-driver tools plus a generic call_tool escape hatch, per-session agent-cursor lifecycle, platform-aware system-prompt guidance (host-deterministic, cache-safe), and honors HERMES_CUA_DRIVER_CMD end-to-end. Replaces the macOS-only skills/apple/macos-computer-use skill with a cross-platform skills/computer-use skill, and refreshes the EN + zh-Hans docs. Supersedes #44221 (Windows-enablement salvage of #30660). Co-authored-by: Teknium <127238744+teknium1@users.noreply.github.com>
325 lines
14 KiB
Python
325 lines
14 KiB
Python
"""Tests for ``tools.computer_use.doctor``.
|
|
|
|
The doctor module drives cua-driver's stable ``health_report`` MCP tool over
|
|
stdio JSON-RPC and renders the structured response. Most of the surface is
|
|
about parsing what cua-driver hands back, plus the exit-code contract
|
|
downstream consumers (CI / `hermes update`) rely on:
|
|
|
|
* Exit 0 when overall == "ok"
|
|
* Exit 1 when overall in ("degraded", "failed") — at least one check
|
|
failed but the tool itself ran successfully
|
|
* Exit 2 when the cua-driver binary is missing or the protocol breaks
|
|
|
|
We do NOT spin up a real cua-driver — that lives in the cua-driver
|
|
integration test suite (libs/cua-driver/rust/tests/integration/
|
|
test_health_report_mcp.py). Here we mock the subprocess and assert the
|
|
Hermes-side adapter behaves correctly against the documented response
|
|
shape.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
from io import StringIO
|
|
from unittest.mock import MagicMock, patch
|
|
|
|
|
|
# ── helpers ────────────────────────────────────────────────────────────────
|
|
|
|
|
|
def _fake_proc_with_responses(*responses: dict) -> MagicMock:
|
|
"""Build a MagicMock subprocess.Popen handle that yields one JSON-RPC
|
|
response per `readline()` call, then returns "" (EOF)."""
|
|
lines = [json.dumps(r) + "\n" for r in responses] + [""]
|
|
proc = MagicMock()
|
|
proc.stdin = MagicMock()
|
|
proc.stdout = MagicMock()
|
|
proc.stdout.readline = MagicMock(side_effect=lines)
|
|
proc.stderr = MagicMock()
|
|
proc.stderr.read = MagicMock(return_value="")
|
|
proc.wait = MagicMock(return_value=0)
|
|
proc.kill = MagicMock()
|
|
return proc
|
|
|
|
|
|
def _ok_report() -> dict:
|
|
"""Minimal well-formed health_report response."""
|
|
return {
|
|
"schema_version": "1",
|
|
"platform": "darwin",
|
|
"driver_version": "0.5.8",
|
|
"overall": "ok",
|
|
"checks": [
|
|
{"name": "binary_version", "status": "pass", "message": "cua-driver 0.5.8"},
|
|
{"name": "tcc_accessibility", "status": "pass", "message": "Accessibility is granted."},
|
|
],
|
|
}
|
|
|
|
|
|
def _degraded_report() -> dict:
|
|
"""Report with one failing check — overall=degraded."""
|
|
return {
|
|
"schema_version": "1",
|
|
"platform": "darwin",
|
|
"driver_version": "0.5.8",
|
|
"overall": "degraded",
|
|
"checks": [
|
|
{"name": "binary_version", "status": "pass", "message": "cua-driver 0.5.8"},
|
|
{
|
|
"name": "bundle_identity",
|
|
"status": "fail",
|
|
"message": "Process has no CFBundleIdentifier.",
|
|
"hint": "Run inside CuaDriver.app",
|
|
"data": {"executable_path": "/tmp/cua-driver"},
|
|
},
|
|
],
|
|
}
|
|
|
|
|
|
# ── exit codes ─────────────────────────────────────────────────────────────
|
|
|
|
|
|
class TestDoctorExitCodes:
|
|
def test_ok_exits_0(self):
|
|
from tools.computer_use import doctor
|
|
|
|
proc = _fake_proc_with_responses(
|
|
{"jsonrpc": "2.0", "id": 1, "result": {}},
|
|
{"jsonrpc": "2.0", "id": 2, "result": {"structuredContent": _ok_report()}},
|
|
)
|
|
with patch("shutil.which", return_value="/fake/cua-driver"), \
|
|
patch("subprocess.Popen", return_value=proc), \
|
|
patch("sys.stdout", new_callable=StringIO):
|
|
code = doctor.run_doctor()
|
|
assert code == 0
|
|
|
|
def test_degraded_exits_1(self):
|
|
from tools.computer_use import doctor
|
|
|
|
proc = _fake_proc_with_responses(
|
|
{"jsonrpc": "2.0", "id": 1, "result": {}},
|
|
{"jsonrpc": "2.0", "id": 2, "result": {"structuredContent": _degraded_report()}},
|
|
)
|
|
with patch("shutil.which", return_value="/fake/cua-driver"), \
|
|
patch("subprocess.Popen", return_value=proc), \
|
|
patch("sys.stdout", new_callable=StringIO):
|
|
code = doctor.run_doctor()
|
|
assert code == 1
|
|
|
|
def test_failed_overall_exits_1(self):
|
|
"""`failed` overall (every check failed) is also exit 1, not 2 —
|
|
the tool ran successfully; the diagnosis was bad."""
|
|
from tools.computer_use import doctor
|
|
|
|
report = _degraded_report()
|
|
report["overall"] = "failed"
|
|
proc = _fake_proc_with_responses(
|
|
{"jsonrpc": "2.0", "id": 1, "result": {}},
|
|
{"jsonrpc": "2.0", "id": 2, "result": {"structuredContent": report}},
|
|
)
|
|
with patch("shutil.which", return_value="/fake/cua-driver"), \
|
|
patch("subprocess.Popen", return_value=proc), \
|
|
patch("sys.stdout", new_callable=StringIO):
|
|
code = doctor.run_doctor()
|
|
assert code == 1
|
|
|
|
def test_missing_binary_exits_2(self):
|
|
from tools.computer_use import doctor
|
|
|
|
with patch("shutil.which", return_value=None), \
|
|
patch("sys.stdout", new_callable=StringIO):
|
|
code = doctor.run_doctor()
|
|
assert code == 2
|
|
|
|
def test_protocol_error_exits_2(self, capsys):
|
|
"""An empty stdout response (driver crashed during handshake) is a
|
|
protocol failure → exit 2."""
|
|
from tools.computer_use import doctor
|
|
|
|
proc = MagicMock()
|
|
proc.stdin = MagicMock()
|
|
proc.stdout = MagicMock()
|
|
proc.stdout.readline = MagicMock(return_value="") # EOF on initialize
|
|
proc.stderr = MagicMock()
|
|
proc.stderr.read = MagicMock(return_value="boom\n")
|
|
proc.wait = MagicMock(return_value=0)
|
|
proc.kill = MagicMock()
|
|
|
|
with patch("shutil.which", return_value="/fake/cua-driver"), \
|
|
patch("subprocess.Popen", return_value=proc):
|
|
code = doctor.run_doctor()
|
|
assert code == 2
|
|
# stderr should mention the failure
|
|
captured = capsys.readouterr()
|
|
assert "cua-driver" in captured.err.lower() or "health_report" in captured.err.lower()
|
|
|
|
|
|
# ── response-shape parsing ─────────────────────────────────────────────────
|
|
|
|
|
|
class TestResponseShapeParsing:
|
|
def test_prefers_structuredContent(self):
|
|
from tools.computer_use import doctor
|
|
|
|
proc = _fake_proc_with_responses(
|
|
{"jsonrpc": "2.0", "id": 1, "result": {}},
|
|
{"jsonrpc": "2.0", "id": 2, "result": {"structuredContent": _ok_report()}},
|
|
)
|
|
with patch("shutil.which", return_value="/fake/cua-driver"), \
|
|
patch("subprocess.Popen", return_value=proc), \
|
|
patch("sys.stdout", new_callable=StringIO) as out:
|
|
doctor.run_doctor()
|
|
# Header line includes driver version + platform + overall.
|
|
text = out.getvalue()
|
|
assert "darwin" in text
|
|
assert "ok" in text
|
|
|
|
def test_falls_back_to_text_content_when_structuredContent_absent(self):
|
|
"""Older cua-driver builds may emit health_report as a text content
|
|
item carrying the JSON — the doctor should still parse it."""
|
|
from tools.computer_use import doctor
|
|
|
|
proc = _fake_proc_with_responses(
|
|
{"jsonrpc": "2.0", "id": 1, "result": {}},
|
|
{
|
|
"jsonrpc": "2.0", "id": 2,
|
|
"result": {
|
|
"content": [
|
|
{"type": "text", "text": json.dumps(_ok_report())},
|
|
],
|
|
},
|
|
},
|
|
)
|
|
with patch("shutil.which", return_value="/fake/cua-driver"), \
|
|
patch("subprocess.Popen", return_value=proc), \
|
|
patch("sys.stdout", new_callable=StringIO) as out:
|
|
code = doctor.run_doctor()
|
|
assert code == 0
|
|
assert "ok" in out.getvalue()
|
|
|
|
def test_jsonrpc_error_response_exits_2(self, capsys):
|
|
from tools.computer_use import doctor
|
|
|
|
proc = _fake_proc_with_responses(
|
|
{"jsonrpc": "2.0", "id": 1, "result": {}},
|
|
{"jsonrpc": "2.0", "id": 2, "error": {"code": -32601, "message": "method not found"}},
|
|
)
|
|
with patch("shutil.which", return_value="/fake/cua-driver"), \
|
|
patch("subprocess.Popen", return_value=proc):
|
|
code = doctor.run_doctor()
|
|
assert code == 2
|
|
assert "method not found" in capsys.readouterr().err
|
|
|
|
|
|
# ── args / arg passthrough ─────────────────────────────────────────────────
|
|
|
|
|
|
class TestArgPassthrough:
|
|
def test_include_passed_through_to_tools_call(self):
|
|
from tools.computer_use import doctor
|
|
|
|
proc = _fake_proc_with_responses(
|
|
{"jsonrpc": "2.0", "id": 1, "result": {}},
|
|
{"jsonrpc": "2.0", "id": 2, "result": {"structuredContent": _ok_report()}},
|
|
)
|
|
with patch("shutil.which", return_value="/fake/cua-driver"), \
|
|
patch("subprocess.Popen", return_value=proc), \
|
|
patch("sys.stdout", new_callable=StringIO):
|
|
doctor.run_doctor(include=["binary_version", "tcc_accessibility"])
|
|
|
|
# Inspect the second write to stdin — the tools/call payload.
|
|
writes = [call.args[0] for call in proc.stdin.write.call_args_list]
|
|
call_payload = next(json.loads(w) for w in writes if "tools/call" in w)
|
|
assert call_payload["params"]["arguments"]["include"] == [
|
|
"binary_version", "tcc_accessibility",
|
|
]
|
|
|
|
def test_skip_passed_through(self):
|
|
from tools.computer_use import doctor
|
|
|
|
proc = _fake_proc_with_responses(
|
|
{"jsonrpc": "2.0", "id": 1, "result": {}},
|
|
{"jsonrpc": "2.0", "id": 2, "result": {"structuredContent": _ok_report()}},
|
|
)
|
|
with patch("shutil.which", return_value="/fake/cua-driver"), \
|
|
patch("subprocess.Popen", return_value=proc), \
|
|
patch("sys.stdout", new_callable=StringIO):
|
|
doctor.run_doctor(skip=["bundle_identity"])
|
|
writes = [call.args[0] for call in proc.stdin.write.call_args_list]
|
|
call_payload = next(json.loads(w) for w in writes if "tools/call" in w)
|
|
assert call_payload["params"]["arguments"]["skip"] == ["bundle_identity"]
|
|
|
|
def test_no_filters_sends_empty_arguments(self):
|
|
"""When neither include nor skip is given, the arguments object is
|
|
empty — not present-but-null — so the driver's default 'run every
|
|
check' branch fires."""
|
|
from tools.computer_use import doctor
|
|
|
|
proc = _fake_proc_with_responses(
|
|
{"jsonrpc": "2.0", "id": 1, "result": {}},
|
|
{"jsonrpc": "2.0", "id": 2, "result": {"structuredContent": _ok_report()}},
|
|
)
|
|
with patch("shutil.which", return_value="/fake/cua-driver"), \
|
|
patch("subprocess.Popen", return_value=proc), \
|
|
patch("sys.stdout", new_callable=StringIO):
|
|
doctor.run_doctor()
|
|
writes = [call.args[0] for call in proc.stdin.write.call_args_list]
|
|
call_payload = next(json.loads(w) for w in writes if "tools/call" in w)
|
|
assert call_payload["params"]["arguments"] == {}
|
|
|
|
|
|
# ── json output ────────────────────────────────────────────────────────────
|
|
|
|
|
|
class TestJsonOutput:
|
|
def test_json_output_is_parseable_round_trip(self):
|
|
from tools.computer_use import doctor
|
|
|
|
proc = _fake_proc_with_responses(
|
|
{"jsonrpc": "2.0", "id": 1, "result": {}},
|
|
{"jsonrpc": "2.0", "id": 2, "result": {"structuredContent": _ok_report()}},
|
|
)
|
|
with patch("shutil.which", return_value="/fake/cua-driver"), \
|
|
patch("subprocess.Popen", return_value=proc), \
|
|
patch("sys.stdout", new_callable=StringIO) as out:
|
|
doctor.run_doctor(json_output=True)
|
|
# Verify the captured text round-trips through json.loads and matches
|
|
# the input report (the contract: --json passes the structured payload
|
|
# through unchanged so downstream tooling can consume it directly).
|
|
parsed = json.loads(out.getvalue())
|
|
assert parsed == _ok_report()
|
|
|
|
|
|
# ── HERMES_CUA_DRIVER_CMD resolution ───────────────────────────────────────
|
|
|
|
|
|
class TestDriverCmdResolution:
|
|
def test_explicit_driver_cmd_arg_wins(self):
|
|
from tools.computer_use import doctor
|
|
|
|
proc = _fake_proc_with_responses(
|
|
{"jsonrpc": "2.0", "id": 1, "result": {}},
|
|
{"jsonrpc": "2.0", "id": 2, "result": {"structuredContent": _ok_report()}},
|
|
)
|
|
with patch("shutil.which", return_value="/fake/explicit-binary") as which_mock, \
|
|
patch("subprocess.Popen", return_value=proc), \
|
|
patch("sys.stdout", new_callable=StringIO):
|
|
doctor.run_doctor(driver_cmd="/custom/path/cua-driver")
|
|
# shutil.which should have been called with the explicit arg, not
|
|
# the env-var / default resolver.
|
|
which_mock.assert_called_with("/custom/path/cua-driver")
|
|
|
|
def test_env_var_used_when_no_arg_given(self, monkeypatch):
|
|
from tools.computer_use import doctor
|
|
|
|
monkeypatch.setenv("HERMES_CUA_DRIVER_CMD", "/env/path/cua-driver")
|
|
proc = _fake_proc_with_responses(
|
|
{"jsonrpc": "2.0", "id": 1, "result": {}},
|
|
{"jsonrpc": "2.0", "id": 2, "result": {"structuredContent": _ok_report()}},
|
|
)
|
|
with patch("shutil.which", return_value="/env/path/cua-driver") as which_mock, \
|
|
patch("subprocess.Popen", return_value=proc), \
|
|
patch("sys.stdout", new_callable=StringIO):
|
|
doctor.run_doctor()
|
|
# First (and only) which call should have used the env var.
|
|
which_mock.assert_called_with("/env/path/cua-driver")
|