feat(computer-use): make the preflight cross-platform (win/linux)

The card was macOS-only. cua-driver also runs on Windows and Linux, so
fold `cua-driver doctor` (cross-platform binary/health probes) into a
single OS-aware `ready` signal:

- macOS: ready == both TCC grants; keeps the permission rows + grant flow.
- Windows/Linux: no TCC toggles, so ready == driver health, with a
  per-OS note (SmartScreen/UIAccess on Windows; X11/XWayland on Linux).

`computer_use_status()` replaces the macOS-only `permissions_status()` and
surfaces `platform`, `ready`, `can_grant`, and the doctor `checks` (non-ok
ones render as warnings). CLI `permissions status`, the REST endpoint, and
the desktop card all key off the one payload. Grant stays macOS-only (400
elsewhere — nothing to grant).
This commit is contained in:
Brooklyn Nicholson 2026-06-22 17:48:43 -05:00
parent 0223ea5f59
commit 2dfcead683
6 changed files with 229 additions and 125 deletions

View file

@ -15,18 +15,32 @@ interface ComputerUsePanelProps {
onConfiguredChange?: () => void
}
function PermissionRow({ granted, label, hint }: { granted: boolean | null; label: string; hint: string }) {
const tone = granted === true ? 'primary' : 'muted'
// Per-OS one-liner shown when there's no TCC grant flow (Windows/Linux). macOS
// drives the permission rows instead, so it has no entry here.
const PLATFORM_NOTE: Record<string, string> = {
linux: 'Drives your desktop via the X11/XWayland accessibility stack — no permission prompt.',
win32: 'First run may trigger a Windows SmartScreen prompt for the cua-driver UIAccess worker — allow it.'
}
function tone(granted: boolean | null) {
return granted === true ? 'primary' : 'muted'
}
function GrantIcon({ granted }: { granted: boolean | null }) {
const Icon = granted === true ? Check : granted === false ? X : AlertTriangle
return <Icon className="size-3" />
}
function PermissionRow({ granted, label, hint }: { granted: boolean | null; label: string; hint: string }) {
return (
<div className="flex flex-wrap items-center justify-between gap-2 rounded-lg bg-background/55 p-2.5">
<div className="min-w-0">
<span className="text-sm font-medium">{label}</span>
<p className="mt-0.5 text-[0.7rem] text-muted-foreground">{hint}</p>
</div>
<Pill tone={tone}>
<Icon className="size-3" />
<Pill tone={tone(granted)}>
<GrantIcon granted={granted} />
{granted === true ? 'Granted' : granted === false ? 'Not granted' : 'Unknown'}
</Pill>
</div>
@ -34,17 +48,17 @@ function PermissionRow({ granted, label, hint }: { granted: boolean | null; labe
}
/**
* Computer Use preflight card.
* Cross-platform Computer Use preflight card.
*
* Computer Use drives the Mac through cua-driver, whose Accessibility +
* Screen Recording grants attach to cua-driver's OWN TCC identity
* (`com.trycua.driver` / the installed CuaDriver.app) not the Hermes
* desktop app. So this card reflects the driver's real grant state and
* triggers a grant via `cua-driver permissions grant`, which launches
* CuaDriver via LaunchServices so the macOS dialog is attributed correctly.
* cua-driver runs on macOS, Windows, and Linux, but readiness differs: macOS
* needs two TCC grants (Accessibility + Screen Recording) that attach to
* cua-driver's own `com.trycua.driver` identity not Hermes and are
* requested via `cua-driver permissions grant` (dialog attributed to
* CuaDriver). Windows/Linux have no TCC toggles, so readiness is driver health
* from `cua-driver doctor`. The backend folds both into one `ready` signal.
*
* Binary install/upgrade still lives in the cua-driver provider's post-setup
* runner below this card (the generic ToolsetConfigPanel).
* Binary install/upgrade stays in the cua-driver provider's post-setup runner
* below this card (the generic ToolsetConfigPanel).
*/
export function ComputerUsePanel({ onConfiguredChange }: ComputerUsePanelProps) {
const [status, setStatus] = useState<ComputerUseStatus | null>(null)
@ -54,8 +68,7 @@ export function ComputerUsePanel({ onConfiguredChange }: ComputerUsePanelProps)
const refresh = useCallback(async () => {
try {
const next = await getComputerUseStatus()
setStatus(next)
setStatus(await getComputerUseStatus())
} catch (err) {
notifyError(err, 'Could not read Computer Use status')
} finally {
@ -67,9 +80,7 @@ export function ComputerUsePanel({ onConfiguredChange }: ComputerUsePanelProps)
activeRef.current = true
void refresh()
return () => {
activeRef.current = false
}
return () => void (activeRef.current = false)
}, [refresh])
const grant = useCallback(async () => {
@ -90,8 +101,7 @@ export function ComputerUsePanel({ onConfiguredChange }: ComputerUsePanelProps)
message: 'macOS will show a permission dialog attributed to CuaDriver. Approve it, then return here.'
})
// Poll the grant action until it exits (the driver waits for the user to
// flip the switch), then re-read the live permission state.
// The driver waits for the user to flip the switch — poll until it exits.
for (let attempt = 0; attempt < 150 && activeRef.current; attempt += 1) {
await new Promise(resolve => window.setTimeout(resolve, 1500))
@ -138,7 +148,7 @@ export function ComputerUsePanel({ onConfiguredChange }: ComputerUsePanelProps)
if (!status.platform_supported) {
return (
<p className="mt-3 px-1 text-xs text-muted-foreground">
Computer Use permissions are managed on macOS. On this platform, enable the cua-driver provider below.
Computer Use isn&apos;t supported on this platform ({status.platform}).
</p>
)
}
@ -146,22 +156,26 @@ export function ComputerUsePanel({ onConfiguredChange }: ComputerUsePanelProps)
if (!status.installed) {
return (
<p className="mt-3 px-1 text-xs text-muted-foreground">
Install the cua-driver backend below to drive macOS. After installing, grant Accessibility and Screen
Recording here.
Install the cua-driver backend below to drive this machine.
{status.can_grant && ' Then grant Accessibility and Screen Recording here.'}
</p>
)
}
const allGranted = status.accessibility === true && status.screen_recording === true
const failingChecks = status.checks.filter(c => c.status !== 'ok')
return (
<div className="mt-3 grid gap-2">
<div className="flex flex-wrap items-center justify-between gap-2 px-1">
<div className="min-w-0">
<p className="text-[0.72rem] text-muted-foreground">
Grants attach to CuaDriver&apos;s own identity (com.trycua.driver), not Hermes so the dialog is
attributed to the process that drives your Mac.
</p>
{status.can_grant ? (
<p className="text-[0.72rem] text-muted-foreground">
Grants attach to CuaDriver&apos;s own identity (com.trycua.driver), not Hermes so the dialog is
attributed to the process that drives your Mac.
</p>
) : (
<p className="text-[0.72rem] text-muted-foreground">{PLATFORM_NOTE[status.platform] ?? ''}</p>
)}
{status.version && <p className="text-[0.68rem] text-muted-foreground/80">{status.version}</p>}
</div>
<Button onClick={() => void refresh()} size="sm" variant="text">
@ -170,16 +184,35 @@ export function ComputerUsePanel({ onConfiguredChange }: ComputerUsePanelProps)
</Button>
</div>
<PermissionRow
granted={status.accessibility}
hint="Lets cua-driver post clicks, keystrokes, and read the accessibility tree."
label="Accessibility"
/>
<PermissionRow
granted={status.screen_recording}
hint="Lets cua-driver capture screenshots of app windows."
label="Screen Recording"
/>
{status.can_grant ? (
<>
<PermissionRow
granted={status.accessibility}
hint="Lets cua-driver post clicks, keystrokes, and read the accessibility tree."
label="Accessibility"
/>
<PermissionRow
granted={status.screen_recording}
hint="Lets cua-driver capture screenshots of app windows."
label="Screen Recording"
/>
</>
) : (
<div className="flex flex-wrap items-center justify-between gap-2 rounded-lg bg-background/55 p-2.5">
<span className="text-sm font-medium">Driver health</span>
<Pill tone={tone(status.ready)}>
<GrantIcon granted={status.ready} />
{status.ready === true ? 'Ready' : status.ready === false ? 'Not ready' : 'Unknown'}
</Pill>
</div>
)}
{failingChecks.map(c => (
<p className="px-1 text-[0.7rem] text-muted-foreground" key={c.label}>
<AlertTriangle className="mr-1 inline size-3" />
{c.label}: {c.message}
</p>
))}
{status.error && (
<p className="px-1 text-[0.7rem] text-muted-foreground">
@ -188,16 +221,18 @@ export function ComputerUsePanel({ onConfiguredChange }: ComputerUsePanelProps)
</p>
)}
{allGranted ? (
{status.ready ? (
<div className="flex items-center gap-1.5 px-1 text-xs text-muted-foreground">
<Check className="size-3.5" />
Computer Use is ready. Ask the agent to capture an app and click around.
</div>
) : (
<Button disabled={granting} onClick={() => void grant()} size="sm">
{granting ? <Loader2 className="size-3.5 animate-spin" /> : <ExternalLink className="size-3.5" />}
{granting ? 'Waiting for approval…' : 'Grant permissions'}
</Button>
status.can_grant && (
<Button disabled={granting} onClick={() => void grant()} size="sm">
{granting ? <Loader2 className="size-3.5 animate-spin" /> : <ExternalLink className="size-3.5" />}
{granting ? 'Waiting for approval…' : 'Grant permissions'}
</Button>
)
)}
</div>
)

View file

@ -60,6 +60,7 @@ export type {
AudioTranscriptionResponse,
AuxiliaryModelsResponse,
BackendUpdateCheckResponse,
ComputerUseCheck,
ComputerUsePermissionSource,
ComputerUseStatus,
ConfigFieldSchema,

View file

@ -581,11 +581,11 @@ export interface ToolsetConfig {
/** Shape of `GET /api/tools/computer-use/status`.
*
* Computer Use drives the Mac through cua-driver, whose Accessibility +
* Screen Recording grants attach to cua-driver's OWN TCC identity
* (`com.trycua.driver`), not the Hermes app. Permission booleans are
* `null` when unknown (binary missing, or no CuaDriver daemon running to
* answer for its own identity). */
* cua-driver runs on macOS, Windows, and Linux. `ready` is the single OS-aware
* readiness signal: on macOS both TCC grants (Accessibility + Screen
* Recording, which attach to cua-driver's own `com.trycua.driver` identity,
* not Hermes); elsewhere, driver health from `cua-driver doctor`. `null`
* means unknown (binary missing / probe failed). */
export interface ComputerUsePermissionSource {
attribution?: string
executable?: string
@ -594,13 +594,28 @@ export interface ComputerUsePermissionSource {
responsible_ppid?: number
}
export interface ComputerUseCheck {
label: string
status: string
message: string
}
export interface ComputerUseStatus {
/** macOS is the only platform with the TCC permission model cua-driver gates. */
/** `sys.platform`: "darwin" | "win32" | "linux" | ... */
platform: string
/** cua-driver has a runtime backend for this platform. */
platform_supported: boolean
/** cua-driver binary resolved on PATH. */
installed: boolean
/** e.g. "cua-driver 0.5.1", or null when unknown. */
version: string | null
/** Unified readiness — both TCC grants (macOS) or driver health (else). */
ready: boolean | null
/** Whether a permission grant flow exists (macOS-only TCC). */
can_grant: boolean
/** Cross-platform `cua-driver doctor` probes. */
checks: ComputerUseCheck[]
/** macOS TCC detail — `null` off macOS or when unknown. */
accessibility: boolean | null
screen_recording: boolean | null
screen_recording_capturable: boolean | null

View file

@ -12598,27 +12598,32 @@ def main():
sys.exit(request_permissions_grant())
if perms_action == "status":
import json as _json
from tools.computer_use.permissions import permissions_status
st = permissions_status()
from tools.computer_use.permissions import computer_use_status
st = computer_use_status()
if bool(getattr(args, "json", False)):
print(_json.dumps(st, indent=2, sort_keys=True))
else:
if not st["installed"]:
print("cua-driver: not installed")
print(" Run: hermes computer-use install")
elif not st["platform_supported"]:
print("Computer Use permissions are managed on macOS only.")
else:
def _glyph(v):
return "" if v is True else ("" if v is False else "")
print(f"cua-driver: {st.get('version') or 'installed'}")
print(f" {_glyph(st['accessibility'])} Accessibility")
print(f" {_glyph(st['screen_recording'])} Screen Recording")
if st.get("error"):
print(f"{st['error']}")
if st["accessibility"] is not True or st["screen_recording"] is not True:
print(" Grant: hermes computer-use permissions grant")
sys.exit(0 if st.get("accessibility") and st.get("screen_recording") else 1)
sys.exit(0 if st["ready"] else 1)
if not st["platform_supported"]:
print(f"Computer Use is not supported on {st['platform']}.")
sys.exit(1)
if not st["installed"]:
print("cua-driver: not installed. Run: hermes computer-use install")
sys.exit(1)
glyph = lambda v: "" if v is True else ("" if v is False else "") # noqa: E731
print(f"cua-driver: {st['version'] or 'installed'} ({st['platform']})")
if st["can_grant"]: # macOS TCC permissions
print(f" {glyph(st['accessibility'])} Accessibility")
print(f" {glyph(st['screen_recording'])} Screen Recording")
if not st["ready"]:
print(" Grant: hermes computer-use permissions grant")
else: # no TCC model — readiness is driver health
print(f" {glyph(st['ready'])} driver health (no permission toggles on {st['platform']})")
for c in st["checks"]:
if c["status"] != "ok":
print(f"{c['label']}: {c['message']}")
if st["error"]:
print(f"{st['error']}")
sys.exit(0 if st["ready"] else 1)
computer_use_perms.print_help()
return
# No subcommand → show help

View file

@ -10673,43 +10673,45 @@ async def run_toolset_post_setup(
# ---------------------------------------------------------------------------
# Computer Use (cua-driver) — install + macOS permission state
# Computer Use (cua-driver) — cross-platform readiness + macOS permission grant
#
# Computer Use drives the Mac through cua-driver, whose Accessibility +
# Screen Recording grants attach to cua-driver's OWN TCC identity
# (com.trycua.driver / the installed CuaDriver.app) — not the Hermes desktop
# app or this server. The desktop's Computer Use card reflects that state and
# triggers a grant via the same `cua-driver permissions grant` flow the CLI
# uses, so no Hermes-side entitlement is involved.
# cua-driver runs on macOS, Windows, and Linux. The desktop card reflects
# per-OS readiness: on macOS the Accessibility + Screen Recording TCC grants
# (which attach to cua-driver's OWN identity, com.trycua.driver — not Hermes,
# so no app entitlement is involved); elsewhere, driver health from
# `cua-driver doctor`. The grant flow is macOS-only (no TCC toggles to request
# on Windows/Linux).
# ---------------------------------------------------------------------------
@app.get("/api/tools/computer-use/status")
async def get_computer_use_status(profile: Optional[str] = None):
"""Report cua-driver install + macOS permission state for the desktop card.
"""Cross-platform Computer Use readiness for the desktop card.
See ``tools.computer_use.permissions.permissions_status`` for the payload
shape. Read-only and fast (shells ``cua-driver permissions status``).
See ``tools.computer_use.permissions.computer_use_status`` for the payload
shape. Read-only and fast (shells ``cua-driver doctor`` + macOS
``permissions status``).
"""
from tools.computer_use.permissions import permissions_status
from tools.computer_use.permissions import computer_use_status
with _profile_scope(profile):
return permissions_status()
return computer_use_status()
@app.post("/api/tools/computer-use/permissions/grant")
async def grant_computer_use_permissions(profile: Optional[str] = None):
"""Spawn ``hermes computer-use permissions grant`` as a background action.
``cua-driver permissions grant`` launches CuaDriver via LaunchServices so
the macOS TCC dialog is attributed to com.trycua.driver, then waits for
the user to approve. The frontend polls ``GET /api/actions/computer-use-
grant/status`` for progress and re-reads ``/status`` once it exits.
macOS-only: ``cua-driver permissions grant`` launches CuaDriver via
LaunchServices so the TCC dialog is attributed to com.trycua.driver, then
waits for approval. The frontend polls ``GET /api/actions/computer-use-
grant/status`` and re-reads ``/status`` once it exits. Windows/Linux have
no TCC toggles to grant, so this returns 400 there.
"""
if sys.platform != "darwin":
raise HTTPException(
status_code=400,
detail="Computer Use permissions are managed on macOS only.",
detail="Computer Use permission grants are a macOS concept.",
)
try:
proc = _spawn_hermes_action(

View file

@ -1,21 +1,24 @@
"""
macOS Accessibility + Screen Recording permission helpers for Computer Use.
Cross-platform Computer Use readiness + macOS permission helpers.
cua-driver 0.5+ owns the permission model. Crucially, the grants attach to
cua-driver's OWN TCC identity (``com.trycua.driver`` — the installed
``CuaDriver.app``), NOT the terminal, the Hermes CLI, or the Hermes desktop
app. So:
cua-driver runs on macOS, Windows, and Linux, but "ready to drive" means
something different on each:
* ``cua-driver permissions status --json`` reports the driver daemon's real
grant state, independent of who asks.
* ``cua-driver permissions grant`` launches CuaDriver via LaunchServices so
the macOS dialog is attributed to ``com.trycua.driver`` the process that
actually does the work.
* macOS explicit TCC grants (Accessibility + Screen Recording). cua-driver
reports/requests them via ``permissions status`` / ``permissions grant``.
The grants attach to cua-driver's OWN identity (``com.trycua.driver`` /
the installed ``CuaDriver.app``), NOT Hermes so no Hermes entitlement is
involved, and ``grant`` launches CuaDriver via LaunchServices so the macOS
dialog is attributed correctly.
* Windows no TCC toggles; the UIAccess worker (``cua-driver-uia.exe``) may
trip a SmartScreen prompt on first run. Readiness == driver health.
* Linux assistive control via the X11/XWayland stack. Readiness == driver
health.
Because the permission lives with the cua-driver binary, the Hermes desktop
app needs no Accessibility / Screen Recording entitlements of its own. This is
a thin, testable client driven by the ``hermes computer-use permissions`` CLI
and the desktop ``/api/tools/computer-use/status`` endpoint.
The universal signal on every platform is ``cua-driver doctor --json`` (binary
integrity + platform support). ``computer_use_status`` folds that together with
the macOS permission detail into one payload for the desktop card, the
``hermes computer-use permissions`` CLI, and ``/api/tools/computer-use/status``.
"""
from __future__ import annotations
@ -25,8 +28,10 @@ import os
import shutil
import subprocess
import sys
from typing import Any, Dict, Optional
from typing import Any, Dict, List, Optional
# Platforms with a cua-driver runtime backend (mirrors the toolset platform_gate).
_RUNTIME_PLATFORMS = frozenset({"darwin", "win32", "linux"})
_BOOLS = ("accessibility", "screen_recording", "screen_recording_capturable")
@ -61,18 +66,65 @@ def _run(binary: str, *args: str, timeout: float) -> subprocess.CompletedProcess
)
def permissions_status(driver_cmd: Optional[str] = None) -> Dict[str, Any]:
"""Computer Use install + macOS permission state for the desktop card.
def _json_out(binary: str, *args: str, timeout: float) -> Any:
"""Run ``binary args`` and parse stdout as JSON, or ``None`` on any failure."""
raw = (_run(binary, *args, timeout=timeout).stdout or "").strip()
return json.loads(raw) if raw else None
``None`` permission values mean "unknown" the driver binary is missing,
the platform has no TCC model, or no CuaDriver daemon is running to answer
for its own identity yet.
def _doctor(binary: str) -> Optional[Dict[str, Any]]:
"""``cua-driver doctor --json`` → ``{ok, checks:[{label,status,message}]}``."""
try:
data = _json_out(binary, "doctor", "--json", timeout=12)
except Exception:
return None
if not isinstance(data, dict):
return None
checks: List[Dict[str, str]] = [
{
"label": str(p.get("label", "")),
"status": str(p.get("status", "")),
"message": str(p.get("message", "")),
}
for p in data.get("probes", [])
if isinstance(p, dict)
]
return {"ok": bool(data.get("ok")), "checks": checks}
def _mac_permissions(binary: str, out: Dict[str, Any]) -> None:
"""Fold ``cua-driver permissions status --json`` booleans into ``out``."""
try:
data = _json_out(binary, "permissions", "status", "--json", timeout=10)
except subprocess.TimeoutExpired:
out["error"] = "cua-driver permissions status timed out"
return
except Exception as exc: # spawn failure or malformed JSON
out["error"] = f"cua-driver permissions status failed: {exc}"
return
if isinstance(data, dict):
out.update({k: data[k] for k in _BOOLS if isinstance(data.get(k), bool)})
if isinstance(data.get("source"), dict):
out["source"] = data["source"]
def computer_use_status(driver_cmd: Optional[str] = None) -> Dict[str, Any]:
"""Unified, OS-aware Computer Use readiness for the desktop card.
``ready`` is the single signal the UI keys off: on macOS it's both TCC
grants; elsewhere it's driver health (no TCC model). ``None`` means
unknown (binary missing / probe failed). ``can_grant`` is macOS-only.
"""
plat = sys.platform
binary = shutil.which(_driver_cmd(driver_cmd))
out: Dict[str, Any] = {
"platform_supported": sys.platform == "darwin",
"platform": plat,
"platform_supported": plat in _RUNTIME_PLATFORMS,
"installed": bool(binary),
"version": None,
"ready": None,
"can_grant": plat == "darwin",
"checks": [],
"source": None,
"error": None,
**{k: None for k in _BOOLS},
@ -85,24 +137,17 @@ def permissions_status(driver_cmd: Optional[str] = None) -> Dict[str, Any]:
except Exception:
pass
# Permissions are a macOS concept; cua-driver only exposes the subcommand there.
if sys.platform != "darwin":
return out
doctor = _doctor(binary)
if doctor is not None:
out["checks"] = doctor["checks"]
try:
raw = (_run(binary, "permissions", "status", "--json", timeout=10).stdout or "").strip()
data = json.loads(raw) if raw else {}
except subprocess.TimeoutExpired:
out["error"] = "cua-driver permissions status timed out"
return out
except Exception as exc: # spawn failure or malformed JSON
out["error"] = f"cua-driver permissions status failed: {exc}"
return out
if isinstance(data, dict):
out.update({k: data[k] for k in _BOOLS if isinstance(data.get(k), bool)})
if isinstance(data.get("source"), dict):
out["source"] = data["source"]
if plat == "darwin":
_mac_permissions(binary, out)
if out["error"] is None:
out["ready"] = out["accessibility"] is True and out["screen_recording"] is True
elif doctor is not None:
# No TCC model off macOS — readiness is driver health.
out["ready"] = doctor["ok"]
return out
@ -111,10 +156,11 @@ def request_permissions_grant(driver_cmd: Optional[str] = None) -> int:
Launches CuaDriver via LaunchServices so the TCC dialog is attributed to
``com.trycua.driver``, then waits for the grant. Returns the driver's exit
code (0 ok), 2 if the binary is missing, 64 on an unsupported platform.
code (0 ok), 2 if the binary is missing, 64 on a non-macOS platform (which
has no TCC permission model to grant).
"""
if sys.platform != "darwin":
print("Computer Use permissions are managed on macOS only.")
print("Computer Use permissions are a macOS concept; nothing to grant here.")
return 64
binary = shutil.which(_driver_cmd(driver_cmd))