From 0223ea5f590aec3697ebad6b7f533b5e5df2cc83 Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Mon, 22 Jun 2026 17:33:52 -0500 Subject: [PATCH 1/3] feat(computer-use): surface macOS permission preflight in the desktop Computer Use already worked through the desktop backend (the cua-driver toolset enables + installs via Settings -> Skills & Tools), but there was no in-app way to see or grant the two macOS permissions it needs, so "give a model my Mac" was tribal knowledge. The grants attach to cua-driver's OWN TCC identity (com.trycua.driver / the installed CuaDriver.app), not Hermes -- so no app entitlement is involved. cua-driver 0.5+ exposes `permissions status/grant`, which we wrap: - tools/computer_use/permissions.py: thin client over the two subcommands - hermes computer-use permissions {status,grant}: CLI parity - GET /api/tools/computer-use/status, POST .../permissions/grant: desktop REST - ComputerUsePanel: live Accessibility + Screen Recording state with a Grant button (dialog attributed to CuaDriver), shown in the expanded Computer Use toolset row. Binary install stays in the existing provider post-setup runner. Follow-ups: i18n the card copy; a "Stop driver" control (cua-driver stop) for the runaway-`serve` case. --- .../src/app/settings/computer-use-panel.tsx | 204 ++++++++++++++++++ apps/desktop/src/app/skills/index.tsx | 4 + apps/desktop/src/hermes.ts | 18 ++ apps/desktop/src/types/hermes.ts | 30 +++ hermes_cli/main.py | 57 +++++ hermes_cli/web_server.py | 56 +++++ tools/computer_use/permissions.py | 136 ++++++++++++ 7 files changed, 505 insertions(+) create mode 100644 apps/desktop/src/app/settings/computer-use-panel.tsx create mode 100644 tools/computer_use/permissions.py diff --git a/apps/desktop/src/app/settings/computer-use-panel.tsx b/apps/desktop/src/app/settings/computer-use-panel.tsx new file mode 100644 index 00000000000..826ce80ae62 --- /dev/null +++ b/apps/desktop/src/app/settings/computer-use-panel.tsx @@ -0,0 +1,204 @@ +import { useCallback, useEffect, useRef, useState } from 'react' + +import { Button } from '@/components/ui/button' +import { getActionStatus, getComputerUseStatus, grantComputerUsePermissions } from '@/hermes' +import { AlertTriangle, Check, ExternalLink, Loader2, RefreshCw, X } from '@/lib/icons' +import { upsertDesktopActionTask } from '@/store/activity' +import { notify, notifyError } from '@/store/notifications' +import type { ComputerUseStatus } from '@/types/hermes' + +import { Pill } from './primitives' + +interface ComputerUsePanelProps { + /** Re-read the parent toolset list after a permission/install change so the + * "Configured / Needs keys" pill stays in sync. */ + onConfiguredChange?: () => void +} + +function PermissionRow({ granted, label, hint }: { granted: boolean | null; label: string; hint: string }) { + const tone = granted === true ? 'primary' : 'muted' + const Icon = granted === true ? Check : granted === false ? X : AlertTriangle + + return ( +
+
+ {label} +

{hint}

+
+ + + {granted === true ? 'Granted' : granted === false ? 'Not granted' : 'Unknown'} + +
+ ) +} + +/** + * Computer Use preflight card. + * + * Computer Use drives the Mac through cua-driver, whose Accessibility + + * Screen Recording grants attach to cua-driver's OWN TCC identity + * (`com.trycua.driver` / the installed CuaDriver.app) — not the Hermes + * desktop app. So this card reflects the driver's real grant state and + * triggers a grant via `cua-driver permissions grant`, which launches + * CuaDriver via LaunchServices so the macOS dialog is attributed correctly. + * + * Binary install/upgrade still lives in the cua-driver provider's post-setup + * runner below this card (the generic ToolsetConfigPanel). + */ +export function ComputerUsePanel({ onConfiguredChange }: ComputerUsePanelProps) { + const [status, setStatus] = useState(null) + const [loading, setLoading] = useState(true) + const [granting, setGranting] = useState(false) + const activeRef = useRef(false) + + const refresh = useCallback(async () => { + try { + const next = await getComputerUseStatus() + setStatus(next) + } catch (err) { + notifyError(err, 'Could not read Computer Use status') + } finally { + setLoading(false) + } + }, []) + + useEffect(() => { + activeRef.current = true + void refresh() + + return () => { + activeRef.current = false + } + }, [refresh]) + + const grant = useCallback(async () => { + setGranting(true) + + try { + const started = await grantComputerUsePermissions() + + if (!started.ok) { + notifyError(new Error('spawn failed'), 'Could not request permissions') + + return + } + + notify({ + kind: 'info', + title: 'Approve in System Settings', + message: 'macOS will show a permission dialog attributed to CuaDriver. Approve it, then return here.' + }) + + // Poll the grant action until it exits (the driver waits for the user to + // flip the switch), then re-read the live permission state. + for (let attempt = 0; attempt < 150 && activeRef.current; attempt += 1) { + await new Promise(resolve => window.setTimeout(resolve, 1500)) + + if (!activeRef.current) { + break + } + + const polled = await getActionStatus(started.name, 200) + upsertDesktopActionTask(polled) + + if (!polled.running) { + break + } + } + + if (activeRef.current) { + await refresh() + onConfiguredChange?.() + } + } catch (err) { + if (activeRef.current) { + notifyError(err, 'Could not request permissions') + } + } finally { + if (activeRef.current) { + setGranting(false) + } + } + }, [onConfiguredChange, refresh]) + + if (loading) { + return ( +
+ + Checking Computer Use status… +
+ ) + } + + if (!status) { + return null + } + + if (!status.platform_supported) { + return ( +

+ Computer Use permissions are managed on macOS. On this platform, enable the cua-driver provider below. +

+ ) + } + + if (!status.installed) { + return ( +

+ Install the cua-driver backend below to drive macOS. After installing, grant Accessibility and Screen + Recording here. +

+ ) + } + + const allGranted = status.accessibility === true && status.screen_recording === true + + return ( +
+
+
+

+ Grants attach to CuaDriver's own identity (com.trycua.driver), not Hermes — so the dialog is + attributed to the process that drives your Mac. +

+ {status.version &&

{status.version}

} +
+ +
+ + + + + {status.error && ( +

+ + {status.error} +

+ )} + + {allGranted ? ( +
+ + Computer Use is ready. Ask the agent to capture an app and click around. +
+ ) : ( + + )} +
+ ) +} diff --git a/apps/desktop/src/app/skills/index.tsx b/apps/desktop/src/app/skills/index.tsx index 716f0181f12..90aa4a24357 100644 --- a/apps/desktop/src/app/skills/index.tsx +++ b/apps/desktop/src/app/skills/index.tsx @@ -17,6 +17,7 @@ import { useRefreshHotkey } from '../hooks/use-refresh-hotkey' import { useRouteEnumParam } from '../hooks/use-route-enum-param' import { PAGE_INSET_X } from '../layout-constants' import { PageSearchShell } from '../page-search-shell' +import { ComputerUsePanel } from '../settings/computer-use-panel' import { asText, includesQuery, prettyName, toolNames, toolsetDisplayLabel } from '../settings/helpers' import { ToolsetConfigPanel } from '../settings/toolset-config-panel' import type { SetStatusbarItemGroup } from '../shell/statusbar-controls' @@ -334,6 +335,9 @@ export function SkillsView({ setStatusbarItemGroup: _setStatusbarItemGroup, ...p ))} )} + {expanded && toolset.name === 'computer_use' && ( + + )} {expanded && } ) diff --git a/apps/desktop/src/hermes.ts b/apps/desktop/src/hermes.ts index 197e24611ab..04340b0a549 100644 --- a/apps/desktop/src/hermes.ts +++ b/apps/desktop/src/hermes.ts @@ -8,6 +8,7 @@ import type { AudioTranscriptionResponse, AuxiliaryModelsResponse, BackendUpdateCheckResponse, + ComputerUseStatus, ConfigSchemaResponse, CronJob, CronJobCreatePayload, @@ -59,6 +60,8 @@ export type { AudioTranscriptionResponse, AuxiliaryModelsResponse, BackendUpdateCheckResponse, + ComputerUsePermissionSource, + ComputerUseStatus, ConfigFieldSchema, ConfigSchemaResponse, CronJob, @@ -516,6 +519,21 @@ export function runToolsetPostSetup(name: string, key: string): Promise { + return window.hermesDesktop.api({ + ...profileScoped(), + path: '/api/tools/computer-use/status' + }) +} + +export function grantComputerUsePermissions(): Promise { + return window.hermesDesktop.api({ + ...profileScoped(), + path: '/api/tools/computer-use/permissions/grant', + method: 'POST' + }) +} + export function getMessagingPlatforms(): Promise { return window.hermesDesktop.api({ path: '/api/messaging/platforms' diff --git a/apps/desktop/src/types/hermes.ts b/apps/desktop/src/types/hermes.ts index b67cc3041a7..b860ea8e89d 100644 --- a/apps/desktop/src/types/hermes.ts +++ b/apps/desktop/src/types/hermes.ts @@ -579,6 +579,36 @@ export interface ToolsetConfig { active_provider: string | null } +/** Shape of `GET /api/tools/computer-use/status`. + * + * Computer Use drives the Mac through cua-driver, whose Accessibility + + * Screen Recording grants attach to cua-driver's OWN TCC identity + * (`com.trycua.driver`), not the Hermes app. Permission booleans are + * `null` when unknown (binary missing, or no CuaDriver daemon running to + * answer for its own identity). */ +export interface ComputerUsePermissionSource { + attribution?: string + executable?: string + note?: string + pid?: number + responsible_ppid?: number +} + +export interface ComputerUseStatus { + /** macOS is the only platform with the TCC permission model cua-driver gates. */ + platform_supported: boolean + /** cua-driver binary resolved on PATH. */ + installed: boolean + /** e.g. "cua-driver 0.5.1", or null when unknown. */ + version: string | null + accessibility: boolean | null + screen_recording: boolean | null + screen_recording_capturable: boolean | null + source: ComputerUsePermissionSource | null + /** Populated when the status probe itself failed. */ + error: string | null +} + export interface SessionSearchResult { /** Lineage root of the matched conversation. Stable across compression and * used as the durable pin id; falls back to session_id when absent. */ diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 4b1a3f64db2..906497055c8 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -12507,6 +12507,33 @@ def main(): action="store_true", help="Emit the raw structured payload as JSON (same shape as `tools/call`).", ) + computer_use_perms = computer_use_sub.add_parser( + "permissions", + help="Check or grant macOS Accessibility + Screen Recording (macOS)", + description=( + "Computer Use drives the Mac through cua-driver, whose TCC grants\n" + "attach to cua-driver's own identity (com.trycua.driver) — not the\n" + "terminal or the Hermes app. `status` reports the driver's grant\n" + "state; `grant` launches CuaDriver via LaunchServices so the macOS\n" + "permission dialog is attributed to the process that does the work." + ), + ) + computer_use_perms_sub = computer_use_perms.add_subparsers( + dest="computer_use_perms_action" + ) + computer_use_perms_status = computer_use_perms_sub.add_parser( + "status", + help="Report Accessibility + Screen Recording grant state (read-only)", + ) + computer_use_perms_status.add_argument( + "--json", + action="store_true", + help="Emit the normalized permission payload as JSON.", + ) + computer_use_perms_sub.add_parser( + "grant", + help="Request the grants (opens the dialog attributed to CuaDriver)", + ) def cmd_computer_use(args): action = getattr(args, "computer_use_action", None) @@ -12564,6 +12591,36 @@ def main(): json_output=bool(getattr(args, "json", False)), ) sys.exit(code) + if action == "permissions": + perms_action = getattr(args, "computer_use_perms_action", None) + if perms_action == "grant": + from tools.computer_use.permissions import request_permissions_grant + sys.exit(request_permissions_grant()) + if perms_action == "status": + import json as _json + from tools.computer_use.permissions import permissions_status + st = permissions_status() + if bool(getattr(args, "json", False)): + print(_json.dumps(st, indent=2, sort_keys=True)) + else: + if not st["installed"]: + print("cua-driver: not installed") + print(" Run: hermes computer-use install") + elif not st["platform_supported"]: + print("Computer Use permissions are managed on macOS only.") + else: + def _glyph(v): + return "✅" if v is True else ("❌" if v is False else "•") + print(f"cua-driver: {st.get('version') or 'installed'}") + print(f" {_glyph(st['accessibility'])} Accessibility") + print(f" {_glyph(st['screen_recording'])} Screen Recording") + if st.get("error"): + print(f" ⚠ {st['error']}") + if st["accessibility"] is not True or st["screen_recording"] is not True: + print(" Grant: hermes computer-use permissions grant") + sys.exit(0 if st.get("accessibility") and st.get("screen_recording") else 1) + computer_use_perms.print_help() + return # No subcommand → show help computer_use_parser.print_help() diff --git a/hermes_cli/web_server.py b/hermes_cli/web_server.py index 997803b8f0a..5a6b764e00f 100644 --- a/hermes_cli/web_server.py +++ b/hermes_cli/web_server.py @@ -8349,6 +8349,7 @@ async def install_mcp_catalog_entry(body: MCPCatalogInstall, profile: Optional[s # Register the mcp-install action log so /api/actions/mcp-install/status works. _ACTION_LOG_FILES.setdefault("mcp-install", "action-mcp-install.log") +_ACTION_LOG_FILES.setdefault("computer-use-grant", "action-computer-use-grant.log") # --------------------------------------------------------------------------- @@ -10671,6 +10672,61 @@ async def run_toolset_post_setup( return {"ok": True, "pid": proc.pid, "name": "tools-post-setup", "key": body.key} +# --------------------------------------------------------------------------- +# Computer Use (cua-driver) — install + macOS permission state +# +# Computer Use drives the Mac through cua-driver, whose Accessibility + +# Screen Recording grants attach to cua-driver's OWN TCC identity +# (com.trycua.driver / the installed CuaDriver.app) — not the Hermes desktop +# app or this server. The desktop's Computer Use card reflects that state and +# triggers a grant via the same `cua-driver permissions grant` flow the CLI +# uses, so no Hermes-side entitlement is involved. +# --------------------------------------------------------------------------- + + +@app.get("/api/tools/computer-use/status") +async def get_computer_use_status(profile: Optional[str] = None): + """Report cua-driver install + macOS permission state for the desktop card. + + See ``tools.computer_use.permissions.permissions_status`` for the payload + shape. Read-only and fast (shells ``cua-driver permissions status``). + """ + from tools.computer_use.permissions import permissions_status + + with _profile_scope(profile): + return permissions_status() + + +@app.post("/api/tools/computer-use/permissions/grant") +async def grant_computer_use_permissions(profile: Optional[str] = None): + """Spawn ``hermes computer-use permissions grant`` as a background action. + + ``cua-driver permissions grant`` launches CuaDriver via LaunchServices so + the macOS TCC dialog is attributed to com.trycua.driver, then waits for + the user to approve. The frontend polls ``GET /api/actions/computer-use- + grant/status`` for progress and re-reads ``/status`` once it exits. + """ + if sys.platform != "darwin": + raise HTTPException( + status_code=400, + detail="Computer Use permissions are managed on macOS only.", + ) + try: + proc = _spawn_hermes_action( + _profile_cli_args(profile) + + ["computer-use", "permissions", "grant"], + "computer-use-grant", + ) + except HTTPException: + raise + except Exception as exc: + _log.exception("Failed to spawn computer-use permissions grant") + raise HTTPException( + status_code=500, detail=f"Failed to request permissions: {exc}" + ) + return {"ok": True, "pid": proc.pid, "name": "computer-use-grant"} + + # --------------------------------------------------------------------------- # Raw YAML config endpoint # --------------------------------------------------------------------------- diff --git a/tools/computer_use/permissions.py b/tools/computer_use/permissions.py new file mode 100644 index 00000000000..45a6ac2534d --- /dev/null +++ b/tools/computer_use/permissions.py @@ -0,0 +1,136 @@ +""" +macOS Accessibility + Screen Recording permission helpers for Computer Use. + +cua-driver 0.5+ owns the permission model. Crucially, the grants attach to +cua-driver's OWN TCC identity (``com.trycua.driver`` — the installed +``CuaDriver.app``), NOT the terminal, the Hermes CLI, or the Hermes desktop +app. So: + + * ``cua-driver permissions status --json`` reports the driver daemon's real + grant state, independent of who asks. + * ``cua-driver permissions grant`` launches CuaDriver via LaunchServices so + the macOS dialog is attributed to ``com.trycua.driver`` — the process that + actually does the work. + +Because the permission lives with the cua-driver binary, the Hermes desktop +app needs no Accessibility / Screen Recording entitlements of its own. This is +a thin, testable client driven by the ``hermes computer-use permissions`` CLI +and the desktop ``/api/tools/computer-use/status`` endpoint. +""" + +from __future__ import annotations + +import json +import os +import shutil +import subprocess +import sys +from typing import Any, Dict, Optional + +_BOOLS = ("accessibility", "screen_recording", "screen_recording_capturable") + + +def _driver_cmd(override: Optional[str]) -> str: + if override: + return override + try: + from hermes_cli.tools_config import _cua_driver_cmd + + return _cua_driver_cmd() + except Exception: + return os.environ.get("HERMES_CUA_DRIVER_CMD", "").strip() or "cua-driver" + + +def _child_env() -> Dict[str, str]: + """cua-driver child env honoring the Hermes telemetry opt-in policy.""" + try: + from tools.computer_use.cua_backend import cua_driver_child_env + + return cua_driver_child_env() + except Exception: + return dict(os.environ) + + +def _run(binary: str, *args: str, timeout: float) -> subprocess.CompletedProcess: + return subprocess.run( + [binary, *args], + capture_output=True, + text=True, + timeout=timeout, + env=_child_env(), + ) + + +def permissions_status(driver_cmd: Optional[str] = None) -> Dict[str, Any]: + """Computer Use install + macOS permission state for the desktop card. + + ``None`` permission values mean "unknown" — the driver binary is missing, + the platform has no TCC model, or no CuaDriver daemon is running to answer + for its own identity yet. + """ + binary = shutil.which(_driver_cmd(driver_cmd)) + out: Dict[str, Any] = { + "platform_supported": sys.platform == "darwin", + "installed": bool(binary), + "version": None, + "source": None, + "error": None, + **{k: None for k in _BOOLS}, + } + if not binary: + return out + + try: + out["version"] = (_run(binary, "--version", timeout=5).stdout or "").strip() or None + except Exception: + pass + + # Permissions are a macOS concept; cua-driver only exposes the subcommand there. + if sys.platform != "darwin": + return out + + try: + raw = (_run(binary, "permissions", "status", "--json", timeout=10).stdout or "").strip() + data = json.loads(raw) if raw else {} + except subprocess.TimeoutExpired: + out["error"] = "cua-driver permissions status timed out" + return out + except Exception as exc: # spawn failure or malformed JSON + out["error"] = f"cua-driver permissions status failed: {exc}" + return out + + if isinstance(data, dict): + out.update({k: data[k] for k in _BOOLS if isinstance(data.get(k), bool)}) + if isinstance(data.get("source"), dict): + out["source"] = data["source"] + return out + + +def request_permissions_grant(driver_cmd: Optional[str] = None) -> int: + """Run ``cua-driver permissions grant`` (macOS); stream its output. + + Launches CuaDriver via LaunchServices so the TCC dialog is attributed to + ``com.trycua.driver``, then waits for the grant. Returns the driver's exit + code (0 ok), 2 if the binary is missing, 64 on an unsupported platform. + """ + if sys.platform != "darwin": + print("Computer Use permissions are managed on macOS only.") + return 64 + + binary = shutil.which(_driver_cmd(driver_cmd)) + if not binary: + print("cua-driver: not installed. Run: hermes computer-use install") + return 2 + + print( + "Requesting Accessibility + Screen Recording for CuaDriver.\n" + "macOS will show a dialog attributed to CuaDriver (com.trycua.driver) — " + "approve it, then return here." + ) + try: + return int(subprocess.run([binary, "permissions", "grant"], env=_child_env()).returncode) + except KeyboardInterrupt: # pragma: no cover - interactive + return 130 + except Exception as exc: # pragma: no cover - defensive + print(f"cua-driver permissions grant failed: {exc}", file=sys.stderr) + return 2 From 2dfcead68367c93c256a966d8314ca36fb2d679f Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Mon, 22 Jun 2026 17:48:43 -0500 Subject: [PATCH 2/3] feat(computer-use): make the preflight cross-platform (win/linux) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The card was macOS-only. cua-driver also runs on Windows and Linux, so fold `cua-driver doctor` (cross-platform binary/health probes) into a single OS-aware `ready` signal: - macOS: ready == both TCC grants; keeps the permission rows + grant flow. - Windows/Linux: no TCC toggles, so ready == driver health, with a per-OS note (SmartScreen/UIAccess on Windows; X11/XWayland on Linux). `computer_use_status()` replaces the macOS-only `permissions_status()` and surfaces `platform`, `ready`, `can_grant`, and the doctor `checks` (non-ok ones render as warnings). CLI `permissions status`, the REST endpoint, and the desktop card all key off the one payload. Grant stays macOS-only (400 elsewhere — nothing to grant). --- .../src/app/settings/computer-use-panel.tsx | 121 +++++++++++------ apps/desktop/src/hermes.ts | 1 + apps/desktop/src/types/hermes.ts | 27 +++- hermes_cli/main.py | 43 +++--- hermes_cli/web_server.py | 36 ++--- tools/computer_use/permissions.py | 126 ++++++++++++------ 6 files changed, 229 insertions(+), 125 deletions(-) diff --git a/apps/desktop/src/app/settings/computer-use-panel.tsx b/apps/desktop/src/app/settings/computer-use-panel.tsx index 826ce80ae62..ada5c08e3ad 100644 --- a/apps/desktop/src/app/settings/computer-use-panel.tsx +++ b/apps/desktop/src/app/settings/computer-use-panel.tsx @@ -15,18 +15,32 @@ interface ComputerUsePanelProps { onConfiguredChange?: () => void } -function PermissionRow({ granted, label, hint }: { granted: boolean | null; label: string; hint: string }) { - const tone = granted === true ? 'primary' : 'muted' +// Per-OS one-liner shown when there's no TCC grant flow (Windows/Linux). macOS +// drives the permission rows instead, so it has no entry here. +const PLATFORM_NOTE: Record = { + linux: 'Drives your desktop via the X11/XWayland accessibility stack — no permission prompt.', + win32: 'First run may trigger a Windows SmartScreen prompt for the cua-driver UIAccess worker — allow it.' +} + +function tone(granted: boolean | null) { + return granted === true ? 'primary' : 'muted' +} + +function GrantIcon({ granted }: { granted: boolean | null }) { const Icon = granted === true ? Check : granted === false ? X : AlertTriangle + return +} + +function PermissionRow({ granted, label, hint }: { granted: boolean | null; label: string; hint: string }) { return (
{label}

{hint}

- - + + {granted === true ? 'Granted' : granted === false ? 'Not granted' : 'Unknown'}
@@ -34,17 +48,17 @@ function PermissionRow({ granted, label, hint }: { granted: boolean | null; labe } /** - * Computer Use preflight card. + * Cross-platform Computer Use preflight card. * - * Computer Use drives the Mac through cua-driver, whose Accessibility + - * Screen Recording grants attach to cua-driver's OWN TCC identity - * (`com.trycua.driver` / the installed CuaDriver.app) — not the Hermes - * desktop app. So this card reflects the driver's real grant state and - * triggers a grant via `cua-driver permissions grant`, which launches - * CuaDriver via LaunchServices so the macOS dialog is attributed correctly. + * cua-driver runs on macOS, Windows, and Linux, but readiness differs: macOS + * needs two TCC grants (Accessibility + Screen Recording) that attach to + * cua-driver's own `com.trycua.driver` identity — not Hermes — and are + * requested via `cua-driver permissions grant` (dialog attributed to + * CuaDriver). Windows/Linux have no TCC toggles, so readiness is driver health + * from `cua-driver doctor`. The backend folds both into one `ready` signal. * - * Binary install/upgrade still lives in the cua-driver provider's post-setup - * runner below this card (the generic ToolsetConfigPanel). + * Binary install/upgrade stays in the cua-driver provider's post-setup runner + * below this card (the generic ToolsetConfigPanel). */ export function ComputerUsePanel({ onConfiguredChange }: ComputerUsePanelProps) { const [status, setStatus] = useState(null) @@ -54,8 +68,7 @@ export function ComputerUsePanel({ onConfiguredChange }: ComputerUsePanelProps) const refresh = useCallback(async () => { try { - const next = await getComputerUseStatus() - setStatus(next) + setStatus(await getComputerUseStatus()) } catch (err) { notifyError(err, 'Could not read Computer Use status') } finally { @@ -67,9 +80,7 @@ export function ComputerUsePanel({ onConfiguredChange }: ComputerUsePanelProps) activeRef.current = true void refresh() - return () => { - activeRef.current = false - } + return () => void (activeRef.current = false) }, [refresh]) const grant = useCallback(async () => { @@ -90,8 +101,7 @@ export function ComputerUsePanel({ onConfiguredChange }: ComputerUsePanelProps) message: 'macOS will show a permission dialog attributed to CuaDriver. Approve it, then return here.' }) - // Poll the grant action until it exits (the driver waits for the user to - // flip the switch), then re-read the live permission state. + // The driver waits for the user to flip the switch — poll until it exits. for (let attempt = 0; attempt < 150 && activeRef.current; attempt += 1) { await new Promise(resolve => window.setTimeout(resolve, 1500)) @@ -138,7 +148,7 @@ export function ComputerUsePanel({ onConfiguredChange }: ComputerUsePanelProps) if (!status.platform_supported) { return (

- Computer Use permissions are managed on macOS. On this platform, enable the cua-driver provider below. + Computer Use isn't supported on this platform ({status.platform}).

) } @@ -146,22 +156,26 @@ export function ComputerUsePanel({ onConfiguredChange }: ComputerUsePanelProps) if (!status.installed) { return (

- Install the cua-driver backend below to drive macOS. After installing, grant Accessibility and Screen - Recording here. + Install the cua-driver backend below to drive this machine. + {status.can_grant && ' Then grant Accessibility and Screen Recording here.'}

) } - const allGranted = status.accessibility === true && status.screen_recording === true + const failingChecks = status.checks.filter(c => c.status !== 'ok') return (
-

- Grants attach to CuaDriver's own identity (com.trycua.driver), not Hermes — so the dialog is - attributed to the process that drives your Mac. -

+ {status.can_grant ? ( +

+ Grants attach to CuaDriver's own identity (com.trycua.driver), not Hermes — so the dialog is + attributed to the process that drives your Mac. +

+ ) : ( +

{PLATFORM_NOTE[status.platform] ?? ''}

+ )} {status.version &&

{status.version}

}
- - + {status.can_grant ? ( + <> + + + + ) : ( +
+ Driver health + + + {status.ready === true ? 'Ready' : status.ready === false ? 'Not ready' : 'Unknown'} + +
+ )} + + {failingChecks.map(c => ( +

+ + {c.label}: {c.message} +

+ ))} {status.error && (

@@ -188,16 +221,18 @@ export function ComputerUsePanel({ onConfiguredChange }: ComputerUsePanelProps)

)} - {allGranted ? ( + {status.ready ? (
Computer Use is ready. Ask the agent to capture an app and click around.
) : ( - + status.can_grant && ( + + ) )}
) diff --git a/apps/desktop/src/hermes.ts b/apps/desktop/src/hermes.ts index 04340b0a549..a7b5ae14307 100644 --- a/apps/desktop/src/hermes.ts +++ b/apps/desktop/src/hermes.ts @@ -60,6 +60,7 @@ export type { AudioTranscriptionResponse, AuxiliaryModelsResponse, BackendUpdateCheckResponse, + ComputerUseCheck, ComputerUsePermissionSource, ComputerUseStatus, ConfigFieldSchema, diff --git a/apps/desktop/src/types/hermes.ts b/apps/desktop/src/types/hermes.ts index b860ea8e89d..338ed2d3544 100644 --- a/apps/desktop/src/types/hermes.ts +++ b/apps/desktop/src/types/hermes.ts @@ -581,11 +581,11 @@ export interface ToolsetConfig { /** Shape of `GET /api/tools/computer-use/status`. * - * Computer Use drives the Mac through cua-driver, whose Accessibility + - * Screen Recording grants attach to cua-driver's OWN TCC identity - * (`com.trycua.driver`), not the Hermes app. Permission booleans are - * `null` when unknown (binary missing, or no CuaDriver daemon running to - * answer for its own identity). */ + * cua-driver runs on macOS, Windows, and Linux. `ready` is the single OS-aware + * readiness signal: on macOS both TCC grants (Accessibility + Screen + * Recording, which attach to cua-driver's own `com.trycua.driver` identity, + * not Hermes); elsewhere, driver health from `cua-driver doctor`. `null` + * means unknown (binary missing / probe failed). */ export interface ComputerUsePermissionSource { attribution?: string executable?: string @@ -594,13 +594,28 @@ export interface ComputerUsePermissionSource { responsible_ppid?: number } +export interface ComputerUseCheck { + label: string + status: string + message: string +} + export interface ComputerUseStatus { - /** macOS is the only platform with the TCC permission model cua-driver gates. */ + /** `sys.platform`: "darwin" | "win32" | "linux" | ... */ + platform: string + /** cua-driver has a runtime backend for this platform. */ platform_supported: boolean /** cua-driver binary resolved on PATH. */ installed: boolean /** e.g. "cua-driver 0.5.1", or null when unknown. */ version: string | null + /** Unified readiness — both TCC grants (macOS) or driver health (else). */ + ready: boolean | null + /** Whether a permission grant flow exists (macOS-only TCC). */ + can_grant: boolean + /** Cross-platform `cua-driver doctor` probes. */ + checks: ComputerUseCheck[] + /** macOS TCC detail — `null` off macOS or when unknown. */ accessibility: boolean | null screen_recording: boolean | null screen_recording_capturable: boolean | null diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 906497055c8..9c0d53247f3 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -12598,27 +12598,32 @@ def main(): sys.exit(request_permissions_grant()) if perms_action == "status": import json as _json - from tools.computer_use.permissions import permissions_status - st = permissions_status() + from tools.computer_use.permissions import computer_use_status + st = computer_use_status() if bool(getattr(args, "json", False)): print(_json.dumps(st, indent=2, sort_keys=True)) - else: - if not st["installed"]: - print("cua-driver: not installed") - print(" Run: hermes computer-use install") - elif not st["platform_supported"]: - print("Computer Use permissions are managed on macOS only.") - else: - def _glyph(v): - return "✅" if v is True else ("❌" if v is False else "•") - print(f"cua-driver: {st.get('version') or 'installed'}") - print(f" {_glyph(st['accessibility'])} Accessibility") - print(f" {_glyph(st['screen_recording'])} Screen Recording") - if st.get("error"): - print(f" ⚠ {st['error']}") - if st["accessibility"] is not True or st["screen_recording"] is not True: - print(" Grant: hermes computer-use permissions grant") - sys.exit(0 if st.get("accessibility") and st.get("screen_recording") else 1) + sys.exit(0 if st["ready"] else 1) + if not st["platform_supported"]: + print(f"Computer Use is not supported on {st['platform']}.") + sys.exit(1) + if not st["installed"]: + print("cua-driver: not installed. Run: hermes computer-use install") + sys.exit(1) + glyph = lambda v: "✅" if v is True else ("❌" if v is False else "•") # noqa: E731 + print(f"cua-driver: {st['version'] or 'installed'} ({st['platform']})") + if st["can_grant"]: # macOS TCC permissions + print(f" {glyph(st['accessibility'])} Accessibility") + print(f" {glyph(st['screen_recording'])} Screen Recording") + if not st["ready"]: + print(" Grant: hermes computer-use permissions grant") + else: # no TCC model — readiness is driver health + print(f" {glyph(st['ready'])} driver health (no permission toggles on {st['platform']})") + for c in st["checks"]: + if c["status"] != "ok": + print(f" ⚠ {c['label']}: {c['message']}") + if st["error"]: + print(f" ⚠ {st['error']}") + sys.exit(0 if st["ready"] else 1) computer_use_perms.print_help() return # No subcommand → show help diff --git a/hermes_cli/web_server.py b/hermes_cli/web_server.py index 5a6b764e00f..c6a6b065589 100644 --- a/hermes_cli/web_server.py +++ b/hermes_cli/web_server.py @@ -10673,43 +10673,45 @@ async def run_toolset_post_setup( # --------------------------------------------------------------------------- -# Computer Use (cua-driver) — install + macOS permission state +# Computer Use (cua-driver) — cross-platform readiness + macOS permission grant # -# Computer Use drives the Mac through cua-driver, whose Accessibility + -# Screen Recording grants attach to cua-driver's OWN TCC identity -# (com.trycua.driver / the installed CuaDriver.app) — not the Hermes desktop -# app or this server. The desktop's Computer Use card reflects that state and -# triggers a grant via the same `cua-driver permissions grant` flow the CLI -# uses, so no Hermes-side entitlement is involved. +# cua-driver runs on macOS, Windows, and Linux. The desktop card reflects +# per-OS readiness: on macOS the Accessibility + Screen Recording TCC grants +# (which attach to cua-driver's OWN identity, com.trycua.driver — not Hermes, +# so no app entitlement is involved); elsewhere, driver health from +# `cua-driver doctor`. The grant flow is macOS-only (no TCC toggles to request +# on Windows/Linux). # --------------------------------------------------------------------------- @app.get("/api/tools/computer-use/status") async def get_computer_use_status(profile: Optional[str] = None): - """Report cua-driver install + macOS permission state for the desktop card. + """Cross-platform Computer Use readiness for the desktop card. - See ``tools.computer_use.permissions.permissions_status`` for the payload - shape. Read-only and fast (shells ``cua-driver permissions status``). + See ``tools.computer_use.permissions.computer_use_status`` for the payload + shape. Read-only and fast (shells ``cua-driver doctor`` + macOS + ``permissions status``). """ - from tools.computer_use.permissions import permissions_status + from tools.computer_use.permissions import computer_use_status with _profile_scope(profile): - return permissions_status() + return computer_use_status() @app.post("/api/tools/computer-use/permissions/grant") async def grant_computer_use_permissions(profile: Optional[str] = None): """Spawn ``hermes computer-use permissions grant`` as a background action. - ``cua-driver permissions grant`` launches CuaDriver via LaunchServices so - the macOS TCC dialog is attributed to com.trycua.driver, then waits for - the user to approve. The frontend polls ``GET /api/actions/computer-use- - grant/status`` for progress and re-reads ``/status`` once it exits. + macOS-only: ``cua-driver permissions grant`` launches CuaDriver via + LaunchServices so the TCC dialog is attributed to com.trycua.driver, then + waits for approval. The frontend polls ``GET /api/actions/computer-use- + grant/status`` and re-reads ``/status`` once it exits. Windows/Linux have + no TCC toggles to grant, so this returns 400 there. """ if sys.platform != "darwin": raise HTTPException( status_code=400, - detail="Computer Use permissions are managed on macOS only.", + detail="Computer Use permission grants are a macOS concept.", ) try: proc = _spawn_hermes_action( diff --git a/tools/computer_use/permissions.py b/tools/computer_use/permissions.py index 45a6ac2534d..e72208b796e 100644 --- a/tools/computer_use/permissions.py +++ b/tools/computer_use/permissions.py @@ -1,21 +1,24 @@ """ -macOS Accessibility + Screen Recording permission helpers for Computer Use. +Cross-platform Computer Use readiness + macOS permission helpers. -cua-driver 0.5+ owns the permission model. Crucially, the grants attach to -cua-driver's OWN TCC identity (``com.trycua.driver`` — the installed -``CuaDriver.app``), NOT the terminal, the Hermes CLI, or the Hermes desktop -app. So: +cua-driver runs on macOS, Windows, and Linux, but "ready to drive" means +something different on each: - * ``cua-driver permissions status --json`` reports the driver daemon's real - grant state, independent of who asks. - * ``cua-driver permissions grant`` launches CuaDriver via LaunchServices so - the macOS dialog is attributed to ``com.trycua.driver`` — the process that - actually does the work. + * macOS — explicit TCC grants (Accessibility + Screen Recording). cua-driver + reports/requests them via ``permissions status`` / ``permissions grant``. + The grants attach to cua-driver's OWN identity (``com.trycua.driver`` / + the installed ``CuaDriver.app``), NOT Hermes — so no Hermes entitlement is + involved, and ``grant`` launches CuaDriver via LaunchServices so the macOS + dialog is attributed correctly. + * Windows — no TCC toggles; the UIAccess worker (``cua-driver-uia.exe``) may + trip a SmartScreen prompt on first run. Readiness == driver health. + * Linux — assistive control via the X11/XWayland stack. Readiness == driver + health. -Because the permission lives with the cua-driver binary, the Hermes desktop -app needs no Accessibility / Screen Recording entitlements of its own. This is -a thin, testable client driven by the ``hermes computer-use permissions`` CLI -and the desktop ``/api/tools/computer-use/status`` endpoint. +The universal signal on every platform is ``cua-driver doctor --json`` (binary +integrity + platform support). ``computer_use_status`` folds that together with +the macOS permission detail into one payload for the desktop card, the +``hermes computer-use permissions`` CLI, and ``/api/tools/computer-use/status``. """ from __future__ import annotations @@ -25,8 +28,10 @@ import os import shutil import subprocess import sys -from typing import Any, Dict, Optional +from typing import Any, Dict, List, Optional +# Platforms with a cua-driver runtime backend (mirrors the toolset platform_gate). +_RUNTIME_PLATFORMS = frozenset({"darwin", "win32", "linux"}) _BOOLS = ("accessibility", "screen_recording", "screen_recording_capturable") @@ -61,18 +66,65 @@ def _run(binary: str, *args: str, timeout: float) -> subprocess.CompletedProcess ) -def permissions_status(driver_cmd: Optional[str] = None) -> Dict[str, Any]: - """Computer Use install + macOS permission state for the desktop card. +def _json_out(binary: str, *args: str, timeout: float) -> Any: + """Run ``binary args`` and parse stdout as JSON, or ``None`` on any failure.""" + raw = (_run(binary, *args, timeout=timeout).stdout or "").strip() + return json.loads(raw) if raw else None - ``None`` permission values mean "unknown" — the driver binary is missing, - the platform has no TCC model, or no CuaDriver daemon is running to answer - for its own identity yet. + +def _doctor(binary: str) -> Optional[Dict[str, Any]]: + """``cua-driver doctor --json`` → ``{ok, checks:[{label,status,message}]}``.""" + try: + data = _json_out(binary, "doctor", "--json", timeout=12) + except Exception: + return None + if not isinstance(data, dict): + return None + checks: List[Dict[str, str]] = [ + { + "label": str(p.get("label", "")), + "status": str(p.get("status", "")), + "message": str(p.get("message", "")), + } + for p in data.get("probes", []) + if isinstance(p, dict) + ] + return {"ok": bool(data.get("ok")), "checks": checks} + + +def _mac_permissions(binary: str, out: Dict[str, Any]) -> None: + """Fold ``cua-driver permissions status --json`` booleans into ``out``.""" + try: + data = _json_out(binary, "permissions", "status", "--json", timeout=10) + except subprocess.TimeoutExpired: + out["error"] = "cua-driver permissions status timed out" + return + except Exception as exc: # spawn failure or malformed JSON + out["error"] = f"cua-driver permissions status failed: {exc}" + return + if isinstance(data, dict): + out.update({k: data[k] for k in _BOOLS if isinstance(data.get(k), bool)}) + if isinstance(data.get("source"), dict): + out["source"] = data["source"] + + +def computer_use_status(driver_cmd: Optional[str] = None) -> Dict[str, Any]: + """Unified, OS-aware Computer Use readiness for the desktop card. + + ``ready`` is the single signal the UI keys off: on macOS it's both TCC + grants; elsewhere it's driver health (no TCC model). ``None`` means + unknown (binary missing / probe failed). ``can_grant`` is macOS-only. """ + plat = sys.platform binary = shutil.which(_driver_cmd(driver_cmd)) out: Dict[str, Any] = { - "platform_supported": sys.platform == "darwin", + "platform": plat, + "platform_supported": plat in _RUNTIME_PLATFORMS, "installed": bool(binary), "version": None, + "ready": None, + "can_grant": plat == "darwin", + "checks": [], "source": None, "error": None, **{k: None for k in _BOOLS}, @@ -85,24 +137,17 @@ def permissions_status(driver_cmd: Optional[str] = None) -> Dict[str, Any]: except Exception: pass - # Permissions are a macOS concept; cua-driver only exposes the subcommand there. - if sys.platform != "darwin": - return out + doctor = _doctor(binary) + if doctor is not None: + out["checks"] = doctor["checks"] - try: - raw = (_run(binary, "permissions", "status", "--json", timeout=10).stdout or "").strip() - data = json.loads(raw) if raw else {} - except subprocess.TimeoutExpired: - out["error"] = "cua-driver permissions status timed out" - return out - except Exception as exc: # spawn failure or malformed JSON - out["error"] = f"cua-driver permissions status failed: {exc}" - return out - - if isinstance(data, dict): - out.update({k: data[k] for k in _BOOLS if isinstance(data.get(k), bool)}) - if isinstance(data.get("source"), dict): - out["source"] = data["source"] + if plat == "darwin": + _mac_permissions(binary, out) + if out["error"] is None: + out["ready"] = out["accessibility"] is True and out["screen_recording"] is True + elif doctor is not None: + # No TCC model off macOS — readiness is driver health. + out["ready"] = doctor["ok"] return out @@ -111,10 +156,11 @@ def request_permissions_grant(driver_cmd: Optional[str] = None) -> int: Launches CuaDriver via LaunchServices so the TCC dialog is attributed to ``com.trycua.driver``, then waits for the grant. Returns the driver's exit - code (0 ok), 2 if the binary is missing, 64 on an unsupported platform. + code (0 ok), 2 if the binary is missing, 64 on a non-macOS platform (which + has no TCC permission model to grant). """ if sys.platform != "darwin": - print("Computer Use permissions are managed on macOS only.") + print("Computer Use permissions are a macOS concept; nothing to grant here.") return 64 binary = shutil.which(_driver_cmd(driver_cmd)) From 3c1058e2e983c45856c4417e1c47d69843e778ed Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Mon, 22 Jun 2026 17:59:18 -0500 Subject: [PATCH 3/3] fix(computer-use): set stdin=DEVNULL on cua-driver subprocess calls The subprocess-stdin guard (TUI gateway fd-inheritance protection) flagged the `permissions grant` call. None of the cua-driver probes/grant read stdin, so DEVNULL is correct; apply it to the shared `_run` helper and the grant call. --- tools/computer_use/permissions.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tools/computer_use/permissions.py b/tools/computer_use/permissions.py index e72208b796e..ab97b60ee66 100644 --- a/tools/computer_use/permissions.py +++ b/tools/computer_use/permissions.py @@ -63,6 +63,7 @@ def _run(binary: str, *args: str, timeout: float) -> subprocess.CompletedProcess text=True, timeout=timeout, env=_child_env(), + stdin=subprocess.DEVNULL, ) @@ -174,7 +175,13 @@ def request_permissions_grant(driver_cmd: Optional[str] = None) -> int: "approve it, then return here." ) try: - return int(subprocess.run([binary, "permissions", "grant"], env=_child_env()).returncode) + return int( + subprocess.run( + [binary, "permissions", "grant"], + env=_child_env(), + stdin=subprocess.DEVNULL, + ).returncode + ) except KeyboardInterrupt: # pragma: no cover - interactive return 130 except Exception as exc: # pragma: no cover - defensive