feat(computer-use): surface macOS permission preflight in the desktop

Computer Use already worked through the desktop backend (the cua-driver
toolset enables + installs via Settings -> Skills & Tools), but there was
no in-app way to see or grant the two macOS permissions it needs, so "give
a model my Mac" was tribal knowledge.

The grants attach to cua-driver's OWN TCC identity (com.trycua.driver /
the installed CuaDriver.app), not Hermes -- so no app entitlement is
involved. cua-driver 0.5+ exposes `permissions status/grant`, which we wrap:

- tools/computer_use/permissions.py: thin client over the two subcommands
- hermes computer-use permissions {status,grant}: CLI parity
- GET /api/tools/computer-use/status, POST .../permissions/grant: desktop REST
- ComputerUsePanel: live Accessibility + Screen Recording state with a
  Grant button (dialog attributed to CuaDriver), shown in the expanded
  Computer Use toolset row. Binary install stays in the existing provider
  post-setup runner.

Follow-ups: i18n the card copy; a "Stop driver" control (cua-driver stop)
for the runaway-`serve` case.
This commit is contained in:
Brooklyn Nicholson 2026-06-22 17:33:52 -05:00
parent c080b2dc3e
commit 0223ea5f59
7 changed files with 505 additions and 0 deletions

View file

@ -0,0 +1,204 @@
import { useCallback, useEffect, useRef, useState } from 'react'
import { Button } from '@/components/ui/button'
import { getActionStatus, getComputerUseStatus, grantComputerUsePermissions } from '@/hermes'
import { AlertTriangle, Check, ExternalLink, Loader2, RefreshCw, X } from '@/lib/icons'
import { upsertDesktopActionTask } from '@/store/activity'
import { notify, notifyError } from '@/store/notifications'
import type { ComputerUseStatus } from '@/types/hermes'
import { Pill } from './primitives'
interface ComputerUsePanelProps {
/** Re-read the parent toolset list after a permission/install change so the
* "Configured / Needs keys" pill stays in sync. */
onConfiguredChange?: () => void
}
function PermissionRow({ granted, label, hint }: { granted: boolean | null; label: string; hint: string }) {
const tone = granted === true ? 'primary' : 'muted'
const Icon = granted === true ? Check : granted === false ? X : AlertTriangle
return (
<div className="flex flex-wrap items-center justify-between gap-2 rounded-lg bg-background/55 p-2.5">
<div className="min-w-0">
<span className="text-sm font-medium">{label}</span>
<p className="mt-0.5 text-[0.7rem] text-muted-foreground">{hint}</p>
</div>
<Pill tone={tone}>
<Icon className="size-3" />
{granted === true ? 'Granted' : granted === false ? 'Not granted' : 'Unknown'}
</Pill>
</div>
)
}
/**
* Computer Use preflight card.
*
* Computer Use drives the Mac through cua-driver, whose Accessibility +
* Screen Recording grants attach to cua-driver's OWN TCC identity
* (`com.trycua.driver` / the installed CuaDriver.app) not the Hermes
* desktop app. So this card reflects the driver's real grant state and
* triggers a grant via `cua-driver permissions grant`, which launches
* CuaDriver via LaunchServices so the macOS dialog is attributed correctly.
*
* Binary install/upgrade still lives in the cua-driver provider's post-setup
* runner below this card (the generic ToolsetConfigPanel).
*/
export function ComputerUsePanel({ onConfiguredChange }: ComputerUsePanelProps) {
const [status, setStatus] = useState<ComputerUseStatus | null>(null)
const [loading, setLoading] = useState(true)
const [granting, setGranting] = useState(false)
const activeRef = useRef(false)
const refresh = useCallback(async () => {
try {
const next = await getComputerUseStatus()
setStatus(next)
} catch (err) {
notifyError(err, 'Could not read Computer Use status')
} finally {
setLoading(false)
}
}, [])
useEffect(() => {
activeRef.current = true
void refresh()
return () => {
activeRef.current = false
}
}, [refresh])
const grant = useCallback(async () => {
setGranting(true)
try {
const started = await grantComputerUsePermissions()
if (!started.ok) {
notifyError(new Error('spawn failed'), 'Could not request permissions')
return
}
notify({
kind: 'info',
title: 'Approve in System Settings',
message: 'macOS will show a permission dialog attributed to CuaDriver. Approve it, then return here.'
})
// Poll the grant action until it exits (the driver waits for the user to
// flip the switch), then re-read the live permission state.
for (let attempt = 0; attempt < 150 && activeRef.current; attempt += 1) {
await new Promise(resolve => window.setTimeout(resolve, 1500))
if (!activeRef.current) {
break
}
const polled = await getActionStatus(started.name, 200)
upsertDesktopActionTask(polled)
if (!polled.running) {
break
}
}
if (activeRef.current) {
await refresh()
onConfiguredChange?.()
}
} catch (err) {
if (activeRef.current) {
notifyError(err, 'Could not request permissions')
}
} finally {
if (activeRef.current) {
setGranting(false)
}
}
}, [onConfiguredChange, refresh])
if (loading) {
return (
<div className="mt-3 flex items-center gap-2 px-1 text-xs text-muted-foreground">
<Loader2 className="size-3.5 animate-spin" />
Checking Computer Use status
</div>
)
}
if (!status) {
return null
}
if (!status.platform_supported) {
return (
<p className="mt-3 px-1 text-xs text-muted-foreground">
Computer Use permissions are managed on macOS. On this platform, enable the cua-driver provider below.
</p>
)
}
if (!status.installed) {
return (
<p className="mt-3 px-1 text-xs text-muted-foreground">
Install the cua-driver backend below to drive macOS. After installing, grant Accessibility and Screen
Recording here.
</p>
)
}
const allGranted = status.accessibility === true && status.screen_recording === true
return (
<div className="mt-3 grid gap-2">
<div className="flex flex-wrap items-center justify-between gap-2 px-1">
<div className="min-w-0">
<p className="text-[0.72rem] text-muted-foreground">
Grants attach to CuaDriver&apos;s own identity (com.trycua.driver), not Hermes so the dialog is
attributed to the process that drives your Mac.
</p>
{status.version && <p className="text-[0.68rem] text-muted-foreground/80">{status.version}</p>}
</div>
<Button onClick={() => void refresh()} size="sm" variant="text">
<RefreshCw className="size-3.5" />
Recheck
</Button>
</div>
<PermissionRow
granted={status.accessibility}
hint="Lets cua-driver post clicks, keystrokes, and read the accessibility tree."
label="Accessibility"
/>
<PermissionRow
granted={status.screen_recording}
hint="Lets cua-driver capture screenshots of app windows."
label="Screen Recording"
/>
{status.error && (
<p className="px-1 text-[0.7rem] text-muted-foreground">
<AlertTriangle className="mr-1 inline size-3" />
{status.error}
</p>
)}
{allGranted ? (
<div className="flex items-center gap-1.5 px-1 text-xs text-muted-foreground">
<Check className="size-3.5" />
Computer Use is ready. Ask the agent to capture an app and click around.
</div>
) : (
<Button disabled={granting} onClick={() => void grant()} size="sm">
{granting ? <Loader2 className="size-3.5 animate-spin" /> : <ExternalLink className="size-3.5" />}
{granting ? 'Waiting for approval…' : 'Grant permissions'}
</Button>
)}
</div>
)
}

View file

@ -17,6 +17,7 @@ import { useRefreshHotkey } from '../hooks/use-refresh-hotkey'
import { useRouteEnumParam } from '../hooks/use-route-enum-param'
import { PAGE_INSET_X } from '../layout-constants'
import { PageSearchShell } from '../page-search-shell'
import { ComputerUsePanel } from '../settings/computer-use-panel'
import { asText, includesQuery, prettyName, toolNames, toolsetDisplayLabel } from '../settings/helpers'
import { ToolsetConfigPanel } from '../settings/toolset-config-panel'
import type { SetStatusbarItemGroup } from '../shell/statusbar-controls'
@ -334,6 +335,9 @@ export function SkillsView({ setStatusbarItemGroup: _setStatusbarItemGroup, ...p
))}
</div>
)}
{expanded && toolset.name === 'computer_use' && (
<ComputerUsePanel onConfiguredChange={refreshToolsets} />
)}
{expanded && <ToolsetConfigPanel onConfiguredChange={refreshToolsets} toolset={toolset.name} />}
</div>
)

View file

@ -8,6 +8,7 @@ import type {
AudioTranscriptionResponse,
AuxiliaryModelsResponse,
BackendUpdateCheckResponse,
ComputerUseStatus,
ConfigSchemaResponse,
CronJob,
CronJobCreatePayload,
@ -59,6 +60,8 @@ export type {
AudioTranscriptionResponse,
AuxiliaryModelsResponse,
BackendUpdateCheckResponse,
ComputerUsePermissionSource,
ComputerUseStatus,
ConfigFieldSchema,
ConfigSchemaResponse,
CronJob,
@ -516,6 +519,21 @@ export function runToolsetPostSetup(name: string, key: string): Promise<ActionRe
})
}
export function getComputerUseStatus(): Promise<ComputerUseStatus> {
return window.hermesDesktop.api<ComputerUseStatus>({
...profileScoped(),
path: '/api/tools/computer-use/status'
})
}
export function grantComputerUsePermissions(): Promise<ActionResponse> {
return window.hermesDesktop.api<ActionResponse>({
...profileScoped(),
path: '/api/tools/computer-use/permissions/grant',
method: 'POST'
})
}
export function getMessagingPlatforms(): Promise<MessagingPlatformsResponse> {
return window.hermesDesktop.api<MessagingPlatformsResponse>({
path: '/api/messaging/platforms'

View file

@ -579,6 +579,36 @@ export interface ToolsetConfig {
active_provider: string | null
}
/** Shape of `GET /api/tools/computer-use/status`.
*
* Computer Use drives the Mac through cua-driver, whose Accessibility +
* Screen Recording grants attach to cua-driver's OWN TCC identity
* (`com.trycua.driver`), not the Hermes app. Permission booleans are
* `null` when unknown (binary missing, or no CuaDriver daemon running to
* answer for its own identity). */
export interface ComputerUsePermissionSource {
attribution?: string
executable?: string
note?: string
pid?: number
responsible_ppid?: number
}
export interface ComputerUseStatus {
/** macOS is the only platform with the TCC permission model cua-driver gates. */
platform_supported: boolean
/** cua-driver binary resolved on PATH. */
installed: boolean
/** e.g. "cua-driver 0.5.1", or null when unknown. */
version: string | null
accessibility: boolean | null
screen_recording: boolean | null
screen_recording_capturable: boolean | null
source: ComputerUsePermissionSource | null
/** Populated when the status probe itself failed. */
error: string | null
}
export interface SessionSearchResult {
/** Lineage root of the matched conversation. Stable across compression and
* used as the durable pin id; falls back to session_id when absent. */

View file

@ -12507,6 +12507,33 @@ def main():
action="store_true",
help="Emit the raw structured payload as JSON (same shape as `tools/call`).",
)
computer_use_perms = computer_use_sub.add_parser(
"permissions",
help="Check or grant macOS Accessibility + Screen Recording (macOS)",
description=(
"Computer Use drives the Mac through cua-driver, whose TCC grants\n"
"attach to cua-driver's own identity (com.trycua.driver) — not the\n"
"terminal or the Hermes app. `status` reports the driver's grant\n"
"state; `grant` launches CuaDriver via LaunchServices so the macOS\n"
"permission dialog is attributed to the process that does the work."
),
)
computer_use_perms_sub = computer_use_perms.add_subparsers(
dest="computer_use_perms_action"
)
computer_use_perms_status = computer_use_perms_sub.add_parser(
"status",
help="Report Accessibility + Screen Recording grant state (read-only)",
)
computer_use_perms_status.add_argument(
"--json",
action="store_true",
help="Emit the normalized permission payload as JSON.",
)
computer_use_perms_sub.add_parser(
"grant",
help="Request the grants (opens the dialog attributed to CuaDriver)",
)
def cmd_computer_use(args):
action = getattr(args, "computer_use_action", None)
@ -12564,6 +12591,36 @@ def main():
json_output=bool(getattr(args, "json", False)),
)
sys.exit(code)
if action == "permissions":
perms_action = getattr(args, "computer_use_perms_action", None)
if perms_action == "grant":
from tools.computer_use.permissions import request_permissions_grant
sys.exit(request_permissions_grant())
if perms_action == "status":
import json as _json
from tools.computer_use.permissions import permissions_status
st = permissions_status()
if bool(getattr(args, "json", False)):
print(_json.dumps(st, indent=2, sort_keys=True))
else:
if not st["installed"]:
print("cua-driver: not installed")
print(" Run: hermes computer-use install")
elif not st["platform_supported"]:
print("Computer Use permissions are managed on macOS only.")
else:
def _glyph(v):
return "" if v is True else ("" if v is False else "")
print(f"cua-driver: {st.get('version') or 'installed'}")
print(f" {_glyph(st['accessibility'])} Accessibility")
print(f" {_glyph(st['screen_recording'])} Screen Recording")
if st.get("error"):
print(f"{st['error']}")
if st["accessibility"] is not True or st["screen_recording"] is not True:
print(" Grant: hermes computer-use permissions grant")
sys.exit(0 if st.get("accessibility") and st.get("screen_recording") else 1)
computer_use_perms.print_help()
return
# No subcommand → show help
computer_use_parser.print_help()

View file

@ -8349,6 +8349,7 @@ async def install_mcp_catalog_entry(body: MCPCatalogInstall, profile: Optional[s
# Register the mcp-install action log so /api/actions/mcp-install/status works.
_ACTION_LOG_FILES.setdefault("mcp-install", "action-mcp-install.log")
_ACTION_LOG_FILES.setdefault("computer-use-grant", "action-computer-use-grant.log")
# ---------------------------------------------------------------------------
@ -10671,6 +10672,61 @@ async def run_toolset_post_setup(
return {"ok": True, "pid": proc.pid, "name": "tools-post-setup", "key": body.key}
# ---------------------------------------------------------------------------
# Computer Use (cua-driver) — install + macOS permission state
#
# Computer Use drives the Mac through cua-driver, whose Accessibility +
# Screen Recording grants attach to cua-driver's OWN TCC identity
# (com.trycua.driver / the installed CuaDriver.app) — not the Hermes desktop
# app or this server. The desktop's Computer Use card reflects that state and
# triggers a grant via the same `cua-driver permissions grant` flow the CLI
# uses, so no Hermes-side entitlement is involved.
# ---------------------------------------------------------------------------
@app.get("/api/tools/computer-use/status")
async def get_computer_use_status(profile: Optional[str] = None):
"""Report cua-driver install + macOS permission state for the desktop card.
See ``tools.computer_use.permissions.permissions_status`` for the payload
shape. Read-only and fast (shells ``cua-driver permissions status``).
"""
from tools.computer_use.permissions import permissions_status
with _profile_scope(profile):
return permissions_status()
@app.post("/api/tools/computer-use/permissions/grant")
async def grant_computer_use_permissions(profile: Optional[str] = None):
"""Spawn ``hermes computer-use permissions grant`` as a background action.
``cua-driver permissions grant`` launches CuaDriver via LaunchServices so
the macOS TCC dialog is attributed to com.trycua.driver, then waits for
the user to approve. The frontend polls ``GET /api/actions/computer-use-
grant/status`` for progress and re-reads ``/status`` once it exits.
"""
if sys.platform != "darwin":
raise HTTPException(
status_code=400,
detail="Computer Use permissions are managed on macOS only.",
)
try:
proc = _spawn_hermes_action(
_profile_cli_args(profile)
+ ["computer-use", "permissions", "grant"],
"computer-use-grant",
)
except HTTPException:
raise
except Exception as exc:
_log.exception("Failed to spawn computer-use permissions grant")
raise HTTPException(
status_code=500, detail=f"Failed to request permissions: {exc}"
)
return {"ok": True, "pid": proc.pid, "name": "computer-use-grant"}
# ---------------------------------------------------------------------------
# Raw YAML config endpoint
# ---------------------------------------------------------------------------

View file

@ -0,0 +1,136 @@
"""
macOS Accessibility + Screen Recording permission helpers for Computer Use.
cua-driver 0.5+ owns the permission model. Crucially, the grants attach to
cua-driver's OWN TCC identity (``com.trycua.driver`` — the installed
``CuaDriver.app``), NOT the terminal, the Hermes CLI, or the Hermes desktop
app. So:
* ``cua-driver permissions status --json`` reports the driver daemon's real
grant state, independent of who asks.
* ``cua-driver permissions grant`` launches CuaDriver via LaunchServices so
the macOS dialog is attributed to ``com.trycua.driver`` the process that
actually does the work.
Because the permission lives with the cua-driver binary, the Hermes desktop
app needs no Accessibility / Screen Recording entitlements of its own. This is
a thin, testable client driven by the ``hermes computer-use permissions`` CLI
and the desktop ``/api/tools/computer-use/status`` endpoint.
"""
from __future__ import annotations
import json
import os
import shutil
import subprocess
import sys
from typing import Any, Dict, Optional
_BOOLS = ("accessibility", "screen_recording", "screen_recording_capturable")
def _driver_cmd(override: Optional[str]) -> str:
if override:
return override
try:
from hermes_cli.tools_config import _cua_driver_cmd
return _cua_driver_cmd()
except Exception:
return os.environ.get("HERMES_CUA_DRIVER_CMD", "").strip() or "cua-driver"
def _child_env() -> Dict[str, str]:
"""cua-driver child env honoring the Hermes telemetry opt-in policy."""
try:
from tools.computer_use.cua_backend import cua_driver_child_env
return cua_driver_child_env()
except Exception:
return dict(os.environ)
def _run(binary: str, *args: str, timeout: float) -> subprocess.CompletedProcess:
return subprocess.run(
[binary, *args],
capture_output=True,
text=True,
timeout=timeout,
env=_child_env(),
)
def permissions_status(driver_cmd: Optional[str] = None) -> Dict[str, Any]:
"""Computer Use install + macOS permission state for the desktop card.
``None`` permission values mean "unknown" the driver binary is missing,
the platform has no TCC model, or no CuaDriver daemon is running to answer
for its own identity yet.
"""
binary = shutil.which(_driver_cmd(driver_cmd))
out: Dict[str, Any] = {
"platform_supported": sys.platform == "darwin",
"installed": bool(binary),
"version": None,
"source": None,
"error": None,
**{k: None for k in _BOOLS},
}
if not binary:
return out
try:
out["version"] = (_run(binary, "--version", timeout=5).stdout or "").strip() or None
except Exception:
pass
# Permissions are a macOS concept; cua-driver only exposes the subcommand there.
if sys.platform != "darwin":
return out
try:
raw = (_run(binary, "permissions", "status", "--json", timeout=10).stdout or "").strip()
data = json.loads(raw) if raw else {}
except subprocess.TimeoutExpired:
out["error"] = "cua-driver permissions status timed out"
return out
except Exception as exc: # spawn failure or malformed JSON
out["error"] = f"cua-driver permissions status failed: {exc}"
return out
if isinstance(data, dict):
out.update({k: data[k] for k in _BOOLS if isinstance(data.get(k), bool)})
if isinstance(data.get("source"), dict):
out["source"] = data["source"]
return out
def request_permissions_grant(driver_cmd: Optional[str] = None) -> int:
"""Run ``cua-driver permissions grant`` (macOS); stream its output.
Launches CuaDriver via LaunchServices so the TCC dialog is attributed to
``com.trycua.driver``, then waits for the grant. Returns the driver's exit
code (0 ok), 2 if the binary is missing, 64 on an unsupported platform.
"""
if sys.platform != "darwin":
print("Computer Use permissions are managed on macOS only.")
return 64
binary = shutil.which(_driver_cmd(driver_cmd))
if not binary:
print("cua-driver: not installed. Run: hermes computer-use install")
return 2
print(
"Requesting Accessibility + Screen Recording for CuaDriver.\n"
"macOS will show a dialog attributed to CuaDriver (com.trycua.driver) — "
"approve it, then return here."
)
try:
return int(subprocess.run([binary, "permissions", "grant"], env=_child_env()).returncode)
except KeyboardInterrupt: # pragma: no cover - interactive
return 130
except Exception as exc: # pragma: no cover - defensive
print(f"cua-driver permissions grant failed: {exc}", file=sys.stderr)
return 2