mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-23 10:42:00 +00:00
Merge pull request #51072 from NousResearch/bb/desktop-computer-use
feat(computer-use): add a cross-platform readiness preflight to the desktop
This commit is contained in:
commit
6780cee679
7 changed files with 616 additions and 0 deletions
239
apps/desktop/src/app/settings/computer-use-panel.tsx
Normal file
239
apps/desktop/src/app/settings/computer-use-panel.tsx
Normal file
|
|
@ -0,0 +1,239 @@
|
|||
import { useCallback, useEffect, useRef, useState } from 'react'
|
||||
|
||||
import { Button } from '@/components/ui/button'
|
||||
import { getActionStatus, getComputerUseStatus, grantComputerUsePermissions } from '@/hermes'
|
||||
import { AlertTriangle, Check, ExternalLink, Loader2, RefreshCw, X } from '@/lib/icons'
|
||||
import { upsertDesktopActionTask } from '@/store/activity'
|
||||
import { notify, notifyError } from '@/store/notifications'
|
||||
import type { ComputerUseStatus } from '@/types/hermes'
|
||||
|
||||
import { Pill } from './primitives'
|
||||
|
||||
interface ComputerUsePanelProps {
|
||||
/** Re-read the parent toolset list after a permission/install change so the
|
||||
* "Configured / Needs keys" pill stays in sync. */
|
||||
onConfiguredChange?: () => void
|
||||
}
|
||||
|
||||
// Per-OS one-liner shown when there's no TCC grant flow (Windows/Linux). macOS
|
||||
// drives the permission rows instead, so it has no entry here.
|
||||
const PLATFORM_NOTE: Record<string, string> = {
|
||||
linux: 'Drives your desktop via the X11/XWayland accessibility stack — no permission prompt.',
|
||||
win32: 'First run may trigger a Windows SmartScreen prompt for the cua-driver UIAccess worker — allow it.'
|
||||
}
|
||||
|
||||
function tone(granted: boolean | null) {
|
||||
return granted === true ? 'primary' : 'muted'
|
||||
}
|
||||
|
||||
function GrantIcon({ granted }: { granted: boolean | null }) {
|
||||
const Icon = granted === true ? Check : granted === false ? X : AlertTriangle
|
||||
|
||||
return <Icon className="size-3" />
|
||||
}
|
||||
|
||||
function PermissionRow({ granted, label, hint }: { granted: boolean | null; label: string; hint: string }) {
|
||||
return (
|
||||
<div className="flex flex-wrap items-center justify-between gap-2 rounded-lg bg-background/55 p-2.5">
|
||||
<div className="min-w-0">
|
||||
<span className="text-sm font-medium">{label}</span>
|
||||
<p className="mt-0.5 text-[0.7rem] text-muted-foreground">{hint}</p>
|
||||
</div>
|
||||
<Pill tone={tone(granted)}>
|
||||
<GrantIcon granted={granted} />
|
||||
{granted === true ? 'Granted' : granted === false ? 'Not granted' : 'Unknown'}
|
||||
</Pill>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Cross-platform Computer Use preflight card.
|
||||
*
|
||||
* cua-driver runs on macOS, Windows, and Linux, but readiness differs: macOS
|
||||
* needs two TCC grants (Accessibility + Screen Recording) that attach to
|
||||
* cua-driver's own `com.trycua.driver` identity — not Hermes — and are
|
||||
* requested via `cua-driver permissions grant` (dialog attributed to
|
||||
* CuaDriver). Windows/Linux have no TCC toggles, so readiness is driver health
|
||||
* from `cua-driver doctor`. The backend folds both into one `ready` signal.
|
||||
*
|
||||
* Binary install/upgrade stays in the cua-driver provider's post-setup runner
|
||||
* below this card (the generic ToolsetConfigPanel).
|
||||
*/
|
||||
export function ComputerUsePanel({ onConfiguredChange }: ComputerUsePanelProps) {
|
||||
const [status, setStatus] = useState<ComputerUseStatus | null>(null)
|
||||
const [loading, setLoading] = useState(true)
|
||||
const [granting, setGranting] = useState(false)
|
||||
const activeRef = useRef(false)
|
||||
|
||||
const refresh = useCallback(async () => {
|
||||
try {
|
||||
setStatus(await getComputerUseStatus())
|
||||
} catch (err) {
|
||||
notifyError(err, 'Could not read Computer Use status')
|
||||
} finally {
|
||||
setLoading(false)
|
||||
}
|
||||
}, [])
|
||||
|
||||
useEffect(() => {
|
||||
activeRef.current = true
|
||||
void refresh()
|
||||
|
||||
return () => void (activeRef.current = false)
|
||||
}, [refresh])
|
||||
|
||||
const grant = useCallback(async () => {
|
||||
setGranting(true)
|
||||
|
||||
try {
|
||||
const started = await grantComputerUsePermissions()
|
||||
|
||||
if (!started.ok) {
|
||||
notifyError(new Error('spawn failed'), 'Could not request permissions')
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
notify({
|
||||
kind: 'info',
|
||||
title: 'Approve in System Settings',
|
||||
message: 'macOS will show a permission dialog attributed to CuaDriver. Approve it, then return here.'
|
||||
})
|
||||
|
||||
// The driver waits for the user to flip the switch — poll until it exits.
|
||||
for (let attempt = 0; attempt < 150 && activeRef.current; attempt += 1) {
|
||||
await new Promise(resolve => window.setTimeout(resolve, 1500))
|
||||
|
||||
if (!activeRef.current) {
|
||||
break
|
||||
}
|
||||
|
||||
const polled = await getActionStatus(started.name, 200)
|
||||
upsertDesktopActionTask(polled)
|
||||
|
||||
if (!polled.running) {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if (activeRef.current) {
|
||||
await refresh()
|
||||
onConfiguredChange?.()
|
||||
}
|
||||
} catch (err) {
|
||||
if (activeRef.current) {
|
||||
notifyError(err, 'Could not request permissions')
|
||||
}
|
||||
} finally {
|
||||
if (activeRef.current) {
|
||||
setGranting(false)
|
||||
}
|
||||
}
|
||||
}, [onConfiguredChange, refresh])
|
||||
|
||||
if (loading) {
|
||||
return (
|
||||
<div className="mt-3 flex items-center gap-2 px-1 text-xs text-muted-foreground">
|
||||
<Loader2 className="size-3.5 animate-spin" />
|
||||
Checking Computer Use status…
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
if (!status) {
|
||||
return null
|
||||
}
|
||||
|
||||
if (!status.platform_supported) {
|
||||
return (
|
||||
<p className="mt-3 px-1 text-xs text-muted-foreground">
|
||||
Computer Use isn't supported on this platform ({status.platform}).
|
||||
</p>
|
||||
)
|
||||
}
|
||||
|
||||
if (!status.installed) {
|
||||
return (
|
||||
<p className="mt-3 px-1 text-xs text-muted-foreground">
|
||||
Install the cua-driver backend below to drive this machine.
|
||||
{status.can_grant && ' Then grant Accessibility and Screen Recording here.'}
|
||||
</p>
|
||||
)
|
||||
}
|
||||
|
||||
const failingChecks = status.checks.filter(c => c.status !== 'ok')
|
||||
|
||||
return (
|
||||
<div className="mt-3 grid gap-2">
|
||||
<div className="flex flex-wrap items-center justify-between gap-2 px-1">
|
||||
<div className="min-w-0">
|
||||
{status.can_grant ? (
|
||||
<p className="text-[0.72rem] text-muted-foreground">
|
||||
Grants attach to CuaDriver's own identity (com.trycua.driver), not Hermes — so the dialog is
|
||||
attributed to the process that drives your Mac.
|
||||
</p>
|
||||
) : (
|
||||
<p className="text-[0.72rem] text-muted-foreground">{PLATFORM_NOTE[status.platform] ?? ''}</p>
|
||||
)}
|
||||
{status.version && <p className="text-[0.68rem] text-muted-foreground/80">{status.version}</p>}
|
||||
</div>
|
||||
<Button onClick={() => void refresh()} size="sm" variant="text">
|
||||
<RefreshCw className="size-3.5" />
|
||||
Recheck
|
||||
</Button>
|
||||
</div>
|
||||
|
||||
{status.can_grant ? (
|
||||
<>
|
||||
<PermissionRow
|
||||
granted={status.accessibility}
|
||||
hint="Lets cua-driver post clicks, keystrokes, and read the accessibility tree."
|
||||
label="Accessibility"
|
||||
/>
|
||||
<PermissionRow
|
||||
granted={status.screen_recording}
|
||||
hint="Lets cua-driver capture screenshots of app windows."
|
||||
label="Screen Recording"
|
||||
/>
|
||||
</>
|
||||
) : (
|
||||
<div className="flex flex-wrap items-center justify-between gap-2 rounded-lg bg-background/55 p-2.5">
|
||||
<span className="text-sm font-medium">Driver health</span>
|
||||
<Pill tone={tone(status.ready)}>
|
||||
<GrantIcon granted={status.ready} />
|
||||
{status.ready === true ? 'Ready' : status.ready === false ? 'Not ready' : 'Unknown'}
|
||||
</Pill>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{failingChecks.map(c => (
|
||||
<p className="px-1 text-[0.7rem] text-muted-foreground" key={c.label}>
|
||||
<AlertTriangle className="mr-1 inline size-3" />
|
||||
{c.label}: {c.message}
|
||||
</p>
|
||||
))}
|
||||
|
||||
{status.error && (
|
||||
<p className="px-1 text-[0.7rem] text-muted-foreground">
|
||||
<AlertTriangle className="mr-1 inline size-3" />
|
||||
{status.error}
|
||||
</p>
|
||||
)}
|
||||
|
||||
{status.ready ? (
|
||||
<div className="flex items-center gap-1.5 px-1 text-xs text-muted-foreground">
|
||||
<Check className="size-3.5" />
|
||||
Computer Use is ready. Ask the agent to capture an app and click around.
|
||||
</div>
|
||||
) : (
|
||||
status.can_grant && (
|
||||
<Button disabled={granting} onClick={() => void grant()} size="sm">
|
||||
{granting ? <Loader2 className="size-3.5 animate-spin" /> : <ExternalLink className="size-3.5" />}
|
||||
{granting ? 'Waiting for approval…' : 'Grant permissions'}
|
||||
</Button>
|
||||
)
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
|
@ -17,6 +17,7 @@ import { useRefreshHotkey } from '../hooks/use-refresh-hotkey'
|
|||
import { useRouteEnumParam } from '../hooks/use-route-enum-param'
|
||||
import { PAGE_INSET_X } from '../layout-constants'
|
||||
import { PageSearchShell } from '../page-search-shell'
|
||||
import { ComputerUsePanel } from '../settings/computer-use-panel'
|
||||
import { asText, includesQuery, prettyName, toolNames, toolsetDisplayLabel } from '../settings/helpers'
|
||||
import { ToolsetConfigPanel } from '../settings/toolset-config-panel'
|
||||
import type { SetStatusbarItemGroup } from '../shell/statusbar-controls'
|
||||
|
|
@ -334,6 +335,9 @@ export function SkillsView({ setStatusbarItemGroup: _setStatusbarItemGroup, ...p
|
|||
))}
|
||||
</div>
|
||||
)}
|
||||
{expanded && toolset.name === 'computer_use' && (
|
||||
<ComputerUsePanel onConfiguredChange={refreshToolsets} />
|
||||
)}
|
||||
{expanded && <ToolsetConfigPanel onConfiguredChange={refreshToolsets} toolset={toolset.name} />}
|
||||
</div>
|
||||
)
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ import type {
|
|||
AudioTranscriptionResponse,
|
||||
AuxiliaryModelsResponse,
|
||||
BackendUpdateCheckResponse,
|
||||
ComputerUseStatus,
|
||||
ConfigSchemaResponse,
|
||||
CronJob,
|
||||
CronJobCreatePayload,
|
||||
|
|
@ -59,6 +60,9 @@ export type {
|
|||
AudioTranscriptionResponse,
|
||||
AuxiliaryModelsResponse,
|
||||
BackendUpdateCheckResponse,
|
||||
ComputerUseCheck,
|
||||
ComputerUsePermissionSource,
|
||||
ComputerUseStatus,
|
||||
ConfigFieldSchema,
|
||||
ConfigSchemaResponse,
|
||||
CronJob,
|
||||
|
|
@ -516,6 +520,21 @@ export function runToolsetPostSetup(name: string, key: string): Promise<ActionRe
|
|||
})
|
||||
}
|
||||
|
||||
export function getComputerUseStatus(): Promise<ComputerUseStatus> {
|
||||
return window.hermesDesktop.api<ComputerUseStatus>({
|
||||
...profileScoped(),
|
||||
path: '/api/tools/computer-use/status'
|
||||
})
|
||||
}
|
||||
|
||||
export function grantComputerUsePermissions(): Promise<ActionResponse> {
|
||||
return window.hermesDesktop.api<ActionResponse>({
|
||||
...profileScoped(),
|
||||
path: '/api/tools/computer-use/permissions/grant',
|
||||
method: 'POST'
|
||||
})
|
||||
}
|
||||
|
||||
export function getMessagingPlatforms(): Promise<MessagingPlatformsResponse> {
|
||||
return window.hermesDesktop.api<MessagingPlatformsResponse>({
|
||||
path: '/api/messaging/platforms'
|
||||
|
|
|
|||
|
|
@ -579,6 +579,51 @@ export interface ToolsetConfig {
|
|||
active_provider: string | null
|
||||
}
|
||||
|
||||
/** Shape of `GET /api/tools/computer-use/status`.
|
||||
*
|
||||
* cua-driver runs on macOS, Windows, and Linux. `ready` is the single OS-aware
|
||||
* readiness signal: on macOS both TCC grants (Accessibility + Screen
|
||||
* Recording, which attach to cua-driver's own `com.trycua.driver` identity,
|
||||
* not Hermes); elsewhere, driver health from `cua-driver doctor`. `null`
|
||||
* means unknown (binary missing / probe failed). */
|
||||
export interface ComputerUsePermissionSource {
|
||||
attribution?: string
|
||||
executable?: string
|
||||
note?: string
|
||||
pid?: number
|
||||
responsible_ppid?: number
|
||||
}
|
||||
|
||||
export interface ComputerUseCheck {
|
||||
label: string
|
||||
status: string
|
||||
message: string
|
||||
}
|
||||
|
||||
export interface ComputerUseStatus {
|
||||
/** `sys.platform`: "darwin" | "win32" | "linux" | ... */
|
||||
platform: string
|
||||
/** cua-driver has a runtime backend for this platform. */
|
||||
platform_supported: boolean
|
||||
/** cua-driver binary resolved on PATH. */
|
||||
installed: boolean
|
||||
/** e.g. "cua-driver 0.5.1", or null when unknown. */
|
||||
version: string | null
|
||||
/** Unified readiness — both TCC grants (macOS) or driver health (else). */
|
||||
ready: boolean | null
|
||||
/** Whether a permission grant flow exists (macOS-only TCC). */
|
||||
can_grant: boolean
|
||||
/** Cross-platform `cua-driver doctor` probes. */
|
||||
checks: ComputerUseCheck[]
|
||||
/** macOS TCC detail — `null` off macOS or when unknown. */
|
||||
accessibility: boolean | null
|
||||
screen_recording: boolean | null
|
||||
screen_recording_capturable: boolean | null
|
||||
source: ComputerUsePermissionSource | null
|
||||
/** Populated when the status probe itself failed. */
|
||||
error: string | null
|
||||
}
|
||||
|
||||
export interface SessionSearchResult {
|
||||
/** Lineage root of the matched conversation. Stable across compression and
|
||||
* used as the durable pin id; falls back to session_id when absent. */
|
||||
|
|
|
|||
|
|
@ -12507,6 +12507,33 @@ def main():
|
|||
action="store_true",
|
||||
help="Emit the raw structured payload as JSON (same shape as `tools/call`).",
|
||||
)
|
||||
computer_use_perms = computer_use_sub.add_parser(
|
||||
"permissions",
|
||||
help="Check or grant macOS Accessibility + Screen Recording (macOS)",
|
||||
description=(
|
||||
"Computer Use drives the Mac through cua-driver, whose TCC grants\n"
|
||||
"attach to cua-driver's own identity (com.trycua.driver) — not the\n"
|
||||
"terminal or the Hermes app. `status` reports the driver's grant\n"
|
||||
"state; `grant` launches CuaDriver via LaunchServices so the macOS\n"
|
||||
"permission dialog is attributed to the process that does the work."
|
||||
),
|
||||
)
|
||||
computer_use_perms_sub = computer_use_perms.add_subparsers(
|
||||
dest="computer_use_perms_action"
|
||||
)
|
||||
computer_use_perms_status = computer_use_perms_sub.add_parser(
|
||||
"status",
|
||||
help="Report Accessibility + Screen Recording grant state (read-only)",
|
||||
)
|
||||
computer_use_perms_status.add_argument(
|
||||
"--json",
|
||||
action="store_true",
|
||||
help="Emit the normalized permission payload as JSON.",
|
||||
)
|
||||
computer_use_perms_sub.add_parser(
|
||||
"grant",
|
||||
help="Request the grants (opens the dialog attributed to CuaDriver)",
|
||||
)
|
||||
|
||||
def cmd_computer_use(args):
|
||||
action = getattr(args, "computer_use_action", None)
|
||||
|
|
@ -12564,6 +12591,41 @@ def main():
|
|||
json_output=bool(getattr(args, "json", False)),
|
||||
)
|
||||
sys.exit(code)
|
||||
if action == "permissions":
|
||||
perms_action = getattr(args, "computer_use_perms_action", None)
|
||||
if perms_action == "grant":
|
||||
from tools.computer_use.permissions import request_permissions_grant
|
||||
sys.exit(request_permissions_grant())
|
||||
if perms_action == "status":
|
||||
import json as _json
|
||||
from tools.computer_use.permissions import computer_use_status
|
||||
st = computer_use_status()
|
||||
if bool(getattr(args, "json", False)):
|
||||
print(_json.dumps(st, indent=2, sort_keys=True))
|
||||
sys.exit(0 if st["ready"] else 1)
|
||||
if not st["platform_supported"]:
|
||||
print(f"Computer Use is not supported on {st['platform']}.")
|
||||
sys.exit(1)
|
||||
if not st["installed"]:
|
||||
print("cua-driver: not installed. Run: hermes computer-use install")
|
||||
sys.exit(1)
|
||||
glyph = lambda v: "✅" if v is True else ("❌" if v is False else "•") # noqa: E731
|
||||
print(f"cua-driver: {st['version'] or 'installed'} ({st['platform']})")
|
||||
if st["can_grant"]: # macOS TCC permissions
|
||||
print(f" {glyph(st['accessibility'])} Accessibility")
|
||||
print(f" {glyph(st['screen_recording'])} Screen Recording")
|
||||
if not st["ready"]:
|
||||
print(" Grant: hermes computer-use permissions grant")
|
||||
else: # no TCC model — readiness is driver health
|
||||
print(f" {glyph(st['ready'])} driver health (no permission toggles on {st['platform']})")
|
||||
for c in st["checks"]:
|
||||
if c["status"] != "ok":
|
||||
print(f" ⚠ {c['label']}: {c['message']}")
|
||||
if st["error"]:
|
||||
print(f" ⚠ {st['error']}")
|
||||
sys.exit(0 if st["ready"] else 1)
|
||||
computer_use_perms.print_help()
|
||||
return
|
||||
# No subcommand → show help
|
||||
computer_use_parser.print_help()
|
||||
|
||||
|
|
|
|||
|
|
@ -8349,6 +8349,7 @@ async def install_mcp_catalog_entry(body: MCPCatalogInstall, profile: Optional[s
|
|||
|
||||
# Register the mcp-install action log so /api/actions/mcp-install/status works.
|
||||
_ACTION_LOG_FILES.setdefault("mcp-install", "action-mcp-install.log")
|
||||
_ACTION_LOG_FILES.setdefault("computer-use-grant", "action-computer-use-grant.log")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -10671,6 +10672,63 @@ async def run_toolset_post_setup(
|
|||
return {"ok": True, "pid": proc.pid, "name": "tools-post-setup", "key": body.key}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Computer Use (cua-driver) — cross-platform readiness + macOS permission grant
|
||||
#
|
||||
# cua-driver runs on macOS, Windows, and Linux. The desktop card reflects
|
||||
# per-OS readiness: on macOS the Accessibility + Screen Recording TCC grants
|
||||
# (which attach to cua-driver's OWN identity, com.trycua.driver — not Hermes,
|
||||
# so no app entitlement is involved); elsewhere, driver health from
|
||||
# `cua-driver doctor`. The grant flow is macOS-only (no TCC toggles to request
|
||||
# on Windows/Linux).
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@app.get("/api/tools/computer-use/status")
|
||||
async def get_computer_use_status(profile: Optional[str] = None):
|
||||
"""Cross-platform Computer Use readiness for the desktop card.
|
||||
|
||||
See ``tools.computer_use.permissions.computer_use_status`` for the payload
|
||||
shape. Read-only and fast (shells ``cua-driver doctor`` + macOS
|
||||
``permissions status``).
|
||||
"""
|
||||
from tools.computer_use.permissions import computer_use_status
|
||||
|
||||
with _profile_scope(profile):
|
||||
return computer_use_status()
|
||||
|
||||
|
||||
@app.post("/api/tools/computer-use/permissions/grant")
|
||||
async def grant_computer_use_permissions(profile: Optional[str] = None):
|
||||
"""Spawn ``hermes computer-use permissions grant`` as a background action.
|
||||
|
||||
macOS-only: ``cua-driver permissions grant`` launches CuaDriver via
|
||||
LaunchServices so the TCC dialog is attributed to com.trycua.driver, then
|
||||
waits for approval. The frontend polls ``GET /api/actions/computer-use-
|
||||
grant/status`` and re-reads ``/status`` once it exits. Windows/Linux have
|
||||
no TCC toggles to grant, so this returns 400 there.
|
||||
"""
|
||||
if sys.platform != "darwin":
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="Computer Use permission grants are a macOS concept.",
|
||||
)
|
||||
try:
|
||||
proc = _spawn_hermes_action(
|
||||
_profile_cli_args(profile)
|
||||
+ ["computer-use", "permissions", "grant"],
|
||||
"computer-use-grant",
|
||||
)
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as exc:
|
||||
_log.exception("Failed to spawn computer-use permissions grant")
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to request permissions: {exc}"
|
||||
)
|
||||
return {"ok": True, "pid": proc.pid, "name": "computer-use-grant"}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Raw YAML config endpoint
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
189
tools/computer_use/permissions.py
Normal file
189
tools/computer_use/permissions.py
Normal file
|
|
@ -0,0 +1,189 @@
|
|||
"""
|
||||
Cross-platform Computer Use readiness + macOS permission helpers.
|
||||
|
||||
cua-driver runs on macOS, Windows, and Linux, but "ready to drive" means
|
||||
something different on each:
|
||||
|
||||
* macOS — explicit TCC grants (Accessibility + Screen Recording). cua-driver
|
||||
reports/requests them via ``permissions status`` / ``permissions grant``.
|
||||
The grants attach to cua-driver's OWN identity (``com.trycua.driver`` /
|
||||
the installed ``CuaDriver.app``), NOT Hermes — so no Hermes entitlement is
|
||||
involved, and ``grant`` launches CuaDriver via LaunchServices so the macOS
|
||||
dialog is attributed correctly.
|
||||
* Windows — no TCC toggles; the UIAccess worker (``cua-driver-uia.exe``) may
|
||||
trip a SmartScreen prompt on first run. Readiness == driver health.
|
||||
* Linux — assistive control via the X11/XWayland stack. Readiness == driver
|
||||
health.
|
||||
|
||||
The universal signal on every platform is ``cua-driver doctor --json`` (binary
|
||||
integrity + platform support). ``computer_use_status`` folds that together with
|
||||
the macOS permission detail into one payload for the desktop card, the
|
||||
``hermes computer-use permissions`` CLI, and ``/api/tools/computer-use/status``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
# Platforms with a cua-driver runtime backend (mirrors the toolset platform_gate).
|
||||
_RUNTIME_PLATFORMS = frozenset({"darwin", "win32", "linux"})
|
||||
_BOOLS = ("accessibility", "screen_recording", "screen_recording_capturable")
|
||||
|
||||
|
||||
def _driver_cmd(override: Optional[str]) -> str:
|
||||
if override:
|
||||
return override
|
||||
try:
|
||||
from hermes_cli.tools_config import _cua_driver_cmd
|
||||
|
||||
return _cua_driver_cmd()
|
||||
except Exception:
|
||||
return os.environ.get("HERMES_CUA_DRIVER_CMD", "").strip() or "cua-driver"
|
||||
|
||||
|
||||
def _child_env() -> Dict[str, str]:
|
||||
"""cua-driver child env honoring the Hermes telemetry opt-in policy."""
|
||||
try:
|
||||
from tools.computer_use.cua_backend import cua_driver_child_env
|
||||
|
||||
return cua_driver_child_env()
|
||||
except Exception:
|
||||
return dict(os.environ)
|
||||
|
||||
|
||||
def _run(binary: str, *args: str, timeout: float) -> subprocess.CompletedProcess:
|
||||
return subprocess.run(
|
||||
[binary, *args],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=timeout,
|
||||
env=_child_env(),
|
||||
stdin=subprocess.DEVNULL,
|
||||
)
|
||||
|
||||
|
||||
def _json_out(binary: str, *args: str, timeout: float) -> Any:
|
||||
"""Run ``binary args`` and parse stdout as JSON, or ``None`` on any failure."""
|
||||
raw = (_run(binary, *args, timeout=timeout).stdout or "").strip()
|
||||
return json.loads(raw) if raw else None
|
||||
|
||||
|
||||
def _doctor(binary: str) -> Optional[Dict[str, Any]]:
|
||||
"""``cua-driver doctor --json`` → ``{ok, checks:[{label,status,message}]}``."""
|
||||
try:
|
||||
data = _json_out(binary, "doctor", "--json", timeout=12)
|
||||
except Exception:
|
||||
return None
|
||||
if not isinstance(data, dict):
|
||||
return None
|
||||
checks: List[Dict[str, str]] = [
|
||||
{
|
||||
"label": str(p.get("label", "")),
|
||||
"status": str(p.get("status", "")),
|
||||
"message": str(p.get("message", "")),
|
||||
}
|
||||
for p in data.get("probes", [])
|
||||
if isinstance(p, dict)
|
||||
]
|
||||
return {"ok": bool(data.get("ok")), "checks": checks}
|
||||
|
||||
|
||||
def _mac_permissions(binary: str, out: Dict[str, Any]) -> None:
|
||||
"""Fold ``cua-driver permissions status --json`` booleans into ``out``."""
|
||||
try:
|
||||
data = _json_out(binary, "permissions", "status", "--json", timeout=10)
|
||||
except subprocess.TimeoutExpired:
|
||||
out["error"] = "cua-driver permissions status timed out"
|
||||
return
|
||||
except Exception as exc: # spawn failure or malformed JSON
|
||||
out["error"] = f"cua-driver permissions status failed: {exc}"
|
||||
return
|
||||
if isinstance(data, dict):
|
||||
out.update({k: data[k] for k in _BOOLS if isinstance(data.get(k), bool)})
|
||||
if isinstance(data.get("source"), dict):
|
||||
out["source"] = data["source"]
|
||||
|
||||
|
||||
def computer_use_status(driver_cmd: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""Unified, OS-aware Computer Use readiness for the desktop card.
|
||||
|
||||
``ready`` is the single signal the UI keys off: on macOS it's both TCC
|
||||
grants; elsewhere it's driver health (no TCC model). ``None`` means
|
||||
unknown (binary missing / probe failed). ``can_grant`` is macOS-only.
|
||||
"""
|
||||
plat = sys.platform
|
||||
binary = shutil.which(_driver_cmd(driver_cmd))
|
||||
out: Dict[str, Any] = {
|
||||
"platform": plat,
|
||||
"platform_supported": plat in _RUNTIME_PLATFORMS,
|
||||
"installed": bool(binary),
|
||||
"version": None,
|
||||
"ready": None,
|
||||
"can_grant": plat == "darwin",
|
||||
"checks": [],
|
||||
"source": None,
|
||||
"error": None,
|
||||
**{k: None for k in _BOOLS},
|
||||
}
|
||||
if not binary:
|
||||
return out
|
||||
|
||||
try:
|
||||
out["version"] = (_run(binary, "--version", timeout=5).stdout or "").strip() or None
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
doctor = _doctor(binary)
|
||||
if doctor is not None:
|
||||
out["checks"] = doctor["checks"]
|
||||
|
||||
if plat == "darwin":
|
||||
_mac_permissions(binary, out)
|
||||
if out["error"] is None:
|
||||
out["ready"] = out["accessibility"] is True and out["screen_recording"] is True
|
||||
elif doctor is not None:
|
||||
# No TCC model off macOS — readiness is driver health.
|
||||
out["ready"] = doctor["ok"]
|
||||
return out
|
||||
|
||||
|
||||
def request_permissions_grant(driver_cmd: Optional[str] = None) -> int:
|
||||
"""Run ``cua-driver permissions grant`` (macOS); stream its output.
|
||||
|
||||
Launches CuaDriver via LaunchServices so the TCC dialog is attributed to
|
||||
``com.trycua.driver``, then waits for the grant. Returns the driver's exit
|
||||
code (0 ok), 2 if the binary is missing, 64 on a non-macOS platform (which
|
||||
has no TCC permission model to grant).
|
||||
"""
|
||||
if sys.platform != "darwin":
|
||||
print("Computer Use permissions are a macOS concept; nothing to grant here.")
|
||||
return 64
|
||||
|
||||
binary = shutil.which(_driver_cmd(driver_cmd))
|
||||
if not binary:
|
||||
print("cua-driver: not installed. Run: hermes computer-use install")
|
||||
return 2
|
||||
|
||||
print(
|
||||
"Requesting Accessibility + Screen Recording for CuaDriver.\n"
|
||||
"macOS will show a dialog attributed to CuaDriver (com.trycua.driver) — "
|
||||
"approve it, then return here."
|
||||
)
|
||||
try:
|
||||
return int(
|
||||
subprocess.run(
|
||||
[binary, "permissions", "grant"],
|
||||
env=_child_env(),
|
||||
stdin=subprocess.DEVNULL,
|
||||
).returncode
|
||||
)
|
||||
except KeyboardInterrupt: # pragma: no cover - interactive
|
||||
return 130
|
||||
except Exception as exc: # pragma: no cover - defensive
|
||||
print(f"cua-driver permissions grant failed: {exc}", file=sys.stderr)
|
||||
return 2
|
||||
Loading…
Add table
Add a link
Reference in a new issue