mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-23 10:42:00 +00:00
Merge remote-tracking branch 'origin/main' into pr-50994
# Conflicts: # tools/computer_use/cua_backend.py
This commit is contained in:
commit
833710d33e
53 changed files with 3438 additions and 576 deletions
|
|
@ -27,6 +27,131 @@ from typing import Any, Dict, List, Optional
|
|||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Background-review aux-model selector + routed digest.
|
||||
#
|
||||
# The review fork runs on the MAIN model by default ("auto"), replaying the
|
||||
# full conversation — already warm in the prompt cache, so cheap cache reads.
|
||||
# Optimal and unchanged. A user can route the review to a different, cheaper
|
||||
# model via auxiliary.background_review.{provider,model}. A different model
|
||||
# cannot reuse the parent's cache (different key), so the fork is cold
|
||||
# regardless — replaying the full transcript would just cold-write it. So when
|
||||
# (and only when) routed to a different model, we replay a compact DIGEST to
|
||||
# minimise cold-written tokens. Same model -> full replay; different model ->
|
||||
# digest. That's the whole policy.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _resolve_review_runtime(agent: Any) -> Dict[str, Any]:
|
||||
"""Resolve provider/model/credentials for the review fork.
|
||||
|
||||
Default (auto / unset / same as parent): inherit the parent's live runtime
|
||||
(with codex_app_server -> codex_responses downgrade). ``routed`` is False —
|
||||
the fork uses the main model and the warm cache, exactly as before. When
|
||||
``auxiliary.background_review.{provider,model}`` names a concrete model
|
||||
different from the parent's, resolve that runtime and set ``routed=True``.
|
||||
"""
|
||||
parent_runtime = agent._current_main_runtime()
|
||||
parent_api_mode = parent_runtime.get("api_mode") or None
|
||||
if parent_api_mode == "codex_app_server":
|
||||
parent_api_mode = "codex_responses"
|
||||
parent = {
|
||||
"provider": agent.provider,
|
||||
"model": agent.model,
|
||||
"api_key": parent_runtime.get("api_key") or None,
|
||||
"base_url": parent_runtime.get("base_url") or None,
|
||||
"api_mode": parent_api_mode,
|
||||
"routed": False,
|
||||
}
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
cfg = load_config()
|
||||
except Exception:
|
||||
return parent
|
||||
aux = cfg.get("auxiliary", {}) if isinstance(cfg.get("auxiliary"), dict) else {}
|
||||
task = aux.get("background_review", {}) if isinstance(aux.get("background_review"), dict) else {}
|
||||
task_provider = (str(task.get("provider", "")).strip() or None)
|
||||
task_model = (str(task.get("model", "")).strip() or None)
|
||||
task_base_url = (str(task.get("base_url", "")).strip() or None)
|
||||
task_api_key = (str(task.get("api_key", "")).strip() or None)
|
||||
if not (task_provider and task_provider != "auto" and task_model):
|
||||
return parent
|
||||
if task_provider == (agent.provider or "") and task_model == (agent.model or ""):
|
||||
return parent # same model/provider as parent -> not routed
|
||||
try:
|
||||
from hermes_cli.runtime_provider import resolve_runtime_provider
|
||||
rp = resolve_runtime_provider(
|
||||
requested=task_provider,
|
||||
target_model=task_model,
|
||||
explicit_api_key=task_api_key,
|
||||
explicit_base_url=task_base_url,
|
||||
)
|
||||
return {
|
||||
"provider": rp.get("provider") or task_provider,
|
||||
"model": task_model,
|
||||
"api_key": rp.get("api_key"),
|
||||
"base_url": rp.get("base_url"),
|
||||
"api_mode": rp.get("api_mode"),
|
||||
"routed": True,
|
||||
}
|
||||
except Exception as e:
|
||||
logger.debug("background-review aux routing failed (%s); using main model", e)
|
||||
return parent
|
||||
|
||||
|
||||
def _msg_text(m: Dict) -> str:
|
||||
c = m.get("content")
|
||||
if isinstance(c, str):
|
||||
return c.strip()
|
||||
if isinstance(c, list):
|
||||
return " ".join(b.get("text", "") for b in c if isinstance(b, dict)).strip()
|
||||
return ""
|
||||
|
||||
|
||||
def _digest_history(messages_snapshot: List[Dict], tail: int = 24) -> List[Dict]:
|
||||
"""Compact replay for the routed (different-model) path only.
|
||||
|
||||
Keeps the recent ``tail`` messages verbatim, collapses older turns into one
|
||||
synthetic user-role digest, preserving role alternation. Used ONLY when
|
||||
routed to a different model (cache cold regardless, so fewer cold-written
|
||||
tokens is a pure win). Never on the main-model path (full replay stays warm).
|
||||
"""
|
||||
msgs = list(messages_snapshot or [])
|
||||
if len(msgs) <= tail:
|
||||
return msgs
|
||||
keep = msgs[-tail:]
|
||||
while keep and isinstance(keep[0], dict) and keep[0].get("role") == "tool":
|
||||
tail += 1
|
||||
if len(msgs) <= tail:
|
||||
return msgs
|
||||
keep = msgs[-tail:]
|
||||
old = msgs[:-len(keep)]
|
||||
lines: List[str] = []
|
||||
for m in old:
|
||||
if not isinstance(m, dict):
|
||||
continue
|
||||
role = m.get("role")
|
||||
text = _msg_text(m).replace("\n", " ")
|
||||
if role == "user" and text:
|
||||
lines.append(f"USER: {text[:300]}")
|
||||
elif role == "assistant":
|
||||
tcs = m.get("tool_calls") or []
|
||||
if tcs:
|
||||
names = [(tc.get("function") or {}).get("name", "?") for tc in tcs if isinstance(tc, dict)]
|
||||
lines.append(f"ASSISTANT[tools: {', '.join(names)}]")
|
||||
if text:
|
||||
lines.append(f"ASSISTANT: {text[:200]}")
|
||||
digest = {
|
||||
"role": "user",
|
||||
"content": (
|
||||
"[Earlier conversation digest — older turns summarised to bound the "
|
||||
"review's cold-write cost on the routed aux model. Recent turns "
|
||||
"follow verbatim below.]\n" + "\n".join(lines)
|
||||
),
|
||||
}
|
||||
return [digest] + keep
|
||||
|
||||
|
||||
# Review-prompt strings — used by ``spawn_background_review_thread`` to build
|
||||
# the user-message that the forked review agent receives. AIAgent exposes
|
||||
# them as class attributes (``_MEMORY_REVIEW_PROMPT`` etc.) for back-compat;
|
||||
|
|
@ -488,18 +613,13 @@ def _run_review_in_thread(
|
|||
# creds, or credential-pool setups where the resolver can't
|
||||
# reconstruct auth from scratch -- producing the spurious
|
||||
# "No LLM provider configured" warning at end of turn.
|
||||
_parent_runtime = agent._current_main_runtime()
|
||||
_parent_api_mode = _parent_runtime.get("api_mode") or None
|
||||
# The review fork needs to call agent-loop tools (memory,
|
||||
# skill_manage). Those tools require Hermes' own dispatch,
|
||||
# which the codex_app_server runtime bypasses entirely
|
||||
# (it runs the turn inside codex's subprocess). So when
|
||||
# the parent is on codex_app_server, downgrade the review
|
||||
# fork to codex_responses — same auth/credentials, but
|
||||
# talks to the OpenAI Responses API directly so Hermes
|
||||
# owns the loop and the agent-loop tools dispatch.
|
||||
if _parent_api_mode == "codex_app_server":
|
||||
_parent_api_mode = "codex_responses"
|
||||
# _resolve_review_runtime() returns the parent's live runtime by
|
||||
# default (routed=False; main model, warm cache), or — when the user
|
||||
# set auxiliary.background_review.{provider,model} to a different
|
||||
# model — that model's runtime (routed=True). The codex_app_server
|
||||
# -> codex_responses downgrade is applied inside the resolver.
|
||||
_rt = _resolve_review_runtime(agent)
|
||||
_routed = bool(_rt.get("routed"))
|
||||
# skip_memory=True keeps the review fork from
|
||||
# touching external memory plugins (honcho, mem0,
|
||||
# supermemory, etc.). Without it, the fork's
|
||||
|
|
@ -519,14 +639,14 @@ def _run_review_in_thread(
|
|||
# in the request body — Anthropic's cache key includes it.
|
||||
# (The runtime whitelist below still restricts dispatch.)
|
||||
review_agent = AIAgent(
|
||||
model=agent.model,
|
||||
model=_rt.get("model") or agent.model,
|
||||
max_iterations=16,
|
||||
quiet_mode=True,
|
||||
platform=agent.platform,
|
||||
provider=agent.provider,
|
||||
api_mode=_parent_api_mode,
|
||||
base_url=_parent_runtime.get("base_url") or None,
|
||||
api_key=_parent_runtime.get("api_key") or None,
|
||||
provider=_rt.get("provider") or agent.provider,
|
||||
api_mode=_rt.get("api_mode"),
|
||||
base_url=_rt.get("base_url") or None,
|
||||
api_key=_rt.get("api_key") or None,
|
||||
credential_pool=getattr(agent, "_credential_pool", None),
|
||||
parent_session_id=agent.session_id,
|
||||
enabled_toolsets=getattr(agent, "enabled_toolsets", None),
|
||||
|
|
@ -565,15 +685,20 @@ def _run_review_in_thread(
|
|||
# issue #25322 and PR #17276 for the full analysis +
|
||||
# measured impact (~26% end-to-end cost reduction on
|
||||
# Sonnet 4.5).
|
||||
review_agent._cached_system_prompt = agent._cached_system_prompt
|
||||
# Defensive: pin session_start + session_id to the
|
||||
# parent's so any code path that re-renders parts of
|
||||
# the system prompt (compression, plugin hooks) still
|
||||
# produces byte-identical output. The cached-prompt
|
||||
# assignment above already short-circuits the normal
|
||||
# rebuild path, but these pins guarantee parity even
|
||||
# if a future code path bypasses the cache.
|
||||
review_agent.session_start = agent.session_start
|
||||
# Share the parent's warm cached system prompt ONLY when the review
|
||||
# runs on the SAME model (not routed). When routed to a different
|
||||
# model the parent's cached prompt is for the wrong model/cache key
|
||||
# and would miss anyway, so let the routed fork build its own.
|
||||
if not _routed:
|
||||
review_agent._cached_system_prompt = agent._cached_system_prompt
|
||||
# Defensive: pin session_start + session_id to the
|
||||
# parent's so any code path that re-renders parts of
|
||||
# the system prompt (compression, plugin hooks) still
|
||||
# produces byte-identical output. The cached-prompt
|
||||
# assignment above already short-circuits the normal
|
||||
# rebuild path, but these pins guarantee parity even
|
||||
# if a future code path bypasses the cache.
|
||||
review_agent.session_start = agent.session_start
|
||||
review_agent.session_id = agent.session_id
|
||||
# The fork shares the parent's live session_id (pinned above for
|
||||
# prefix-cache parity). It is single-lifecycle and calls close()
|
||||
|
|
@ -615,6 +740,13 @@ def _run_review_in_thread(
|
|||
),
|
||||
)
|
||||
try:
|
||||
# Routed to a different model -> replay a digest (cache is cold
|
||||
# on that model anyway, so minimise cold-written tokens). Same
|
||||
# model -> replay the full snapshot (warm cache reads).
|
||||
_review_history = (
|
||||
_digest_history(messages_snapshot) if _routed
|
||||
else messages_snapshot
|
||||
)
|
||||
review_agent.run_conversation(
|
||||
user_message=(
|
||||
prompt
|
||||
|
|
@ -622,7 +754,7 @@ def _run_review_in_thread(
|
|||
"management tools. Other tools will be denied "
|
||||
"at runtime — do not attempt them."
|
||||
),
|
||||
conversation_history=messages_snapshot,
|
||||
conversation_history=_review_history,
|
||||
)
|
||||
finally:
|
||||
clear_thread_tool_whitelist()
|
||||
|
|
|
|||
|
|
@ -805,10 +805,11 @@ def try_shrink_image_parts_in_messages(
|
|||
Pillow couldn't help (caller should surface the original error).
|
||||
|
||||
Strategy: look for ``image_url`` / ``input_image`` parts carrying a
|
||||
``data:image/...;base64,...`` payload. For each one whose encoded
|
||||
size exceeds 4 MB (a safe target that slides under Anthropic's 5 MB
|
||||
ceiling with header overhead) or whose longest side exceeds
|
||||
``max_dimension``, write the base64 to a tempfile, call
|
||||
``data:image/...;base64,...`` payload, plus Anthropic-native
|
||||
``{"type": "image", "source": {"type": "base64", ...}}`` blocks.
|
||||
For each one whose encoded size exceeds 4 MB (a safe target that slides
|
||||
under Anthropic's 5 MB ceiling with header overhead) or whose longest side
|
||||
exceeds ``max_dimension``, write the base64 to a tempfile, call
|
||||
``vision_tools._resize_image_for_vision`` to produce a smaller data
|
||||
URL, and substitute it in place.
|
||||
|
||||
|
|
@ -964,6 +965,28 @@ def try_shrink_image_parts_in_messages(
|
|||
logger.warning("image-shrink recovery: re-encode failed — %s", exc)
|
||||
return None, triggered_by is not None
|
||||
|
||||
def _source_to_data_url(source: Any) -> Optional[str]:
|
||||
if not isinstance(source, dict) or source.get("type") != "base64":
|
||||
return None
|
||||
data = source.get("data")
|
||||
if not isinstance(data, str) or not data:
|
||||
return None
|
||||
media_type = str(source.get("media_type") or "image/jpeg").strip()
|
||||
if not media_type.startswith("image/"):
|
||||
media_type = "image/jpeg"
|
||||
return f"data:{media_type};base64,{data}"
|
||||
|
||||
def _write_data_url_to_source(source: dict, data_url: str) -> None:
|
||||
header, _, data = data_url.partition(",")
|
||||
media_type = "image/jpeg"
|
||||
if header.startswith("data:"):
|
||||
candidate = header[len("data:"):].split(";", 1)[0].strip()
|
||||
if candidate.startswith("image/"):
|
||||
media_type = candidate
|
||||
source["type"] = "base64"
|
||||
source["media_type"] = media_type
|
||||
source["data"] = data
|
||||
|
||||
for msg in api_messages:
|
||||
if not isinstance(msg, dict):
|
||||
continue
|
||||
|
|
@ -974,6 +997,16 @@ def try_shrink_image_parts_in_messages(
|
|||
if not isinstance(part, dict):
|
||||
continue
|
||||
ptype = part.get("type")
|
||||
if ptype == "image":
|
||||
source = part.get("source")
|
||||
url = _source_to_data_url(source)
|
||||
resized, unshrinkable = _shrink_data_url(url or "")
|
||||
if resized and isinstance(source, dict):
|
||||
_write_data_url_to_source(source, resized)
|
||||
changed_count += 1
|
||||
elif unshrinkable:
|
||||
unshrinkable_oversized += 1
|
||||
continue
|
||||
if ptype not in {"image_url", "input_image"}:
|
||||
continue
|
||||
image_value = part.get("image_url")
|
||||
|
|
|
|||
|
|
@ -122,10 +122,14 @@ def finalize_turn(
|
|||
)
|
||||
|
||||
# Determine if conversation completed successfully
|
||||
normal_text_response = str(_turn_exit_reason).startswith("text_response(")
|
||||
completed = (
|
||||
final_response is not None
|
||||
and api_call_count < agent.max_iterations
|
||||
and not failed
|
||||
and (
|
||||
api_call_count < agent.max_iterations
|
||||
or normal_text_response
|
||||
)
|
||||
)
|
||||
|
||||
# Post-loop cleanup must never lose the response. Trajectory save,
|
||||
|
|
|
|||
239
apps/desktop/src/app/settings/computer-use-panel.tsx
Normal file
239
apps/desktop/src/app/settings/computer-use-panel.tsx
Normal file
|
|
@ -0,0 +1,239 @@
|
|||
import { useCallback, useEffect, useRef, useState } from 'react'
|
||||
|
||||
import { Button } from '@/components/ui/button'
|
||||
import { getActionStatus, getComputerUseStatus, grantComputerUsePermissions } from '@/hermes'
|
||||
import { AlertTriangle, Check, ExternalLink, Loader2, RefreshCw, X } from '@/lib/icons'
|
||||
import { upsertDesktopActionTask } from '@/store/activity'
|
||||
import { notify, notifyError } from '@/store/notifications'
|
||||
import type { ComputerUseStatus } from '@/types/hermes'
|
||||
|
||||
import { Pill } from './primitives'
|
||||
|
||||
interface ComputerUsePanelProps {
|
||||
/** Re-read the parent toolset list after a permission/install change so the
|
||||
* "Configured / Needs keys" pill stays in sync. */
|
||||
onConfiguredChange?: () => void
|
||||
}
|
||||
|
||||
// Per-OS one-liner shown when there's no TCC grant flow (Windows/Linux). macOS
|
||||
// drives the permission rows instead, so it has no entry here.
|
||||
const PLATFORM_NOTE: Record<string, string> = {
|
||||
linux: 'Drives your desktop via the X11/XWayland accessibility stack — no permission prompt.',
|
||||
win32: 'First run may trigger a Windows SmartScreen prompt for the cua-driver UIAccess worker — allow it.'
|
||||
}
|
||||
|
||||
function tone(granted: boolean | null) {
|
||||
return granted === true ? 'primary' : 'muted'
|
||||
}
|
||||
|
||||
function GrantIcon({ granted }: { granted: boolean | null }) {
|
||||
const Icon = granted === true ? Check : granted === false ? X : AlertTriangle
|
||||
|
||||
return <Icon className="size-3" />
|
||||
}
|
||||
|
||||
function PermissionRow({ granted, label, hint }: { granted: boolean | null; label: string; hint: string }) {
|
||||
return (
|
||||
<div className="flex flex-wrap items-center justify-between gap-2 rounded-lg bg-background/55 p-2.5">
|
||||
<div className="min-w-0">
|
||||
<span className="text-sm font-medium">{label}</span>
|
||||
<p className="mt-0.5 text-[0.7rem] text-muted-foreground">{hint}</p>
|
||||
</div>
|
||||
<Pill tone={tone(granted)}>
|
||||
<GrantIcon granted={granted} />
|
||||
{granted === true ? 'Granted' : granted === false ? 'Not granted' : 'Unknown'}
|
||||
</Pill>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Cross-platform Computer Use preflight card.
|
||||
*
|
||||
* cua-driver runs on macOS, Windows, and Linux, but readiness differs: macOS
|
||||
* needs two TCC grants (Accessibility + Screen Recording) that attach to
|
||||
* cua-driver's own `com.trycua.driver` identity — not Hermes — and are
|
||||
* requested via `cua-driver permissions grant` (dialog attributed to
|
||||
* CuaDriver). Windows/Linux have no TCC toggles, so readiness is driver health
|
||||
* from `cua-driver doctor`. The backend folds both into one `ready` signal.
|
||||
*
|
||||
* Binary install/upgrade stays in the cua-driver provider's post-setup runner
|
||||
* below this card (the generic ToolsetConfigPanel).
|
||||
*/
|
||||
export function ComputerUsePanel({ onConfiguredChange }: ComputerUsePanelProps) {
|
||||
const [status, setStatus] = useState<ComputerUseStatus | null>(null)
|
||||
const [loading, setLoading] = useState(true)
|
||||
const [granting, setGranting] = useState(false)
|
||||
const activeRef = useRef(false)
|
||||
|
||||
const refresh = useCallback(async () => {
|
||||
try {
|
||||
setStatus(await getComputerUseStatus())
|
||||
} catch (err) {
|
||||
notifyError(err, 'Could not read Computer Use status')
|
||||
} finally {
|
||||
setLoading(false)
|
||||
}
|
||||
}, [])
|
||||
|
||||
useEffect(() => {
|
||||
activeRef.current = true
|
||||
void refresh()
|
||||
|
||||
return () => void (activeRef.current = false)
|
||||
}, [refresh])
|
||||
|
||||
const grant = useCallback(async () => {
|
||||
setGranting(true)
|
||||
|
||||
try {
|
||||
const started = await grantComputerUsePermissions()
|
||||
|
||||
if (!started.ok) {
|
||||
notifyError(new Error('spawn failed'), 'Could not request permissions')
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
notify({
|
||||
kind: 'info',
|
||||
title: 'Approve in System Settings',
|
||||
message: 'macOS will show a permission dialog attributed to CuaDriver. Approve it, then return here.'
|
||||
})
|
||||
|
||||
// The driver waits for the user to flip the switch — poll until it exits.
|
||||
for (let attempt = 0; attempt < 150 && activeRef.current; attempt += 1) {
|
||||
await new Promise(resolve => window.setTimeout(resolve, 1500))
|
||||
|
||||
if (!activeRef.current) {
|
||||
break
|
||||
}
|
||||
|
||||
const polled = await getActionStatus(started.name, 200)
|
||||
upsertDesktopActionTask(polled)
|
||||
|
||||
if (!polled.running) {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if (activeRef.current) {
|
||||
await refresh()
|
||||
onConfiguredChange?.()
|
||||
}
|
||||
} catch (err) {
|
||||
if (activeRef.current) {
|
||||
notifyError(err, 'Could not request permissions')
|
||||
}
|
||||
} finally {
|
||||
if (activeRef.current) {
|
||||
setGranting(false)
|
||||
}
|
||||
}
|
||||
}, [onConfiguredChange, refresh])
|
||||
|
||||
if (loading) {
|
||||
return (
|
||||
<div className="mt-3 flex items-center gap-2 px-1 text-xs text-muted-foreground">
|
||||
<Loader2 className="size-3.5 animate-spin" />
|
||||
Checking Computer Use status…
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
if (!status) {
|
||||
return null
|
||||
}
|
||||
|
||||
if (!status.platform_supported) {
|
||||
return (
|
||||
<p className="mt-3 px-1 text-xs text-muted-foreground">
|
||||
Computer Use isn't supported on this platform ({status.platform}).
|
||||
</p>
|
||||
)
|
||||
}
|
||||
|
||||
if (!status.installed) {
|
||||
return (
|
||||
<p className="mt-3 px-1 text-xs text-muted-foreground">
|
||||
Install the cua-driver backend below to drive this machine.
|
||||
{status.can_grant && ' Then grant Accessibility and Screen Recording here.'}
|
||||
</p>
|
||||
)
|
||||
}
|
||||
|
||||
const failingChecks = status.checks.filter(c => c.status !== 'ok')
|
||||
|
||||
return (
|
||||
<div className="mt-3 grid gap-2">
|
||||
<div className="flex flex-wrap items-center justify-between gap-2 px-1">
|
||||
<div className="min-w-0">
|
||||
{status.can_grant ? (
|
||||
<p className="text-[0.72rem] text-muted-foreground">
|
||||
Grants attach to CuaDriver's own identity (com.trycua.driver), not Hermes — so the dialog is
|
||||
attributed to the process that drives your Mac.
|
||||
</p>
|
||||
) : (
|
||||
<p className="text-[0.72rem] text-muted-foreground">{PLATFORM_NOTE[status.platform] ?? ''}</p>
|
||||
)}
|
||||
{status.version && <p className="text-[0.68rem] text-muted-foreground/80">{status.version}</p>}
|
||||
</div>
|
||||
<Button onClick={() => void refresh()} size="sm" variant="text">
|
||||
<RefreshCw className="size-3.5" />
|
||||
Recheck
|
||||
</Button>
|
||||
</div>
|
||||
|
||||
{status.can_grant ? (
|
||||
<>
|
||||
<PermissionRow
|
||||
granted={status.accessibility}
|
||||
hint="Lets cua-driver post clicks, keystrokes, and read the accessibility tree."
|
||||
label="Accessibility"
|
||||
/>
|
||||
<PermissionRow
|
||||
granted={status.screen_recording}
|
||||
hint="Lets cua-driver capture screenshots of app windows."
|
||||
label="Screen Recording"
|
||||
/>
|
||||
</>
|
||||
) : (
|
||||
<div className="flex flex-wrap items-center justify-between gap-2 rounded-lg bg-background/55 p-2.5">
|
||||
<span className="text-sm font-medium">Driver health</span>
|
||||
<Pill tone={tone(status.ready)}>
|
||||
<GrantIcon granted={status.ready} />
|
||||
{status.ready === true ? 'Ready' : status.ready === false ? 'Not ready' : 'Unknown'}
|
||||
</Pill>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{failingChecks.map(c => (
|
||||
<p className="px-1 text-[0.7rem] text-muted-foreground" key={c.label}>
|
||||
<AlertTriangle className="mr-1 inline size-3" />
|
||||
{c.label}: {c.message}
|
||||
</p>
|
||||
))}
|
||||
|
||||
{status.error && (
|
||||
<p className="px-1 text-[0.7rem] text-muted-foreground">
|
||||
<AlertTriangle className="mr-1 inline size-3" />
|
||||
{status.error}
|
||||
</p>
|
||||
)}
|
||||
|
||||
{status.ready ? (
|
||||
<div className="flex items-center gap-1.5 px-1 text-xs text-muted-foreground">
|
||||
<Check className="size-3.5" />
|
||||
Computer Use is ready. Ask the agent to capture an app and click around.
|
||||
</div>
|
||||
) : (
|
||||
status.can_grant && (
|
||||
<Button disabled={granting} onClick={() => void grant()} size="sm">
|
||||
{granting ? <Loader2 className="size-3.5 animate-spin" /> : <ExternalLink className="size-3.5" />}
|
||||
{granting ? 'Waiting for approval…' : 'Grant permissions'}
|
||||
</Button>
|
||||
)
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
|
@ -17,6 +17,7 @@ import { useRefreshHotkey } from '../hooks/use-refresh-hotkey'
|
|||
import { useRouteEnumParam } from '../hooks/use-route-enum-param'
|
||||
import { PAGE_INSET_X } from '../layout-constants'
|
||||
import { PageSearchShell } from '../page-search-shell'
|
||||
import { ComputerUsePanel } from '../settings/computer-use-panel'
|
||||
import { asText, includesQuery, prettyName, toolNames, toolsetDisplayLabel } from '../settings/helpers'
|
||||
import { ToolsetConfigPanel } from '../settings/toolset-config-panel'
|
||||
import type { SetStatusbarItemGroup } from '../shell/statusbar-controls'
|
||||
|
|
@ -334,6 +335,9 @@ export function SkillsView({ setStatusbarItemGroup: _setStatusbarItemGroup, ...p
|
|||
))}
|
||||
</div>
|
||||
)}
|
||||
{expanded && toolset.name === 'computer_use' && (
|
||||
<ComputerUsePanel onConfiguredChange={refreshToolsets} />
|
||||
)}
|
||||
{expanded && <ToolsetConfigPanel onConfiguredChange={refreshToolsets} toolset={toolset.name} />}
|
||||
</div>
|
||||
)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,51 @@
|
|||
import { describe, expect, it } from 'vitest'
|
||||
|
||||
import { activeTimelineIndex, deriveTimelineEntries, timelinePreview } from './thread-timeline-data'
|
||||
|
||||
describe('timelinePreview', () => {
|
||||
it('collapses whitespace to a single line', () => {
|
||||
expect(timelinePreview('hello\n\n world\tagain')).toBe('hello world again')
|
||||
})
|
||||
|
||||
it('truncates with an ellipsis past the limit', () => {
|
||||
const out = timelinePreview('abcdefghij', 5)
|
||||
expect(out).toBe('abcd…')
|
||||
expect(out.length).toBe(5)
|
||||
})
|
||||
})
|
||||
|
||||
describe('deriveTimelineEntries', () => {
|
||||
it('keeps non-empty user prompts in order', () => {
|
||||
expect(
|
||||
deriveTimelineEntries([
|
||||
{ id: 'u1', role: 'user', text: 'first' },
|
||||
{ id: 'a1', role: 'assistant', text: 'answer' },
|
||||
{ id: 'u2', role: 'user', text: ' second ' }
|
||||
])
|
||||
).toEqual([
|
||||
{ id: 'u1', preview: 'first' },
|
||||
{ id: 'u2', preview: 'second' }
|
||||
])
|
||||
})
|
||||
|
||||
it('drops blanks and background-process notifications', () => {
|
||||
expect(
|
||||
deriveTimelineEntries([
|
||||
{ id: 'u1', role: 'user', text: ' ' },
|
||||
{ id: 'u2', role: 'user', text: '[IMPORTANT: Background process 123 finished]' },
|
||||
{ id: 'u3', role: 'user', text: 'real prompt' }
|
||||
]).map(e => e.id)
|
||||
).toEqual(['u3'])
|
||||
})
|
||||
})
|
||||
|
||||
describe('activeTimelineIndex', () => {
|
||||
it('returns the last prompt scrolled to or above the top edge', () => {
|
||||
expect(activeTimelineIndex([-400, -10, 320])).toBe(1)
|
||||
})
|
||||
|
||||
it('falls back to the first rendered entry', () => {
|
||||
expect(activeTimelineIndex([null, 120, 480])).toBe(1)
|
||||
expect(activeTimelineIndex([null, null])).toBe(0)
|
||||
})
|
||||
})
|
||||
|
|
@ -0,0 +1,75 @@
|
|||
// Pure timeline helpers — no React/DOM; tested in thread-timeline-data.test.ts.
|
||||
|
||||
export interface TimelineSourceMessage {
|
||||
id: string
|
||||
role: string
|
||||
text: string
|
||||
}
|
||||
|
||||
export interface TimelineEntry {
|
||||
id: string
|
||||
preview: string
|
||||
}
|
||||
|
||||
// Injected as user messages for alternation; not human prompts (thread.tsx).
|
||||
const PROCESS_NOTIFICATION_RE = /^\[IMPORTANT: Background process [\s\S]*\]$/
|
||||
|
||||
const PREVIEW_MAX = 120
|
||||
|
||||
export function timelinePreview(text: string, max: number = PREVIEW_MAX): string {
|
||||
const collapsed = text.replace(/\s+/g, ' ').trim()
|
||||
|
||||
if (collapsed.length <= max) {
|
||||
return collapsed
|
||||
}
|
||||
|
||||
return `${collapsed.slice(0, max - 1).trimEnd()}…`
|
||||
}
|
||||
|
||||
export function deriveTimelineEntries(messages: readonly TimelineSourceMessage[]): TimelineEntry[] {
|
||||
const entries: TimelineEntry[] = []
|
||||
|
||||
for (const message of messages) {
|
||||
if (message.role !== 'user') {
|
||||
continue
|
||||
}
|
||||
|
||||
const text = message.text.trim()
|
||||
|
||||
if (!text || PROCESS_NOTIFICATION_RE.test(text)) {
|
||||
continue
|
||||
}
|
||||
|
||||
entries.push({ id: message.id, preview: timelinePreview(text) })
|
||||
}
|
||||
|
||||
return entries
|
||||
}
|
||||
|
||||
/** Last user prompt at/above the viewport top (with slack); else first rendered. */
|
||||
export function activeTimelineIndex(offsets: readonly (number | null)[], slack: number = 8): number {
|
||||
let active = -1
|
||||
let firstRendered = -1
|
||||
|
||||
for (let i = 0; i < offsets.length; i++) {
|
||||
const offset = offsets[i]
|
||||
|
||||
if (offset == null) {
|
||||
continue
|
||||
}
|
||||
|
||||
if (firstRendered === -1) {
|
||||
firstRendered = i
|
||||
}
|
||||
|
||||
if (offset <= slack) {
|
||||
active = i
|
||||
}
|
||||
}
|
||||
|
||||
if (active !== -1) {
|
||||
return active
|
||||
}
|
||||
|
||||
return firstRendered === -1 ? 0 : firstRendered
|
||||
}
|
||||
272
apps/desktop/src/components/assistant-ui/thread-timeline.tsx
Normal file
272
apps/desktop/src/components/assistant-ui/thread-timeline.tsx
Normal file
|
|
@ -0,0 +1,272 @@
|
|||
import { useAuiState } from '@assistant-ui/react'
|
||||
import { type FC, useCallback, useEffect, useMemo, useRef, useState } from 'react'
|
||||
|
||||
import { composerPanelCard } from '@/components/chat/composer-dock'
|
||||
import { triggerHaptic } from '@/lib/haptics'
|
||||
import { cn } from '@/lib/utils'
|
||||
import { setPaneHoverRevealSuppressed } from '@/store/panes'
|
||||
|
||||
import {
|
||||
activeTimelineIndex,
|
||||
deriveTimelineEntries,
|
||||
type TimelineEntry,
|
||||
type TimelineSourceMessage
|
||||
} from './thread-timeline-data'
|
||||
|
||||
const MIN_ENTRIES = 4
|
||||
const VIEWPORT = '[data-slot="aui_thread-viewport"]'
|
||||
const HOVER_CLOSE_MS = 140
|
||||
|
||||
const ROW_CLASS =
|
||||
'relative flex w-full min-w-0 max-w-full cursor-pointer select-none overflow-hidden rounded-md px-2 py-1 text-left outline-hidden transition-colors duration-100 ease-out hover:bg-(--ui-row-hover-background) hover:transition-none'
|
||||
|
||||
const POPOVER_SHELL = cn(
|
||||
'absolute right-full top-1/2 z-50 mr-1.5 max-h-[min(22rem,calc(100vh-8rem))] w-80 max-w-[min(20rem,calc(100vw-2rem))] -translate-y-1/2 overflow-x-hidden overflow-y-auto overscroll-contain p-1 text-popover-foreground transition-[opacity,transform] duration-100 ease-out group-hover/timeline:transition-none',
|
||||
composerPanelCard,
|
||||
// Solid fill — composerPanelCard is deliberately translucent; without this,
|
||||
// directive chips in the transcript bleed through and look like popover overflow.
|
||||
'bg-(--composer-fill)'
|
||||
)
|
||||
|
||||
function userPromptText(content: unknown): string {
|
||||
if (typeof content === 'string') {
|
||||
return content
|
||||
}
|
||||
|
||||
if (!Array.isArray(content)) {
|
||||
return ''
|
||||
}
|
||||
|
||||
let out = ''
|
||||
|
||||
for (const part of content) {
|
||||
if (typeof part === 'string') {
|
||||
out += part
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
if (!part || typeof part !== 'object') {
|
||||
continue
|
||||
}
|
||||
|
||||
const row = part as { text?: unknown; type?: unknown }
|
||||
|
||||
if ((!row.type || row.type === 'text') && typeof row.text === 'string') {
|
||||
out += row.text
|
||||
}
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
function scrollToPrompt(id: string) {
|
||||
const viewport = document.querySelector<HTMLElement>(VIEWPORT)
|
||||
const node = viewport?.querySelector<HTMLElement>(`[data-message-id="${CSS.escape(id)}"]`)
|
||||
|
||||
if (!viewport || !node) {
|
||||
return
|
||||
}
|
||||
|
||||
const top = viewport.scrollTop + (node.getBoundingClientRect().top - viewport.getBoundingClientRect().top) - 8
|
||||
|
||||
triggerHaptic('selection')
|
||||
viewport.scrollTo({ behavior: 'smooth', top: Math.max(0, top) })
|
||||
}
|
||||
|
||||
/** Right-edge prompt rail — hover previews, click to jump. ≥4 user turns only. */
|
||||
export const ThreadTimeline: FC = () => {
|
||||
const sourceSignature = useAuiState(s => {
|
||||
const rows: TimelineSourceMessage[] = []
|
||||
|
||||
for (const message of s.thread.messages) {
|
||||
if (message.role !== 'user') {
|
||||
continue
|
||||
}
|
||||
|
||||
rows.push({ id: message.id, role: 'user', text: userPromptText(message.content) })
|
||||
}
|
||||
|
||||
return JSON.stringify(rows)
|
||||
})
|
||||
|
||||
const entries = useMemo(
|
||||
() => deriveTimelineEntries(JSON.parse(sourceSignature) as TimelineSourceMessage[]),
|
||||
[sourceSignature]
|
||||
)
|
||||
|
||||
const [activeIndex, setActiveIndex] = useState(0)
|
||||
const [hoverIndex, setHoverIndex] = useState<number | null>(null)
|
||||
const [open, setOpen] = useState(false)
|
||||
const closeTimerRef = useRef<number | undefined>(undefined)
|
||||
|
||||
const keepOpen = useCallback(() => {
|
||||
window.clearTimeout(closeTimerRef.current)
|
||||
setPaneHoverRevealSuppressed(true)
|
||||
setOpen(true)
|
||||
}, [])
|
||||
|
||||
const closeSoon = useCallback(() => {
|
||||
window.clearTimeout(closeTimerRef.current)
|
||||
setHoverIndex(null)
|
||||
setPaneHoverRevealSuppressed(false)
|
||||
closeTimerRef.current = window.setTimeout(() => setOpen(false), HOVER_CLOSE_MS)
|
||||
}, [])
|
||||
|
||||
useEffect(
|
||||
() => () => {
|
||||
window.clearTimeout(closeTimerRef.current)
|
||||
setPaneHoverRevealSuppressed(false)
|
||||
},
|
||||
[]
|
||||
)
|
||||
|
||||
useEffect(() => {
|
||||
if (entries.length < MIN_ENTRIES) {
|
||||
setPaneHoverRevealSuppressed(false)
|
||||
}
|
||||
}, [entries.length])
|
||||
|
||||
useEffect(() => {
|
||||
const viewport = document.querySelector<HTMLElement>(VIEWPORT)
|
||||
|
||||
if (!viewport || entries.length === 0) {
|
||||
return
|
||||
}
|
||||
|
||||
let raf = 0
|
||||
|
||||
const compute = () => {
|
||||
raf = 0
|
||||
|
||||
const top = viewport.getBoundingClientRect().top
|
||||
|
||||
const offsets = entries.map(entry => {
|
||||
const node = viewport.querySelector<HTMLElement>(`[data-message-id="${CSS.escape(entry.id)}"]`)
|
||||
|
||||
return node ? node.getBoundingClientRect().top - top : null
|
||||
})
|
||||
|
||||
const next = activeTimelineIndex(offsets)
|
||||
|
||||
setActiveIndex(prev => (prev === next ? prev : next))
|
||||
}
|
||||
|
||||
const onScroll = () => {
|
||||
if (!raf) {
|
||||
raf = requestAnimationFrame(compute)
|
||||
}
|
||||
}
|
||||
|
||||
compute()
|
||||
viewport.addEventListener('scroll', onScroll, { passive: true })
|
||||
|
||||
return () => {
|
||||
viewport.removeEventListener('scroll', onScroll)
|
||||
|
||||
if (raf) {
|
||||
cancelAnimationFrame(raf)
|
||||
}
|
||||
}
|
||||
}, [entries])
|
||||
|
||||
if (entries.length < MIN_ENTRIES) {
|
||||
return null
|
||||
}
|
||||
|
||||
return (
|
||||
<div
|
||||
aria-label="Conversation timeline"
|
||||
className="group/timeline pointer-events-auto absolute right-0 top-1/2 z-40 flex -translate-y-1/2 flex-col items-end"
|
||||
data-slot="thread-timeline"
|
||||
onMouseEnter={keepOpen}
|
||||
onMouseLeave={closeSoon}
|
||||
role="navigation"
|
||||
>
|
||||
<TimelineTicks
|
||||
activeIndex={activeIndex}
|
||||
entries={entries}
|
||||
onHover={setHoverIndex}
|
||||
onJump={scrollToPrompt}
|
||||
/>
|
||||
<TimelinePopover
|
||||
activeIndex={activeIndex}
|
||||
entries={entries}
|
||||
hoverIndex={hoverIndex}
|
||||
onHover={setHoverIndex}
|
||||
onJump={scrollToPrompt}
|
||||
open={open}
|
||||
/>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
const TimelinePopover: FC<{
|
||||
activeIndex: number
|
||||
entries: TimelineEntry[]
|
||||
hoverIndex: number | null
|
||||
onHover: (index: number) => void
|
||||
onJump: (id: string) => void
|
||||
open: boolean
|
||||
}> = ({ activeIndex, entries, hoverIndex, onHover, onJump, open }) => (
|
||||
<div
|
||||
className={cn(
|
||||
POPOVER_SHELL,
|
||||
open ? 'pointer-events-auto opacity-100 translate-x-0' : 'pointer-events-none translate-x-1 opacity-0'
|
||||
)}
|
||||
data-slot="thread-timeline-popover"
|
||||
>
|
||||
{entries.map((entry, index) => {
|
||||
const hovered = index === hoverIndex
|
||||
const active = index === activeIndex
|
||||
|
||||
return (
|
||||
<button
|
||||
aria-label={entry.preview}
|
||||
className={cn(
|
||||
ROW_CLASS,
|
||||
active && 'bg-(--ui-row-active-background) text-foreground',
|
||||
hovered && 'bg-(--ui-row-hover-background) text-foreground transition-none'
|
||||
)}
|
||||
key={entry.id}
|
||||
onClick={() => onJump(entry.id)}
|
||||
onMouseEnter={() => onHover(index)}
|
||||
type="button"
|
||||
>
|
||||
<span className="block w-full min-w-0 truncate font-medium leading-snug text-foreground">
|
||||
{entry.preview}
|
||||
</span>
|
||||
</button>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
)
|
||||
|
||||
const TimelineTicks: FC<{
|
||||
activeIndex: number
|
||||
entries: TimelineEntry[]
|
||||
onHover: (index: number) => void
|
||||
onJump: (id: string) => void
|
||||
}> = ({ activeIndex, entries, onHover, onJump }) => (
|
||||
<div className="flex flex-col items-end py-1" data-slot="thread-timeline-ticks">
|
||||
{entries.map((entry, index) => (
|
||||
<button
|
||||
aria-label={entry.preview}
|
||||
className="group/tick flex h-2 w-7 cursor-pointer items-center justify-end pr-1"
|
||||
key={entry.id}
|
||||
onClick={() => onJump(entry.id)}
|
||||
onMouseEnter={() => onHover(index)}
|
||||
type="button"
|
||||
>
|
||||
<span
|
||||
className={cn(
|
||||
'block h-px w-3 transition-opacity duration-100 ease-out',
|
||||
index === activeIndex
|
||||
? 'bg-(--theme-primary)'
|
||||
: 'dither text-(--ui-text-quaternary) opacity-70 group-hover/tick:opacity-100 group-hover/tick:transition-none'
|
||||
)}
|
||||
/>
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
)
|
||||
|
|
@ -64,6 +64,7 @@ import { ClarifyTool } from '@/components/assistant-ui/clarify-tool'
|
|||
import { DirectiveContent, hermesDirectiveFormatter } from '@/components/assistant-ui/directive-text'
|
||||
import { MarkdownText, MarkdownTextContent } from '@/components/assistant-ui/markdown-text'
|
||||
import { ThreadMessageList } from '@/components/assistant-ui/thread-list'
|
||||
import { ThreadTimeline } from '@/components/assistant-ui/thread-timeline'
|
||||
import { ToolFallback, ToolGroupSlot } from '@/components/assistant-ui/tool-fallback'
|
||||
import { TooltipIconButton } from '@/components/assistant-ui/tooltip-icon-button'
|
||||
import { UserMessageText } from '@/components/assistant-ui/user-message-text'
|
||||
|
|
@ -212,6 +213,7 @@ export const Thread: FC<{
|
|||
sessionKey={sessionKey}
|
||||
/>
|
||||
{loading === 'session' && <CenteredThreadSpinner />}
|
||||
<ThreadTimeline />
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
|
@ -797,7 +799,15 @@ function messageAttachmentRefs(value: unknown): string[] {
|
|||
return value.every(ref => typeof ref === 'string') ? value : EMPTY_ATTACHMENT_REFS
|
||||
}
|
||||
|
||||
function StickyHumanMessageContainer({ attachments, children }: { attachments?: ReactNode; children: ReactNode }) {
|
||||
function StickyHumanMessageContainer({
|
||||
attachments,
|
||||
children,
|
||||
messageId
|
||||
}: {
|
||||
attachments?: ReactNode
|
||||
children: ReactNode
|
||||
messageId?: string
|
||||
}) {
|
||||
return (
|
||||
// Fragment, not a wrapper: a wrapping element becomes the sticky's
|
||||
// containing block (it'd stick within its own height = never). The bubble
|
||||
|
|
@ -806,6 +816,7 @@ function StickyHumanMessageContainer({ attachments, children }: { attachments?:
|
|||
<>
|
||||
<div
|
||||
className="group/user-message sticky z-40 -mx-4 flex w-[calc(100%+2rem)] min-w-0 max-w-none flex-col items-stretch gap-0 self-end overflow-visible bg-(--ui-chat-surface-background) px-4 pb-(--conversation-turn-gap) pt-1"
|
||||
data-message-id={messageId}
|
||||
data-role="user"
|
||||
data-slot="aui_user-message-root"
|
||||
>
|
||||
|
|
@ -990,6 +1001,7 @@ const UserMessage: FC<{
|
|||
return (
|
||||
<MessagePrimitive.Root asChild>
|
||||
<StickyHumanMessageContainer
|
||||
messageId={messageId}
|
||||
attachments={
|
||||
// Attachments live BELOW the sticky bubble in normal flow, so they
|
||||
// scroll away behind the pinned bubble instead of riding along with
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ import {
|
|||
} from 'react'
|
||||
|
||||
import { cn } from '@/lib/utils'
|
||||
import { $paneStates, ensurePaneRegistered, setPaneWidthOverride } from '@/store/panes'
|
||||
import { $paneHoverRevealSuppressed, $paneStates, ensurePaneRegistered, setPaneWidthOverride } from '@/store/panes'
|
||||
|
||||
import { PaneShellContext, type PaneShellContextValue, type PaneSlot } from './context'
|
||||
|
||||
|
|
@ -250,6 +250,7 @@ export function Pane({
|
|||
}: PaneProps) {
|
||||
const ctx = useContext(PaneShellContext)
|
||||
const paneStates = useStore($paneStates)
|
||||
const hoverRevealSuppressed = useStore($paneHoverRevealSuppressed)
|
||||
const registered = useRef(false)
|
||||
const paneRef = useRef<HTMLDivElement | null>(null)
|
||||
// Keyboard (mod+b / mod+j) pins the reveal open while collapsed; hover is CSS.
|
||||
|
|
@ -378,7 +379,10 @@ export function Pane({
|
|||
>
|
||||
<div
|
||||
aria-hidden="true"
|
||||
className="pointer-events-auto absolute inset-y-0 z-30 [-webkit-app-region:no-drag]"
|
||||
className={cn(
|
||||
'absolute inset-y-0 z-30 [-webkit-app-region:no-drag]',
|
||||
hoverRevealSuppressed ? 'pointer-events-none' : 'pointer-events-auto'
|
||||
)}
|
||||
style={{ [edge]: HOVER_REVEAL_EDGE_GUTTER, width: HOVER_REVEAL_TRIGGER_WIDTH }}
|
||||
/>
|
||||
|
||||
|
|
@ -388,7 +392,8 @@ export function Pane({
|
|||
className={cn(
|
||||
'pointer-events-none absolute inset-y-0 z-30 overflow-hidden transition-transform delay-0',
|
||||
offscreen,
|
||||
'group-hover/reveal:pointer-events-auto group-hover/reveal:translate-x-0 group-hover/reveal:delay-[var(--reveal-enter-delay)] group-hover/reveal:shadow-[var(--reveal-shadow)]',
|
||||
!hoverRevealSuppressed &&
|
||||
'group-hover/reveal:pointer-events-auto group-hover/reveal:translate-x-0 group-hover/reveal:delay-[var(--reveal-enter-delay)] group-hover/reveal:shadow-[var(--reveal-shadow)]',
|
||||
'group-data-[forced]/reveal:pointer-events-auto group-data-[forced]/reveal:translate-x-0 group-data-[forced]/reveal:delay-0 group-data-[forced]/reveal:shadow-[var(--reveal-shadow)]'
|
||||
)}
|
||||
key={edge}
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ import type {
|
|||
AudioTranscriptionResponse,
|
||||
AuxiliaryModelsResponse,
|
||||
BackendUpdateCheckResponse,
|
||||
ComputerUseStatus,
|
||||
ConfigSchemaResponse,
|
||||
CronJob,
|
||||
CronJobCreatePayload,
|
||||
|
|
@ -59,6 +60,9 @@ export type {
|
|||
AudioTranscriptionResponse,
|
||||
AuxiliaryModelsResponse,
|
||||
BackendUpdateCheckResponse,
|
||||
ComputerUseCheck,
|
||||
ComputerUsePermissionSource,
|
||||
ComputerUseStatus,
|
||||
ConfigFieldSchema,
|
||||
ConfigSchemaResponse,
|
||||
CronJob,
|
||||
|
|
@ -516,6 +520,21 @@ export function runToolsetPostSetup(name: string, key: string): Promise<ActionRe
|
|||
})
|
||||
}
|
||||
|
||||
export function getComputerUseStatus(): Promise<ComputerUseStatus> {
|
||||
return window.hermesDesktop.api<ComputerUseStatus>({
|
||||
...profileScoped(),
|
||||
path: '/api/tools/computer-use/status'
|
||||
})
|
||||
}
|
||||
|
||||
export function grantComputerUsePermissions(): Promise<ActionResponse> {
|
||||
return window.hermesDesktop.api<ActionResponse>({
|
||||
...profileScoped(),
|
||||
path: '/api/tools/computer-use/permissions/grant',
|
||||
method: 'POST'
|
||||
})
|
||||
}
|
||||
|
||||
export function getMessagingPlatforms(): Promise<MessagingPlatformsResponse> {
|
||||
return window.hermesDesktop.api<MessagingPlatformsResponse>({
|
||||
path: '/api/messaging/platforms'
|
||||
|
|
|
|||
|
|
@ -32,4 +32,13 @@ describe('extractEmbeddedImages', () => {
|
|||
expect(result.cleanedText).toBe('first mid tail')
|
||||
expect(result.images).toEqual([SAMPLE_PNG_DATA_URL, second])
|
||||
})
|
||||
|
||||
it('handles multi-megabyte data URLs without overflowing the JS stack', () => {
|
||||
const hugeDataUrl = 'data:image/png;base64,' + 'A'.repeat(8_000_000)
|
||||
const result = extractEmbeddedImages(`describe this ${hugeDataUrl} thanks`)
|
||||
|
||||
expect(result.cleanedText).toBe('describe this thanks')
|
||||
expect(result.images).toHaveLength(1)
|
||||
expect(result.images[0]).toHaveLength(hugeDataUrl.length)
|
||||
})
|
||||
})
|
||||
|
|
|
|||
|
|
@ -1,7 +1,11 @@
|
|||
const EMBEDDED_IMAGE_RE =
|
||||
/(\{\s*"type"\s*:\s*"image_url"\s*,\s*"image_url"\s*:\s*\{\s*"url"\s*:\s*")?(data:image\/[\w.+-]+;base64,[A-Za-z0-9+/=]{64,})("\s*\}\s*\})?/g
|
||||
|
||||
const DATA_URL_RE = /^data:([\w./+-]+);base64,(.*)$/i
|
||||
const DATA_IMAGE_PREFIX = 'data:image/'
|
||||
const BASE64_MARKER = ';base64,'
|
||||
const MIN_EMBEDDED_IMAGE_BASE64_LENGTH = 64
|
||||
const JSON_IMAGE_OPEN_RE = /\{\s*"type"\s*:\s*"image_url"\s*,\s*"image_url"\s*:\s*\{\s*"url"\s*:\s*"$/
|
||||
const JSON_IMAGE_CLOSE_RE = /^"\s*\}\s*\}/
|
||||
const JSON_IMAGE_OPEN_MAX = 96
|
||||
const JSON_IMAGE_CLOSE_MAX = 16
|
||||
|
||||
export const DATA_IMAGE_URL_RE = /^data:image\/[\w.+-]+;base64,/i
|
||||
|
||||
|
|
@ -31,24 +35,119 @@ export function dataUrlToBlob(dataUrl: string): Blob | null {
|
|||
}
|
||||
}
|
||||
|
||||
function isImageMimeCode(code: number): boolean {
|
||||
return (
|
||||
(code >= 48 && code <= 57) ||
|
||||
(code >= 65 && code <= 90) ||
|
||||
(code >= 97 && code <= 122) ||
|
||||
code === 43 ||
|
||||
code === 45 ||
|
||||
code === 46 ||
|
||||
code === 95
|
||||
)
|
||||
}
|
||||
|
||||
function isBase64Code(code: number): boolean {
|
||||
return (
|
||||
(code >= 48 && code <= 57) ||
|
||||
(code >= 65 && code <= 90) ||
|
||||
(code >= 97 && code <= 122) ||
|
||||
code === 43 ||
|
||||
code === 47 ||
|
||||
code === 61
|
||||
)
|
||||
}
|
||||
|
||||
function readDataImageUrl(text: string, start: number): { end: number; url: string } | null {
|
||||
if (!text.startsWith(DATA_IMAGE_PREFIX, start)) {
|
||||
return null
|
||||
}
|
||||
|
||||
let cursor = start + DATA_IMAGE_PREFIX.length
|
||||
|
||||
while (cursor < text.length && isImageMimeCode(text.charCodeAt(cursor))) {
|
||||
cursor += 1
|
||||
}
|
||||
|
||||
if (cursor === start + DATA_IMAGE_PREFIX.length || !text.startsWith(BASE64_MARKER, cursor)) {
|
||||
return null
|
||||
}
|
||||
|
||||
cursor += BASE64_MARKER.length
|
||||
const base64Start = cursor
|
||||
|
||||
while (cursor < text.length && isBase64Code(text.charCodeAt(cursor))) {
|
||||
cursor += 1
|
||||
}
|
||||
|
||||
if (cursor - base64Start < MIN_EMBEDDED_IMAGE_BASE64_LENGTH) {
|
||||
return null
|
||||
}
|
||||
|
||||
return { end: cursor, url: text.slice(start, cursor) }
|
||||
}
|
||||
|
||||
function embeddedImageRemovalRange(text: string, dataStart: number, dataEnd: number): { end: number; start: number } {
|
||||
let start = dataStart
|
||||
let end = dataEnd
|
||||
const openSearchStart = Math.max(0, dataStart - JSON_IMAGE_OPEN_MAX)
|
||||
const openMatch = text.slice(openSearchStart, dataStart).match(JSON_IMAGE_OPEN_RE)
|
||||
|
||||
if (openMatch?.index !== undefined) {
|
||||
const close = text.slice(dataEnd, dataEnd + JSON_IMAGE_CLOSE_MAX).match(JSON_IMAGE_CLOSE_RE)
|
||||
|
||||
if (close) {
|
||||
start = openSearchStart + openMatch.index
|
||||
end = dataEnd + close[0].length
|
||||
}
|
||||
}
|
||||
|
||||
return { end, start }
|
||||
}
|
||||
|
||||
function normalizeCleanedText(text: string): string {
|
||||
return text.replace(/[ \t]+\n/g, '\n').replace(/\n{3,}/g, '\n\n').trim()
|
||||
}
|
||||
|
||||
export function extractEmbeddedImages(text: string): EmbeddedImageExtraction {
|
||||
if (!text || !text.includes('data:image/')) {
|
||||
if (!text || !text.includes(DATA_IMAGE_PREFIX)) {
|
||||
return { cleanedText: text, images: [] }
|
||||
}
|
||||
|
||||
const images: string[] = []
|
||||
const pieces: string[] = []
|
||||
let appendCursor = 0
|
||||
let searchCursor = 0
|
||||
|
||||
const cleanedText = text
|
||||
.replace(EMBEDDED_IMAGE_RE, (_match, _open, dataUrl: string) => {
|
||||
images.push(dataUrl)
|
||||
while (searchCursor < text.length) {
|
||||
const dataStart = text.indexOf(DATA_IMAGE_PREFIX, searchCursor)
|
||||
|
||||
return ''
|
||||
})
|
||||
.replace(/[ \t]+\n/g, '\n')
|
||||
.replace(/\n{3,}/g, '\n\n')
|
||||
.trim()
|
||||
if (dataStart === -1) {
|
||||
break
|
||||
}
|
||||
|
||||
return { cleanedText, images }
|
||||
const dataUrl = readDataImageUrl(text, dataStart)
|
||||
|
||||
if (!dataUrl) {
|
||||
searchCursor = dataStart + DATA_IMAGE_PREFIX.length
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
const range = embeddedImageRemovalRange(text, dataStart, dataUrl.end)
|
||||
pieces.push(text.slice(appendCursor, range.start))
|
||||
images.push(dataUrl.url)
|
||||
appendCursor = range.end
|
||||
searchCursor = range.end
|
||||
}
|
||||
|
||||
if (!images.length) {
|
||||
return { cleanedText: text, images: [] }
|
||||
}
|
||||
|
||||
pieces.push(text.slice(appendCursor))
|
||||
|
||||
return { cleanedText: normalizeCleanedText(pieces.join('')), images }
|
||||
}
|
||||
|
||||
export function embeddedImageUrls(text: string): string[] {
|
||||
|
|
|
|||
|
|
@ -76,6 +76,7 @@ function persist(states: Record<string, PaneStateSnapshot>) {
|
|||
}
|
||||
|
||||
export const $paneStates = atom<Record<string, PaneStateSnapshot>>(load())
|
||||
export const $paneHoverRevealSuppressed = atom(false)
|
||||
|
||||
$paneStates.subscribe(persist)
|
||||
|
||||
|
|
@ -143,3 +144,4 @@ export function setPaneWidthOverride(id: string, width: number | undefined) {
|
|||
|
||||
export const clearPaneWidthOverride = (id: string) => setPaneWidthOverride(id, undefined)
|
||||
export const getPaneStateSnapshot = (id: string) => $paneStates.get()[id]
|
||||
export const setPaneHoverRevealSuppressed = (suppressed: boolean) => $paneHoverRevealSuppressed.set(suppressed)
|
||||
|
|
|
|||
|
|
@ -579,6 +579,51 @@ export interface ToolsetConfig {
|
|||
active_provider: string | null
|
||||
}
|
||||
|
||||
/** Shape of `GET /api/tools/computer-use/status`.
|
||||
*
|
||||
* cua-driver runs on macOS, Windows, and Linux. `ready` is the single OS-aware
|
||||
* readiness signal: on macOS both TCC grants (Accessibility + Screen
|
||||
* Recording, which attach to cua-driver's own `com.trycua.driver` identity,
|
||||
* not Hermes); elsewhere, driver health from `cua-driver doctor`. `null`
|
||||
* means unknown (binary missing / probe failed). */
|
||||
export interface ComputerUsePermissionSource {
|
||||
attribution?: string
|
||||
executable?: string
|
||||
note?: string
|
||||
pid?: number
|
||||
responsible_ppid?: number
|
||||
}
|
||||
|
||||
export interface ComputerUseCheck {
|
||||
label: string
|
||||
status: string
|
||||
message: string
|
||||
}
|
||||
|
||||
export interface ComputerUseStatus {
|
||||
/** `sys.platform`: "darwin" | "win32" | "linux" | ... */
|
||||
platform: string
|
||||
/** cua-driver has a runtime backend for this platform. */
|
||||
platform_supported: boolean
|
||||
/** cua-driver binary resolved on PATH. */
|
||||
installed: boolean
|
||||
/** e.g. "cua-driver 0.5.1", or null when unknown. */
|
||||
version: string | null
|
||||
/** Unified readiness — both TCC grants (macOS) or driver health (else). */
|
||||
ready: boolean | null
|
||||
/** Whether a permission grant flow exists (macOS-only TCC). */
|
||||
can_grant: boolean
|
||||
/** Cross-platform `cua-driver doctor` probes. */
|
||||
checks: ComputerUseCheck[]
|
||||
/** macOS TCC detail — `null` off macOS or when unknown. */
|
||||
accessibility: boolean | null
|
||||
screen_recording: boolean | null
|
||||
screen_recording_capturable: boolean | null
|
||||
source: ComputerUsePermissionSource | null
|
||||
/** Populated when the status probe itself failed. */
|
||||
error: string | null
|
||||
}
|
||||
|
||||
export interface SessionSearchResult {
|
||||
/** Lineage root of the matched conversation. Stable across compression and
|
||||
* used as the durable pin id; falls back to session_id when absent. */
|
||||
|
|
|
|||
57
cron/jobs.py
57
cron/jobs.py
|
|
@ -248,6 +248,12 @@ def _normalize_job_record(job: Dict[str, Any]) -> Dict[str, Any]:
|
|||
state = "scheduled" if normalized.get("enabled", True) else "paused"
|
||||
normalized["state"] = state
|
||||
|
||||
# Legacy jobs (created before per-job profile scoping) have no profile
|
||||
# field. Default them to "default" so the scheduler treats them as
|
||||
# root-profile jobs — matching their pre-existing behaviour.
|
||||
prof = normalized.get("profile")
|
||||
normalized["profile"] = (str(prof).strip() if isinstance(prof, str) and prof.strip() else "default")
|
||||
|
||||
return normalized
|
||||
|
||||
|
||||
|
|
@ -268,6 +274,43 @@ def _secure_file(path: Path):
|
|||
pass
|
||||
|
||||
|
||||
def current_profile_name() -> str:
|
||||
"""Return the active profile name for the process creating a job.
|
||||
|
||||
``~/.hermes`` -> ``"default"``
|
||||
``~/.hermes/profiles/X`` -> ``"X"``
|
||||
|
||||
Used at create time to tag a job with the profile whose environment
|
||||
(.env / config.yaml / credentials) it should execute under, so the
|
||||
job runs as its owning profile regardless of which profile's ticker
|
||||
picks it up from the shared root store (#32091).
|
||||
"""
|
||||
try:
|
||||
from agent.file_safety import _resolve_active_profile_name
|
||||
return _resolve_active_profile_name() or "default"
|
||||
except Exception:
|
||||
return "default"
|
||||
|
||||
|
||||
def resolve_profile_home(profile_name: Optional[str]) -> Optional[Path]:
|
||||
"""Map a job's ``profile`` name to the HERMES_HOME it should run under.
|
||||
|
||||
``"default"`` / empty / ``None`` -> the root home (``get_default_hermes_root()``).
|
||||
``"<name>"`` -> ``<root>/profiles/<name>``.
|
||||
|
||||
Returns ``None`` when the named profile directory does not exist, so the
|
||||
scheduler can fall back to the ticker's own home and log a warning rather
|
||||
than pointing a job at a missing profile.
|
||||
"""
|
||||
name = (profile_name or "").strip()
|
||||
if not name or name == "default":
|
||||
return get_default_hermes_root().resolve()
|
||||
candidate = (get_default_hermes_root() / "profiles" / name).resolve()
|
||||
if candidate.is_dir():
|
||||
return candidate
|
||||
return None
|
||||
|
||||
|
||||
def ensure_dirs():
|
||||
"""Ensure cron directories exist with secure permissions."""
|
||||
CRON_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
|
@ -772,6 +815,7 @@ def create_job(
|
|||
enabled_toolsets: Optional[List[str]] = None,
|
||||
workdir: Optional[str] = None,
|
||||
no_agent: bool = False,
|
||||
profile: Optional[str] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Create a new cron job.
|
||||
|
|
@ -816,6 +860,13 @@ def create_job(
|
|||
and deliver its stdout directly. Empty stdout = silent (no
|
||||
delivery). Requires ``script`` to be set. Ideal for classic
|
||||
watchdogs and periodic alerts that don't need LLM reasoning.
|
||||
profile: Optional Hermes profile name the job should EXECUTE under
|
||||
(its .env / config.yaml / credentials). Defaults to the active
|
||||
profile of the session creating the job. The shared root store
|
||||
holds every profile's jobs (#32091); this field is what scopes
|
||||
a job's runtime environment to its owning profile so it runs
|
||||
with that profile's permissions regardless of which ticker
|
||||
picks it up.
|
||||
|
||||
Returns:
|
||||
The created job dict
|
||||
|
|
@ -850,6 +901,11 @@ def create_job(
|
|||
normalized_toolsets = normalized_toolsets or None
|
||||
normalized_workdir = _normalize_workdir(workdir)
|
||||
normalized_no_agent = bool(no_agent)
|
||||
# Tag the job with the profile whose environment it should execute under.
|
||||
# When the caller does not pass one explicitly, capture the active profile
|
||||
# of the session creating the job so a job created under `hermes -p donna`
|
||||
# runs as donna even though it now lives in the shared root store (#32091).
|
||||
normalized_profile = (str(profile).strip() if isinstance(profile, str) else "") or current_profile_name()
|
||||
|
||||
# no_agent jobs are meaningless without a script — the script IS the job.
|
||||
# Surface this as a clear ValueError at create time so bad configs never
|
||||
|
|
@ -903,6 +959,7 @@ def create_job(
|
|||
"origin": origin, # Tracks where job was created for "origin" delivery
|
||||
"enabled_toolsets": normalized_toolsets,
|
||||
"workdir": normalized_workdir,
|
||||
"profile": normalized_profile,
|
||||
}
|
||||
|
||||
with _jobs_lock():
|
||||
|
|
|
|||
|
|
@ -1857,6 +1857,32 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
|||
os.environ["TERMINAL_CWD"] = _job_workdir
|
||||
logger.info("Job '%s': using workdir %s", job_id, _job_workdir)
|
||||
|
||||
# Scope this job's execution to its owning profile's HERMES_HOME (#32091).
|
||||
# The shared root store holds every profile's jobs, but a job must run with
|
||||
# the .env / config.yaml / credentials of the profile that created it — not
|
||||
# whichever profile's ticker happened to pick it up. We set both the
|
||||
# in-process ContextVar override (consumed by _get_hermes_home() for the
|
||||
# config/.env/script loads below) AND os.environ["HERMES_HOME"] (inherited
|
||||
# by any child subprocess the agent spawns). tick() routes profile-scoped
|
||||
# jobs to the single-worker sequential pool, so mutating os.environ here is
|
||||
# safe — they never overlap. Restored in the finally block.
|
||||
from cron.jobs import resolve_profile_home
|
||||
from hermes_constants import set_hermes_home_override
|
||||
_job_profile = (job.get("profile") or "default").strip() or "default"
|
||||
_profile_home = resolve_profile_home(_job_profile)
|
||||
_prior_hermes_home = os.environ.get("HERMES_HOME", "_UNSET_")
|
||||
_hermes_home_token = None
|
||||
if _profile_home is not None and _profile_home != _get_hermes_home().resolve():
|
||||
os.environ["HERMES_HOME"] = str(_profile_home)
|
||||
_hermes_home_token = set_hermes_home_override(str(_profile_home))
|
||||
logger.info("Job '%s': executing under profile %r (HERMES_HOME=%s)",
|
||||
job_id, _job_profile, _profile_home)
|
||||
elif _profile_home is None and _job_profile != "default":
|
||||
logger.warning(
|
||||
"Job '%s': profile %r no longer exists — running under the "
|
||||
"ticker's profile instead", job_id, _job_profile,
|
||||
)
|
||||
|
||||
try:
|
||||
# Re-read .env and config.yaml fresh every run so provider/key
|
||||
# changes take effect without a gateway restart.
|
||||
|
|
@ -2189,13 +2215,27 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
|||
# would otherwise be delivered as if it were the agent's reply and the
|
||||
# job's `last_status` set to "ok". Raise so the except handler below
|
||||
# builds the proper failure tuple. (issue #17855)
|
||||
if result.get("failed") is True or result.get("completed") is False:
|
||||
turn_exit_reason = str(result.get("turn_exit_reason") or "")
|
||||
final_response_text = (result.get("final_response") or "").strip()
|
||||
max_iteration_summary = (
|
||||
result.get("failed") is not True
|
||||
and result.get("completed") is False
|
||||
and turn_exit_reason.startswith("max_iterations_reached(")
|
||||
and bool(final_response_text)
|
||||
)
|
||||
if result.get("failed") is True or (result.get("completed") is False and not max_iteration_summary):
|
||||
_err_text = (
|
||||
result.get("error")
|
||||
or (result.get("final_response") or "").strip()
|
||||
or final_response_text
|
||||
or "agent reported failure"
|
||||
)
|
||||
raise RuntimeError(_err_text)
|
||||
if max_iteration_summary:
|
||||
logger.warning(
|
||||
"Job '%s' reached the iteration limit but produced a final fallback response; "
|
||||
"delivering the response instead of failing the cron run",
|
||||
job_name,
|
||||
)
|
||||
|
||||
final_response = result.get("final_response", "") or ""
|
||||
# Strip leaked placeholder text that upstream may inject on empty completions.
|
||||
|
|
@ -2254,6 +2294,19 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
|||
os.environ.pop("TERMINAL_CWD", None)
|
||||
else:
|
||||
os.environ["TERMINAL_CWD"] = _prior_terminal_cwd
|
||||
# Restore HERMES_HOME to the ticker's value when this job overrode it
|
||||
# for profile-scoped execution (#32091). Mirrors the TERMINAL_CWD
|
||||
# restore above; the sequential pool guarantees no overlap.
|
||||
if _hermes_home_token is not None:
|
||||
try:
|
||||
from hermes_constants import reset_hermes_home_override
|
||||
reset_hermes_home_override(_hermes_home_token)
|
||||
except Exception:
|
||||
pass
|
||||
if _prior_hermes_home == "_UNSET_":
|
||||
os.environ.pop("HERMES_HOME", None)
|
||||
else:
|
||||
os.environ["HERMES_HOME"] = _prior_hermes_home
|
||||
# Clean up ContextVar session/delivery state for this job.
|
||||
clear_session_vars(_ctx_tokens)
|
||||
for _var_name in _cron_delivery_vars:
|
||||
|
|
@ -2459,12 +2512,26 @@ def tick(verbose: bool = True, adapters=None, loop=None, sync: bool = True) -> i
|
|||
body."""
|
||||
return run_one_job(job, adapters=adapters, loop=loop, verbose=verbose)
|
||||
|
||||
# Partition due jobs: those with a per-job workdir mutate
|
||||
# os.environ["TERMINAL_CWD"] inside run_job, which is process-global —
|
||||
# so they MUST run sequentially to avoid corrupting each other. Jobs
|
||||
# without a workdir leave env untouched and stay parallel-safe.
|
||||
sequential_jobs = [j for j in due_jobs if (j.get("workdir") or "").strip()]
|
||||
parallel_jobs = [j for j in due_jobs if not (j.get("workdir") or "").strip()]
|
||||
# Partition due jobs: those that mutate process-global os.environ
|
||||
# inside run_job MUST run sequentially to avoid corrupting each other.
|
||||
# Two cases mutate env:
|
||||
# - a per-job workdir sets os.environ["TERMINAL_CWD"].
|
||||
# - a per-job profile whose HERMES_HOME differs from the ticker's
|
||||
# sets os.environ["HERMES_HOME"] to scope execution (#32091).
|
||||
# Jobs that need neither leave env untouched and stay parallel-safe.
|
||||
def _needs_sequential(j: dict) -> bool:
|
||||
if (j.get("workdir") or "").strip():
|
||||
return True
|
||||
prof = (j.get("profile") or "default").strip() or "default"
|
||||
try:
|
||||
from cron.jobs import resolve_profile_home
|
||||
phome = resolve_profile_home(prof)
|
||||
except Exception:
|
||||
phome = None
|
||||
return phome is not None and phome != _get_hermes_home().resolve()
|
||||
|
||||
sequential_jobs = [j for j in due_jobs if _needs_sequential(j)]
|
||||
parallel_jobs = [j for j in due_jobs if not _needs_sequential(j)]
|
||||
|
||||
_results: list = []
|
||||
_all_futures: list = []
|
||||
|
|
|
|||
|
|
@ -1066,12 +1066,48 @@ def _media_delivery_denied_paths() -> List[Path]:
|
|||
denied.append(home / sub)
|
||||
# The active Hermes profile and shared Hermes root both contain control
|
||||
# files and credentials. Only cache subdirectories under them are
|
||||
# explicitly allowlisted above.
|
||||
# explicitly allowlisted above (matched BEFORE this denylist in
|
||||
# validate_media_delivery_path, so generated media still delivers).
|
||||
#
|
||||
# These are the per-file credential / secret stores that live at the
|
||||
# HERMES_HOME root. The set mirrors the canonical read guard in
|
||||
# agent/file_safety.py (get_read_block_error / build_write_denied_*) so the
|
||||
# delivery (read/exfil) side can't trail the write side: a credential the
|
||||
# agent is forbidden to write or read must also never be auto-attached to a
|
||||
# chat reply. Enumerated explicitly per-file rather than denying the whole
|
||||
# tree, so skills/, logs/, and ad-hoc agent-written files under ~/.hermes
|
||||
# stay deliverable (see #32090, #34425).
|
||||
_ROOT_CREDENTIAL_FILES = (
|
||||
".env",
|
||||
"auth.json",
|
||||
"auth.lock",
|
||||
"credentials",
|
||||
"config.yaml",
|
||||
# Anthropic PKCE / OAuth refresh credential store.
|
||||
".anthropic_oauth.json",
|
||||
# Google Workspace skill: auto-refreshing OAuth token (mtime bumps
|
||||
# every turn, which defeated the strict-mode recency window) plus the
|
||||
# pending-exchange session/verifier file.
|
||||
"google_token.json",
|
||||
"google_oauth_pending.json",
|
||||
os.path.join("auth", "google_oauth.json"),
|
||||
# Webhook subscription HMAC secrets.
|
||||
"webhook_subscriptions.json",
|
||||
# Bitwarden Secrets Manager plaintext disk cache.
|
||||
os.path.join("cache", "bws_cache.json"),
|
||||
)
|
||||
# Directory trees whose every child is credential material. (MCP OAuth
|
||||
# tokens under mcp-tokens/ are handled by the sibling targeted PR #37222;
|
||||
# session/kanban SQLite stores by #41071 — kept out of this diff to avoid
|
||||
# overlap.)
|
||||
_ROOT_CREDENTIAL_DIRS = (
|
||||
"pairing",
|
||||
)
|
||||
for hermes_root in (_HERMES_HOME, _HERMES_ROOT):
|
||||
denied.append(hermes_root / ".env")
|
||||
denied.append(hermes_root / "auth.json")
|
||||
denied.append(hermes_root / "credentials")
|
||||
denied.append(hermes_root / "config.yaml")
|
||||
for rel in _ROOT_CREDENTIAL_FILES:
|
||||
denied.append(hermes_root / rel)
|
||||
for rel in _ROOT_CREDENTIAL_DIRS:
|
||||
denied.append(hermes_root / rel)
|
||||
return denied
|
||||
|
||||
|
||||
|
|
@ -1190,9 +1226,12 @@ def validate_media_delivery_path(path: str) -> Optional[str]:
|
|||
return str(resolved)
|
||||
|
||||
# Non-strict mode (default): accept anything not on the denylist.
|
||||
# The denylist still blocks /etc, /proc, ~/.ssh, ~/.aws, ~/.hermes/.env,
|
||||
# ~/.hermes/auth.json, etc. — so the obvious prompt-injection sites
|
||||
# (``MEDIA:/etc/passwd``, ``MEDIA:~/.ssh/id_rsa``) remain rejected.
|
||||
# The denylist still blocks /etc, /proc, ~/.ssh, ~/.aws, and the
|
||||
# credential/secret stores under the Hermes root (~/.hermes/.env,
|
||||
# auth.json, .anthropic_oauth.json, google_token.json, pairing/, ...) —
|
||||
# so the obvious prompt-injection / credential-exfil sites
|
||||
# (``MEDIA:/etc/passwd``, ``MEDIA:~/.ssh/id_rsa``,
|
||||
# ``MEDIA:~/.hermes/google_token.json``) remain rejected.
|
||||
if not _media_delivery_strict_mode():
|
||||
if _path_under_denied_prefix(resolved):
|
||||
return None
|
||||
|
|
|
|||
|
|
@ -2343,7 +2343,7 @@ class GatewaySlashCommandsMixin:
|
|||
from gateway.run import _hermes_home
|
||||
from hermes_cli.write_approval_commands import handle_pending_subcommand
|
||||
from tools import write_approval as wa
|
||||
from tools.memory_tool import MemoryStore
|
||||
from tools.memory_tool import load_on_disk_store
|
||||
|
||||
raw_args = event.get_command_args().strip()
|
||||
args = raw_args.split() if raw_args else []
|
||||
|
|
@ -2363,8 +2363,8 @@ class GatewaySlashCommandsMixin:
|
|||
|
||||
# Apply approved writes against a fresh on-disk store (the gateway has
|
||||
# no long-lived agent; the store persists to the same MEMORY/USER.md).
|
||||
store = MemoryStore()
|
||||
store.load_from_disk()
|
||||
# load_on_disk_store() honors the user's configured char limits.
|
||||
store = load_on_disk_store()
|
||||
|
||||
out = handle_pending_subcommand(
|
||||
wa.MEMORY, args, memory_store=store, set_mode_fn=_set_approval,
|
||||
|
|
|
|||
|
|
@ -1361,6 +1361,17 @@ class CLICommandsMixin:
|
|||
parts = cmd.strip().split()
|
||||
args = parts[1:] if len(parts) > 1 else []
|
||||
store = getattr(self.agent, "_memory_store", None) if getattr(self, "agent", None) else None
|
||||
if store is None:
|
||||
# No live agent store (e.g. /memory approve invoked from the Desktop
|
||||
# GUI, or any context without an active agent). Apply against a freshly
|
||||
# loaded on-disk store, mirroring the gateway path
|
||||
# (gateway/slash_commands.py): it persists to the same MEMORY/USER.md
|
||||
# and creates MEMORY.md on the first approved write. Without this the
|
||||
# shared handler returns "memory store unavailable". See #46783.
|
||||
# load_on_disk_store() honors the user's configured char limits, so
|
||||
# an approval here enforces the same caps as the live agent would.
|
||||
from tools.memory_tool import load_on_disk_store
|
||||
store = load_on_disk_store()
|
||||
out = handle_pending_subcommand(
|
||||
wa.MEMORY, args,
|
||||
memory_store=store,
|
||||
|
|
|
|||
|
|
@ -1535,6 +1535,25 @@ DEFAULT_CONFIG = {
|
|||
"timeout": 60,
|
||||
"extra_body": {},
|
||||
},
|
||||
# Background review — the post-turn self-improvement fork that decides
|
||||
# whether to save a memory / patch a skill. "auto" (default) = run on
|
||||
# the main chat model, replaying the full conversation, which is already
|
||||
# warm in the prompt cache (cheap cache reads) — unchanged, optimal.
|
||||
# Set provider/model to a cheaper model (e.g. openrouter
|
||||
# google/gemini-3-flash-preview) to run the review there for ~3-5x lower
|
||||
# cost. A different model can't reuse the main prompt cache anyway, so
|
||||
# the fork automatically replays a compact digest instead of the full
|
||||
# transcript when routed (minimises the cold-write). Same model = full
|
||||
# replay; different model = digest. Quality holds (memory capture
|
||||
# identical, skill near-identical in benchmarks).
|
||||
"background_review": {
|
||||
"provider": "auto",
|
||||
"model": "",
|
||||
"base_url": "",
|
||||
"api_key": "",
|
||||
"timeout": 120,
|
||||
"extra_body": {},
|
||||
},
|
||||
},
|
||||
|
||||
"display": {
|
||||
|
|
|
|||
|
|
@ -120,6 +120,9 @@ def cron_list(show_all: bool = False):
|
|||
workdir = job.get("workdir")
|
||||
if workdir:
|
||||
print(f" Workdir: {workdir}")
|
||||
_prof = job.get("profile")
|
||||
if _prof and _prof != "default":
|
||||
print(f" Profile: {_prof}")
|
||||
|
||||
# Execution history
|
||||
last_status = job.get("last_status")
|
||||
|
|
@ -259,6 +262,7 @@ def cron_create(args):
|
|||
script=getattr(args, "script", None),
|
||||
workdir=getattr(args, "workdir", None),
|
||||
no_agent=getattr(args, "no_agent", False) or None,
|
||||
profile=getattr(args, "profile", None),
|
||||
)
|
||||
if not result.get("success"):
|
||||
print(color(f"Failed to create job: {result.get('error', 'unknown error')}", Colors.RED))
|
||||
|
|
@ -275,6 +279,9 @@ def cron_create(args):
|
|||
print(" Mode: no-agent (script stdout delivered directly)")
|
||||
if job_data.get("workdir"):
|
||||
print(f" Workdir: {job_data['workdir']}")
|
||||
_prof = job_data.get("profile")
|
||||
if _prof and _prof != "default":
|
||||
print(f" Profile: {_prof}")
|
||||
print(f" Next run: {result['next_run_at']}")
|
||||
return 0
|
||||
|
||||
|
|
|
|||
|
|
@ -12507,6 +12507,33 @@ def main():
|
|||
action="store_true",
|
||||
help="Emit the raw structured payload as JSON (same shape as `tools/call`).",
|
||||
)
|
||||
computer_use_perms = computer_use_sub.add_parser(
|
||||
"permissions",
|
||||
help="Check or grant macOS Accessibility + Screen Recording (macOS)",
|
||||
description=(
|
||||
"Computer Use drives the Mac through cua-driver, whose TCC grants\n"
|
||||
"attach to cua-driver's own identity (com.trycua.driver) — not the\n"
|
||||
"terminal or the Hermes app. `status` reports the driver's grant\n"
|
||||
"state; `grant` launches CuaDriver via LaunchServices so the macOS\n"
|
||||
"permission dialog is attributed to the process that does the work."
|
||||
),
|
||||
)
|
||||
computer_use_perms_sub = computer_use_perms.add_subparsers(
|
||||
dest="computer_use_perms_action"
|
||||
)
|
||||
computer_use_perms_status = computer_use_perms_sub.add_parser(
|
||||
"status",
|
||||
help="Report Accessibility + Screen Recording grant state (read-only)",
|
||||
)
|
||||
computer_use_perms_status.add_argument(
|
||||
"--json",
|
||||
action="store_true",
|
||||
help="Emit the normalized permission payload as JSON.",
|
||||
)
|
||||
computer_use_perms_sub.add_parser(
|
||||
"grant",
|
||||
help="Request the grants (opens the dialog attributed to CuaDriver)",
|
||||
)
|
||||
|
||||
def cmd_computer_use(args):
|
||||
action = getattr(args, "computer_use_action", None)
|
||||
|
|
@ -12564,6 +12591,41 @@ def main():
|
|||
json_output=bool(getattr(args, "json", False)),
|
||||
)
|
||||
sys.exit(code)
|
||||
if action == "permissions":
|
||||
perms_action = getattr(args, "computer_use_perms_action", None)
|
||||
if perms_action == "grant":
|
||||
from tools.computer_use.permissions import request_permissions_grant
|
||||
sys.exit(request_permissions_grant())
|
||||
if perms_action == "status":
|
||||
import json as _json
|
||||
from tools.computer_use.permissions import computer_use_status
|
||||
st = computer_use_status()
|
||||
if bool(getattr(args, "json", False)):
|
||||
print(_json.dumps(st, indent=2, sort_keys=True))
|
||||
sys.exit(0 if st["ready"] else 1)
|
||||
if not st["platform_supported"]:
|
||||
print(f"Computer Use is not supported on {st['platform']}.")
|
||||
sys.exit(1)
|
||||
if not st["installed"]:
|
||||
print("cua-driver: not installed. Run: hermes computer-use install")
|
||||
sys.exit(1)
|
||||
glyph = lambda v: "✅" if v is True else ("❌" if v is False else "•") # noqa: E731
|
||||
print(f"cua-driver: {st['version'] or 'installed'} ({st['platform']})")
|
||||
if st["can_grant"]: # macOS TCC permissions
|
||||
print(f" {glyph(st['accessibility'])} Accessibility")
|
||||
print(f" {glyph(st['screen_recording'])} Screen Recording")
|
||||
if not st["ready"]:
|
||||
print(" Grant: hermes computer-use permissions grant")
|
||||
else: # no TCC model — readiness is driver health
|
||||
print(f" {glyph(st['ready'])} driver health (no permission toggles on {st['platform']})")
|
||||
for c in st["checks"]:
|
||||
if c["status"] != "ok":
|
||||
print(f" ⚠ {c['label']}: {c['message']}")
|
||||
if st["error"]:
|
||||
print(f" ⚠ {st['error']}")
|
||||
sys.exit(0 if st["ready"] else 1)
|
||||
computer_use_perms.print_help()
|
||||
return
|
||||
# No subcommand → show help
|
||||
computer_use_parser.print_help()
|
||||
|
||||
|
|
|
|||
|
|
@ -70,6 +70,10 @@ def build_cron_parser(subparsers, *, cmd_cron: Callable) -> None:
|
|||
"--workdir",
|
||||
help="Absolute path for the job to run from. Injects AGENTS.md / CLAUDE.md / .cursorrules from that directory and uses it as the cwd for terminal/file/code_exec tools. Omit to preserve old behaviour (no project context files).",
|
||||
)
|
||||
cron_create.add_argument(
|
||||
"--profile",
|
||||
help="Hermes profile the job should EXECUTE under (its .env / config.yaml / credentials). Defaults to the profile that created the job. Jobs live in one shared root store (#32091); this scopes a job's runtime environment to the named profile so it runs with that profile's permissions.",
|
||||
)
|
||||
|
||||
# cron edit
|
||||
cron_edit = cron_subparsers.add_parser(
|
||||
|
|
|
|||
|
|
@ -667,102 +667,31 @@ def _pip_install(
|
|||
|
||||
|
||||
|
||||
def _check_cua_driver_asset_for_arch() -> bool:
|
||||
"""Check whether the latest CUA release ships an asset for this OS+arch.
|
||||
|
||||
Returns True if the asset likely exists (or if we cannot determine it).
|
||||
Returns False and prints a warning when the asset is confirmed missing,
|
||||
so callers can skip the install attempt and avoid a raw 404.
|
||||
|
||||
Recognizes release-asset names across all supported platforms:
|
||||
|
||||
* macOS (``Darwin``) — arm64 always ships; x86_64/amd64 probed.
|
||||
* Windows (``AMD64``/``ARM64``) — amd64/x86_64 and arm64 probed.
|
||||
* Linux (``x86_64``/``aarch64``) — x86_64/amd64 and aarch64/arm64 probed.
|
||||
"""
|
||||
import platform as _plat
|
||||
import urllib.request
|
||||
|
||||
system = _plat.system()
|
||||
machine = _plat.machine().lower() # e.g. "x86_64", "arm64", "amd64", "aarch64"
|
||||
|
||||
# arm64 (Apple Silicon) macOS assets are always published — short-circuit
|
||||
# to preserve the original fail-open behaviour and avoid a network call.
|
||||
if system == "Darwin" and machine == "arm64":
|
||||
return True
|
||||
|
||||
# Map this host's arch to the set of asset-name substrings we'll accept.
|
||||
# Asset names vary by OS (darwin-x86_64, windows-amd64, linux-aarch64, …),
|
||||
# so we match on the architecture token only and let any of the common
|
||||
# aliases satisfy the probe.
|
||||
if machine in {"x86_64", "amd64", "x64"}:
|
||||
arch_names = {"x86_64", "amd64", "x64"}
|
||||
arch_label = "x86_64/amd64"
|
||||
elif machine in {"arm64", "aarch64"}:
|
||||
arch_names = {"arm64", "aarch64"}
|
||||
arch_label = "arm64/aarch64"
|
||||
else:
|
||||
# Unknown arch — fail open and let the installer surface the error.
|
||||
return True
|
||||
|
||||
# Probe the cua-driver release for an OS+arch asset before falling through
|
||||
# to the upstream installer.
|
||||
#
|
||||
# The cua-driver-rs binaries are published to the trycua/cua monorepo under
|
||||
# tag prefix ``cua-driver-rs-v*``. The repo's ``releases/latest`` is NOT
|
||||
# that — it floats across the monorepo's other components (agent-*,
|
||||
# computer-*, lume-*, train-*), most of which ship zero binary assets. So
|
||||
# we list releases and pick the newest ``cua-driver-rs-v*`` tag, matching
|
||||
# what the upstream install.sh does. Failing to find one => fail open and
|
||||
# let the installer (which resolves the tag itself) be the source of truth.
|
||||
driver_tag_prefix = "cua-driver-rs-v"
|
||||
api_url = (
|
||||
"https://api.github.com/repos/trycua/cua/releases?per_page=100"
|
||||
)
|
||||
try:
|
||||
req = urllib.request.Request(api_url, headers={"Accept": "application/vnd.github+json"})
|
||||
with urllib.request.urlopen(req, timeout=10) as resp:
|
||||
releases = _json.loads(resp.read().decode())
|
||||
if not isinstance(releases, list):
|
||||
return True
|
||||
# GitHub returns releases newest-first; take the first cua-driver-rs tag.
|
||||
driver_release = next(
|
||||
(
|
||||
r for r in releases
|
||||
if str(r.get("tag_name", "")).startswith(driver_tag_prefix)
|
||||
),
|
||||
None,
|
||||
)
|
||||
if driver_release is None:
|
||||
# No cua-driver-rs release surfaced (API hiccup / unexpected shape).
|
||||
# Fail open — the installer resolves the tag on its own.
|
||||
return True
|
||||
tag = driver_release.get("tag_name", "")
|
||||
assets = driver_release.get("assets", [])
|
||||
# OS token gates the asset alongside arch so a darwin asset can't
|
||||
# satisfy a Linux probe (every cua-driver-rs release ships all three
|
||||
# OSes, so the arch token alone would always match).
|
||||
os_token = {"Darwin": "darwin", "Windows": "windows", "Linux": "linux"}.get(system, "")
|
||||
has_asset = any(
|
||||
os_token in (name := a_info.get("name", "").lower())
|
||||
and any(a in name for a in arch_names)
|
||||
for a_info in assets
|
||||
)
|
||||
if not has_asset:
|
||||
_print_warning(
|
||||
f" Latest cua-driver release ({tag}) has no {system} {arch_label} asset."
|
||||
)
|
||||
_print_info(
|
||||
" CUA Driver may not yet ship a build for this platform."
|
||||
)
|
||||
_print_info(
|
||||
" See: https://github.com/trycua/cua/releases"
|
||||
)
|
||||
return False
|
||||
except Exception:
|
||||
# Network / API failure — proceed and let the installer handle it.
|
||||
pass
|
||||
return True
|
||||
# The asset-probe that lived here used to hit `/releases/latest` on
|
||||
# trycua/cua and inspect the release's asset list before piping the
|
||||
# installer to bash. It was broken in two places:
|
||||
#
|
||||
# 1. cua-driver-rs releases are marked **prerelease** on every cut,
|
||||
# and GitHub's `/releases/latest` endpoint explicitly skips
|
||||
# prereleases. On the live trycua/cua repo today, `/releases/latest`
|
||||
# returns the Python `cua-agent v0.8.3` package (zero binary
|
||||
# assets) instead of `cua-driver-rs-v0.6.0` (19 binary assets).
|
||||
# The probe then reported "no asset for this arch" and skipped the
|
||||
# install on every non-arm64 host — Linux x86_64, Windows, macOS
|
||||
# Intel, Linux arm64 — even when the upstream installer would have
|
||||
# succeeded.
|
||||
# 2. Even with the right endpoint, we'd be duplicating tag-resolution
|
||||
# logic the upstream installer already does correctly via
|
||||
# `CUA_DRIVER_RS_BAKED_VERSION` (auto-baked by CD on every release,
|
||||
# with an API fallback). Drift between our probe and theirs is a
|
||||
# maintenance hazard.
|
||||
#
|
||||
# Resolution: trust the upstream installer. For fresh installs, run
|
||||
# install.sh directly — it errors clean if the target arch has no
|
||||
# asset. For the upgrade path, `cua_driver_update_check()` (which calls
|
||||
# `cua-driver check-update --json`) gives us the canonical update
|
||||
# answer from the binary itself — same tag-resolution as the installer,
|
||||
# no Python-side duplication.
|
||||
|
||||
|
||||
def install_cua_driver(upgrade: bool = False) -> bool:
|
||||
|
|
@ -811,8 +740,9 @@ def install_cua_driver(upgrade: bool = False) -> bool:
|
|||
_print_warning(f" {fetch_tool} not found — install manually:")
|
||||
_print_info(" https://github.com/trycua/cua/blob/main/libs/cua-driver/README.md")
|
||||
return False
|
||||
if not _check_cua_driver_asset_for_arch():
|
||||
return False
|
||||
# Pre-install asset probe deleted — see comment near the top of
|
||||
# tools_config.py for why. install.sh has CUA_DRIVER_RS_BAKED_VERSION
|
||||
# baked in by CD and errors cleanly on missing-arch assets.
|
||||
return _run_cua_driver_installer(label="Installing")
|
||||
|
||||
# Already installed and caller didn't ask to upgrade → just confirm.
|
||||
|
|
@ -841,8 +771,10 @@ def install_cua_driver(upgrade: bool = False) -> bool:
|
|||
_print_warning(f" {fetch_tool} not found — cannot refresh cua-driver.")
|
||||
return bool(binary)
|
||||
|
||||
if not _check_cua_driver_asset_for_arch():
|
||||
return bool(binary)
|
||||
# Pre-install asset probe deleted (see top-of-file comment). The
|
||||
# `cua_driver_update_check()` call further down asks the installed
|
||||
# cua-driver binary itself whether an update exists — same
|
||||
# tag-resolution as the installer, no duplication.
|
||||
|
||||
# Skip the (network) re-install when the driver itself reports it's already
|
||||
# on the latest release. Best-effort: an older driver (no check-update
|
||||
|
|
|
|||
|
|
@ -1322,13 +1322,35 @@ def _dashboard_local_update_managed_externally() -> bool:
|
|||
in-browser local update action. Keep this dashboard capability separate
|
||||
from install-method detection: manual git/pip installs inside containers can
|
||||
still behave like their actual install method in the CLI.
|
||||
|
||||
However, when the install method is ``git`` (a bind-mounted checkout inside
|
||||
a container — e.g. the hermes-webui image sharing the Hermes source tree),
|
||||
the dashboard's ``hermes update`` button is the correct update path and
|
||||
should not be suppressed. Other containerized install methods remain
|
||||
externally managed unless their apply path is proven safe inside the
|
||||
running container filesystem.
|
||||
"""
|
||||
if _default_hermes_root_is_opt_data():
|
||||
return True
|
||||
try:
|
||||
from hermes_constants import is_container
|
||||
|
||||
return is_container()
|
||||
if not is_container():
|
||||
return False
|
||||
except Exception:
|
||||
return False
|
||||
# We are inside a container, but the install may still be self-managed.
|
||||
# If the install method is git, the dashboard update button works against
|
||||
# the mounted checkout and should be offered. Keep pip blocked inside
|
||||
# containers: its apply path mutates the running container filesystem and
|
||||
# is not the bind-mounted checkout case this gate is meant to recover.
|
||||
try:
|
||||
method = detect_install_method(PROJECT_ROOT)
|
||||
if method == "git":
|
||||
return False
|
||||
except Exception:
|
||||
pass
|
||||
return True
|
||||
|
||||
|
||||
def _managed_files_policy(request: Request, *, create_root: bool = True) -> ManagedFilesPolicy:
|
||||
|
|
@ -8327,6 +8349,7 @@ async def install_mcp_catalog_entry(body: MCPCatalogInstall, profile: Optional[s
|
|||
|
||||
# Register the mcp-install action log so /api/actions/mcp-install/status works.
|
||||
_ACTION_LOG_FILES.setdefault("mcp-install", "action-mcp-install.log")
|
||||
_ACTION_LOG_FILES.setdefault("computer-use-grant", "action-computer-use-grant.log")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -10649,6 +10672,63 @@ async def run_toolset_post_setup(
|
|||
return {"ok": True, "pid": proc.pid, "name": "tools-post-setup", "key": body.key}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Computer Use (cua-driver) — cross-platform readiness + macOS permission grant
|
||||
#
|
||||
# cua-driver runs on macOS, Windows, and Linux. The desktop card reflects
|
||||
# per-OS readiness: on macOS the Accessibility + Screen Recording TCC grants
|
||||
# (which attach to cua-driver's OWN identity, com.trycua.driver — not Hermes,
|
||||
# so no app entitlement is involved); elsewhere, driver health from
|
||||
# `cua-driver doctor`. The grant flow is macOS-only (no TCC toggles to request
|
||||
# on Windows/Linux).
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@app.get("/api/tools/computer-use/status")
|
||||
async def get_computer_use_status(profile: Optional[str] = None):
|
||||
"""Cross-platform Computer Use readiness for the desktop card.
|
||||
|
||||
See ``tools.computer_use.permissions.computer_use_status`` for the payload
|
||||
shape. Read-only and fast (shells ``cua-driver doctor`` + macOS
|
||||
``permissions status``).
|
||||
"""
|
||||
from tools.computer_use.permissions import computer_use_status
|
||||
|
||||
with _profile_scope(profile):
|
||||
return computer_use_status()
|
||||
|
||||
|
||||
@app.post("/api/tools/computer-use/permissions/grant")
|
||||
async def grant_computer_use_permissions(profile: Optional[str] = None):
|
||||
"""Spawn ``hermes computer-use permissions grant`` as a background action.
|
||||
|
||||
macOS-only: ``cua-driver permissions grant`` launches CuaDriver via
|
||||
LaunchServices so the TCC dialog is attributed to com.trycua.driver, then
|
||||
waits for approval. The frontend polls ``GET /api/actions/computer-use-
|
||||
grant/status`` and re-reads ``/status`` once it exits. Windows/Linux have
|
||||
no TCC toggles to grant, so this returns 400 there.
|
||||
"""
|
||||
if sys.platform != "darwin":
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="Computer Use permission grants are a macOS concept.",
|
||||
)
|
||||
try:
|
||||
proc = _spawn_hermes_action(
|
||||
_profile_cli_args(profile)
|
||||
+ ["computer-use", "permissions", "grant"],
|
||||
"computer-use-grant",
|
||||
)
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as exc:
|
||||
_log.exception("Failed to spawn computer-use permissions grant")
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to request permissions: {exc}"
|
||||
)
|
||||
return {"ok": True, "pid": proc.pid, "name": "computer-use-grant"}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Raw YAML config endpoint
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -4598,6 +4598,83 @@ class SessionDB:
|
|||
return None
|
||||
return dict(row) if row else None
|
||||
|
||||
def delete_telegram_topic_binding(
|
||||
self,
|
||||
*,
|
||||
chat_id: str,
|
||||
thread_id: str,
|
||||
) -> int:
|
||||
"""Remove the binding row for a single (chat, thread) pair.
|
||||
|
||||
Called when the Telegram Bot API confirms a topic was deleted
|
||||
externally (``Thread not found`` after the same-thread retry
|
||||
already failed). Without this prune, the stale row keeps
|
||||
living in ``telegram_dm_topic_bindings`` and the
|
||||
recovery logic in ``gateway.run._recover_telegram_topic_thread_id``
|
||||
cheerfully redirects future inbound messages to the deleted
|
||||
topic, causing tool progress, approvals, and replies to land
|
||||
in the wrong place. Issue #31501.
|
||||
|
||||
When this prune removes the chat's *last* remaining binding,
|
||||
the chat's row in ``telegram_dm_topic_mode`` is also flipped to
|
||||
``enabled = 0`` in the same transaction. Otherwise the chat
|
||||
would be left in topic mode with zero lanes — and
|
||||
``gateway.run._recover_telegram_topic_thread_id`` keeps treating
|
||||
the chat as topic-enabled, lobby messages keep hunting for a
|
||||
binding that no longer exists, and a user who disabled topics in
|
||||
the Telegram client (rather than via ``/topic off``) stays stuck
|
||||
until the next send happens to fail. Clearing the flag makes
|
||||
recovery fully stand down once the dead topics are gone.
|
||||
|
||||
Returns the number of binding rows deleted (0 when the binding
|
||||
was already absent or the topic-mode tables haven't been
|
||||
migrated yet — both are silent no-ops; we never raise from
|
||||
a cleanup hot path).
|
||||
"""
|
||||
chat_id = str(chat_id)
|
||||
thread_id = str(thread_id)
|
||||
deleted = {"count": 0}
|
||||
|
||||
def _do(conn):
|
||||
try:
|
||||
cursor = conn.execute(
|
||||
"""
|
||||
DELETE FROM telegram_dm_topic_bindings
|
||||
WHERE chat_id = ? AND thread_id = ?
|
||||
""",
|
||||
(chat_id, thread_id),
|
||||
)
|
||||
deleted["count"] = cursor.rowcount or 0
|
||||
except sqlite3.OperationalError:
|
||||
# Tables don't exist yet — nothing to prune.
|
||||
deleted["count"] = 0
|
||||
return
|
||||
if not deleted["count"]:
|
||||
return
|
||||
# If that was the chat's last binding, disable topic mode for
|
||||
# the chat so recovery stops steering lobby messages at a now
|
||||
# empty lane set. Same transaction → no read-after-prune race.
|
||||
try:
|
||||
remaining = conn.execute(
|
||||
"""
|
||||
SELECT 1 FROM telegram_dm_topic_bindings
|
||||
WHERE chat_id = ? LIMIT 1
|
||||
""",
|
||||
(chat_id,),
|
||||
).fetchone()
|
||||
if remaining is None:
|
||||
conn.execute(
|
||||
"UPDATE telegram_dm_topic_mode "
|
||||
"SET enabled = 0, updated_at = ? WHERE chat_id = ?",
|
||||
(time.time(), chat_id),
|
||||
)
|
||||
except sqlite3.OperationalError:
|
||||
# telegram_dm_topic_mode absent — binding prune still stands.
|
||||
pass
|
||||
|
||||
self._execute_write(_do)
|
||||
return deleted["count"]
|
||||
|
||||
def bind_telegram_topic(
|
||||
self,
|
||||
*,
|
||||
|
|
|
|||
|
|
@ -1590,6 +1590,19 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
mutation_count += 1
|
||||
return result
|
||||
|
||||
# Delete obsolete commands FIRST to stay under Discord's 100-command
|
||||
# limit. Discord rejects an upsert that would push the live total over
|
||||
# 100 (error 30032), which silently breaks ALL slash commands. If a new
|
||||
# command is created before the obsolete ones are removed, an app that
|
||||
# is already at the cap momentarily exceeds it and the whole sync fails.
|
||||
# Removing the no-longer-desired commands up front guarantees the live
|
||||
# total never rises above the cap mid-sync.
|
||||
obsolete_keys = set(existing_by_key.keys()) - set(desired_by_key.keys())
|
||||
for key in obsolete_keys:
|
||||
current = existing_by_key.pop(key)
|
||||
await mutate(http.delete_global_command, app_id, current.id)
|
||||
deleted += 1
|
||||
|
||||
for key, desired in desired_by_key.items():
|
||||
current = existing_by_key.pop(key, None)
|
||||
if current is None:
|
||||
|
|
@ -1613,10 +1626,6 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
await mutate(http.edit_global_command, app_id, current.id, desired)
|
||||
updated += 1
|
||||
|
||||
for current in existing_by_key.values():
|
||||
await mutate(http.delete_global_command, app_id, current.id)
|
||||
deleted += 1
|
||||
|
||||
return {
|
||||
"total": len(desired_payloads),
|
||||
"unchanged": unchanged,
|
||||
|
|
|
|||
|
|
@ -810,6 +810,47 @@ class TelegramAdapter(BasePlatformAdapter):
|
|||
def _is_thread_not_found_error(error: Exception) -> bool:
|
||||
return "thread not found" in str(error).lower()
|
||||
|
||||
def _prune_stale_dm_topic_binding(
|
||||
self, chat_id: Any, thread_id: Any,
|
||||
) -> None:
|
||||
"""Drop the stale ``telegram_dm_topic_bindings`` row for a
|
||||
topic Telegram has confirmed deleted.
|
||||
|
||||
Without this prune the recovery logic in
|
||||
``gateway.run._recover_telegram_topic_thread_id`` keeps
|
||||
steering future inbound messages to the dead thread (the
|
||||
bug behind #31501 — tool progress, approvals, replies all
|
||||
end up in the wrong place even though the user has moved
|
||||
on to a fresh topic). Best-effort: we never raise from a
|
||||
send-fallback path — a failed cleanup must not turn into a
|
||||
failed user-facing send.
|
||||
"""
|
||||
if chat_id is None or thread_id is None:
|
||||
return
|
||||
store = getattr(self, "_session_store", None)
|
||||
if store is None:
|
||||
return
|
||||
db = getattr(store, "_db", None)
|
||||
if db is None or not hasattr(db, "delete_telegram_topic_binding"):
|
||||
return
|
||||
try:
|
||||
removed = db.delete_telegram_topic_binding(
|
||||
chat_id=str(chat_id), thread_id=str(thread_id),
|
||||
)
|
||||
except Exception:
|
||||
logger.debug(
|
||||
"[%s] delete_telegram_topic_binding failed for "
|
||||
"chat=%s thread=%s — skipping prune",
|
||||
self.name, chat_id, thread_id, exc_info=True,
|
||||
)
|
||||
return
|
||||
if removed:
|
||||
logger.info(
|
||||
"[%s] Pruned stale Telegram DM topic binding "
|
||||
"chat=%s thread=%s (Bot API: thread not found)",
|
||||
self.name, chat_id, thread_id,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _is_bad_request_error(error: Exception) -> bool:
|
||||
name = error.__class__.__name__.lower()
|
||||
|
|
@ -2670,11 +2711,17 @@ class TelegramAdapter(BasePlatformAdapter):
|
|||
continue
|
||||
# Second failure: the thread is genuinely gone.
|
||||
# Retry without ``message_thread_id`` so the
|
||||
# message still reaches the chat.
|
||||
# message still reaches the chat, and prune
|
||||
# the stale binding so future inbound
|
||||
# messages aren't redirected back to it
|
||||
# (#31501).
|
||||
logger.warning(
|
||||
"[%s] Thread %s not found, retrying without message_thread_id",
|
||||
self.name, effective_thread_id,
|
||||
)
|
||||
self._prune_stale_dm_topic_binding(
|
||||
chat_id, effective_thread_id,
|
||||
)
|
||||
used_thread_fallback = True
|
||||
effective_thread_id = None
|
||||
thread_kwargs = {"message_thread_id": None}
|
||||
|
|
@ -3355,6 +3402,13 @@ class TelegramAdapter(BasePlatformAdapter):
|
|||
self.name,
|
||||
message_thread_id,
|
||||
)
|
||||
# Same prune as the streaming send path — the
|
||||
# control-message retry tells us the topic is gone,
|
||||
# so the binding row in state.db must go too
|
||||
# (#31501).
|
||||
self._prune_stale_dm_topic_binding(
|
||||
kwargs.get("chat_id"), message_thread_id,
|
||||
)
|
||||
retry_kwargs = dict(kwargs)
|
||||
retry_kwargs.pop("message_thread_id", None)
|
||||
return await self._bot.send_message(**retry_kwargs)
|
||||
|
|
|
|||
|
|
@ -108,6 +108,7 @@ AUTHOR_MAP = {
|
|||
"804436395@qq.com": "LaPhilosophie",
|
||||
"maxmitcham@mac.home": "maxtrigify",
|
||||
"ccook@nvms.com": "ccook1963",
|
||||
"libre-7@users.noreply.github.com": "libre-7",
|
||||
"kristian@agrointel.no": "kristianvast",
|
||||
"thomas.paquette@gmail.com": "RyTsYdUp",
|
||||
"techxacm@gmail.com": "ProgramCaiCai",
|
||||
|
|
|
|||
|
|
@ -100,7 +100,13 @@ class _StubAgent:
|
|||
pass
|
||||
|
||||
|
||||
def _run(agent):
|
||||
def _run(
|
||||
agent,
|
||||
*,
|
||||
final_response=None,
|
||||
api_call_count=3,
|
||||
turn_exit_reason="unknown",
|
||||
):
|
||||
messages = [
|
||||
{"role": "user", "content": "do a thing"},
|
||||
{
|
||||
|
|
@ -114,8 +120,8 @@ def _run(agent):
|
|||
]
|
||||
return finalize_turn(
|
||||
agent,
|
||||
final_response=None, # forces the max-iterations summary path
|
||||
api_call_count=3,
|
||||
final_response=final_response,
|
||||
api_call_count=api_call_count,
|
||||
interrupted=False,
|
||||
failed=False,
|
||||
messages=messages,
|
||||
|
|
@ -125,7 +131,7 @@ def _run(agent):
|
|||
user_message="do a thing",
|
||||
original_user_message="do a thing",
|
||||
_should_review_memory=False,
|
||||
_turn_exit_reason="unknown",
|
||||
_turn_exit_reason=turn_exit_reason,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -162,4 +168,17 @@ def test_clean_turn_has_no_cleanup_errors_key():
|
|||
agent = _StubAgent(raise_in=())
|
||||
result = _run(agent)
|
||||
assert result["final_response"] == "PARTIAL SUMMARY FROM MODEL"
|
||||
assert result["completed"] is False
|
||||
assert "cleanup_errors" not in result
|
||||
|
||||
|
||||
def test_text_response_on_last_allowed_call_is_completed():
|
||||
agent = _StubAgent(raise_in=())
|
||||
result = _run(
|
||||
agent,
|
||||
final_response="final report",
|
||||
api_call_count=agent.max_iterations,
|
||||
turn_exit_reason="text_response(finish_reason=stop)",
|
||||
)
|
||||
assert result["final_response"] == "final report"
|
||||
assert result["completed"] is True
|
||||
|
|
|
|||
|
|
@ -103,3 +103,139 @@ def test_get_default_hermes_root_docker_layouts(tmp_path, monkeypatch):
|
|||
# Docker profile layout: <custom>/profiles/<name> -> <custom>.
|
||||
monkeypatch.setenv("HERMES_HOME", "/opt/data/profiles/coder")
|
||||
assert hermes_constants.get_default_hermes_root() == Path("/opt/data")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Per-job profile EXECUTION scoping (#32091 follow-up).
|
||||
#
|
||||
# The storage half of #32091 (above) moved every profile's jobs into one shared
|
||||
# root store. But a job must still EXECUTE under its owning profile's
|
||||
# environment (.env / config.yaml / credentials) — not whichever profile's
|
||||
# ticker picks it up. These tests cover the execution-scoping half.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _profile_env(tmp_path, monkeypatch, active="default"):
|
||||
"""Set up a root home with a 'donna' profile dir and point the platform
|
||||
default at it. Returns (root, donna_home). ``active`` selects which
|
||||
HERMES_HOME the process runs under."""
|
||||
root = tmp_path / "hermes_home"
|
||||
(root / "cron").mkdir(parents=True)
|
||||
donna_home = root / "profiles" / "donna"
|
||||
(donna_home / "cron").mkdir(parents=True)
|
||||
import hermes_constants
|
||||
monkeypatch.setattr(hermes_constants, "_get_platform_default_hermes_home",
|
||||
lambda: root)
|
||||
monkeypatch.setenv("HERMES_HOME", str(root if active == "default" else donna_home))
|
||||
return root, donna_home
|
||||
|
||||
|
||||
def test_create_job_autocaptures_active_profile(tmp_path, monkeypatch):
|
||||
"""A job created from inside a profile session is tagged with that profile,
|
||||
so the scheduler can later scope its execution back to it."""
|
||||
root, donna_home = _profile_env(tmp_path, monkeypatch, active="donna")
|
||||
import cron.jobs as jobs
|
||||
importlib.reload(jobs)
|
||||
try:
|
||||
job = jobs.create_job(prompt="audit", schedule="every 1h", name="a")
|
||||
# auto-captured from the active (donna) session
|
||||
assert job["profile"] == "donna"
|
||||
# and it landed in the SHARED ROOT store, not donna's profile-local one
|
||||
assert jobs.JOBS_FILE.resolve() == (root / "cron" / "jobs.json").resolve()
|
||||
assert jobs.JOBS_FILE.exists()
|
||||
assert not (donna_home / "cron" / "jobs.json").exists()
|
||||
finally:
|
||||
monkeypatch.undo()
|
||||
importlib.reload(jobs)
|
||||
|
||||
|
||||
def test_create_job_explicit_profile_override(tmp_path, monkeypatch):
|
||||
"""An explicit profile= wins over the auto-captured active profile."""
|
||||
root, donna_home = _profile_env(tmp_path, monkeypatch, active="default")
|
||||
(root / "profiles" / "ops" / "cron").mkdir(parents=True)
|
||||
import cron.jobs as jobs
|
||||
importlib.reload(jobs)
|
||||
try:
|
||||
job = jobs.create_job(prompt="x", schedule="every 2h", profile="ops")
|
||||
assert job["profile"] == "ops"
|
||||
finally:
|
||||
monkeypatch.undo()
|
||||
importlib.reload(jobs)
|
||||
|
||||
|
||||
def test_resolve_profile_home_maps_names(tmp_path, monkeypatch):
|
||||
"""resolve_profile_home maps default/named profiles to homes and returns
|
||||
None for a missing profile."""
|
||||
root, donna_home = _profile_env(tmp_path, monkeypatch, active="default")
|
||||
import cron.jobs as jobs
|
||||
importlib.reload(jobs)
|
||||
try:
|
||||
assert jobs.resolve_profile_home("default").resolve() == root.resolve()
|
||||
assert jobs.resolve_profile_home("").resolve() == root.resolve()
|
||||
assert jobs.resolve_profile_home("donna").resolve() == donna_home.resolve()
|
||||
assert jobs.resolve_profile_home("ghost") is None
|
||||
finally:
|
||||
monkeypatch.undo()
|
||||
importlib.reload(jobs)
|
||||
|
||||
|
||||
def test_normalize_backfills_legacy_profile_to_default(tmp_path, monkeypatch):
|
||||
"""A pre-feature job with no profile field reads back as 'default'."""
|
||||
import cron.jobs as jobs
|
||||
legacy = {"id": "l1", "name": "old", "prompt": "x",
|
||||
"schedule": {"kind": "interval", "minutes": 60}}
|
||||
assert jobs._normalize_job_record(legacy)["profile"] == "default"
|
||||
|
||||
|
||||
def test_run_job_scopes_execution_to_job_profile(tmp_path, monkeypatch):
|
||||
"""The decisive test: a ticker running as the ROOT profile executes a
|
||||
job tagged profile='donna' with HERMES_HOME pointed at donna's home
|
||||
(both the env var and the in-process override), then restores the
|
||||
ticker's env afterward."""
|
||||
from unittest.mock import MagicMock, patch
|
||||
root, donna_home = _profile_env(tmp_path, monkeypatch, active="default")
|
||||
(donna_home / "config.yaml").write_text("model:\n default: openrouter/test\n")
|
||||
|
||||
import hermes_constants
|
||||
import cron.jobs as jobs
|
||||
import cron.scheduler as sched
|
||||
importlib.reload(jobs)
|
||||
importlib.reload(sched)
|
||||
|
||||
captured = {}
|
||||
|
||||
def fake_run_conversation(prompt, *a, **k):
|
||||
captured["env"] = os.environ.get("HERMES_HOME")
|
||||
captured["override"] = hermes_constants.get_hermes_home_override()
|
||||
captured["resolved"] = str(hermes_constants.get_hermes_home())
|
||||
return {"final_response": "done", "completed": True, "failed": False,
|
||||
"turn_exit_reason": "text_response(finish_reason=stop)"}
|
||||
|
||||
job = {"id": "j-donna", "name": "donna-audit", "prompt": "audit",
|
||||
"profile": "donna", "schedule": {"kind": "interval", "minutes": 60},
|
||||
"deliver": "local", "model": "openrouter/test"}
|
||||
|
||||
before = os.environ.get("HERMES_HOME")
|
||||
try:
|
||||
fake_agent = MagicMock()
|
||||
fake_agent.run_conversation.side_effect = fake_run_conversation
|
||||
with patch("cron.scheduler._resolve_origin", return_value=None), \
|
||||
patch("dotenv.load_dotenv"), \
|
||||
patch("hermes_state.SessionDB", return_value=MagicMock()), \
|
||||
patch("hermes_cli.runtime_provider.resolve_runtime_provider",
|
||||
return_value={"api_key": "k", "base_url": "https://x/v1",
|
||||
"provider": "openrouter", "api_mode": "chat_completions"}), \
|
||||
patch("run_agent.AIAgent", return_value=fake_agent):
|
||||
success, output, final, err = sched.run_job(job)
|
||||
|
||||
assert success is True, (success, err)
|
||||
# During execution the job ran AS donna:
|
||||
assert captured["env"] == str(donna_home)
|
||||
assert captured["override"] == str(donna_home)
|
||||
assert captured["resolved"] == str(donna_home)
|
||||
# After the job, the ticker's HERMES_HOME is restored (no leak):
|
||||
assert os.environ.get("HERMES_HOME") == before
|
||||
finally:
|
||||
monkeypatch.undo()
|
||||
importlib.reload(jobs)
|
||||
importlib.reload(sched)
|
||||
|
|
|
|||
|
|
@ -1394,6 +1394,52 @@ class TestRunJobSessionPersistence:
|
|||
assert error is None
|
||||
assert final_response == "all good"
|
||||
|
||||
def test_run_job_delivers_max_iteration_fallback_summary(self, tmp_path):
|
||||
"""Cron should deliver a usable max-iteration fallback summary.
|
||||
|
||||
A cron run can exhaust the iteration budget, get a final text summary
|
||||
from the no-tools fallback call, and still have ``completed=False`` in
|
||||
the generic agent result. That should not make cron raise the report
|
||||
text as a RuntimeError.
|
||||
"""
|
||||
job = {
|
||||
"id": "summary-job",
|
||||
"name": "summary",
|
||||
"prompt": "finish the report",
|
||||
}
|
||||
fake_db = MagicMock()
|
||||
|
||||
with patch("cron.scheduler._hermes_home", tmp_path), \
|
||||
patch("cron.scheduler._resolve_origin", return_value=None), \
|
||||
patch("dotenv.load_dotenv"), \
|
||||
patch("hermes_state.SessionDB", return_value=fake_db), \
|
||||
patch(
|
||||
"hermes_cli.runtime_provider.resolve_runtime_provider",
|
||||
return_value={
|
||||
"api_key": "***",
|
||||
"base_url": "https://example.invalid/v1",
|
||||
"provider": "openrouter",
|
||||
"api_mode": "chat_completions",
|
||||
},
|
||||
), \
|
||||
patch("run_agent.AIAgent") as mock_agent_cls:
|
||||
mock_agent = MagicMock()
|
||||
mock_agent.run_conversation.return_value = {
|
||||
"final_response": "final fallback report",
|
||||
"completed": False,
|
||||
"failed": False,
|
||||
"turn_exit_reason": "max_iterations_reached(60/60)",
|
||||
}
|
||||
mock_agent_cls.return_value = mock_agent
|
||||
|
||||
success, output, final_response, error = run_job(job)
|
||||
|
||||
assert success is True
|
||||
assert error is None
|
||||
assert final_response == "final fallback report"
|
||||
assert "final fallback report" in output
|
||||
assert "(FAILED)" not in output
|
||||
|
||||
def test_tick_marks_empty_response_as_error(self, tmp_path):
|
||||
"""When run_job returns success=True but final_response is empty,
|
||||
tick() should mark the job as error so last_status != 'ok'.
|
||||
|
|
|
|||
140
tests/gateway/test_discord_sync_limit.py
Normal file
140
tests/gateway/test_discord_sync_limit.py
Normal file
|
|
@ -0,0 +1,140 @@
|
|||
"""Test Discord slash command sync respects the 100-command hard limit."""
|
||||
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
|
||||
from gateway.config import PlatformConfig
|
||||
|
||||
|
||||
def _ensure_discord_mock():
|
||||
if "discord" in sys.modules and hasattr(sys.modules["discord"], "__file__"):
|
||||
return
|
||||
if sys.modules.get("discord") is None:
|
||||
discord_mod = MagicMock()
|
||||
discord_mod.Intents.default.return_value = MagicMock()
|
||||
sys.modules["discord"] = discord_mod
|
||||
sys.modules["discord.ext"] = MagicMock()
|
||||
sys.modules["discord.ext.commands"] = MagicMock()
|
||||
|
||||
|
||||
_ensure_discord_mock()
|
||||
|
||||
from plugins.platforms.discord.adapter import DiscordAdapter
|
||||
|
||||
|
||||
class _FakeTreeCommand:
|
||||
"""Minimal command stub matching discord.py tree command API."""
|
||||
|
||||
def __init__(self, name: str, command_type: int = 1):
|
||||
self.name = name
|
||||
self.type = command_type
|
||||
|
||||
def to_dict(self, _tree):
|
||||
return {"name": self.name, "type": self.type}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def adapter():
|
||||
"""Create a Discord adapter with mocked Discord client."""
|
||||
_ensure_discord_mock()
|
||||
config = PlatformConfig(enabled=True, token="fake-token")
|
||||
adapter = DiscordAdapter(config)
|
||||
|
||||
# Mock the Discord client and tree
|
||||
adapter._client = MagicMock()
|
||||
adapter._client.tree = MagicMock()
|
||||
adapter._client.http = AsyncMock()
|
||||
adapter._client.application_id = "test_app_id"
|
||||
|
||||
adapter._sleep_between_command_sync_mutations = AsyncMock()
|
||||
adapter._existing_command_to_payload = MagicMock(side_effect=lambda cmd: {"name": cmd.name})
|
||||
adapter._canonicalize_app_command_payload = MagicMock(side_effect=lambda p: p)
|
||||
adapter._patchable_app_command_payload = MagicMock(side_effect=lambda p: p)
|
||||
|
||||
return adapter
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_safe_sync_deletes_before_creating():
|
||||
"""Sync must delete obsolete commands BEFORE creating new ones.
|
||||
|
||||
Discord's 100-command limit is enforced when trying to upsert. If we
|
||||
have 100 commands on Discord, try to add 1 new one, and haven't deleted
|
||||
any yet, Discord rejects with error 30032.
|
||||
|
||||
The fix: identify and delete obsolete commands first, then create/update.
|
||||
This ensures we never temporarily exceed 100 during the sync operation.
|
||||
|
||||
This is a regression guard for the samuraiheart bug where sync would fail
|
||||
with error 30032 even though the registration code properly capped at 100.
|
||||
"""
|
||||
_ensure_discord_mock()
|
||||
config = PlatformConfig(enabled=True, token="fake-token")
|
||||
adapter = DiscordAdapter(config)
|
||||
|
||||
adapter._client = MagicMock()
|
||||
adapter._client.tree = MagicMock()
|
||||
adapter._client.http = AsyncMock()
|
||||
adapter._client.application_id = "test_app_id"
|
||||
adapter._sleep_between_command_sync_mutations = AsyncMock()
|
||||
adapter._existing_command_to_payload = MagicMock(side_effect=lambda cmd: {"name": cmd.name})
|
||||
adapter._canonicalize_app_command_payload = MagicMock(side_effect=lambda p: p)
|
||||
adapter._patchable_app_command_payload = MagicMock(side_effect=lambda p: p)
|
||||
|
||||
# Simulate having 100 commands on Discord, with 1 that's no longer desired
|
||||
# and 1 new command that should be created.
|
||||
# Existing on Discord: cmd_0, cmd_1, ..., cmd_99 (100 total)
|
||||
# Desired locally: cmd_1, cmd_2, ..., cmd_99, cmd_new (100 total)
|
||||
# So: delete cmd_0 (1 deletion), create cmd_new (1 creation)
|
||||
|
||||
existing_commands = [
|
||||
SimpleNamespace(id=f"id_{i}", name=f"cmd_{i}", type=1)
|
||||
for i in range(100)
|
||||
]
|
||||
adapter._client.tree.fetch_commands = AsyncMock(return_value=existing_commands)
|
||||
|
||||
adapter._client.tree.get_commands = MagicMock(
|
||||
return_value=[
|
||||
_FakeTreeCommand(name=f"cmd_{i}", command_type=1)
|
||||
for i in range(1, 100)
|
||||
] + [_FakeTreeCommand(name="cmd_new", command_type=1)]
|
||||
)
|
||||
|
||||
# Track the order of mutations
|
||||
mutation_log = []
|
||||
|
||||
async def mock_delete(*args):
|
||||
mutation_log.append(("delete", args[-1]))
|
||||
|
||||
async def mock_upsert(*args):
|
||||
mutation_log.append(("create", args[-1].get("name")))
|
||||
|
||||
adapter._client.http.delete_global_command = mock_delete
|
||||
adapter._client.http.upsert_global_command = mock_upsert
|
||||
adapter._client.http.edit_global_command = AsyncMock()
|
||||
|
||||
# Call sync
|
||||
await adapter._safe_sync_slash_commands()
|
||||
|
||||
# Verify that:
|
||||
# 1. A deletion happened (cmd_0)
|
||||
# 2. It happened BEFORE any creation
|
||||
# 3. The creation of cmd_new happened AFTER deletion
|
||||
deletes = [m for m in mutation_log if m[0] == "delete"]
|
||||
creates = [m for m in mutation_log if m[0] == "create"]
|
||||
|
||||
assert len(deletes) >= 1, "At least one command should be deleted"
|
||||
assert len(creates) >= 1, "At least one command should be created"
|
||||
|
||||
# The key assertion: all deletions should come before all creations.
|
||||
# Find the index of the last delete and the first create.
|
||||
last_delete_idx = max(i for i, m in enumerate(mutation_log) if m[0] == "delete")
|
||||
first_create_idx = min(i for i, m in enumerate(mutation_log) if m[0] == "create")
|
||||
|
||||
assert last_delete_idx < first_create_idx, (
|
||||
f"Deletions must happen before creations to avoid exceeding 100-command limit. "
|
||||
f"Last delete at index {last_delete_idx}, first create at index {first_create_idx}"
|
||||
)
|
||||
|
|
@ -967,6 +967,105 @@ class TestMediaDeliveryDefaultMode:
|
|||
|
||||
assert BasePlatformAdapter.validate_media_delivery_path(str(config_file)) is None
|
||||
|
||||
def test_denylist_blocks_google_token_default_mode(self, tmp_path, monkeypatch):
|
||||
"""Integration credentials at the HERMES_HOME root (google_token.json)
|
||||
must never be deliverable, even though they aren't the historically
|
||||
enumerated .env/auth.json/config.yaml files. Regression for a
|
||||
refreshed google_token.json being auto-attached to a Slack reply
|
||||
(#50912).
|
||||
"""
|
||||
self._patch_roots(monkeypatch)
|
||||
|
||||
fake_home = tmp_path / "home"
|
||||
hermes_dir = fake_home / ".hermes"
|
||||
hermes_dir.mkdir(parents=True)
|
||||
token = hermes_dir / "google_token.json"
|
||||
token.write_text('{"access_token": "***", "refresh_token": "***"}')
|
||||
monkeypatch.setenv("HOME", str(fake_home))
|
||||
monkeypatch.setattr("gateway.platforms.base._HERMES_HOME", hermes_dir)
|
||||
monkeypatch.setattr("gateway.platforms.base._HERMES_ROOT", hermes_dir)
|
||||
|
||||
assert BasePlatformAdapter.validate_media_delivery_path(str(token)) is None
|
||||
|
||||
def test_denylist_blocks_google_token_even_when_freshly_refreshed(self, tmp_path, monkeypatch):
|
||||
"""The exploit was that the Google integration rewrites
|
||||
google_token.json every turn, bumping its mtime to ~now, so the
|
||||
strict-mode recency window (trust_recent_files) kept re-trusting it
|
||||
and it re-sent on every reply. An explicit denylist entry must win
|
||||
over recency trust.
|
||||
"""
|
||||
self._patch_roots(monkeypatch) # zero cache allowlist, strict mode on
|
||||
monkeypatch.setenv("HERMES_MEDIA_TRUST_RECENT_FILES", "1")
|
||||
monkeypatch.setenv("HERMES_MEDIA_TRUST_RECENT_SECONDS", "600")
|
||||
|
||||
fake_home = tmp_path / "home"
|
||||
hermes_dir = fake_home / ".hermes"
|
||||
hermes_dir.mkdir(parents=True)
|
||||
token = hermes_dir / "google_token.json"
|
||||
token.write_text('{"access_token": "***"}') # mtime = now → "recent"
|
||||
monkeypatch.setenv("HOME", str(fake_home))
|
||||
monkeypatch.setattr("gateway.platforms.base._HERMES_HOME", hermes_dir)
|
||||
monkeypatch.setattr("gateway.platforms.base._HERMES_ROOT", hermes_dir)
|
||||
|
||||
assert BasePlatformAdapter.validate_media_delivery_path(str(token)) is None
|
||||
|
||||
def test_denylist_blocks_pairing_directory_contents(self, tmp_path, monkeypatch):
|
||||
"""Files under ~/.hermes/pairing/ (platform pairing tokens) are
|
||||
credential material and must not be deliverable.
|
||||
"""
|
||||
self._patch_roots(monkeypatch)
|
||||
|
||||
fake_home = tmp_path / "home"
|
||||
hermes_dir = fake_home / ".hermes"
|
||||
pairing = hermes_dir / "pairing"
|
||||
pairing.mkdir(parents=True)
|
||||
token = pairing / "telegram-approved.json"
|
||||
token.write_text('{"approved": ["123"]}')
|
||||
monkeypatch.setenv("HOME", str(fake_home))
|
||||
monkeypatch.setattr("gateway.platforms.base._HERMES_HOME", hermes_dir)
|
||||
monkeypatch.setattr("gateway.platforms.base._HERMES_ROOT", hermes_dir)
|
||||
|
||||
assert BasePlatformAdapter.validate_media_delivery_path(str(token)) is None
|
||||
|
||||
def test_hermes_cache_still_delivers_under_denied_home(self, tmp_path, monkeypatch):
|
||||
"""The targeted credential denylist must not break legitimate cache
|
||||
deliveries: a generated artifact under the allowlisted cache root is
|
||||
matched before the denylist and still delivers.
|
||||
"""
|
||||
fake_home = tmp_path / "home"
|
||||
hermes_dir = fake_home / ".hermes"
|
||||
cache_dir = hermes_dir / "cache" / "documents"
|
||||
cache_dir.mkdir(parents=True)
|
||||
artifact = cache_dir / "report.pdf"
|
||||
artifact.write_bytes(b"%PDF-1.4")
|
||||
self._patch_roots(monkeypatch, cache_dir)
|
||||
monkeypatch.setenv("HOME", str(fake_home))
|
||||
monkeypatch.setattr("gateway.platforms.base._HERMES_HOME", hermes_dir)
|
||||
monkeypatch.setattr("gateway.platforms.base._HERMES_ROOT", hermes_dir)
|
||||
|
||||
assert BasePlatformAdapter.validate_media_delivery_path(str(artifact)) == str(artifact.resolve())
|
||||
|
||||
def test_denylist_blocks_non_cache_file_under_hermes_home(self, tmp_path, monkeypatch):
|
||||
"""A non-credential file the agent wrote directly under ~/.hermes
|
||||
(not in a cache subdir) is still deliverable via recency trust — we
|
||||
did NOT blanket-deny the tree (per #32090/#34425). This guards against
|
||||
accidentally re-introducing the rejected whole-tree deny.
|
||||
"""
|
||||
self._patch_roots(monkeypatch) # strict mode on
|
||||
monkeypatch.setenv("HERMES_MEDIA_TRUST_RECENT_FILES", "1")
|
||||
monkeypatch.setenv("HERMES_MEDIA_TRUST_RECENT_SECONDS", "600")
|
||||
|
||||
fake_home = tmp_path / "home"
|
||||
hermes_dir = fake_home / ".hermes"
|
||||
hermes_dir.mkdir(parents=True)
|
||||
artifact = hermes_dir / "adhoc_report.pdf"
|
||||
artifact.write_bytes(b"%PDF-1.4") # fresh mtime
|
||||
monkeypatch.setenv("HOME", str(fake_home))
|
||||
monkeypatch.setattr("gateway.platforms.base._HERMES_HOME", hermes_dir)
|
||||
monkeypatch.setattr("gateway.platforms.base._HERMES_ROOT", hermes_dir)
|
||||
|
||||
assert BasePlatformAdapter.validate_media_delivery_path(str(artifact)) == str(artifact.resolve())
|
||||
|
||||
def test_strict_mode_envvar_restores_legacy_behavior(self, tmp_path, monkeypatch):
|
||||
"""Setting HERMES_MEDIA_DELIVERY_STRICT=1 reactivates the older
|
||||
allowlist+recency logic. A stale file outside the allowlist is
|
||||
|
|
|
|||
459
tests/gateway/test_telegram_prune_stale_topic_binding_31501.py
Normal file
459
tests/gateway/test_telegram_prune_stale_topic_binding_31501.py
Normal file
|
|
@ -0,0 +1,459 @@
|
|||
"""Regression tests for #31501 — prune stale Telegram DM topic bindings.
|
||||
|
||||
When a Telegram user deletes a DM topic in the client, the Bot API
|
||||
responds to the gateway's next send with ``Thread not found``. The
|
||||
adapter falls back to a plain send (no ``message_thread_id``), but
|
||||
prior to this fix it left the corresponding row in
|
||||
``telegram_dm_topic_bindings`` untouched.
|
||||
``gateway.run._recover_telegram_topic_thread_id`` then walked the
|
||||
user's bindings newest-first on every later inbound message and
|
||||
cheerfully redirected them back to the deleted topic — tool
|
||||
progress, approvals and replies all silently landed in the wrong
|
||||
place until the operator manually ran ``DELETE`` on ``state.db``.
|
||||
|
||||
The fix has three pieces — these tests pin all three:
|
||||
|
||||
1. ``SessionDB.delete_telegram_topic_binding`` — the targeted
|
||||
prune helper (new public API).
|
||||
2. ``TelegramAdapter._prune_stale_dm_topic_binding`` — the
|
||||
adapter glue that calls the helper from a send-fallback hot
|
||||
path without raising on cleanup failure.
|
||||
3. The two "Thread not found" call sites in the streaming send
|
||||
loop and the control-message helper now invoke (2) — we pin
|
||||
this with a source-level guard rather than spinning the full
|
||||
send pipeline.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import inspect
|
||||
from types import SimpleNamespace
|
||||
|
||||
import pytest
|
||||
|
||||
from hermes_state import SessionDB
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# SessionDB.delete_telegram_topic_binding
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _seed_binding(
|
||||
db: SessionDB,
|
||||
*,
|
||||
chat_id: str = "5595856929",
|
||||
thread_id: str = "15287",
|
||||
user_id: str = "5595856929",
|
||||
session_id: str = "sess-target",
|
||||
) -> None:
|
||||
db.create_session(
|
||||
session_id=session_id,
|
||||
source="telegram",
|
||||
user_id=user_id,
|
||||
)
|
||||
db.bind_telegram_topic(
|
||||
chat_id=chat_id,
|
||||
thread_id=thread_id,
|
||||
user_id=user_id,
|
||||
session_key=f"agent:main:telegram:dm:{chat_id}:{thread_id}",
|
||||
session_id=session_id,
|
||||
)
|
||||
|
||||
|
||||
class TestDeleteTelegramTopicBinding:
|
||||
def test_removes_matching_row_and_returns_count(self, tmp_path):
|
||||
db = SessionDB(db_path=tmp_path / "state.db")
|
||||
_seed_binding(db, thread_id="15287")
|
||||
# Sanity check — binding present before prune.
|
||||
assert db.get_telegram_topic_binding(
|
||||
chat_id="5595856929", thread_id="15287",
|
||||
) is not None
|
||||
|
||||
removed = db.delete_telegram_topic_binding(
|
||||
chat_id="5595856929", thread_id="15287",
|
||||
)
|
||||
|
||||
assert removed == 1
|
||||
assert db.get_telegram_topic_binding(
|
||||
chat_id="5595856929", thread_id="15287",
|
||||
) is None
|
||||
db.close()
|
||||
|
||||
def test_does_not_touch_unrelated_bindings(self, tmp_path):
|
||||
# Critical for the fix: a chat with multiple topics must
|
||||
# only lose the one Telegram confirmed deleted, never the
|
||||
# rest. Otherwise the user's healthy topics also vanish
|
||||
# from recovery's view.
|
||||
db = SessionDB(db_path=tmp_path / "state.db")
|
||||
_seed_binding(db, thread_id="15287", session_id="sess-stale")
|
||||
_seed_binding(db, thread_id="15418", session_id="sess-fresh")
|
||||
|
||||
removed = db.delete_telegram_topic_binding(
|
||||
chat_id="5595856929", thread_id="15287",
|
||||
)
|
||||
assert removed == 1
|
||||
|
||||
# Stale binding is gone; the fresh one survives.
|
||||
assert db.get_telegram_topic_binding(
|
||||
chat_id="5595856929", thread_id="15287",
|
||||
) is None
|
||||
assert db.get_telegram_topic_binding(
|
||||
chat_id="5595856929", thread_id="15418",
|
||||
) is not None
|
||||
db.close()
|
||||
|
||||
def test_missing_row_returns_zero_silently(self, tmp_path):
|
||||
db = SessionDB(db_path=tmp_path / "state.db")
|
||||
_seed_binding(db, thread_id="15287")
|
||||
|
||||
# Different thread_id — must not raise, just report 0.
|
||||
removed = db.delete_telegram_topic_binding(
|
||||
chat_id="5595856929", thread_id="99999",
|
||||
)
|
||||
assert removed == 0
|
||||
# Original binding still intact.
|
||||
assert db.get_telegram_topic_binding(
|
||||
chat_id="5595856929", thread_id="15287",
|
||||
) is not None
|
||||
db.close()
|
||||
|
||||
def test_pristine_database_with_no_topic_tables_is_silent_noop(self, tmp_path):
|
||||
# Fresh profile that has never run /topic — the topic-mode
|
||||
# tables don't exist yet. The send-fallback hot path can
|
||||
# still hit this code, so we must not crash.
|
||||
db = SessionDB(db_path=tmp_path / "state.db")
|
||||
# Confirm precondition: tables really aren't there.
|
||||
tables = {
|
||||
row[0]
|
||||
for row in db._conn.execute(
|
||||
"SELECT name FROM sqlite_master WHERE type='table' "
|
||||
"AND name LIKE 'telegram_dm%'"
|
||||
).fetchall()
|
||||
}
|
||||
assert "telegram_dm_topic_bindings" not in tables
|
||||
|
||||
removed = db.delete_telegram_topic_binding(
|
||||
chat_id="any", thread_id="any",
|
||||
)
|
||||
assert removed == 0
|
||||
db.close()
|
||||
|
||||
def test_idempotent_under_repeated_calls(self, tmp_path):
|
||||
db = SessionDB(db_path=tmp_path / "state.db")
|
||||
_seed_binding(db, thread_id="15287")
|
||||
|
||||
first = db.delete_telegram_topic_binding(
|
||||
chat_id="5595856929", thread_id="15287",
|
||||
)
|
||||
second = db.delete_telegram_topic_binding(
|
||||
chat_id="5595856929", thread_id="15287",
|
||||
)
|
||||
|
||||
assert first == 1
|
||||
assert second == 0 # already gone, no spurious "1"
|
||||
db.close()
|
||||
|
||||
|
||||
class TestPruneClearsTopicModeWhenLastBindingGone:
|
||||
"""Proactive cleanup (#31501 follow-up): pruning the chat's final
|
||||
binding must also flip ``telegram_dm_topic_mode.enabled`` to 0 so
|
||||
recovery fully stands down — covers the user who disabled topics in
|
||||
the Telegram client without ever running ``/topic off``."""
|
||||
|
||||
def test_clears_enabled_when_last_binding_pruned(self, tmp_path):
|
||||
db = SessionDB(db_path=tmp_path / "state.db")
|
||||
db.enable_telegram_topic_mode(
|
||||
chat_id="5595856929", user_id="5595856929",
|
||||
)
|
||||
_seed_binding(db, thread_id="15287")
|
||||
assert db.is_telegram_topic_mode_enabled(
|
||||
chat_id="5595856929", user_id="5595856929",
|
||||
) is True
|
||||
|
||||
removed = db.delete_telegram_topic_binding(
|
||||
chat_id="5595856929", thread_id="15287",
|
||||
)
|
||||
|
||||
assert removed == 1
|
||||
assert db.is_telegram_topic_mode_enabled(
|
||||
chat_id="5595856929", user_id="5595856929",
|
||||
) is False
|
||||
db.close()
|
||||
|
||||
def test_keeps_enabled_while_other_bindings_remain(self, tmp_path):
|
||||
# Deleting one of several topics must NOT disable topic mode —
|
||||
# the chat still has healthy lanes that recovery should serve.
|
||||
db = SessionDB(db_path=tmp_path / "state.db")
|
||||
db.enable_telegram_topic_mode(
|
||||
chat_id="5595856929", user_id="5595856929",
|
||||
)
|
||||
_seed_binding(db, thread_id="15287", session_id="sess-stale")
|
||||
_seed_binding(db, thread_id="15418", session_id="sess-fresh")
|
||||
|
||||
db.delete_telegram_topic_binding(
|
||||
chat_id="5595856929", thread_id="15287",
|
||||
)
|
||||
|
||||
assert db.is_telegram_topic_mode_enabled(
|
||||
chat_id="5595856929", user_id="5595856929",
|
||||
) is True
|
||||
db.close()
|
||||
|
||||
def test_noop_prune_leaves_enabled_untouched(self, tmp_path):
|
||||
# A prune that matches no row must not flip the flag — there's
|
||||
# still a live binding the (wrong) thread_id didn't match.
|
||||
db = SessionDB(db_path=tmp_path / "state.db")
|
||||
db.enable_telegram_topic_mode(
|
||||
chat_id="5595856929", user_id="5595856929",
|
||||
)
|
||||
_seed_binding(db, thread_id="15287")
|
||||
|
||||
removed = db.delete_telegram_topic_binding(
|
||||
chat_id="5595856929", thread_id="99999",
|
||||
)
|
||||
|
||||
assert removed == 0
|
||||
assert db.is_telegram_topic_mode_enabled(
|
||||
chat_id="5595856929", user_id="5595856929",
|
||||
) is True
|
||||
db.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Adapter glue — _prune_stale_dm_topic_binding
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _bare_adapter(db: SessionDB | None = None):
|
||||
# The adapter accesses the SessionDB via
|
||||
# ``self._session_store._db`` (set by GatewayRunner via
|
||||
# ``set_session_store``). Build a minimal stand-in with just
|
||||
# the surface the prune helper touches; we don't need the
|
||||
# python-telegram-bot import-graph here. ``name`` is a
|
||||
# property that delegates to ``platform.value.title()``, so
|
||||
# we set ``platform`` rather than poking ``name`` directly.
|
||||
from gateway.config import Platform
|
||||
from plugins.platforms.telegram.adapter import TelegramAdapter
|
||||
|
||||
adapter = object.__new__(TelegramAdapter)
|
||||
adapter.platform = Platform.TELEGRAM
|
||||
if db is not None:
|
||||
adapter._session_store = SimpleNamespace(_db=db)
|
||||
return adapter
|
||||
|
||||
|
||||
class TestPruneStaleDmTopicBindingHelper:
|
||||
def test_drops_binding_when_session_store_db_is_present(self, tmp_path):
|
||||
db = SessionDB(db_path=tmp_path / "state.db")
|
||||
_seed_binding(db, thread_id="15287")
|
||||
|
||||
adapter = _bare_adapter(db)
|
||||
adapter._prune_stale_dm_topic_binding("5595856929", 15287)
|
||||
|
||||
assert db.get_telegram_topic_binding(
|
||||
chat_id="5595856929", thread_id="15287",
|
||||
) is None
|
||||
db.close()
|
||||
|
||||
def test_silent_when_session_store_unavailable(self):
|
||||
# No ``_session_store`` attribute — the helper must not
|
||||
# explode (the streaming send path hits this in tests
|
||||
# that bypass the gateway runner).
|
||||
adapter = _bare_adapter()
|
||||
adapter._prune_stale_dm_topic_binding("123", "456")
|
||||
|
||||
def test_silent_when_db_lacks_helper(self):
|
||||
# Old SessionDB without the new method (e.g. running
|
||||
# against an older state.db schema). Must be a no-op
|
||||
# rather than AttributeError.
|
||||
adapter = _bare_adapter()
|
||||
adapter._session_store = SimpleNamespace(
|
||||
_db=SimpleNamespace(), # no methods at all
|
||||
)
|
||||
adapter._prune_stale_dm_topic_binding("123", "456")
|
||||
|
||||
def test_swallows_db_exceptions_so_send_continues(self):
|
||||
class ExplodingDb:
|
||||
def delete_telegram_topic_binding(self, **_):
|
||||
raise RuntimeError("disk full or whatever")
|
||||
|
||||
adapter = _bare_adapter()
|
||||
adapter._session_store = SimpleNamespace(_db=ExplodingDb())
|
||||
|
||||
# The point of the helper is that a failed cleanup must
|
||||
# NEVER turn into a failed user-facing send. No exception
|
||||
# should escape.
|
||||
adapter._prune_stale_dm_topic_binding("123", "456")
|
||||
|
||||
def test_skips_when_chat_or_thread_missing(self, tmp_path):
|
||||
# Defensive — control-message paths sometimes call us
|
||||
# with chat_id=None when kwargs lack the key. We must
|
||||
# not produce a spurious DELETE that matches every row
|
||||
# with a NULL chat_id.
|
||||
db = SessionDB(db_path=tmp_path / "state.db")
|
||||
_seed_binding(db, thread_id="15287")
|
||||
|
||||
adapter = _bare_adapter(db)
|
||||
|
||||
adapter._prune_stale_dm_topic_binding(None, "15287")
|
||||
adapter._prune_stale_dm_topic_binding("5595856929", None)
|
||||
|
||||
# Still there — neither call generated a DELETE.
|
||||
assert db.get_telegram_topic_binding(
|
||||
chat_id="5595856929", thread_id="15287",
|
||||
) is not None
|
||||
db.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Source-level wiring guards — both fallback sites must call the helper
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestThreadNotFoundFallbackSitesPruneBinding:
|
||||
"""Pin that the two ``Thread not found`` warning sites in the
|
||||
Telegram adapter actually invoke ``_prune_stale_dm_topic_binding``.
|
||||
These guards stop a future refactor from quietly losing the
|
||||
cleanup wire — re-opening #31501.
|
||||
"""
|
||||
|
||||
def test_streaming_send_fallback_calls_prune(self):
|
||||
from plugins.platforms.telegram import adapter as telegram_mod
|
||||
|
||||
src = inspect.getsource(telegram_mod.TelegramAdapter.send)
|
||||
# Locate the second-failure branch (the one that flips
|
||||
# ``used_thread_fallback``). It must invoke the prune
|
||||
# helper before flipping the flag.
|
||||
marker = "retrying without message_thread_id"
|
||||
idx = src.find(marker)
|
||||
assert idx != -1, (
|
||||
"Streaming send must keep its 'thread not found' "
|
||||
"fallback log line — the prune wiring is anchored "
|
||||
"next to it."
|
||||
)
|
||||
# 600 char window is enough to cover the warning, the
|
||||
# prune call, and the ``used_thread_fallback = True``
|
||||
# assignment that follows.
|
||||
window = src[idx:idx + 600]
|
||||
assert "_prune_stale_dm_topic_binding" in window, (
|
||||
"Streaming send 'Thread not found' fallback must call "
|
||||
"_prune_stale_dm_topic_binding so the stale row in "
|
||||
"telegram_dm_topic_bindings doesn't keep redirecting "
|
||||
"future inbound messages to the deleted topic (#31501)."
|
||||
)
|
||||
|
||||
def test_control_message_helper_calls_prune(self):
|
||||
from plugins.platforms.telegram import adapter as telegram_mod
|
||||
|
||||
src = inspect.getsource(
|
||||
telegram_mod.TelegramAdapter._send_message_with_thread_fallback
|
||||
)
|
||||
# The helper has a single retry path; the prune call
|
||||
# must sit inside it, not in dead code outside the
|
||||
# ``if message_thread_id is not None and …`` guard.
|
||||
assert "_prune_stale_dm_topic_binding" in src, (
|
||||
"_send_message_with_thread_fallback must call "
|
||||
"_prune_stale_dm_topic_binding when Telegram returns "
|
||||
"BadRequest('Thread not found') for a control message "
|
||||
"(#31501)."
|
||||
)
|
||||
# Belt-and-braces: the call must precede the retry
|
||||
# ``send_message`` so the prune happens whether or not
|
||||
# the retry itself succeeds.
|
||||
prune_idx = src.find("_prune_stale_dm_topic_binding")
|
||||
retry_idx = src.find("send_message(**retry_kwargs)")
|
||||
assert 0 <= prune_idx < retry_idx, (
|
||||
"_prune_stale_dm_topic_binding must run before the "
|
||||
"fallback send_message retry."
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# End-to-end semantic — prune + recovery returns None for deleted topic
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestRecoveryAfterPrune:
|
||||
"""The whole point of the fix: once a topic is pruned, the
|
||||
GatewayRunner's ``_recover_telegram_topic_thread_id`` must no
|
||||
longer steer future inbound messages to it.
|
||||
"""
|
||||
|
||||
def test_recovery_no_longer_returns_pruned_topic(self, tmp_path):
|
||||
# Build the same fixture used elsewhere: two topic bindings
|
||||
# for the same user, then prune the most-recent one.
|
||||
# ``_recover_telegram_topic_thread_id`` walks bindings
|
||||
# newest-first, so without the prune it would pick the
|
||||
# one we just removed.
|
||||
from gateway.config import GatewayConfig, Platform, PlatformConfig
|
||||
from gateway.run import GatewayRunner
|
||||
from gateway.session import SessionSource, build_session_key
|
||||
|
||||
db = SessionDB(db_path=tmp_path / "state.db")
|
||||
db.enable_telegram_topic_mode(
|
||||
chat_id="5595856929", user_id="5595856929",
|
||||
)
|
||||
|
||||
for sid, thread in (("sess-A", "111"), ("sess-B", "222")):
|
||||
db.create_session(
|
||||
session_id=sid, source="telegram",
|
||||
user_id="5595856929",
|
||||
)
|
||||
db.bind_telegram_topic(
|
||||
chat_id="5595856929",
|
||||
thread_id=thread,
|
||||
user_id="5595856929",
|
||||
session_key=build_session_key(SessionSource(
|
||||
platform=Platform.TELEGRAM,
|
||||
user_id="5595856929",
|
||||
chat_id="5595856929",
|
||||
user_name="tester",
|
||||
chat_type="dm",
|
||||
thread_id=thread,
|
||||
)),
|
||||
session_id=sid,
|
||||
)
|
||||
|
||||
runner = object.__new__(GatewayRunner)
|
||||
runner.config = GatewayConfig(
|
||||
platforms={
|
||||
Platform.TELEGRAM: PlatformConfig(enabled=True, token="***"),
|
||||
}
|
||||
)
|
||||
runner._session_db = db
|
||||
runner._telegram_topic_mode_enabled = lambda _src: True
|
||||
|
||||
# Sanity: before the prune, recovery picks "222" (newest).
|
||||
# Recovery only fires for a lobby-shaped inbound (omitted
|
||||
# message_thread_id or General topic "1"); a non-lobby
|
||||
# unknown thread is preserved as a brand-new topic. Use the
|
||||
# General topic id so the recovery walk actually runs.
|
||||
before = runner._recover_telegram_topic_thread_id(SessionSource(
|
||||
platform=Platform.TELEGRAM,
|
||||
user_id="5595856929",
|
||||
chat_id="5595856929",
|
||||
user_name="tester",
|
||||
chat_type="dm",
|
||||
thread_id="1", # General/stripped reply — triggers recovery
|
||||
))
|
||||
assert before == "222"
|
||||
|
||||
# User deletes topic 222 in Telegram → adapter prunes.
|
||||
db.delete_telegram_topic_binding(
|
||||
chat_id="5595856929", thread_id="222",
|
||||
)
|
||||
|
||||
# Now recovery falls back to topic 111 (the surviving
|
||||
# binding) instead of the dead one. This is the exact
|
||||
# behaviour change the bug report asks for.
|
||||
after = runner._recover_telegram_topic_thread_id(SessionSource(
|
||||
platform=Platform.TELEGRAM,
|
||||
user_id="5595856929",
|
||||
chat_id="5595856929",
|
||||
user_name="tester",
|
||||
chat_type="dm",
|
||||
thread_id="1",
|
||||
))
|
||||
assert after == "111"
|
||||
db.close()
|
||||
66
tests/gateway/test_tui_approval_redaction.py
Normal file
66
tests/gateway/test_tui_approval_redaction.py
Normal file
|
|
@ -0,0 +1,66 @@
|
|||
"""Regression test for TUI approval-prompt credential redaction (#48456).
|
||||
|
||||
Follow-up to #50767, which redacted the chat-platform and SSE/API approval
|
||||
transports. The TUI JSON-RPC transport is the third egress: three
|
||||
`register_gateway_notify` callbacks in `tui_gateway/server.py` emit the raw
|
||||
`approval_data` (with an unredacted `command`) to the TUI client. They now
|
||||
route through the module-level `_emit_approval_request` helper, which redacts
|
||||
`payload["command"]` via the shared `gateway.run._redact_approval_command` seam
|
||||
before emitting.
|
||||
"""
|
||||
|
||||
import inspect
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
class TestTuiApprovalEmitRedaction:
|
||||
def test_emit_approval_request_redacts_command_in_payload(self, monkeypatch):
|
||||
from tui_gateway import server as tui_server
|
||||
|
||||
emitted = {}
|
||||
monkeypatch.setattr(
|
||||
tui_server, "_emit",
|
||||
lambda event, sid, payload=None: emitted.update(
|
||||
{"event": event, "sid": sid, "payload": payload}
|
||||
),
|
||||
)
|
||||
raw = "curl -H 'Authorization: token ghp_01...6789' https://api.github.com"
|
||||
tui_server._emit_approval_request("sess-1", {"command": raw, "description": "x"})
|
||||
|
||||
assert emitted["event"] == "approval.request"
|
||||
# credential removed, non-command field + command structure preserved
|
||||
assert "ghp_01...6789" not in emitted["payload"]["command"]
|
||||
assert emitted["payload"]["description"] == "x"
|
||||
assert "github.com" in emitted["payload"]["command"]
|
||||
|
||||
def test_emit_approval_request_handles_missing_command(self, monkeypatch):
|
||||
from tui_gateway import server as tui_server
|
||||
|
||||
emitted = {}
|
||||
monkeypatch.setattr(
|
||||
tui_server, "_emit",
|
||||
lambda event, sid, payload=None: emitted.update({"payload": payload}),
|
||||
)
|
||||
tui_server._emit_approval_request("s", {"description": "no command here"})
|
||||
assert emitted["payload"] == {"description": "no command here"}
|
||||
tui_server._emit_approval_request("s", None)
|
||||
assert emitted["payload"] == {}
|
||||
|
||||
def test_no_raw_command_emit_in_approval_registrations(self):
|
||||
"""Every register_gateway_notify approval callback must route through the
|
||||
redacting `_emit_approval_request` helper — no registration may emit the
|
||||
raw payload via `_emit("approval.request", ...)` directly. The ONLY
|
||||
allowed raw emit is inside the helper itself."""
|
||||
from tui_gateway import server as tui_server
|
||||
|
||||
src = inspect.getsource(tui_server)
|
||||
raw_emits = src.count('_emit("approval.request"')
|
||||
assert raw_emits == 1, (
|
||||
f'expected exactly 1 raw _emit("approval.request") (inside the '
|
||||
f"redacting helper), found {raw_emits} — a registration may be "
|
||||
f"emitting the unredacted command"
|
||||
)
|
||||
assert "_emit_approval_request(sid, data)" in src, (
|
||||
"registration lambdas must route through _emit_approval_request"
|
||||
)
|
||||
|
|
@ -1,26 +1,27 @@
|
|||
"""Tests for ``install_cua_driver`` upgrade semantics and architecture pre-check.
|
||||
"""Tests for ``install_cua_driver`` upgrade semantics.
|
||||
|
||||
The cua-driver upstream installer always pulls the latest release tag, so
|
||||
re-running it is the canonical upgrade path. ``install_cua_driver(upgrade=True)``
|
||||
must:
|
||||
|
||||
* Be cross-platform — run on macOS, Windows, and Linux. Only genuinely
|
||||
unsupported platforms no-op silently on upgrade so ``hermes update`` can
|
||||
call it unconditionally without warning those users.
|
||||
* Choose the right installer per OS: ``install.sh`` via ``curl | bash`` on
|
||||
macOS/Linux, ``install.ps1`` via PowerShell ``irm | iex`` on Windows.
|
||||
* Be macOS-only — no-op silently on Linux/Windows so ``hermes update`` can
|
||||
call it unconditionally without warning every non-macOS user.
|
||||
* Re-run the installer even when the binary is already on PATH (this is the
|
||||
fix for the "we only pulled cua-driver once on enable" complaint).
|
||||
* Preserve original ``upgrade=False`` behaviour for the toolset-enable flow:
|
||||
skip if installed, install otherwise, warn on unsupported platforms.
|
||||
* Pre-check architecture compatibility before downloading to avoid raw 404
|
||||
errors when the upstream release lacks an asset for this OS+arch.
|
||||
skip if installed, install otherwise, warn on non-macOS.
|
||||
|
||||
The pre-install arch probe that used to live alongside this function was
|
||||
deleted (see top-of-file comment in tools_config.py) — the upstream
|
||||
installer has CUA_DRIVER_RS_BAKED_VERSION baked in by CD and errors
|
||||
cleanly on missing-arch assets, and the upgrade path uses
|
||||
``cua_driver_update_check()`` (which shells `cua-driver check-update
|
||||
--json` against the already-installed binary).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from unittest.mock import MagicMock, patch
|
||||
from unittest.mock import patch
|
||||
|
||||
|
||||
class TestInstallCuaDriverUpgrade:
|
||||
|
|
@ -47,8 +48,6 @@ class TestInstallCuaDriverUpgrade:
|
|||
patch.object(tools_config.shutil, "which",
|
||||
side_effect=lambda n: "/usr/local/bin/" + n
|
||||
if n in {"cua-driver", "curl"} else None), \
|
||||
patch.object(tools_config, "_check_cua_driver_asset_for_arch",
|
||||
return_value=True), \
|
||||
patch.object(tools_config, "_run_cua_driver_installer",
|
||||
return_value=True) as runner, \
|
||||
patch("subprocess.run"):
|
||||
|
|
@ -63,8 +62,6 @@ class TestInstallCuaDriverUpgrade:
|
|||
with patch("platform.system", return_value="Darwin"), \
|
||||
patch.object(tools_config.shutil, "which",
|
||||
side_effect=lambda n: "/usr/bin/curl" if n == "curl" else None), \
|
||||
patch.object(tools_config, "_check_cua_driver_asset_for_arch",
|
||||
return_value=True), \
|
||||
patch.object(tools_config, "_run_cua_driver_installer",
|
||||
return_value=True) as runner:
|
||||
assert tools_config.install_cua_driver(upgrade=True) is True
|
||||
|
|
@ -88,359 +85,75 @@ class TestInstallCuaDriverUpgrade:
|
|||
with patch("platform.system", return_value="Darwin"), \
|
||||
patch.object(tools_config.shutil, "which",
|
||||
side_effect=lambda n: "/usr/bin/curl" if n == "curl" else None), \
|
||||
patch.object(tools_config, "_check_cua_driver_asset_for_arch",
|
||||
return_value=True), \
|
||||
patch.object(tools_config, "_run_cua_driver_installer",
|
||||
return_value=True) as runner:
|
||||
assert tools_config.install_cua_driver(upgrade=False) is True
|
||||
runner.assert_called_once()
|
||||
|
||||
|
||||
class TestCheckCuaDriverAssetForArch:
|
||||
def test_arm64_macos_always_returns_true(self):
|
||||
class TestArchProbeRemoval:
|
||||
"""Regression tests for the deletion of `_check_cua_driver_asset_for_arch`.
|
||||
|
||||
The old probe queried ``/releases/latest`` on trycua/cua and inspected
|
||||
asset names. That was wrong in two ways:
|
||||
|
||||
1. cua-driver-rs releases are marked **prerelease** on every cut, so
|
||||
``/releases/latest`` returns the Python ``cua-agent`` / ``cua-computer``
|
||||
package instead — a release with zero binary assets. The probe then
|
||||
reported "no asset for $arch" on Linux x86_64, Windows, macOS Intel,
|
||||
Linux arm64 — every non-Apple-Silicon host.
|
||||
2. Even with the right endpoint, it duplicated tag-resolution the upstream
|
||||
installer already does correctly via ``CUA_DRIVER_RS_BAKED_VERSION``
|
||||
(auto-baked by CD on every release).
|
||||
|
||||
The fix: stop probing. Trust the upstream installer for fresh installs
|
||||
(it has the baked version + correct API fallback) and the
|
||||
``cua-driver check-update --json`` MCP-binary native command for the
|
||||
upgrade path.
|
||||
"""
|
||||
|
||||
def test_probe_function_is_gone(self):
|
||||
from hermes_cli import tools_config
|
||||
assert not hasattr(tools_config, "_check_cua_driver_asset_for_arch")
|
||||
assert not hasattr(tools_config, "_latest_cua_driver_rs_release")
|
||||
|
||||
# Apple Silicon assets are always published — short-circuits without
|
||||
# a network probe.
|
||||
with patch("platform.system", return_value="Darwin"), \
|
||||
patch("platform.machine", return_value="arm64"):
|
||||
assert tools_config._check_cua_driver_asset_for_arch() is True
|
||||
|
||||
def test_x86_64_with_asset_returns_true(self):
|
||||
def test_fresh_install_does_not_call_github_api(self):
|
||||
"""Pre-install no longer probes the GitHub API — the upstream
|
||||
``install.sh`` resolves the tag from its baked CUA_DRIVER_RS_BAKED_VERSION
|
||||
line. install.sh errors cleanly when the arch has no asset, so the
|
||||
probe was duplicate gatekeeping.
|
||||
"""
|
||||
from hermes_cli import tools_config
|
||||
|
||||
releases = [{
|
||||
"tag_name": "cua-driver-rs-v0.1.6",
|
||||
"assets": [
|
||||
{"name": "cua-driver-rs-0.1.6-darwin-arm64.tar.gz"},
|
||||
{"name": "cua-driver-rs-0.1.6-darwin-x86_64.tar.gz"},
|
||||
],
|
||||
}]
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.read.return_value = json.dumps(releases).encode()
|
||||
mock_resp.__enter__ = lambda s: s
|
||||
mock_resp.__exit__ = MagicMock(return_value=False)
|
||||
|
||||
with patch("platform.system", return_value="Darwin"), \
|
||||
patch("platform.machine", return_value="x86_64"), \
|
||||
patch("urllib.request.urlopen", return_value=mock_resp):
|
||||
assert tools_config._check_cua_driver_asset_for_arch() is True
|
||||
|
||||
def test_x86_64_without_asset_returns_false(self):
|
||||
from hermes_cli import tools_config
|
||||
|
||||
releases = [{
|
||||
"tag_name": "cua-driver-rs-v0.1.6",
|
||||
"assets": [
|
||||
{"name": "cua-driver-rs-0.1.6-darwin-arm64.tar.gz"},
|
||||
{"name": "cua-driver-rs.tar.gz"},
|
||||
],
|
||||
}]
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.read.return_value = json.dumps(releases).encode()
|
||||
mock_resp.__enter__ = lambda s: s
|
||||
mock_resp.__exit__ = MagicMock(return_value=False)
|
||||
|
||||
with patch("platform.system", return_value="Darwin"), \
|
||||
patch("platform.machine", return_value="x86_64"), \
|
||||
patch("urllib.request.urlopen", return_value=mock_resp), \
|
||||
patch.object(tools_config, "_print_warning") as warn, \
|
||||
patch.object(tools_config, "_print_info"):
|
||||
assert tools_config._check_cua_driver_asset_for_arch() is False
|
||||
warn.assert_called_once()
|
||||
assert "no Intel" in warn.call_args[0][0].lower() or "x86_64" in warn.call_args[0][0]
|
||||
|
||||
def test_x86_64_api_failure_returns_true(self):
|
||||
"""Network failure should fail open — let the installer handle it."""
|
||||
from hermes_cli import tools_config
|
||||
|
||||
with patch("platform.machine", return_value="x86_64"), \
|
||||
patch("urllib.request.urlopen", side_effect=Exception("timeout")):
|
||||
assert tools_config._check_cua_driver_asset_for_arch() is True
|
||||
|
||||
def test_fresh_install_x86_64_no_asset_skips_installer(self):
|
||||
"""When the latest release has no Intel asset, skip the installer."""
|
||||
from hermes_cli import tools_config
|
||||
|
||||
releases = [{
|
||||
"tag_name": "cua-driver-rs-v0.1.6",
|
||||
"assets": [{"name": "cua-driver-rs-0.1.6-darwin-arm64.tar.gz"}],
|
||||
}]
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.read.return_value = json.dumps(releases).encode()
|
||||
mock_resp.__enter__ = lambda s: s
|
||||
mock_resp.__exit__ = MagicMock(return_value=False)
|
||||
|
||||
with patch("platform.system", return_value="Darwin"), \
|
||||
patch.object(tools_config.shutil, "which",
|
||||
side_effect=lambda n: "/usr/bin/curl" if n == "curl" else None), \
|
||||
patch("platform.machine", return_value="x86_64"), \
|
||||
patch("urllib.request.urlopen", return_value=mock_resp), \
|
||||
patch.object(tools_config, "_print_warning"), \
|
||||
patch.object(tools_config, "_print_info"), \
|
||||
patch.object(tools_config, "_run_cua_driver_installer") as runner:
|
||||
assert tools_config.install_cua_driver(upgrade=False) is False
|
||||
runner.assert_not_called()
|
||||
patch("urllib.request.urlopen") as urlopen, \
|
||||
patch.object(tools_config, "_run_cua_driver_installer",
|
||||
return_value=True) as runner:
|
||||
assert tools_config.install_cua_driver(upgrade=False) is True
|
||||
runner.assert_called_once()
|
||||
urlopen.assert_not_called()
|
||||
|
||||
def test_upgrade_x86_64_no_asset_returns_existing_status(self):
|
||||
"""On upgrade with no Intel asset, return whether binary existed."""
|
||||
def test_upgrade_with_binary_does_not_call_github_api_directly(self):
|
||||
"""The upgrade path no longer hits GitHub from Python — it delegates
|
||||
to the upstream ``install.sh`` (which has the baked release tag and
|
||||
the proper API fallback). When cua-driver is already installed,
|
||||
``cua_driver_update_check()`` (added in a separate change) further
|
||||
short-circuits the network re-install via the binary's native
|
||||
``check-update --json`` verb.
|
||||
"""
|
||||
from hermes_cli import tools_config
|
||||
|
||||
releases = [{
|
||||
"tag_name": "cua-driver-rs-v0.1.6",
|
||||
"assets": [{"name": "cua-driver-rs-0.1.6-darwin-arm64.tar.gz"}],
|
||||
}]
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.read.return_value = json.dumps(releases).encode()
|
||||
mock_resp.__enter__ = lambda s: s
|
||||
mock_resp.__exit__ = MagicMock(return_value=False)
|
||||
|
||||
# With binary installed — returns True (binary exists)
|
||||
with patch("platform.system", return_value="Darwin"), \
|
||||
patch.object(tools_config.shutil, "which",
|
||||
side_effect=lambda n: "/usr/local/bin/" + n
|
||||
if n in ("cua-driver", "curl") else None), \
|
||||
patch("platform.machine", return_value="x86_64"), \
|
||||
patch("urllib.request.urlopen", return_value=mock_resp), \
|
||||
patch.object(tools_config, "_print_warning"), \
|
||||
patch.object(tools_config, "_print_info"), \
|
||||
patch.object(tools_config, "_run_cua_driver_installer") as runner:
|
||||
assert tools_config.install_cua_driver(upgrade=True) is True
|
||||
runner.assert_not_called()
|
||||
|
||||
# Without binary — returns False
|
||||
with patch("platform.system", return_value="Darwin"), \
|
||||
patch.object(tools_config.shutil, "which",
|
||||
side_effect=lambda n: "/usr/bin/curl" if n == "curl" else None), \
|
||||
patch("platform.machine", return_value="x86_64"), \
|
||||
patch("urllib.request.urlopen", return_value=mock_resp), \
|
||||
patch.object(tools_config, "_print_warning"), \
|
||||
patch.object(tools_config, "_print_info"), \
|
||||
patch.object(tools_config, "_run_cua_driver_installer") as runner:
|
||||
assert tools_config.install_cua_driver(upgrade=True) is False
|
||||
runner.assert_not_called()
|
||||
|
||||
|
||||
class TestInstallCuaDriverWindows:
|
||||
"""install_cua_driver dispatch on Windows hosts."""
|
||||
|
||||
def test_fresh_install_runs_installer(self):
|
||||
from hermes_cli import tools_config
|
||||
|
||||
# PowerShell present, cua-driver not yet installed.
|
||||
with patch("platform.system", return_value="Windows"), \
|
||||
patch.object(tools_config.shutil, "which",
|
||||
side_effect=lambda n: r"C:\\Windows\\powershell.exe"
|
||||
if n == "powershell" else None), \
|
||||
patch.object(tools_config, "_check_cua_driver_asset_for_arch",
|
||||
return_value=True), \
|
||||
patch("urllib.request.urlopen") as urlopen, \
|
||||
patch("subprocess.run"), \
|
||||
patch.object(tools_config, "_run_cua_driver_installer",
|
||||
return_value=True) as runner:
|
||||
assert tools_config.install_cua_driver(upgrade=False) is True
|
||||
runner.assert_called_once()
|
||||
|
||||
def test_fresh_install_without_powershell_fails(self):
|
||||
from hermes_cli import tools_config
|
||||
|
||||
with patch("platform.system", return_value="Windows"), \
|
||||
patch.object(tools_config.shutil, "which", lambda n: None), \
|
||||
patch.object(tools_config, "_print_warning") as warn, \
|
||||
patch.object(tools_config, "_print_info"), \
|
||||
patch.object(tools_config, "_run_cua_driver_installer") as runner:
|
||||
assert tools_config.install_cua_driver(upgrade=False) is False
|
||||
runner.assert_not_called()
|
||||
# The warning should name the missing fetch tool (powershell).
|
||||
assert "powershell" in warn.call_args[0][0].lower()
|
||||
|
||||
def test_upgrade_with_binary_runs_installer(self):
|
||||
from hermes_cli import tools_config
|
||||
|
||||
with patch("platform.system", return_value="Windows"), \
|
||||
patch.object(tools_config.shutil, "which",
|
||||
side_effect=lambda n: r"C:\\bin\\" + n
|
||||
if n in {"cua-driver", "powershell"} else None), \
|
||||
patch.object(tools_config, "_check_cua_driver_asset_for_arch",
|
||||
return_value=True), \
|
||||
patch.object(tools_config, "_run_cua_driver_installer",
|
||||
return_value=True) as runner, \
|
||||
patch("subprocess.run"):
|
||||
assert tools_config.install_cua_driver(upgrade=True) is True
|
||||
runner.assert_called_once()
|
||||
assert runner.call_args.kwargs.get("verbose") is False
|
||||
|
||||
def test_installer_uses_powershell_irm_command(self):
|
||||
"""_run_cua_driver_installer must shell out to PowerShell irm|iex."""
|
||||
from hermes_cli import tools_config
|
||||
|
||||
completed = MagicMock(returncode=0)
|
||||
with patch("platform.system", return_value="Windows"), \
|
||||
patch.object(tools_config.shutil, "which",
|
||||
side_effect=lambda n: r"C:\\bin\\" + n
|
||||
if n == "cua-driver" else None), \
|
||||
patch("subprocess.run", return_value=completed) as run, \
|
||||
patch.object(tools_config, "_print_info"), \
|
||||
patch.object(tools_config, "_print_success"), \
|
||||
patch.object(tools_config, "_print_warning"):
|
||||
assert tools_config._run_cua_driver_installer() is True
|
||||
cmd = run.call_args[0][0]
|
||||
# Argument list (shell=False), not a string.
|
||||
assert isinstance(cmd, list)
|
||||
assert cmd[0] == "powershell"
|
||||
assert run.call_args.kwargs.get("shell") is False
|
||||
joined = " ".join(cmd)
|
||||
assert "install.ps1" in joined
|
||||
assert "iex" in joined
|
||||
|
||||
|
||||
class TestInstallCuaDriverLinux:
|
||||
"""install_cua_driver dispatch on Linux hosts (alpha)."""
|
||||
|
||||
def test_fresh_install_runs_installer(self):
|
||||
from hermes_cli import tools_config
|
||||
|
||||
with patch("platform.system", return_value="Linux"), \
|
||||
patch.object(tools_config.shutil, "which",
|
||||
side_effect=lambda n: "/usr/bin/curl" if n == "curl" else None), \
|
||||
patch.object(tools_config, "_check_cua_driver_asset_for_arch",
|
||||
return_value=True), \
|
||||
patch.object(tools_config, "_run_cua_driver_installer",
|
||||
return_value=True) as runner:
|
||||
assert tools_config.install_cua_driver(upgrade=False) is True
|
||||
runner.assert_called_once()
|
||||
|
||||
def test_upgrade_with_binary_runs_installer(self):
|
||||
from hermes_cli import tools_config
|
||||
|
||||
with patch("platform.system", return_value="Linux"), \
|
||||
patch.object(tools_config.shutil, "which",
|
||||
side_effect=lambda n: "/usr/local/bin/" + n
|
||||
if n in {"cua-driver", "curl"} else None), \
|
||||
patch.object(tools_config, "_check_cua_driver_asset_for_arch",
|
||||
return_value=True), \
|
||||
patch.object(tools_config, "_run_cua_driver_installer",
|
||||
return_value=True) as runner, \
|
||||
patch("subprocess.run"):
|
||||
assert tools_config.install_cua_driver(upgrade=True) is True
|
||||
runner.assert_called_once()
|
||||
|
||||
def test_installer_uses_curl_bash_command(self):
|
||||
"""_run_cua_driver_installer must shell out to curl | bash install.sh."""
|
||||
from hermes_cli import tools_config
|
||||
|
||||
completed = MagicMock(returncode=0)
|
||||
with patch("platform.system", return_value="Linux"), \
|
||||
patch.object(tools_config.shutil, "which",
|
||||
side_effect=lambda n: "/usr/local/bin/" + n
|
||||
if n == "cua-driver" else None), \
|
||||
patch("subprocess.run", return_value=completed) as run, \
|
||||
patch.object(tools_config, "_print_info"), \
|
||||
patch.object(tools_config, "_print_success"), \
|
||||
patch.object(tools_config, "_print_warning"):
|
||||
assert tools_config._run_cua_driver_installer() is True
|
||||
cmd = run.call_args[0][0]
|
||||
assert isinstance(cmd, str) # shell string on POSIX
|
||||
assert run.call_args.kwargs.get("shell") is True
|
||||
assert "install.sh" in cmd
|
||||
assert "curl" in cmd
|
||||
|
||||
|
||||
class TestCheckCuaDriverAssetCrossPlatform:
|
||||
"""_check_cua_driver_asset_for_arch recognizes Windows/Linux asset names."""
|
||||
|
||||
@staticmethod
|
||||
def _mock_release(asset_names):
|
||||
# The probe lists /releases and picks the newest cua-driver-rs-v* tag,
|
||||
# so the mock returns a LIST of releases with that tag prefix.
|
||||
releases = [{"tag_name": "cua-driver-rs-v0.5.0",
|
||||
"assets": [{"name": n} for n in asset_names]}]
|
||||
resp = MagicMock()
|
||||
resp.read.return_value = json.dumps(releases).encode()
|
||||
resp.__enter__ = lambda s: s
|
||||
resp.__exit__ = MagicMock(return_value=False)
|
||||
return resp
|
||||
|
||||
def test_windows_amd64_with_asset_returns_true(self):
|
||||
from hermes_cli import tools_config
|
||||
|
||||
resp = self._mock_release([
|
||||
"cua-driver-rs-0.5.0-windows-x86_64.zip",
|
||||
"cua-driver-rs-0.5.0-darwin-arm64.tar.gz",
|
||||
])
|
||||
with patch("platform.system", return_value="Windows"), \
|
||||
patch("platform.machine", return_value="AMD64"), \
|
||||
patch("urllib.request.urlopen", return_value=resp):
|
||||
assert tools_config._check_cua_driver_asset_for_arch() is True
|
||||
|
||||
def test_windows_arm64_without_asset_returns_false(self):
|
||||
from hermes_cli import tools_config
|
||||
|
||||
resp = self._mock_release([
|
||||
"cua-driver-rs-0.5.0-windows-x86_64.zip",
|
||||
])
|
||||
with patch("platform.system", return_value="Windows"), \
|
||||
patch("platform.machine", return_value="ARM64"), \
|
||||
patch("urllib.request.urlopen", return_value=resp), \
|
||||
patch.object(tools_config, "_print_warning") as warn, \
|
||||
patch.object(tools_config, "_print_info"):
|
||||
assert tools_config._check_cua_driver_asset_for_arch() is False
|
||||
warn.assert_called_once()
|
||||
assert "arm64" in warn.call_args[0][0].lower()
|
||||
|
||||
def test_linux_x86_64_with_asset_returns_true(self):
|
||||
from hermes_cli import tools_config
|
||||
|
||||
resp = self._mock_release([
|
||||
"cua-driver-rs-0.5.0-linux-x86_64.tar.gz",
|
||||
])
|
||||
with patch("platform.system", return_value="Linux"), \
|
||||
patch("platform.machine", return_value="x86_64"), \
|
||||
patch("urllib.request.urlopen", return_value=resp):
|
||||
assert tools_config._check_cua_driver_asset_for_arch() is True
|
||||
|
||||
def test_linux_aarch64_with_asset_returns_true(self):
|
||||
from hermes_cli import tools_config
|
||||
|
||||
resp = self._mock_release([
|
||||
"cua-driver-rs-0.5.0-linux-arm64.tar.gz",
|
||||
])
|
||||
with patch("platform.system", return_value="Linux"), \
|
||||
patch("platform.machine", return_value="aarch64"), \
|
||||
patch("urllib.request.urlopen", return_value=resp):
|
||||
assert tools_config._check_cua_driver_asset_for_arch() is True
|
||||
|
||||
def test_linux_aarch64_without_asset_returns_false(self):
|
||||
from hermes_cli import tools_config
|
||||
|
||||
resp = self._mock_release([
|
||||
"cua-driver-rs-0.5.0-linux-x86_64.tar.gz",
|
||||
])
|
||||
with patch("platform.system", return_value="Linux"), \
|
||||
patch("platform.machine", return_value="aarch64"), \
|
||||
patch("urllib.request.urlopen", return_value=resp), \
|
||||
patch.object(tools_config, "_print_warning") as warn, \
|
||||
patch.object(tools_config, "_print_info"):
|
||||
assert tools_config._check_cua_driver_asset_for_arch() is False
|
||||
warn.assert_called_once()
|
||||
|
||||
def test_releases_latest_tag_ignored_picks_driver_rs_tag(self):
|
||||
"""A non-driver tag at the head of the list must not gate the probe.
|
||||
|
||||
Regression guard: the monorepo's newest release is often a Python
|
||||
component (agent-*, computer-*) with zero binary assets. The probe
|
||||
must skip past it to the newest cua-driver-rs-v* release.
|
||||
"""
|
||||
from hermes_cli import tools_config
|
||||
|
||||
releases = [
|
||||
{"tag_name": "agent-v0.8.3", "assets": []},
|
||||
{"tag_name": "computer-v0.5.19", "assets": []},
|
||||
{"tag_name": "cua-driver-rs-v0.6.0",
|
||||
"assets": [{"name": "cua-driver-rs-0.6.0-linux-x86_64-binary.tar.gz"}]},
|
||||
]
|
||||
resp = MagicMock()
|
||||
resp.read.return_value = json.dumps(releases).encode()
|
||||
resp.__enter__ = lambda s: s
|
||||
resp.__exit__ = MagicMock(return_value=False)
|
||||
with patch("platform.system", return_value="Linux"), \
|
||||
patch("platform.machine", return_value="x86_64"), \
|
||||
patch("urllib.request.urlopen", return_value=resp):
|
||||
assert tools_config._check_cua_driver_asset_for_arch() is True
|
||||
# Probe deleted — no direct GitHub API call from Python.
|
||||
urlopen.assert_not_called()
|
||||
|
|
|
|||
|
|
@ -263,6 +263,29 @@ class TestWebServerEndpoints:
|
|||
import hermes_cli.web_server as web_server
|
||||
|
||||
monkeypatch.setattr(hermes_constants, "is_container", lambda: True)
|
||||
# A docker install inside a container should be managed externally.
|
||||
monkeypatch.setattr(web_server, "detect_install_method", lambda _root: "docker")
|
||||
|
||||
assert web_server._dashboard_local_update_managed_externally() is True
|
||||
|
||||
def test_dashboard_update_capability_allows_git_in_container(self, monkeypatch):
|
||||
"""A git checkout inside a container (e.g. bind-mounted in hermes-webui)
|
||||
should still offer dashboard updates — the checkout is self-managed."""
|
||||
import hermes_constants
|
||||
import hermes_cli.web_server as web_server
|
||||
|
||||
monkeypatch.setattr(hermes_constants, "is_container", lambda: True)
|
||||
monkeypatch.setattr(web_server, "detect_install_method", lambda _root: "git")
|
||||
|
||||
assert web_server._dashboard_local_update_managed_externally() is False
|
||||
|
||||
def test_dashboard_update_capability_blocks_pip_in_container(self, monkeypatch):
|
||||
"""A pip install inside a container is still managed externally."""
|
||||
import hermes_constants
|
||||
import hermes_cli.web_server as web_server
|
||||
|
||||
monkeypatch.setattr(hermes_constants, "is_container", lambda: True)
|
||||
monkeypatch.setattr(web_server, "detect_install_method", lambda _root: "pip")
|
||||
|
||||
assert web_server._dashboard_local_update_managed_externally() is True
|
||||
|
||||
|
|
@ -1011,6 +1034,8 @@ class TestWebServerEndpoints:
|
|||
spawned = True
|
||||
raise AssertionError("docker update guard should not spawn hermes update")
|
||||
|
||||
# Bypass the managed-externally gate so we reach the docker install check.
|
||||
monkeypatch.setattr(web_server, "_dashboard_local_update_managed_externally", lambda: False)
|
||||
monkeypatch.setattr(web_server, "detect_install_method", lambda _root: "docker")
|
||||
monkeypatch.setattr(web_server, "_spawn_hermes_action", fail_spawn)
|
||||
web_server._ACTION_PROCS.pop("hermes-update", None)
|
||||
|
|
|
|||
138
tests/run_agent/test_background_review_cost_controls.py
Normal file
138
tests/run_agent/test_background_review_cost_controls.py
Normal file
|
|
@ -0,0 +1,138 @@
|
|||
"""Unit coverage for the background-review aux-model selector + routed digest.
|
||||
|
||||
Covers the two behaviors this change adds:
|
||||
• _resolve_review_runtime — auto/same-model → not routed (main model, warm
|
||||
cache); a configured different model → routed with resolved credentials.
|
||||
• _digest_history — compact replay used ONLY on the routed path (recent tail
|
||||
verbatim + a digest of older turns), preserving role alternation.
|
||||
|
||||
Pure-function / config-driven; no live model calls.
|
||||
"""
|
||||
from unittest.mock import patch
|
||||
|
||||
from agent import background_review as br
|
||||
|
||||
|
||||
def _msg(role, content, tool_calls=None):
|
||||
m = {"role": role, "content": content}
|
||||
if tool_calls:
|
||||
m["tool_calls"] = tool_calls
|
||||
return m
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _resolve_review_runtime — the aux-model selector
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class _FakeAgent:
|
||||
def __init__(self, provider="openai-codex", model="gpt-5.5"):
|
||||
self.provider = provider
|
||||
self.model = model
|
||||
|
||||
def _current_main_runtime(self):
|
||||
return {
|
||||
"api_key": "parent-key",
|
||||
"base_url": "https://chatgpt.com/backend-api/codex",
|
||||
"api_mode": "codex_app_server",
|
||||
}
|
||||
|
||||
|
||||
def test_routing_auto_inherits_parent_and_downgrades_codex_app_server():
|
||||
agent = _FakeAgent()
|
||||
cfg = {"auxiliary": {"background_review": {"provider": "auto", "model": ""}}}
|
||||
with patch("hermes_cli.config.load_config", return_value=cfg):
|
||||
rt = br._resolve_review_runtime(agent)
|
||||
assert rt["routed"] is False
|
||||
assert rt["provider"] == "openai-codex"
|
||||
assert rt["model"] == "gpt-5.5"
|
||||
assert rt["api_mode"] == "codex_responses" # downgraded so agent-loop tools dispatch
|
||||
|
||||
|
||||
def test_routing_to_different_model_marks_routed_and_resolves_credentials():
|
||||
agent = _FakeAgent()
|
||||
cfg = {"auxiliary": {"background_review": {
|
||||
"provider": "openrouter", "model": "google/gemini-3-flash-preview",
|
||||
}}}
|
||||
fake_rp = {
|
||||
"provider": "openrouter", "api_key": "or-key",
|
||||
"base_url": "https://openrouter.ai/api/v1", "api_mode": "chat_completions",
|
||||
}
|
||||
with patch("hermes_cli.config.load_config", return_value=cfg), \
|
||||
patch("hermes_cli.runtime_provider.resolve_runtime_provider", return_value=fake_rp):
|
||||
rt = br._resolve_review_runtime(agent)
|
||||
assert rt["routed"] is True
|
||||
assert rt["provider"] == "openrouter"
|
||||
assert rt["model"] == "google/gemini-3-flash-preview"
|
||||
assert rt["api_key"] == "or-key"
|
||||
|
||||
|
||||
def test_routing_same_model_as_parent_is_not_routed():
|
||||
agent = _FakeAgent(provider="openrouter", model="anthropic/claude-opus-4.8")
|
||||
cfg = {"auxiliary": {"background_review": {
|
||||
"provider": "openrouter", "model": "anthropic/claude-opus-4.8",
|
||||
}}}
|
||||
with patch("hermes_cli.config.load_config", return_value=cfg):
|
||||
rt = br._resolve_review_runtime(agent)
|
||||
assert rt["routed"] is False # same model/provider → keep full-replay path
|
||||
|
||||
|
||||
def test_routing_resolution_failure_falls_back_to_parent():
|
||||
agent = _FakeAgent()
|
||||
cfg = {"auxiliary": {"background_review": {
|
||||
"provider": "openrouter", "model": "google/gemini-3-flash-preview",
|
||||
}}}
|
||||
with patch("hermes_cli.config.load_config", return_value=cfg), \
|
||||
patch("hermes_cli.runtime_provider.resolve_runtime_provider",
|
||||
side_effect=RuntimeError("boom")):
|
||||
rt = br._resolve_review_runtime(agent)
|
||||
assert rt["routed"] is False
|
||||
assert rt["provider"] == "openai-codex"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _digest_history — routed-path compact replay
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_digest_under_tail_returns_full():
|
||||
msgs = [_msg("user", "hi"), _msg("assistant", "hello")]
|
||||
assert br._digest_history(msgs, tail=24) == msgs
|
||||
|
||||
|
||||
def test_digest_collapses_old_keeps_tail_verbatim():
|
||||
msgs = []
|
||||
for i in range(60):
|
||||
msgs.append(_msg("user", f"u{i} " + "x" * 50))
|
||||
msgs.append(_msg("assistant", f"a{i} " + "y" * 50))
|
||||
out = br._digest_history(msgs, tail=10)
|
||||
# First message is the synthetic digest (user role → alternation preserved).
|
||||
assert out[0]["role"] == "user"
|
||||
assert out[0]["content"].startswith("[Earlier conversation digest")
|
||||
# Recent tail preserved verbatim.
|
||||
assert out[-1] == msgs[-1]
|
||||
assert len(out) == 11 # 1 digest + 10 tail
|
||||
|
||||
|
||||
def test_digest_does_not_open_tail_on_a_tool_message():
|
||||
msgs = []
|
||||
for i in range(40):
|
||||
msgs.append(_msg("user", "u" + "x" * 50))
|
||||
msgs.append(_msg("assistant", "", tool_calls=[
|
||||
{"function": {"name": "terminal", "arguments": "{}"}}]))
|
||||
msgs.append({"role": "tool", "content": "result " + "w" * 50})
|
||||
out = br._digest_history(msgs, tail=2)
|
||||
# The verbatim tail (after the digest) must not begin on a bare tool message.
|
||||
assert out[1]["role"] != "tool"
|
||||
|
||||
|
||||
def test_digest_records_tool_names_in_arc():
|
||||
old = [
|
||||
_msg("user", "do the thing"),
|
||||
_msg("assistant", "", tool_calls=[
|
||||
{"function": {"name": "skill_view", "arguments": "{}"}},
|
||||
{"function": {"name": "patch", "arguments": "{}"}}]),
|
||||
]
|
||||
msgs = old + [_msg("user", f"tail{i}") for i in range(30)]
|
||||
out = br._digest_history(msgs, tail=10)
|
||||
digest = out[0]["content"]
|
||||
assert "USER: do the thing" in digest
|
||||
assert "tools: skill_view, patch" in digest
|
||||
|
|
@ -260,6 +260,52 @@ class TestShrinkImagePartsHelper:
|
|||
assert seen["max_dimension"] == 2000
|
||||
assert msgs[0]["content"][0]["image_url"]["url"] == shrunk
|
||||
|
||||
def test_anthropic_base64_image_source_rewritten(self, monkeypatch):
|
||||
"""Anthropic-native image blocks are shrinkable after adapter conversion."""
|
||||
agent = _make_agent()
|
||||
_install_fake_pillow(monkeypatch, (2501, 100), shrunk_size=(1500, 60))
|
||||
original = _big_png_data_url(100)
|
||||
_, _, original_data = original.partition(",")
|
||||
shrunk = "data:image/jpeg;base64," + "N" * 1000
|
||||
seen = {}
|
||||
|
||||
def _fake_resize(path, mime_type=None, max_base64_bytes=None, max_dimension=None):
|
||||
seen["mime_type"] = mime_type
|
||||
seen["max_dimension"] = max_dimension
|
||||
return shrunk
|
||||
|
||||
monkeypatch.setattr(
|
||||
"tools.vision_tools._resize_image_for_vision",
|
||||
_fake_resize,
|
||||
raising=False,
|
||||
)
|
||||
|
||||
msgs = [{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "image",
|
||||
"source": {
|
||||
"type": "base64",
|
||||
"media_type": "image/png",
|
||||
"data": original_data,
|
||||
},
|
||||
},
|
||||
],
|
||||
}]
|
||||
changed = agent._try_shrink_image_parts_in_messages(
|
||||
msgs,
|
||||
max_dimension=2000,
|
||||
)
|
||||
source = msgs[0]["content"][0]["source"]
|
||||
|
||||
assert changed is True
|
||||
assert seen["mime_type"] == "image/png"
|
||||
assert seen["max_dimension"] == 2000
|
||||
assert source["type"] == "base64"
|
||||
assert source["media_type"] == "image/jpeg"
|
||||
assert source["data"] == "N" * 1000
|
||||
|
||||
def test_oversized_input_image_string_shape_rewritten(self, monkeypatch):
|
||||
"""OpenAI Responses shape: {type: input_image, image_url: "data:..."}."""
|
||||
agent = _make_agent()
|
||||
|
|
|
|||
|
|
@ -2139,14 +2139,18 @@ class TestStructuredElementsConsumption:
|
|||
# Markdown surface doesn't carry bounds — lossy by design.
|
||||
assert cap.elements[0].bounds == (0, 0, 0, 0)
|
||||
|
||||
def test_vision_capture_uses_get_window_state_not_removed_screenshot_tool(self):
|
||||
"""cua-driver 0.6.x returns vision screenshots from
|
||||
get_window_state(capture_mode="vision"); the old standalone
|
||||
screenshot tool is no longer available."""
|
||||
def test_vision_capture_falls_back_to_get_window_state_when_screenshot_dropped(self):
|
||||
"""cua-driver >=0.5.x dropped the standalone `screenshot` MCP tool and
|
||||
folded full-window PNG capture into `get_window_state`. When the driver
|
||||
no longer advertises `screenshot`, vision capture must route through
|
||||
`get_window_state` (discarding the AX tree) and still return a PNG."""
|
||||
from tools.computer_use.cua_backend import CuaDriverBackend
|
||||
|
||||
backend = CuaDriverBackend()
|
||||
backend._session = MagicMock()
|
||||
# Modern driver: capabilities discovered, `screenshot` not advertised.
|
||||
backend._session._has_tool.return_value = False
|
||||
backend._session.capabilities_discovered = True
|
||||
|
||||
windows_payload = {
|
||||
"windows": [{
|
||||
|
|
@ -2164,12 +2168,11 @@ class TestStructuredElementsConsumption:
|
|||
return {"data": "", "images": [], "image_mime_types": [],
|
||||
"structuredContent": windows_payload, "isError": False}
|
||||
if name == "get_window_state":
|
||||
assert args["capture_mode"] == "vision"
|
||||
return {"data": "", "images": [png_b64],
|
||||
"image_mime_types": ["image/png"],
|
||||
"structuredContent": None, "isError": False}
|
||||
if name == "screenshot":
|
||||
raise AssertionError("vision capture must not call removed screenshot tool")
|
||||
raise AssertionError("driver dropped screenshot; must not be called")
|
||||
return {"data": "", "images": [], "image_mime_types": [],
|
||||
"structuredContent": None, "isError": False}
|
||||
|
||||
|
|
@ -2182,6 +2185,8 @@ class TestStructuredElementsConsumption:
|
|||
assert cap.image_mime_type == "image/png"
|
||||
assert cap.width == 1
|
||||
assert cap.height == 1
|
||||
# Vision mode stays free of AX element noise.
|
||||
assert cap.elements == []
|
||||
|
||||
def test_capture_app_screen_targets_desktop_window(self):
|
||||
"""capture(app='screen') resolves to the OS shell/desktop window
|
||||
|
|
|
|||
109
tests/tools/test_file_tools_tilde_profile.py
Normal file
109
tests/tools/test_file_tools_tilde_profile.py
Normal file
|
|
@ -0,0 +1,109 @@
|
|||
"""Regression tests for profile-aware tilde expansion in file tools.
|
||||
|
||||
The bug (#48552): in-process file tools (write_file, read_file, patch,
|
||||
search_files) resolved ``~`` via ``os.path.expanduser()``, which reads the
|
||||
gateway process's ``HOME``. In profile mode (Docker, systemd, s6) the gateway
|
||||
``HOME`` differs from the profile ``HOME`` that interactive sessions use, so
|
||||
``~`` expanded to the wrong directory and file operations failed with
|
||||
"no such file or directory".
|
||||
|
||||
The fix adds ``_expand_tilde()`` which delegates to
|
||||
``hermes_constants.get_subprocess_home()`` — the same policy the terminal tool
|
||||
uses for subprocess environments.
|
||||
|
||||
See: https://github.com/NousResearch/hermes-agent/issues/48552
|
||||
"""
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
import tools.file_tools as ft
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _expand_tilde() unit tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestExpandTilde:
|
||||
"""Verify the _expand_tilde() helper resolves ~ to the profile home."""
|
||||
|
||||
def test_tilde_expands_to_profile_home(self):
|
||||
"""When get_subprocess_home returns a value, ~/path uses it."""
|
||||
with patch("hermes_constants.get_subprocess_home", return_value="/opt/data/profiles/coder/home"):
|
||||
result = ft._expand_tilde("~/scratch/file.txt")
|
||||
assert result == "/opt/data/profiles/coder/home/scratch/file.txt"
|
||||
|
||||
def test_bare_tilde_expands_to_profile_home(self):
|
||||
"""Bare ~ expands to the profile home."""
|
||||
with patch("hermes_constants.get_subprocess_home", return_value="/opt/data/profiles/coder/home"):
|
||||
result = ft._expand_tilde("~")
|
||||
assert result == "/opt/data/profiles/coder/home"
|
||||
|
||||
def test_falls_back_when_no_profile_home(self):
|
||||
"""When get_subprocess_home returns None, use os.path.expanduser."""
|
||||
with patch("hermes_constants.get_subprocess_home", return_value=None):
|
||||
result = ft._expand_tilde("~/Documents")
|
||||
assert result == os.path.expanduser("~/Documents")
|
||||
|
||||
def test_other_user_tilde_not_overridden(self):
|
||||
"""~user/path must NOT use the profile home — it's a different user."""
|
||||
with patch("hermes_constants.get_subprocess_home", return_value="/opt/data/profiles/coder/home"):
|
||||
result = ft._expand_tilde("~root/file.txt")
|
||||
# Should use os.path.expanduser, not the profile home
|
||||
assert "/opt/data/profiles/coder/home" not in result
|
||||
|
||||
def test_no_tilde_unchanged(self):
|
||||
"""Paths without ~ are returned unchanged (modulo expanduser)."""
|
||||
with patch("hermes_constants.get_subprocess_home", return_value="/opt/data/profiles/coder/home"):
|
||||
result = ft._expand_tilde("/etc/passwd")
|
||||
assert result == "/etc/passwd"
|
||||
|
||||
def test_empty_path_unchanged(self):
|
||||
"""Empty string returns empty."""
|
||||
with patch("hermes_constants.get_subprocess_home", return_value="/opt/data/profiles/coder/home"):
|
||||
assert ft._expand_tilde("") == ""
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Integration: _resolve_path_for_task uses profile home
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestResolvePathUsesProfileHome:
|
||||
"""Verify _resolve_path_for_task resolves ~ to the profile home."""
|
||||
|
||||
def test_relative_tilde_resolves_to_profile_home(self, tmp_path, monkeypatch):
|
||||
"""A ~/path argument resolves under the profile home, not process HOME."""
|
||||
profile_home = tmp_path / "profile_home"
|
||||
profile_home.mkdir()
|
||||
process_home = tmp_path / "process_home"
|
||||
process_home.mkdir()
|
||||
|
||||
monkeypatch.setenv("HOME", str(process_home))
|
||||
monkeypatch.setattr(ft, "_get_live_tracking_cwd", lambda task_id="default": None)
|
||||
|
||||
with patch("hermes_constants.get_subprocess_home", return_value=str(profile_home)):
|
||||
resolved = ft._resolve_path_for_task("~/test_file.txt", task_id="test")
|
||||
|
||||
assert str(resolved).startswith(str(profile_home))
|
||||
assert "process_home" not in str(resolved)
|
||||
|
||||
def test_absolute_tilde_in_workspace_root(self, tmp_path, monkeypatch):
|
||||
"""A workspace root specified with ~ resolves to profile home."""
|
||||
profile_home = tmp_path / "profile_home"
|
||||
profile_home.mkdir()
|
||||
process_home = tmp_path / "process_home"
|
||||
process_home.mkdir()
|
||||
|
||||
monkeypatch.setenv("HOME", str(process_home))
|
||||
monkeypatch.setattr(ft, "_get_live_tracking_cwd", lambda task_id="default": None)
|
||||
|
||||
with patch("hermes_constants.get_subprocess_home", return_value=str(profile_home)):
|
||||
# _resolve_base_dir uses the workspace root from config; if it contains ~,
|
||||
# it should resolve to profile home
|
||||
resolved = ft._resolve_path_for_task("~/data/config.json", task_id="test")
|
||||
|
||||
assert str(profile_home) in str(resolved)
|
||||
assert str(process_home) not in str(resolved)
|
||||
|
|
@ -107,6 +107,63 @@ def test_memory_gate_on_then_apply(hermes_home):
|
|||
assert "approved entry" in store.user_entries[0]
|
||||
|
||||
|
||||
def test_cli_memory_approve_without_live_agent_uses_fresh_store(hermes_home, capsys):
|
||||
"""#46783: ``/memory approve`` from a context with no live agent (e.g. the
|
||||
Desktop GUI) passed ``memory_store=None`` into the shared handler, which
|
||||
returned "memory store unavailable" and applied nothing. The CLI handler must
|
||||
fall back to a freshly loaded on-disk store, like the gateway path does."""
|
||||
import json
|
||||
from tools.memory_tool import memory_tool, MemoryStore
|
||||
from tools import write_approval as wa
|
||||
from hermes_cli.cli_commands_mixin import CLICommandsMixin
|
||||
|
||||
_set_approval("memory", True)
|
||||
staging = MemoryStore(); staging.load_from_disk()
|
||||
r = json.loads(memory_tool("add", "memory", "remember the launch date", store=staging))
|
||||
assert r.get("pending_id"), r
|
||||
assert wa.pending_count("memory") == 1
|
||||
|
||||
# Bare CLI handler with no live agent → store resolves to None pre-fix.
|
||||
handler = CLICommandsMixin.__new__(CLICommandsMixin)
|
||||
handler.agent = None
|
||||
handler._handle_memory_command("/memory approve all")
|
||||
|
||||
out = capsys.readouterr().out
|
||||
assert "memory store unavailable" not in out, out
|
||||
assert "Approved 1" in out, out
|
||||
assert wa.pending_count("memory") == 0
|
||||
# The approved write landed in a freshly loaded on-disk store (MEMORY.md).
|
||||
reloaded = MemoryStore(); reloaded.load_from_disk()
|
||||
assert any("remember the launch date" in e for e in reloaded.memory_entries)
|
||||
|
||||
|
||||
def test_load_on_disk_store_honors_configured_char_limits(hermes_home, monkeypatch):
|
||||
"""load_on_disk_store() must read memory.memory_char_limit /
|
||||
user_char_limit from config so approvals applied without a live agent
|
||||
enforce the SAME caps as the live agent (agent_init.py). Falls back to
|
||||
defaults when config can't be loaded.
|
||||
"""
|
||||
from tools.memory_tool import load_on_disk_store
|
||||
|
||||
# Config override path: helper picks up the configured limits.
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.config.load_config",
|
||||
lambda: {"memory": {"memory_char_limit": 999, "user_char_limit": 444}},
|
||||
)
|
||||
store = load_on_disk_store()
|
||||
assert store.memory_char_limit == 999
|
||||
assert store.user_char_limit == 444
|
||||
|
||||
# Failure path: config raises → defaults, never blows up.
|
||||
def _boom():
|
||||
raise RuntimeError("no config")
|
||||
|
||||
monkeypatch.setattr("hermes_cli.config.load_config", _boom)
|
||||
fallback = load_on_disk_store()
|
||||
assert fallback.memory_char_limit == 2200
|
||||
assert fallback.user_char_limit == 1375
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Skill gate
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -746,6 +746,28 @@ class _CuaDriverSession:
|
|||
return capability in self._capabilities.get(tool, set())
|
||||
return any(capability in caps for caps in self._capabilities.values())
|
||||
|
||||
def _has_tool(self, name: str) -> bool:
|
||||
"""Return True when ``tools/list`` advertised a tool by this name.
|
||||
|
||||
Used to route capture(): cua-driver dropped the standalone
|
||||
``screenshot`` tool and folded full-window PNG capture into
|
||||
``get_window_state`` (whose own description notes it "Also captures
|
||||
a PNG screenshot of the specified window"). Older drivers that still
|
||||
expose ``screenshot`` keep using it; newer ones fall through to
|
||||
``get_window_state``.
|
||||
|
||||
Returns False when discovery hasn't populated the map yet — callers
|
||||
treat that as "unknown" and probe defensively rather than trusting it.
|
||||
"""
|
||||
return name in self._capabilities
|
||||
|
||||
@property
|
||||
def capabilities_discovered(self) -> bool:
|
||||
"""True once ``tools/list`` populated the per-tool map. When False,
|
||||
``_has_tool`` answers are not trustworthy (discovery failed or the
|
||||
session hasn't started) and capture() should probe defensively."""
|
||||
return bool(self._capabilities)
|
||||
|
||||
@property
|
||||
def capability_version(self) -> str:
|
||||
"""Driver-advertised capability vocabulary version (empty string
|
||||
|
|
@ -848,6 +870,45 @@ def _extract_tool_result(mcp_result: Any) -> Dict[str, Any]:
|
|||
}
|
||||
|
||||
|
||||
def _image_from_tool_result(out: Dict[str, Any]) -> tuple[Optional[str], Optional[str]]:
|
||||
"""Pull a (png_b64, mime_type) pair out of a flattened tool result.
|
||||
|
||||
cua-driver delivers window screenshots in two shapes depending on tool +
|
||||
transport:
|
||||
|
||||
* As an MCP ``image`` content part — surfaced by ``_extract_tool_result``
|
||||
in ``out["images"]`` with a parallel ``image_mime_types`` entry. This
|
||||
is what ``get_window_state`` emits over the stdio MCP transport.
|
||||
* As a base64 field inside ``structuredContent`` —
|
||||
``screenshot_png_b64`` (+ ``screenshot_mime_type``). This is what
|
||||
``get_window_state`` returns when its structured payload carries the
|
||||
image instead of a content part (newer driver builds; also the shape
|
||||
seen via the ``cua-driver call`` CLI surface).
|
||||
|
||||
Checking both makes capture() robust to either delivery shape, so the
|
||||
image never silently drops just because the driver moved it between the
|
||||
content list and structuredContent. Returns ``(None, None)`` when neither
|
||||
location carries an image.
|
||||
"""
|
||||
images = out.get("images") or []
|
||||
if images and images[0]:
|
||||
mimes = out.get("image_mime_types") or []
|
||||
mime = mimes[0] if mimes and mimes[0] else None
|
||||
return images[0], mime
|
||||
|
||||
structured = out.get("structuredContent") or {}
|
||||
b64 = structured.get("screenshot_png_b64") or structured.get("png_b64")
|
||||
if b64:
|
||||
mime = (
|
||||
structured.get("screenshot_mime_type")
|
||||
or structured.get("mime_type")
|
||||
or None
|
||||
)
|
||||
return b64, mime
|
||||
|
||||
return None, None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# The backend itself
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -1062,28 +1123,61 @@ class CuaDriverBackend(ComputerUseBackend):
|
|||
window_title = ""
|
||||
|
||||
if mode == "vision":
|
||||
# Newer cua-driver releases no longer expose a standalone
|
||||
# `screenshot` MCP tool. Request a screenshot-only capture via
|
||||
# get_window_state instead; this keeps vision mode working while
|
||||
# avoiding the AX walk used by som/ax captures.
|
||||
sc_out = self._session.call_tool(
|
||||
"get_window_state",
|
||||
{
|
||||
"pid": self._active_pid,
|
||||
"window_id": self._active_window_id,
|
||||
"capture_mode": "vision",
|
||||
"session": self._session_id,
|
||||
},
|
||||
# Plain screenshot, no AX walk. cua-driver dropped the standalone
|
||||
# `screenshot` tool (≥0.5.x) and folded full-window PNG capture
|
||||
# into `get_window_state`. Route accordingly:
|
||||
# * Driver advertises `screenshot` (older builds) → use it; it's
|
||||
# the cheapest path (no AX tree walked server-side).
|
||||
# * Otherwise (current drivers) → call `get_window_state` but
|
||||
# DISCARD the AX tree/elements, returning only the PNG. Vision
|
||||
# mode's whole contract is "just the pixels, no element noise",
|
||||
# so we drop everything but the image.
|
||||
# When capability discovery hasn't run (empty map), we don't trust
|
||||
# a negative `_has_tool` answer — we still try `screenshot` first
|
||||
# and fall back if the driver rejects it, so the path self-heals on
|
||||
# any driver version.
|
||||
use_screenshot = (
|
||||
self._session._has_tool("screenshot")
|
||||
or not self._session.capabilities_discovered
|
||||
)
|
||||
if sc_out["images"]:
|
||||
png_b64 = sc_out["images"][0]
|
||||
# Pick up the explicit mimeType cua-driver attaches to image
|
||||
# parts (Surface 7). Empty string means the driver didn't
|
||||
# carry one — callers will fall back to magic-byte sniffing.
|
||||
mimes = sc_out.get("image_mime_types") or []
|
||||
image_mime_type = mimes[0] if mimes and mimes[0] else None
|
||||
sc_out: Optional[Dict[str, Any]] = None
|
||||
if use_screenshot:
|
||||
sc_out = self._session.call_tool(
|
||||
"screenshot",
|
||||
{
|
||||
"window_id": self._active_window_id,
|
||||
"format": "jpeg",
|
||||
"quality": 85,
|
||||
"session": self._session_id,
|
||||
},
|
||||
)
|
||||
png_b64, image_mime_type = _image_from_tool_result(sc_out)
|
||||
if not png_b64:
|
||||
# Driver had no usable `screenshot` (e.g. "Unknown tool:
|
||||
# screenshot" on ≥0.5.x, or an empty image part). Fall
|
||||
# through to the get_window_state path below.
|
||||
sc_out = None
|
||||
|
||||
if sc_out is None:
|
||||
gws_out = self._session.call_tool(
|
||||
"get_window_state",
|
||||
{
|
||||
"pid": self._active_pid,
|
||||
"window_id": self._active_window_id,
|
||||
"session": self._session_id,
|
||||
},
|
||||
)
|
||||
png_b64, image_mime_type = _image_from_tool_result(gws_out)
|
||||
# Still grab the window title — it's cheap and useful in the
|
||||
# vision response — but deliberately leave `elements` empty so
|
||||
# vision stays free of AX-tree noise.
|
||||
text = gws_out["data"] if isinstance(gws_out["data"], str) else ""
|
||||
_, tree = _split_tree_text(text)
|
||||
wt = re.search(r'AXWindow\s+"([^"]+)"', tree)
|
||||
if wt:
|
||||
window_title = wt.group(1)
|
||||
else:
|
||||
# get_window_state: AX tree + optional screenshot.
|
||||
# get_window_state: AX tree + screenshot.
|
||||
gws_out = self._session.call_tool(
|
||||
"get_window_state",
|
||||
{
|
||||
|
|
@ -1120,10 +1214,10 @@ class CuaDriverBackend(ComputerUseBackend):
|
|||
if e.element_token
|
||||
}
|
||||
|
||||
if gws_out["images"]:
|
||||
png_b64 = gws_out["images"][0]
|
||||
mimes = gws_out.get("image_mime_types") or []
|
||||
image_mime_type = mimes[0] if mimes and mimes[0] else None
|
||||
# Image may arrive as an MCP image part or inside
|
||||
# structuredContent (screenshot_png_b64) depending on the driver
|
||||
# build — _image_from_tool_result handles both.
|
||||
png_b64, image_mime_type = _image_from_tool_result(gws_out)
|
||||
|
||||
# Extract window title from the AX tree first AXWindow line.
|
||||
wt = re.search(r'AXWindow\s+"([^"]+)"', tree)
|
||||
|
|
|
|||
189
tools/computer_use/permissions.py
Normal file
189
tools/computer_use/permissions.py
Normal file
|
|
@ -0,0 +1,189 @@
|
|||
"""
|
||||
Cross-platform Computer Use readiness + macOS permission helpers.
|
||||
|
||||
cua-driver runs on macOS, Windows, and Linux, but "ready to drive" means
|
||||
something different on each:
|
||||
|
||||
* macOS — explicit TCC grants (Accessibility + Screen Recording). cua-driver
|
||||
reports/requests them via ``permissions status`` / ``permissions grant``.
|
||||
The grants attach to cua-driver's OWN identity (``com.trycua.driver`` /
|
||||
the installed ``CuaDriver.app``), NOT Hermes — so no Hermes entitlement is
|
||||
involved, and ``grant`` launches CuaDriver via LaunchServices so the macOS
|
||||
dialog is attributed correctly.
|
||||
* Windows — no TCC toggles; the UIAccess worker (``cua-driver-uia.exe``) may
|
||||
trip a SmartScreen prompt on first run. Readiness == driver health.
|
||||
* Linux — assistive control via the X11/XWayland stack. Readiness == driver
|
||||
health.
|
||||
|
||||
The universal signal on every platform is ``cua-driver doctor --json`` (binary
|
||||
integrity + platform support). ``computer_use_status`` folds that together with
|
||||
the macOS permission detail into one payload for the desktop card, the
|
||||
``hermes computer-use permissions`` CLI, and ``/api/tools/computer-use/status``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
# Platforms with a cua-driver runtime backend (mirrors the toolset platform_gate).
|
||||
_RUNTIME_PLATFORMS = frozenset({"darwin", "win32", "linux"})
|
||||
_BOOLS = ("accessibility", "screen_recording", "screen_recording_capturable")
|
||||
|
||||
|
||||
def _driver_cmd(override: Optional[str]) -> str:
|
||||
if override:
|
||||
return override
|
||||
try:
|
||||
from hermes_cli.tools_config import _cua_driver_cmd
|
||||
|
||||
return _cua_driver_cmd()
|
||||
except Exception:
|
||||
return os.environ.get("HERMES_CUA_DRIVER_CMD", "").strip() or "cua-driver"
|
||||
|
||||
|
||||
def _child_env() -> Dict[str, str]:
|
||||
"""cua-driver child env honoring the Hermes telemetry opt-in policy."""
|
||||
try:
|
||||
from tools.computer_use.cua_backend import cua_driver_child_env
|
||||
|
||||
return cua_driver_child_env()
|
||||
except Exception:
|
||||
return dict(os.environ)
|
||||
|
||||
|
||||
def _run(binary: str, *args: str, timeout: float) -> subprocess.CompletedProcess:
|
||||
return subprocess.run(
|
||||
[binary, *args],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=timeout,
|
||||
env=_child_env(),
|
||||
stdin=subprocess.DEVNULL,
|
||||
)
|
||||
|
||||
|
||||
def _json_out(binary: str, *args: str, timeout: float) -> Any:
|
||||
"""Run ``binary args`` and parse stdout as JSON, or ``None`` on any failure."""
|
||||
raw = (_run(binary, *args, timeout=timeout).stdout or "").strip()
|
||||
return json.loads(raw) if raw else None
|
||||
|
||||
|
||||
def _doctor(binary: str) -> Optional[Dict[str, Any]]:
|
||||
"""``cua-driver doctor --json`` → ``{ok, checks:[{label,status,message}]}``."""
|
||||
try:
|
||||
data = _json_out(binary, "doctor", "--json", timeout=12)
|
||||
except Exception:
|
||||
return None
|
||||
if not isinstance(data, dict):
|
||||
return None
|
||||
checks: List[Dict[str, str]] = [
|
||||
{
|
||||
"label": str(p.get("label", "")),
|
||||
"status": str(p.get("status", "")),
|
||||
"message": str(p.get("message", "")),
|
||||
}
|
||||
for p in data.get("probes", [])
|
||||
if isinstance(p, dict)
|
||||
]
|
||||
return {"ok": bool(data.get("ok")), "checks": checks}
|
||||
|
||||
|
||||
def _mac_permissions(binary: str, out: Dict[str, Any]) -> None:
|
||||
"""Fold ``cua-driver permissions status --json`` booleans into ``out``."""
|
||||
try:
|
||||
data = _json_out(binary, "permissions", "status", "--json", timeout=10)
|
||||
except subprocess.TimeoutExpired:
|
||||
out["error"] = "cua-driver permissions status timed out"
|
||||
return
|
||||
except Exception as exc: # spawn failure or malformed JSON
|
||||
out["error"] = f"cua-driver permissions status failed: {exc}"
|
||||
return
|
||||
if isinstance(data, dict):
|
||||
out.update({k: data[k] for k in _BOOLS if isinstance(data.get(k), bool)})
|
||||
if isinstance(data.get("source"), dict):
|
||||
out["source"] = data["source"]
|
||||
|
||||
|
||||
def computer_use_status(driver_cmd: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""Unified, OS-aware Computer Use readiness for the desktop card.
|
||||
|
||||
``ready`` is the single signal the UI keys off: on macOS it's both TCC
|
||||
grants; elsewhere it's driver health (no TCC model). ``None`` means
|
||||
unknown (binary missing / probe failed). ``can_grant`` is macOS-only.
|
||||
"""
|
||||
plat = sys.platform
|
||||
binary = shutil.which(_driver_cmd(driver_cmd))
|
||||
out: Dict[str, Any] = {
|
||||
"platform": plat,
|
||||
"platform_supported": plat in _RUNTIME_PLATFORMS,
|
||||
"installed": bool(binary),
|
||||
"version": None,
|
||||
"ready": None,
|
||||
"can_grant": plat == "darwin",
|
||||
"checks": [],
|
||||
"source": None,
|
||||
"error": None,
|
||||
**{k: None for k in _BOOLS},
|
||||
}
|
||||
if not binary:
|
||||
return out
|
||||
|
||||
try:
|
||||
out["version"] = (_run(binary, "--version", timeout=5).stdout or "").strip() or None
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
doctor = _doctor(binary)
|
||||
if doctor is not None:
|
||||
out["checks"] = doctor["checks"]
|
||||
|
||||
if plat == "darwin":
|
||||
_mac_permissions(binary, out)
|
||||
if out["error"] is None:
|
||||
out["ready"] = out["accessibility"] is True and out["screen_recording"] is True
|
||||
elif doctor is not None:
|
||||
# No TCC model off macOS — readiness is driver health.
|
||||
out["ready"] = doctor["ok"]
|
||||
return out
|
||||
|
||||
|
||||
def request_permissions_grant(driver_cmd: Optional[str] = None) -> int:
|
||||
"""Run ``cua-driver permissions grant`` (macOS); stream its output.
|
||||
|
||||
Launches CuaDriver via LaunchServices so the TCC dialog is attributed to
|
||||
``com.trycua.driver``, then waits for the grant. Returns the driver's exit
|
||||
code (0 ok), 2 if the binary is missing, 64 on a non-macOS platform (which
|
||||
has no TCC permission model to grant).
|
||||
"""
|
||||
if sys.platform != "darwin":
|
||||
print("Computer Use permissions are a macOS concept; nothing to grant here.")
|
||||
return 64
|
||||
|
||||
binary = shutil.which(_driver_cmd(driver_cmd))
|
||||
if not binary:
|
||||
print("cua-driver: not installed. Run: hermes computer-use install")
|
||||
return 2
|
||||
|
||||
print(
|
||||
"Requesting Accessibility + Screen Recording for CuaDriver.\n"
|
||||
"macOS will show a dialog attributed to CuaDriver (com.trycua.driver) — "
|
||||
"approve it, then return here."
|
||||
)
|
||||
try:
|
||||
return int(
|
||||
subprocess.run(
|
||||
[binary, "permissions", "grant"],
|
||||
env=_child_env(),
|
||||
stdin=subprocess.DEVNULL,
|
||||
).returncode
|
||||
)
|
||||
except KeyboardInterrupt: # pragma: no cover - interactive
|
||||
return 130
|
||||
except Exception as exc: # pragma: no cover - defensive
|
||||
print(f"cua-driver permissions grant failed: {exc}", file=sys.stderr)
|
||||
return 2
|
||||
|
|
@ -539,6 +539,7 @@ def cronjob(
|
|||
enabled_toolsets: Optional[List[str]] = None,
|
||||
workdir: Optional[str] = None,
|
||||
no_agent: Optional[bool] = None,
|
||||
profile: Optional[str] = None,
|
||||
task_id: str = None,
|
||||
) -> str:
|
||||
"""Unified cron job management tool."""
|
||||
|
|
@ -605,6 +606,7 @@ def cronjob(
|
|||
enabled_toolsets=enabled_toolsets or None,
|
||||
workdir=_normalize_optional_job_value(workdir),
|
||||
no_agent=_no_agent,
|
||||
profile=_normalize_optional_job_value(profile),
|
||||
)
|
||||
_notify_provider_jobs_changed_safe()
|
||||
return json.dumps(
|
||||
|
|
|
|||
|
|
@ -23,6 +23,29 @@ logger = logging.getLogger(__name__)
|
|||
|
||||
_EXPECTED_WRITE_ERRNOS = {errno.EACCES, errno.EPERM, errno.EROFS}
|
||||
|
||||
|
||||
def _expand_tilde(path: str) -> str:
|
||||
"""Expand ``~`` using the effective profile home when available.
|
||||
|
||||
In-process file tools share the gateway process's HOME, which may differ
|
||||
from the profile-specific HOME that interactive CLI sessions use. This
|
||||
mirrors ``hermes_constants.get_subprocess_home()`` so that ``~`` resolves
|
||||
consistently regardless of whether the tool runs interactively or inside a
|
||||
gateway-driven cron job (#48552).
|
||||
"""
|
||||
if not path or "~" not in path:
|
||||
return path
|
||||
try:
|
||||
from hermes_constants import get_subprocess_home
|
||||
|
||||
home = get_subprocess_home()
|
||||
except Exception:
|
||||
home = None
|
||||
if home and (path == "~" or path.startswith("~/")):
|
||||
return home if path == "~" else os.path.join(home, path[2:])
|
||||
return os.path.expanduser(path)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Read-size guard: cap the character count returned to the model.
|
||||
# We're model-agnostic so we can't count tokens; characters are a safe proxy.
|
||||
|
|
@ -107,7 +130,7 @@ def _sentinel_free_abs_cwd(raw: str | None) -> str | None:
|
|||
raw = str(raw or "").strip()
|
||||
if raw.lower() in _TERMINAL_CWD_SENTINELS:
|
||||
return None
|
||||
expanded = os.path.expanduser(raw)
|
||||
expanded = _expand_tilde(raw)
|
||||
if not os.path.isabs(expanded):
|
||||
return None
|
||||
return expanded
|
||||
|
|
@ -222,7 +245,7 @@ def _resolve_base_dir(task_id: str = "default") -> Path:
|
|||
"""
|
||||
root = _authoritative_workspace_root(task_id)
|
||||
if root:
|
||||
base = Path(root).expanduser()
|
||||
base = Path(_expand_tilde(root))
|
||||
else:
|
||||
base = Path(os.getcwd())
|
||||
if not base.is_absolute():
|
||||
|
|
@ -239,7 +262,7 @@ def _resolve_path_for_task(filepath: str, task_id: str = "default") -> Path:
|
|||
See :func:`_resolve_base_dir` for how the base is chosen. Absolute input
|
||||
paths are returned resolved-but-unanchored.
|
||||
"""
|
||||
p = Path(filepath).expanduser()
|
||||
p = Path(_expand_tilde(filepath))
|
||||
if p.is_absolute():
|
||||
return p.resolve()
|
||||
return (_resolve_base_dir(task_id) / p).resolve()
|
||||
|
|
@ -261,12 +284,12 @@ def _path_resolution_warning(filepath: str, resolved: Path, task_id: str = "defa
|
|||
(no ``cd`` run yet) is warned on the very first write.
|
||||
"""
|
||||
try:
|
||||
if Path(filepath).expanduser().is_absolute():
|
||||
if Path(_expand_tilde(filepath)).is_absolute():
|
||||
return None
|
||||
workspace_root = _authoritative_workspace_root(task_id)
|
||||
if not workspace_root:
|
||||
return None # No authoritative workspace root to compare against.
|
||||
root = Path(workspace_root).expanduser().resolve()
|
||||
root = Path(_expand_tilde(workspace_root)).resolve()
|
||||
# Is `resolved` inside `root`?
|
||||
try:
|
||||
resolved.relative_to(root)
|
||||
|
|
@ -285,7 +308,7 @@ def _path_resolution_warning(filepath: str, resolved: Path, task_id: str = "defa
|
|||
|
||||
def _is_blocked_device_path(path: str) -> bool:
|
||||
"""Return True for concrete device/fd paths that can hang reads."""
|
||||
normalized = os.path.normpath(os.path.expanduser(path))
|
||||
normalized = os.path.normpath(_expand_tilde(path))
|
||||
if normalized in _BLOCKED_DEVICE_PATHS:
|
||||
return True
|
||||
# /proc/self/fd/0-2 and /proc/<pid>/fd/0-2 are Linux aliases for stdio
|
||||
|
|
@ -309,7 +332,7 @@ def _is_blocked_device(filepath: str, base_dir: str | Path | None = None) -> boo
|
|||
they resolve to terminal-specific paths. Then check each symlink hop before
|
||||
the final resolved path so aliases to devices cannot bypass the guard.
|
||||
"""
|
||||
expanded = os.path.expanduser(filepath)
|
||||
expanded = _expand_tilde(filepath)
|
||||
if base_dir is not None and not os.path.isabs(expanded):
|
||||
expanded = os.path.join(os.fspath(base_dir), expanded)
|
||||
normalized = os.path.normpath(expanded)
|
||||
|
|
@ -365,7 +388,7 @@ def _get_hermes_config_resolved() -> str | None:
|
|||
_hermes_config_resolved = str(get_config_path().resolve())
|
||||
except Exception:
|
||||
try:
|
||||
_hermes_config_resolved = str(Path("~/.hermes/config.yaml").expanduser().resolve())
|
||||
_hermes_config_resolved = str(Path(_expand_tilde("~/.hermes/config.yaml")).resolve())
|
||||
except Exception:
|
||||
_hermes_config_resolved = None
|
||||
return _hermes_config_resolved
|
||||
|
|
@ -377,7 +400,7 @@ def _check_sensitive_path(filepath: str, task_id: str = "default") -> str | None
|
|||
resolved = str(_resolve_path_for_task(filepath, task_id))
|
||||
except (OSError, ValueError):
|
||||
resolved = filepath
|
||||
normalized = os.path.normpath(os.path.expanduser(filepath))
|
||||
normalized = os.path.normpath(_expand_tilde(filepath))
|
||||
_err = (
|
||||
f"Refusing to write to sensitive system path: {filepath}\n"
|
||||
"Use the terminal tool with sudo if you need to modify system files."
|
||||
|
|
|
|||
|
|
@ -1184,11 +1184,13 @@ IMAGE_GENERATE_SCHEMA = {
|
|||
"`reference_image_urls` for style/composition references; omit both "
|
||||
"for text-to-image. The underlying backend (FAL, OpenAI, xAI, etc.) "
|
||||
"and model are user-configured and not selectable by the agent. "
|
||||
"Returns either a URL or an absolute file path in the `image` field; "
|
||||
"display it with markdown  and the gateway "
|
||||
"will deliver it. When the active terminal backend has a different "
|
||||
"filesystem, successful local-file results may also include "
|
||||
"`agent_visible_image` for follow-up terminal/file operations."
|
||||
"Returns the result in the `image` field — either a URL or an absolute "
|
||||
"file path. To show it to the user, reference that path/URL in your "
|
||||
"response using the file-delivery convention for the current platform "
|
||||
"(your platform guidance describes how files are delivered here). When "
|
||||
"the active terminal backend has a different filesystem, successful "
|
||||
"local-file results may also include `agent_visible_image` for "
|
||||
"follow-up terminal/file operations."
|
||||
),
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
|
|
|
|||
|
|
@ -731,6 +731,38 @@ class MemoryStore:
|
|||
raise RuntimeError(f"Failed to write memory file {path}: {e}")
|
||||
|
||||
|
||||
def load_on_disk_store() -> "MemoryStore":
|
||||
"""Build a fresh on-disk :class:`MemoryStore`, honoring configured char limits.
|
||||
|
||||
Use this from any context that has no live agent (the messaging gateway, the
|
||||
Desktop GUI, the bare CLI ``/memory`` handler) but still needs to read or
|
||||
apply approved memory writes. Mirrors how the live agent constructs its store
|
||||
in ``agent/agent_init.py`` — including the user's ``memory.memory_char_limit``
|
||||
/ ``memory.user_char_limit`` overrides — so an approval applied without a live
|
||||
agent enforces the SAME caps as one applied with one.
|
||||
|
||||
Falls back to the built-in defaults if config can't be loaded, so this can
|
||||
never raise on a missing/unreadable config.
|
||||
"""
|
||||
memory_char_limit = 2200
|
||||
user_char_limit = 1375
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
|
||||
mem_cfg = (load_config() or {}).get("memory", {}) or {}
|
||||
memory_char_limit = int(mem_cfg.get("memory_char_limit", memory_char_limit))
|
||||
user_char_limit = int(mem_cfg.get("user_char_limit", user_char_limit))
|
||||
except Exception:
|
||||
pass # config optional — fall back to defaults rather than break /memory
|
||||
|
||||
store = MemoryStore(
|
||||
memory_char_limit=memory_char_limit,
|
||||
user_char_limit=user_char_limit,
|
||||
)
|
||||
store.load_from_disk()
|
||||
return store
|
||||
|
||||
|
||||
def _apply_write_gate(action: str, target: str, content: Optional[str],
|
||||
old_text: Optional[str]) -> Optional[str]:
|
||||
"""Evaluate the memory write gate. Returns a JSON tool-result string when
|
||||
|
|
|
|||
|
|
@ -419,9 +419,11 @@ _GENERIC_DESCRIPTION = (
|
|||
"endpoint. The backend and model family are user-configured via "
|
||||
"`hermes tools` → Video Generation; the agent does not pick them. "
|
||||
"Long-running generations may take 30 seconds to several minutes — "
|
||||
"the call blocks until the video is ready. Returns either an HTTP "
|
||||
"URL or an absolute file path in the `video` field; display it with "
|
||||
"markdown  and the gateway will deliver it."
|
||||
"the call blocks until the video is ready. Returns the result in the "
|
||||
"`video` field — either an HTTP URL or an absolute file path. To show "
|
||||
"it to the user, reference that path/URL in your response using the "
|
||||
"file-delivery convention for the current platform (your platform "
|
||||
"guidance describes how files are delivered here)."
|
||||
)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -806,6 +806,21 @@ def _emit(event: str, sid: str, payload: dict | None = None):
|
|||
write_json({"jsonrpc": "2.0", "method": "event", "params": params})
|
||||
|
||||
|
||||
def _emit_approval_request(sid: str, data: dict | None) -> None:
|
||||
"""Emit an ``approval.request`` event to the TUI client with the command
|
||||
redacted. The approval payload is built from the RAW command string, so a
|
||||
credential-shaped value Tirith flagged would otherwise be echoed verbatim
|
||||
to the TUI client (#48456 — third egress transport alongside the chat
|
||||
platforms and the SSE/API stream fixed in #50767). Reuse the shared gateway
|
||||
seam so all approval transports redact consistently."""
|
||||
payload = dict(data or {})
|
||||
if "command" in payload:
|
||||
from gateway.run import _redact_approval_command
|
||||
|
||||
payload["command"] = _redact_approval_command(payload.get("command"))
|
||||
_emit("approval.request", sid, payload)
|
||||
|
||||
|
||||
def _status_update(sid: str, kind: str, text: str | None = None):
|
||||
body = (text if text is not None else kind).strip()
|
||||
if not body:
|
||||
|
|
@ -1040,7 +1055,7 @@ def _start_agent_build(sid: str, session: dict) -> None:
|
|||
)
|
||||
|
||||
register_gateway_notify(
|
||||
key, lambda data: _emit("approval.request", sid, data)
|
||||
key, lambda data: _emit_approval_request(sid, data)
|
||||
)
|
||||
notify_registered = True
|
||||
load_permanent_allowlist()
|
||||
|
|
@ -2554,7 +2569,7 @@ def _sync_session_key_after_compress(
|
|||
try:
|
||||
register_gateway_notify(
|
||||
new_session_id,
|
||||
lambda data: _emit("approval.request", sid, data),
|
||||
lambda data: _emit_approval_request(sid, data),
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
|
@ -3916,7 +3931,7 @@ def _init_session(
|
|||
try:
|
||||
from tools.approval import register_gateway_notify, load_permanent_allowlist
|
||||
|
||||
register_gateway_notify(key, lambda data: _emit("approval.request", sid, data))
|
||||
register_gateway_notify(key, lambda data: _emit_approval_request(sid, data))
|
||||
load_permanent_allowlist()
|
||||
except Exception:
|
||||
pass
|
||||
|
|
|
|||
|
|
@ -270,6 +270,31 @@ display:
|
|||
> writes to your memory/skill stores, are unaffected by this setting. Set it
|
||||
> per-platform via `display.platforms.<platform>.memory_notifications`.
|
||||
|
||||
## Running the review on a cheaper model (`auxiliary.background_review`)
|
||||
|
||||
The review runs on your **main chat model** by default, replaying the
|
||||
conversation — which is already warm in the prompt cache, so it's cheap cache
|
||||
reads. On an expensive main model you can run the review on a cheaper model
|
||||
instead:
|
||||
|
||||
```yaml
|
||||
auxiliary:
|
||||
background_review:
|
||||
provider: openrouter
|
||||
model: google/gemini-3-flash-preview # auto (default) = main chat model
|
||||
```
|
||||
|
||||
When you point it at a model **different** from your main one, the review runs
|
||||
there for substantially lower cost (~3–5× in benchmarks). Because a different
|
||||
model can't reuse your main model's prompt cache anyway, the fork automatically
|
||||
replays a compact **digest** of the conversation (recent turns verbatim + a
|
||||
summary of older ones) rather than the full transcript — minimizing what it
|
||||
writes to the new cache. Capture holds: in testing, memory capture was
|
||||
identical and skill capture near-identical to the main-model review.
|
||||
|
||||
Leave it at `auto` (or set it to your main model) and nothing changes — the
|
||||
review keeps running on the main model with the full warm-cache replay.
|
||||
|
||||
## Controlling skill writes (`skills.write_approval`)
|
||||
|
||||
Skills use the same on/off gate, but the review UX differs because a
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue