Merge remote-tracking branch 'origin/main' into pr-50994

# Conflicts:
#	tools/computer_use/cua_backend.py
This commit is contained in:
Brooklyn Nicholson 2026-06-22 18:48:07 -05:00
commit 833710d33e
53 changed files with 3438 additions and 576 deletions

View file

@ -27,6 +27,131 @@ from typing import Any, Dict, List, Optional
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Background-review aux-model selector + routed digest.
#
# The review fork runs on the MAIN model by default ("auto"), replaying the
# full conversation — already warm in the prompt cache, so cheap cache reads.
# Optimal and unchanged. A user can route the review to a different, cheaper
# model via auxiliary.background_review.{provider,model}. A different model
# cannot reuse the parent's cache (different key), so the fork is cold
# regardless — replaying the full transcript would just cold-write it. So when
# (and only when) routed to a different model, we replay a compact DIGEST to
# minimise cold-written tokens. Same model -> full replay; different model ->
# digest. That's the whole policy.
# ---------------------------------------------------------------------------
def _resolve_review_runtime(agent: Any) -> Dict[str, Any]:
"""Resolve provider/model/credentials for the review fork.
Default (auto / unset / same as parent): inherit the parent's live runtime
(with codex_app_server -> codex_responses downgrade). ``routed`` is False
the fork uses the main model and the warm cache, exactly as before. When
``auxiliary.background_review.{provider,model}`` names a concrete model
different from the parent's, resolve that runtime and set ``routed=True``.
"""
parent_runtime = agent._current_main_runtime()
parent_api_mode = parent_runtime.get("api_mode") or None
if parent_api_mode == "codex_app_server":
parent_api_mode = "codex_responses"
parent = {
"provider": agent.provider,
"model": agent.model,
"api_key": parent_runtime.get("api_key") or None,
"base_url": parent_runtime.get("base_url") or None,
"api_mode": parent_api_mode,
"routed": False,
}
try:
from hermes_cli.config import load_config
cfg = load_config()
except Exception:
return parent
aux = cfg.get("auxiliary", {}) if isinstance(cfg.get("auxiliary"), dict) else {}
task = aux.get("background_review", {}) if isinstance(aux.get("background_review"), dict) else {}
task_provider = (str(task.get("provider", "")).strip() or None)
task_model = (str(task.get("model", "")).strip() or None)
task_base_url = (str(task.get("base_url", "")).strip() or None)
task_api_key = (str(task.get("api_key", "")).strip() or None)
if not (task_provider and task_provider != "auto" and task_model):
return parent
if task_provider == (agent.provider or "") and task_model == (agent.model or ""):
return parent # same model/provider as parent -> not routed
try:
from hermes_cli.runtime_provider import resolve_runtime_provider
rp = resolve_runtime_provider(
requested=task_provider,
target_model=task_model,
explicit_api_key=task_api_key,
explicit_base_url=task_base_url,
)
return {
"provider": rp.get("provider") or task_provider,
"model": task_model,
"api_key": rp.get("api_key"),
"base_url": rp.get("base_url"),
"api_mode": rp.get("api_mode"),
"routed": True,
}
except Exception as e:
logger.debug("background-review aux routing failed (%s); using main model", e)
return parent
def _msg_text(m: Dict) -> str:
c = m.get("content")
if isinstance(c, str):
return c.strip()
if isinstance(c, list):
return " ".join(b.get("text", "") for b in c if isinstance(b, dict)).strip()
return ""
def _digest_history(messages_snapshot: List[Dict], tail: int = 24) -> List[Dict]:
"""Compact replay for the routed (different-model) path only.
Keeps the recent ``tail`` messages verbatim, collapses older turns into one
synthetic user-role digest, preserving role alternation. Used ONLY when
routed to a different model (cache cold regardless, so fewer cold-written
tokens is a pure win). Never on the main-model path (full replay stays warm).
"""
msgs = list(messages_snapshot or [])
if len(msgs) <= tail:
return msgs
keep = msgs[-tail:]
while keep and isinstance(keep[0], dict) and keep[0].get("role") == "tool":
tail += 1
if len(msgs) <= tail:
return msgs
keep = msgs[-tail:]
old = msgs[:-len(keep)]
lines: List[str] = []
for m in old:
if not isinstance(m, dict):
continue
role = m.get("role")
text = _msg_text(m).replace("\n", " ")
if role == "user" and text:
lines.append(f"USER: {text[:300]}")
elif role == "assistant":
tcs = m.get("tool_calls") or []
if tcs:
names = [(tc.get("function") or {}).get("name", "?") for tc in tcs if isinstance(tc, dict)]
lines.append(f"ASSISTANT[tools: {', '.join(names)}]")
if text:
lines.append(f"ASSISTANT: {text[:200]}")
digest = {
"role": "user",
"content": (
"[Earlier conversation digest — older turns summarised to bound the "
"review's cold-write cost on the routed aux model. Recent turns "
"follow verbatim below.]\n" + "\n".join(lines)
),
}
return [digest] + keep
# Review-prompt strings — used by ``spawn_background_review_thread`` to build
# the user-message that the forked review agent receives. AIAgent exposes
# them as class attributes (``_MEMORY_REVIEW_PROMPT`` etc.) for back-compat;
@ -488,18 +613,13 @@ def _run_review_in_thread(
# creds, or credential-pool setups where the resolver can't
# reconstruct auth from scratch -- producing the spurious
# "No LLM provider configured" warning at end of turn.
_parent_runtime = agent._current_main_runtime()
_parent_api_mode = _parent_runtime.get("api_mode") or None
# The review fork needs to call agent-loop tools (memory,
# skill_manage). Those tools require Hermes' own dispatch,
# which the codex_app_server runtime bypasses entirely
# (it runs the turn inside codex's subprocess). So when
# the parent is on codex_app_server, downgrade the review
# fork to codex_responses — same auth/credentials, but
# talks to the OpenAI Responses API directly so Hermes
# owns the loop and the agent-loop tools dispatch.
if _parent_api_mode == "codex_app_server":
_parent_api_mode = "codex_responses"
# _resolve_review_runtime() returns the parent's live runtime by
# default (routed=False; main model, warm cache), or — when the user
# set auxiliary.background_review.{provider,model} to a different
# model — that model's runtime (routed=True). The codex_app_server
# -> codex_responses downgrade is applied inside the resolver.
_rt = _resolve_review_runtime(agent)
_routed = bool(_rt.get("routed"))
# skip_memory=True keeps the review fork from
# touching external memory plugins (honcho, mem0,
# supermemory, etc.). Without it, the fork's
@ -519,14 +639,14 @@ def _run_review_in_thread(
# in the request body — Anthropic's cache key includes it.
# (The runtime whitelist below still restricts dispatch.)
review_agent = AIAgent(
model=agent.model,
model=_rt.get("model") or agent.model,
max_iterations=16,
quiet_mode=True,
platform=agent.platform,
provider=agent.provider,
api_mode=_parent_api_mode,
base_url=_parent_runtime.get("base_url") or None,
api_key=_parent_runtime.get("api_key") or None,
provider=_rt.get("provider") or agent.provider,
api_mode=_rt.get("api_mode"),
base_url=_rt.get("base_url") or None,
api_key=_rt.get("api_key") or None,
credential_pool=getattr(agent, "_credential_pool", None),
parent_session_id=agent.session_id,
enabled_toolsets=getattr(agent, "enabled_toolsets", None),
@ -565,15 +685,20 @@ def _run_review_in_thread(
# issue #25322 and PR #17276 for the full analysis +
# measured impact (~26% end-to-end cost reduction on
# Sonnet 4.5).
review_agent._cached_system_prompt = agent._cached_system_prompt
# Defensive: pin session_start + session_id to the
# parent's so any code path that re-renders parts of
# the system prompt (compression, plugin hooks) still
# produces byte-identical output. The cached-prompt
# assignment above already short-circuits the normal
# rebuild path, but these pins guarantee parity even
# if a future code path bypasses the cache.
review_agent.session_start = agent.session_start
# Share the parent's warm cached system prompt ONLY when the review
# runs on the SAME model (not routed). When routed to a different
# model the parent's cached prompt is for the wrong model/cache key
# and would miss anyway, so let the routed fork build its own.
if not _routed:
review_agent._cached_system_prompt = agent._cached_system_prompt
# Defensive: pin session_start + session_id to the
# parent's so any code path that re-renders parts of
# the system prompt (compression, plugin hooks) still
# produces byte-identical output. The cached-prompt
# assignment above already short-circuits the normal
# rebuild path, but these pins guarantee parity even
# if a future code path bypasses the cache.
review_agent.session_start = agent.session_start
review_agent.session_id = agent.session_id
# The fork shares the parent's live session_id (pinned above for
# prefix-cache parity). It is single-lifecycle and calls close()
@ -615,6 +740,13 @@ def _run_review_in_thread(
),
)
try:
# Routed to a different model -> replay a digest (cache is cold
# on that model anyway, so minimise cold-written tokens). Same
# model -> replay the full snapshot (warm cache reads).
_review_history = (
_digest_history(messages_snapshot) if _routed
else messages_snapshot
)
review_agent.run_conversation(
user_message=(
prompt
@ -622,7 +754,7 @@ def _run_review_in_thread(
"management tools. Other tools will be denied "
"at runtime — do not attempt them."
),
conversation_history=messages_snapshot,
conversation_history=_review_history,
)
finally:
clear_thread_tool_whitelist()

View file

@ -805,10 +805,11 @@ def try_shrink_image_parts_in_messages(
Pillow couldn't help (caller should surface the original error).
Strategy: look for ``image_url`` / ``input_image`` parts carrying a
``data:image/...;base64,...`` payload. For each one whose encoded
size exceeds 4 MB (a safe target that slides under Anthropic's 5 MB
ceiling with header overhead) or whose longest side exceeds
``max_dimension``, write the base64 to a tempfile, call
``data:image/...;base64,...`` payload, plus Anthropic-native
``{"type": "image", "source": {"type": "base64", ...}}`` blocks.
For each one whose encoded size exceeds 4 MB (a safe target that slides
under Anthropic's 5 MB ceiling with header overhead) or whose longest side
exceeds ``max_dimension``, write the base64 to a tempfile, call
``vision_tools._resize_image_for_vision`` to produce a smaller data
URL, and substitute it in place.
@ -964,6 +965,28 @@ def try_shrink_image_parts_in_messages(
logger.warning("image-shrink recovery: re-encode failed — %s", exc)
return None, triggered_by is not None
def _source_to_data_url(source: Any) -> Optional[str]:
if not isinstance(source, dict) or source.get("type") != "base64":
return None
data = source.get("data")
if not isinstance(data, str) or not data:
return None
media_type = str(source.get("media_type") or "image/jpeg").strip()
if not media_type.startswith("image/"):
media_type = "image/jpeg"
return f"data:{media_type};base64,{data}"
def _write_data_url_to_source(source: dict, data_url: str) -> None:
header, _, data = data_url.partition(",")
media_type = "image/jpeg"
if header.startswith("data:"):
candidate = header[len("data:"):].split(";", 1)[0].strip()
if candidate.startswith("image/"):
media_type = candidate
source["type"] = "base64"
source["media_type"] = media_type
source["data"] = data
for msg in api_messages:
if not isinstance(msg, dict):
continue
@ -974,6 +997,16 @@ def try_shrink_image_parts_in_messages(
if not isinstance(part, dict):
continue
ptype = part.get("type")
if ptype == "image":
source = part.get("source")
url = _source_to_data_url(source)
resized, unshrinkable = _shrink_data_url(url or "")
if resized and isinstance(source, dict):
_write_data_url_to_source(source, resized)
changed_count += 1
elif unshrinkable:
unshrinkable_oversized += 1
continue
if ptype not in {"image_url", "input_image"}:
continue
image_value = part.get("image_url")

View file

@ -122,10 +122,14 @@ def finalize_turn(
)
# Determine if conversation completed successfully
normal_text_response = str(_turn_exit_reason).startswith("text_response(")
completed = (
final_response is not None
and api_call_count < agent.max_iterations
and not failed
and (
api_call_count < agent.max_iterations
or normal_text_response
)
)
# Post-loop cleanup must never lose the response. Trajectory save,

View file

@ -0,0 +1,239 @@
import { useCallback, useEffect, useRef, useState } from 'react'
import { Button } from '@/components/ui/button'
import { getActionStatus, getComputerUseStatus, grantComputerUsePermissions } from '@/hermes'
import { AlertTriangle, Check, ExternalLink, Loader2, RefreshCw, X } from '@/lib/icons'
import { upsertDesktopActionTask } from '@/store/activity'
import { notify, notifyError } from '@/store/notifications'
import type { ComputerUseStatus } from '@/types/hermes'
import { Pill } from './primitives'
interface ComputerUsePanelProps {
/** Re-read the parent toolset list after a permission/install change so the
* "Configured / Needs keys" pill stays in sync. */
onConfiguredChange?: () => void
}
// Per-OS one-liner shown when there's no TCC grant flow (Windows/Linux). macOS
// drives the permission rows instead, so it has no entry here.
const PLATFORM_NOTE: Record<string, string> = {
linux: 'Drives your desktop via the X11/XWayland accessibility stack — no permission prompt.',
win32: 'First run may trigger a Windows SmartScreen prompt for the cua-driver UIAccess worker — allow it.'
}
function tone(granted: boolean | null) {
return granted === true ? 'primary' : 'muted'
}
function GrantIcon({ granted }: { granted: boolean | null }) {
const Icon = granted === true ? Check : granted === false ? X : AlertTriangle
return <Icon className="size-3" />
}
function PermissionRow({ granted, label, hint }: { granted: boolean | null; label: string; hint: string }) {
return (
<div className="flex flex-wrap items-center justify-between gap-2 rounded-lg bg-background/55 p-2.5">
<div className="min-w-0">
<span className="text-sm font-medium">{label}</span>
<p className="mt-0.5 text-[0.7rem] text-muted-foreground">{hint}</p>
</div>
<Pill tone={tone(granted)}>
<GrantIcon granted={granted} />
{granted === true ? 'Granted' : granted === false ? 'Not granted' : 'Unknown'}
</Pill>
</div>
)
}
/**
* Cross-platform Computer Use preflight card.
*
* cua-driver runs on macOS, Windows, and Linux, but readiness differs: macOS
* needs two TCC grants (Accessibility + Screen Recording) that attach to
* cua-driver's own `com.trycua.driver` identity not Hermes and are
* requested via `cua-driver permissions grant` (dialog attributed to
* CuaDriver). Windows/Linux have no TCC toggles, so readiness is driver health
* from `cua-driver doctor`. The backend folds both into one `ready` signal.
*
* Binary install/upgrade stays in the cua-driver provider's post-setup runner
* below this card (the generic ToolsetConfigPanel).
*/
export function ComputerUsePanel({ onConfiguredChange }: ComputerUsePanelProps) {
const [status, setStatus] = useState<ComputerUseStatus | null>(null)
const [loading, setLoading] = useState(true)
const [granting, setGranting] = useState(false)
const activeRef = useRef(false)
const refresh = useCallback(async () => {
try {
setStatus(await getComputerUseStatus())
} catch (err) {
notifyError(err, 'Could not read Computer Use status')
} finally {
setLoading(false)
}
}, [])
useEffect(() => {
activeRef.current = true
void refresh()
return () => void (activeRef.current = false)
}, [refresh])
const grant = useCallback(async () => {
setGranting(true)
try {
const started = await grantComputerUsePermissions()
if (!started.ok) {
notifyError(new Error('spawn failed'), 'Could not request permissions')
return
}
notify({
kind: 'info',
title: 'Approve in System Settings',
message: 'macOS will show a permission dialog attributed to CuaDriver. Approve it, then return here.'
})
// The driver waits for the user to flip the switch — poll until it exits.
for (let attempt = 0; attempt < 150 && activeRef.current; attempt += 1) {
await new Promise(resolve => window.setTimeout(resolve, 1500))
if (!activeRef.current) {
break
}
const polled = await getActionStatus(started.name, 200)
upsertDesktopActionTask(polled)
if (!polled.running) {
break
}
}
if (activeRef.current) {
await refresh()
onConfiguredChange?.()
}
} catch (err) {
if (activeRef.current) {
notifyError(err, 'Could not request permissions')
}
} finally {
if (activeRef.current) {
setGranting(false)
}
}
}, [onConfiguredChange, refresh])
if (loading) {
return (
<div className="mt-3 flex items-center gap-2 px-1 text-xs text-muted-foreground">
<Loader2 className="size-3.5 animate-spin" />
Checking Computer Use status
</div>
)
}
if (!status) {
return null
}
if (!status.platform_supported) {
return (
<p className="mt-3 px-1 text-xs text-muted-foreground">
Computer Use isn&apos;t supported on this platform ({status.platform}).
</p>
)
}
if (!status.installed) {
return (
<p className="mt-3 px-1 text-xs text-muted-foreground">
Install the cua-driver backend below to drive this machine.
{status.can_grant && ' Then grant Accessibility and Screen Recording here.'}
</p>
)
}
const failingChecks = status.checks.filter(c => c.status !== 'ok')
return (
<div className="mt-3 grid gap-2">
<div className="flex flex-wrap items-center justify-between gap-2 px-1">
<div className="min-w-0">
{status.can_grant ? (
<p className="text-[0.72rem] text-muted-foreground">
Grants attach to CuaDriver&apos;s own identity (com.trycua.driver), not Hermes so the dialog is
attributed to the process that drives your Mac.
</p>
) : (
<p className="text-[0.72rem] text-muted-foreground">{PLATFORM_NOTE[status.platform] ?? ''}</p>
)}
{status.version && <p className="text-[0.68rem] text-muted-foreground/80">{status.version}</p>}
</div>
<Button onClick={() => void refresh()} size="sm" variant="text">
<RefreshCw className="size-3.5" />
Recheck
</Button>
</div>
{status.can_grant ? (
<>
<PermissionRow
granted={status.accessibility}
hint="Lets cua-driver post clicks, keystrokes, and read the accessibility tree."
label="Accessibility"
/>
<PermissionRow
granted={status.screen_recording}
hint="Lets cua-driver capture screenshots of app windows."
label="Screen Recording"
/>
</>
) : (
<div className="flex flex-wrap items-center justify-between gap-2 rounded-lg bg-background/55 p-2.5">
<span className="text-sm font-medium">Driver health</span>
<Pill tone={tone(status.ready)}>
<GrantIcon granted={status.ready} />
{status.ready === true ? 'Ready' : status.ready === false ? 'Not ready' : 'Unknown'}
</Pill>
</div>
)}
{failingChecks.map(c => (
<p className="px-1 text-[0.7rem] text-muted-foreground" key={c.label}>
<AlertTriangle className="mr-1 inline size-3" />
{c.label}: {c.message}
</p>
))}
{status.error && (
<p className="px-1 text-[0.7rem] text-muted-foreground">
<AlertTriangle className="mr-1 inline size-3" />
{status.error}
</p>
)}
{status.ready ? (
<div className="flex items-center gap-1.5 px-1 text-xs text-muted-foreground">
<Check className="size-3.5" />
Computer Use is ready. Ask the agent to capture an app and click around.
</div>
) : (
status.can_grant && (
<Button disabled={granting} onClick={() => void grant()} size="sm">
{granting ? <Loader2 className="size-3.5 animate-spin" /> : <ExternalLink className="size-3.5" />}
{granting ? 'Waiting for approval…' : 'Grant permissions'}
</Button>
)
)}
</div>
)
}

View file

@ -17,6 +17,7 @@ import { useRefreshHotkey } from '../hooks/use-refresh-hotkey'
import { useRouteEnumParam } from '../hooks/use-route-enum-param'
import { PAGE_INSET_X } from '../layout-constants'
import { PageSearchShell } from '../page-search-shell'
import { ComputerUsePanel } from '../settings/computer-use-panel'
import { asText, includesQuery, prettyName, toolNames, toolsetDisplayLabel } from '../settings/helpers'
import { ToolsetConfigPanel } from '../settings/toolset-config-panel'
import type { SetStatusbarItemGroup } from '../shell/statusbar-controls'
@ -334,6 +335,9 @@ export function SkillsView({ setStatusbarItemGroup: _setStatusbarItemGroup, ...p
))}
</div>
)}
{expanded && toolset.name === 'computer_use' && (
<ComputerUsePanel onConfiguredChange={refreshToolsets} />
)}
{expanded && <ToolsetConfigPanel onConfiguredChange={refreshToolsets} toolset={toolset.name} />}
</div>
)

View file

@ -0,0 +1,51 @@
import { describe, expect, it } from 'vitest'
import { activeTimelineIndex, deriveTimelineEntries, timelinePreview } from './thread-timeline-data'
describe('timelinePreview', () => {
it('collapses whitespace to a single line', () => {
expect(timelinePreview('hello\n\n world\tagain')).toBe('hello world again')
})
it('truncates with an ellipsis past the limit', () => {
const out = timelinePreview('abcdefghij', 5)
expect(out).toBe('abcd…')
expect(out.length).toBe(5)
})
})
describe('deriveTimelineEntries', () => {
it('keeps non-empty user prompts in order', () => {
expect(
deriveTimelineEntries([
{ id: 'u1', role: 'user', text: 'first' },
{ id: 'a1', role: 'assistant', text: 'answer' },
{ id: 'u2', role: 'user', text: ' second ' }
])
).toEqual([
{ id: 'u1', preview: 'first' },
{ id: 'u2', preview: 'second' }
])
})
it('drops blanks and background-process notifications', () => {
expect(
deriveTimelineEntries([
{ id: 'u1', role: 'user', text: ' ' },
{ id: 'u2', role: 'user', text: '[IMPORTANT: Background process 123 finished]' },
{ id: 'u3', role: 'user', text: 'real prompt' }
]).map(e => e.id)
).toEqual(['u3'])
})
})
describe('activeTimelineIndex', () => {
it('returns the last prompt scrolled to or above the top edge', () => {
expect(activeTimelineIndex([-400, -10, 320])).toBe(1)
})
it('falls back to the first rendered entry', () => {
expect(activeTimelineIndex([null, 120, 480])).toBe(1)
expect(activeTimelineIndex([null, null])).toBe(0)
})
})

View file

@ -0,0 +1,75 @@
// Pure timeline helpers — no React/DOM; tested in thread-timeline-data.test.ts.
export interface TimelineSourceMessage {
id: string
role: string
text: string
}
export interface TimelineEntry {
id: string
preview: string
}
// Injected as user messages for alternation; not human prompts (thread.tsx).
const PROCESS_NOTIFICATION_RE = /^\[IMPORTANT: Background process [\s\S]*\]$/
const PREVIEW_MAX = 120
export function timelinePreview(text: string, max: number = PREVIEW_MAX): string {
const collapsed = text.replace(/\s+/g, ' ').trim()
if (collapsed.length <= max) {
return collapsed
}
return `${collapsed.slice(0, max - 1).trimEnd()}`
}
export function deriveTimelineEntries(messages: readonly TimelineSourceMessage[]): TimelineEntry[] {
const entries: TimelineEntry[] = []
for (const message of messages) {
if (message.role !== 'user') {
continue
}
const text = message.text.trim()
if (!text || PROCESS_NOTIFICATION_RE.test(text)) {
continue
}
entries.push({ id: message.id, preview: timelinePreview(text) })
}
return entries
}
/** Last user prompt at/above the viewport top (with slack); else first rendered. */
export function activeTimelineIndex(offsets: readonly (number | null)[], slack: number = 8): number {
let active = -1
let firstRendered = -1
for (let i = 0; i < offsets.length; i++) {
const offset = offsets[i]
if (offset == null) {
continue
}
if (firstRendered === -1) {
firstRendered = i
}
if (offset <= slack) {
active = i
}
}
if (active !== -1) {
return active
}
return firstRendered === -1 ? 0 : firstRendered
}

View file

@ -0,0 +1,272 @@
import { useAuiState } from '@assistant-ui/react'
import { type FC, useCallback, useEffect, useMemo, useRef, useState } from 'react'
import { composerPanelCard } from '@/components/chat/composer-dock'
import { triggerHaptic } from '@/lib/haptics'
import { cn } from '@/lib/utils'
import { setPaneHoverRevealSuppressed } from '@/store/panes'
import {
activeTimelineIndex,
deriveTimelineEntries,
type TimelineEntry,
type TimelineSourceMessage
} from './thread-timeline-data'
const MIN_ENTRIES = 4
const VIEWPORT = '[data-slot="aui_thread-viewport"]'
const HOVER_CLOSE_MS = 140
const ROW_CLASS =
'relative flex w-full min-w-0 max-w-full cursor-pointer select-none overflow-hidden rounded-md px-2 py-1 text-left outline-hidden transition-colors duration-100 ease-out hover:bg-(--ui-row-hover-background) hover:transition-none'
const POPOVER_SHELL = cn(
'absolute right-full top-1/2 z-50 mr-1.5 max-h-[min(22rem,calc(100vh-8rem))] w-80 max-w-[min(20rem,calc(100vw-2rem))] -translate-y-1/2 overflow-x-hidden overflow-y-auto overscroll-contain p-1 text-popover-foreground transition-[opacity,transform] duration-100 ease-out group-hover/timeline:transition-none',
composerPanelCard,
// Solid fill — composerPanelCard is deliberately translucent; without this,
// directive chips in the transcript bleed through and look like popover overflow.
'bg-(--composer-fill)'
)
function userPromptText(content: unknown): string {
if (typeof content === 'string') {
return content
}
if (!Array.isArray(content)) {
return ''
}
let out = ''
for (const part of content) {
if (typeof part === 'string') {
out += part
continue
}
if (!part || typeof part !== 'object') {
continue
}
const row = part as { text?: unknown; type?: unknown }
if ((!row.type || row.type === 'text') && typeof row.text === 'string') {
out += row.text
}
}
return out
}
function scrollToPrompt(id: string) {
const viewport = document.querySelector<HTMLElement>(VIEWPORT)
const node = viewport?.querySelector<HTMLElement>(`[data-message-id="${CSS.escape(id)}"]`)
if (!viewport || !node) {
return
}
const top = viewport.scrollTop + (node.getBoundingClientRect().top - viewport.getBoundingClientRect().top) - 8
triggerHaptic('selection')
viewport.scrollTo({ behavior: 'smooth', top: Math.max(0, top) })
}
/** Right-edge prompt rail — hover previews, click to jump. ≥4 user turns only. */
export const ThreadTimeline: FC = () => {
const sourceSignature = useAuiState(s => {
const rows: TimelineSourceMessage[] = []
for (const message of s.thread.messages) {
if (message.role !== 'user') {
continue
}
rows.push({ id: message.id, role: 'user', text: userPromptText(message.content) })
}
return JSON.stringify(rows)
})
const entries = useMemo(
() => deriveTimelineEntries(JSON.parse(sourceSignature) as TimelineSourceMessage[]),
[sourceSignature]
)
const [activeIndex, setActiveIndex] = useState(0)
const [hoverIndex, setHoverIndex] = useState<number | null>(null)
const [open, setOpen] = useState(false)
const closeTimerRef = useRef<number | undefined>(undefined)
const keepOpen = useCallback(() => {
window.clearTimeout(closeTimerRef.current)
setPaneHoverRevealSuppressed(true)
setOpen(true)
}, [])
const closeSoon = useCallback(() => {
window.clearTimeout(closeTimerRef.current)
setHoverIndex(null)
setPaneHoverRevealSuppressed(false)
closeTimerRef.current = window.setTimeout(() => setOpen(false), HOVER_CLOSE_MS)
}, [])
useEffect(
() => () => {
window.clearTimeout(closeTimerRef.current)
setPaneHoverRevealSuppressed(false)
},
[]
)
useEffect(() => {
if (entries.length < MIN_ENTRIES) {
setPaneHoverRevealSuppressed(false)
}
}, [entries.length])
useEffect(() => {
const viewport = document.querySelector<HTMLElement>(VIEWPORT)
if (!viewport || entries.length === 0) {
return
}
let raf = 0
const compute = () => {
raf = 0
const top = viewport.getBoundingClientRect().top
const offsets = entries.map(entry => {
const node = viewport.querySelector<HTMLElement>(`[data-message-id="${CSS.escape(entry.id)}"]`)
return node ? node.getBoundingClientRect().top - top : null
})
const next = activeTimelineIndex(offsets)
setActiveIndex(prev => (prev === next ? prev : next))
}
const onScroll = () => {
if (!raf) {
raf = requestAnimationFrame(compute)
}
}
compute()
viewport.addEventListener('scroll', onScroll, { passive: true })
return () => {
viewport.removeEventListener('scroll', onScroll)
if (raf) {
cancelAnimationFrame(raf)
}
}
}, [entries])
if (entries.length < MIN_ENTRIES) {
return null
}
return (
<div
aria-label="Conversation timeline"
className="group/timeline pointer-events-auto absolute right-0 top-1/2 z-40 flex -translate-y-1/2 flex-col items-end"
data-slot="thread-timeline"
onMouseEnter={keepOpen}
onMouseLeave={closeSoon}
role="navigation"
>
<TimelineTicks
activeIndex={activeIndex}
entries={entries}
onHover={setHoverIndex}
onJump={scrollToPrompt}
/>
<TimelinePopover
activeIndex={activeIndex}
entries={entries}
hoverIndex={hoverIndex}
onHover={setHoverIndex}
onJump={scrollToPrompt}
open={open}
/>
</div>
)
}
const TimelinePopover: FC<{
activeIndex: number
entries: TimelineEntry[]
hoverIndex: number | null
onHover: (index: number) => void
onJump: (id: string) => void
open: boolean
}> = ({ activeIndex, entries, hoverIndex, onHover, onJump, open }) => (
<div
className={cn(
POPOVER_SHELL,
open ? 'pointer-events-auto opacity-100 translate-x-0' : 'pointer-events-none translate-x-1 opacity-0'
)}
data-slot="thread-timeline-popover"
>
{entries.map((entry, index) => {
const hovered = index === hoverIndex
const active = index === activeIndex
return (
<button
aria-label={entry.preview}
className={cn(
ROW_CLASS,
active && 'bg-(--ui-row-active-background) text-foreground',
hovered && 'bg-(--ui-row-hover-background) text-foreground transition-none'
)}
key={entry.id}
onClick={() => onJump(entry.id)}
onMouseEnter={() => onHover(index)}
type="button"
>
<span className="block w-full min-w-0 truncate font-medium leading-snug text-foreground">
{entry.preview}
</span>
</button>
)
})}
</div>
)
const TimelineTicks: FC<{
activeIndex: number
entries: TimelineEntry[]
onHover: (index: number) => void
onJump: (id: string) => void
}> = ({ activeIndex, entries, onHover, onJump }) => (
<div className="flex flex-col items-end py-1" data-slot="thread-timeline-ticks">
{entries.map((entry, index) => (
<button
aria-label={entry.preview}
className="group/tick flex h-2 w-7 cursor-pointer items-center justify-end pr-1"
key={entry.id}
onClick={() => onJump(entry.id)}
onMouseEnter={() => onHover(index)}
type="button"
>
<span
className={cn(
'block h-px w-3 transition-opacity duration-100 ease-out',
index === activeIndex
? 'bg-(--theme-primary)'
: 'dither text-(--ui-text-quaternary) opacity-70 group-hover/tick:opacity-100 group-hover/tick:transition-none'
)}
/>
</button>
))}
</div>
)

View file

@ -64,6 +64,7 @@ import { ClarifyTool } from '@/components/assistant-ui/clarify-tool'
import { DirectiveContent, hermesDirectiveFormatter } from '@/components/assistant-ui/directive-text'
import { MarkdownText, MarkdownTextContent } from '@/components/assistant-ui/markdown-text'
import { ThreadMessageList } from '@/components/assistant-ui/thread-list'
import { ThreadTimeline } from '@/components/assistant-ui/thread-timeline'
import { ToolFallback, ToolGroupSlot } from '@/components/assistant-ui/tool-fallback'
import { TooltipIconButton } from '@/components/assistant-ui/tooltip-icon-button'
import { UserMessageText } from '@/components/assistant-ui/user-message-text'
@ -212,6 +213,7 @@ export const Thread: FC<{
sessionKey={sessionKey}
/>
{loading === 'session' && <CenteredThreadSpinner />}
<ThreadTimeline />
</div>
)
}
@ -797,7 +799,15 @@ function messageAttachmentRefs(value: unknown): string[] {
return value.every(ref => typeof ref === 'string') ? value : EMPTY_ATTACHMENT_REFS
}
function StickyHumanMessageContainer({ attachments, children }: { attachments?: ReactNode; children: ReactNode }) {
function StickyHumanMessageContainer({
attachments,
children,
messageId
}: {
attachments?: ReactNode
children: ReactNode
messageId?: string
}) {
return (
// Fragment, not a wrapper: a wrapping element becomes the sticky's
// containing block (it'd stick within its own height = never). The bubble
@ -806,6 +816,7 @@ function StickyHumanMessageContainer({ attachments, children }: { attachments?:
<>
<div
className="group/user-message sticky z-40 -mx-4 flex w-[calc(100%+2rem)] min-w-0 max-w-none flex-col items-stretch gap-0 self-end overflow-visible bg-(--ui-chat-surface-background) px-4 pb-(--conversation-turn-gap) pt-1"
data-message-id={messageId}
data-role="user"
data-slot="aui_user-message-root"
>
@ -990,6 +1001,7 @@ const UserMessage: FC<{
return (
<MessagePrimitive.Root asChild>
<StickyHumanMessageContainer
messageId={messageId}
attachments={
// Attachments live BELOW the sticky bubble in normal flow, so they
// scroll away behind the pinned bubble instead of riding along with

View file

@ -15,7 +15,7 @@ import {
} from 'react'
import { cn } from '@/lib/utils'
import { $paneStates, ensurePaneRegistered, setPaneWidthOverride } from '@/store/panes'
import { $paneHoverRevealSuppressed, $paneStates, ensurePaneRegistered, setPaneWidthOverride } from '@/store/panes'
import { PaneShellContext, type PaneShellContextValue, type PaneSlot } from './context'
@ -250,6 +250,7 @@ export function Pane({
}: PaneProps) {
const ctx = useContext(PaneShellContext)
const paneStates = useStore($paneStates)
const hoverRevealSuppressed = useStore($paneHoverRevealSuppressed)
const registered = useRef(false)
const paneRef = useRef<HTMLDivElement | null>(null)
// Keyboard (mod+b / mod+j) pins the reveal open while collapsed; hover is CSS.
@ -378,7 +379,10 @@ export function Pane({
>
<div
aria-hidden="true"
className="pointer-events-auto absolute inset-y-0 z-30 [-webkit-app-region:no-drag]"
className={cn(
'absolute inset-y-0 z-30 [-webkit-app-region:no-drag]',
hoverRevealSuppressed ? 'pointer-events-none' : 'pointer-events-auto'
)}
style={{ [edge]: HOVER_REVEAL_EDGE_GUTTER, width: HOVER_REVEAL_TRIGGER_WIDTH }}
/>
@ -388,7 +392,8 @@ export function Pane({
className={cn(
'pointer-events-none absolute inset-y-0 z-30 overflow-hidden transition-transform delay-0',
offscreen,
'group-hover/reveal:pointer-events-auto group-hover/reveal:translate-x-0 group-hover/reveal:delay-[var(--reveal-enter-delay)] group-hover/reveal:shadow-[var(--reveal-shadow)]',
!hoverRevealSuppressed &&
'group-hover/reveal:pointer-events-auto group-hover/reveal:translate-x-0 group-hover/reveal:delay-[var(--reveal-enter-delay)] group-hover/reveal:shadow-[var(--reveal-shadow)]',
'group-data-[forced]/reveal:pointer-events-auto group-data-[forced]/reveal:translate-x-0 group-data-[forced]/reveal:delay-0 group-data-[forced]/reveal:shadow-[var(--reveal-shadow)]'
)}
key={edge}

View file

@ -8,6 +8,7 @@ import type {
AudioTranscriptionResponse,
AuxiliaryModelsResponse,
BackendUpdateCheckResponse,
ComputerUseStatus,
ConfigSchemaResponse,
CronJob,
CronJobCreatePayload,
@ -59,6 +60,9 @@ export type {
AudioTranscriptionResponse,
AuxiliaryModelsResponse,
BackendUpdateCheckResponse,
ComputerUseCheck,
ComputerUsePermissionSource,
ComputerUseStatus,
ConfigFieldSchema,
ConfigSchemaResponse,
CronJob,
@ -516,6 +520,21 @@ export function runToolsetPostSetup(name: string, key: string): Promise<ActionRe
})
}
export function getComputerUseStatus(): Promise<ComputerUseStatus> {
return window.hermesDesktop.api<ComputerUseStatus>({
...profileScoped(),
path: '/api/tools/computer-use/status'
})
}
export function grantComputerUsePermissions(): Promise<ActionResponse> {
return window.hermesDesktop.api<ActionResponse>({
...profileScoped(),
path: '/api/tools/computer-use/permissions/grant',
method: 'POST'
})
}
export function getMessagingPlatforms(): Promise<MessagingPlatformsResponse> {
return window.hermesDesktop.api<MessagingPlatformsResponse>({
path: '/api/messaging/platforms'

View file

@ -32,4 +32,13 @@ describe('extractEmbeddedImages', () => {
expect(result.cleanedText).toBe('first mid tail')
expect(result.images).toEqual([SAMPLE_PNG_DATA_URL, second])
})
it('handles multi-megabyte data URLs without overflowing the JS stack', () => {
const hugeDataUrl = 'data:image/png;base64,' + 'A'.repeat(8_000_000)
const result = extractEmbeddedImages(`describe this ${hugeDataUrl} thanks`)
expect(result.cleanedText).toBe('describe this thanks')
expect(result.images).toHaveLength(1)
expect(result.images[0]).toHaveLength(hugeDataUrl.length)
})
})

View file

@ -1,7 +1,11 @@
const EMBEDDED_IMAGE_RE =
/(\{\s*"type"\s*:\s*"image_url"\s*,\s*"image_url"\s*:\s*\{\s*"url"\s*:\s*")?(data:image\/[\w.+-]+;base64,[A-Za-z0-9+/=]{64,})("\s*\}\s*\})?/g
const DATA_URL_RE = /^data:([\w./+-]+);base64,(.*)$/i
const DATA_IMAGE_PREFIX = 'data:image/'
const BASE64_MARKER = ';base64,'
const MIN_EMBEDDED_IMAGE_BASE64_LENGTH = 64
const JSON_IMAGE_OPEN_RE = /\{\s*"type"\s*:\s*"image_url"\s*,\s*"image_url"\s*:\s*\{\s*"url"\s*:\s*"$/
const JSON_IMAGE_CLOSE_RE = /^"\s*\}\s*\}/
const JSON_IMAGE_OPEN_MAX = 96
const JSON_IMAGE_CLOSE_MAX = 16
export const DATA_IMAGE_URL_RE = /^data:image\/[\w.+-]+;base64,/i
@ -31,24 +35,119 @@ export function dataUrlToBlob(dataUrl: string): Blob | null {
}
}
function isImageMimeCode(code: number): boolean {
return (
(code >= 48 && code <= 57) ||
(code >= 65 && code <= 90) ||
(code >= 97 && code <= 122) ||
code === 43 ||
code === 45 ||
code === 46 ||
code === 95
)
}
function isBase64Code(code: number): boolean {
return (
(code >= 48 && code <= 57) ||
(code >= 65 && code <= 90) ||
(code >= 97 && code <= 122) ||
code === 43 ||
code === 47 ||
code === 61
)
}
function readDataImageUrl(text: string, start: number): { end: number; url: string } | null {
if (!text.startsWith(DATA_IMAGE_PREFIX, start)) {
return null
}
let cursor = start + DATA_IMAGE_PREFIX.length
while (cursor < text.length && isImageMimeCode(text.charCodeAt(cursor))) {
cursor += 1
}
if (cursor === start + DATA_IMAGE_PREFIX.length || !text.startsWith(BASE64_MARKER, cursor)) {
return null
}
cursor += BASE64_MARKER.length
const base64Start = cursor
while (cursor < text.length && isBase64Code(text.charCodeAt(cursor))) {
cursor += 1
}
if (cursor - base64Start < MIN_EMBEDDED_IMAGE_BASE64_LENGTH) {
return null
}
return { end: cursor, url: text.slice(start, cursor) }
}
function embeddedImageRemovalRange(text: string, dataStart: number, dataEnd: number): { end: number; start: number } {
let start = dataStart
let end = dataEnd
const openSearchStart = Math.max(0, dataStart - JSON_IMAGE_OPEN_MAX)
const openMatch = text.slice(openSearchStart, dataStart).match(JSON_IMAGE_OPEN_RE)
if (openMatch?.index !== undefined) {
const close = text.slice(dataEnd, dataEnd + JSON_IMAGE_CLOSE_MAX).match(JSON_IMAGE_CLOSE_RE)
if (close) {
start = openSearchStart + openMatch.index
end = dataEnd + close[0].length
}
}
return { end, start }
}
function normalizeCleanedText(text: string): string {
return text.replace(/[ \t]+\n/g, '\n').replace(/\n{3,}/g, '\n\n').trim()
}
export function extractEmbeddedImages(text: string): EmbeddedImageExtraction {
if (!text || !text.includes('data:image/')) {
if (!text || !text.includes(DATA_IMAGE_PREFIX)) {
return { cleanedText: text, images: [] }
}
const images: string[] = []
const pieces: string[] = []
let appendCursor = 0
let searchCursor = 0
const cleanedText = text
.replace(EMBEDDED_IMAGE_RE, (_match, _open, dataUrl: string) => {
images.push(dataUrl)
while (searchCursor < text.length) {
const dataStart = text.indexOf(DATA_IMAGE_PREFIX, searchCursor)
return ''
})
.replace(/[ \t]+\n/g, '\n')
.replace(/\n{3,}/g, '\n\n')
.trim()
if (dataStart === -1) {
break
}
return { cleanedText, images }
const dataUrl = readDataImageUrl(text, dataStart)
if (!dataUrl) {
searchCursor = dataStart + DATA_IMAGE_PREFIX.length
continue
}
const range = embeddedImageRemovalRange(text, dataStart, dataUrl.end)
pieces.push(text.slice(appendCursor, range.start))
images.push(dataUrl.url)
appendCursor = range.end
searchCursor = range.end
}
if (!images.length) {
return { cleanedText: text, images: [] }
}
pieces.push(text.slice(appendCursor))
return { cleanedText: normalizeCleanedText(pieces.join('')), images }
}
export function embeddedImageUrls(text: string): string[] {

View file

@ -76,6 +76,7 @@ function persist(states: Record<string, PaneStateSnapshot>) {
}
export const $paneStates = atom<Record<string, PaneStateSnapshot>>(load())
export const $paneHoverRevealSuppressed = atom(false)
$paneStates.subscribe(persist)
@ -143,3 +144,4 @@ export function setPaneWidthOverride(id: string, width: number | undefined) {
export const clearPaneWidthOverride = (id: string) => setPaneWidthOverride(id, undefined)
export const getPaneStateSnapshot = (id: string) => $paneStates.get()[id]
export const setPaneHoverRevealSuppressed = (suppressed: boolean) => $paneHoverRevealSuppressed.set(suppressed)

View file

@ -579,6 +579,51 @@ export interface ToolsetConfig {
active_provider: string | null
}
/** Shape of `GET /api/tools/computer-use/status`.
*
* cua-driver runs on macOS, Windows, and Linux. `ready` is the single OS-aware
* readiness signal: on macOS both TCC grants (Accessibility + Screen
* Recording, which attach to cua-driver's own `com.trycua.driver` identity,
* not Hermes); elsewhere, driver health from `cua-driver doctor`. `null`
* means unknown (binary missing / probe failed). */
export interface ComputerUsePermissionSource {
attribution?: string
executable?: string
note?: string
pid?: number
responsible_ppid?: number
}
export interface ComputerUseCheck {
label: string
status: string
message: string
}
export interface ComputerUseStatus {
/** `sys.platform`: "darwin" | "win32" | "linux" | ... */
platform: string
/** cua-driver has a runtime backend for this platform. */
platform_supported: boolean
/** cua-driver binary resolved on PATH. */
installed: boolean
/** e.g. "cua-driver 0.5.1", or null when unknown. */
version: string | null
/** Unified readiness — both TCC grants (macOS) or driver health (else). */
ready: boolean | null
/** Whether a permission grant flow exists (macOS-only TCC). */
can_grant: boolean
/** Cross-platform `cua-driver doctor` probes. */
checks: ComputerUseCheck[]
/** macOS TCC detail — `null` off macOS or when unknown. */
accessibility: boolean | null
screen_recording: boolean | null
screen_recording_capturable: boolean | null
source: ComputerUsePermissionSource | null
/** Populated when the status probe itself failed. */
error: string | null
}
export interface SessionSearchResult {
/** Lineage root of the matched conversation. Stable across compression and
* used as the durable pin id; falls back to session_id when absent. */

View file

@ -248,6 +248,12 @@ def _normalize_job_record(job: Dict[str, Any]) -> Dict[str, Any]:
state = "scheduled" if normalized.get("enabled", True) else "paused"
normalized["state"] = state
# Legacy jobs (created before per-job profile scoping) have no profile
# field. Default them to "default" so the scheduler treats them as
# root-profile jobs — matching their pre-existing behaviour.
prof = normalized.get("profile")
normalized["profile"] = (str(prof).strip() if isinstance(prof, str) and prof.strip() else "default")
return normalized
@ -268,6 +274,43 @@ def _secure_file(path: Path):
pass
def current_profile_name() -> str:
"""Return the active profile name for the process creating a job.
``~/.hermes`` -> ``"default"``
``~/.hermes/profiles/X`` -> ``"X"``
Used at create time to tag a job with the profile whose environment
(.env / config.yaml / credentials) it should execute under, so the
job runs as its owning profile regardless of which profile's ticker
picks it up from the shared root store (#32091).
"""
try:
from agent.file_safety import _resolve_active_profile_name
return _resolve_active_profile_name() or "default"
except Exception:
return "default"
def resolve_profile_home(profile_name: Optional[str]) -> Optional[Path]:
"""Map a job's ``profile`` name to the HERMES_HOME it should run under.
``"default"`` / empty / ``None`` -> the root home (``get_default_hermes_root()``).
``"<name>"`` -> ``<root>/profiles/<name>``.
Returns ``None`` when the named profile directory does not exist, so the
scheduler can fall back to the ticker's own home and log a warning rather
than pointing a job at a missing profile.
"""
name = (profile_name or "").strip()
if not name or name == "default":
return get_default_hermes_root().resolve()
candidate = (get_default_hermes_root() / "profiles" / name).resolve()
if candidate.is_dir():
return candidate
return None
def ensure_dirs():
"""Ensure cron directories exist with secure permissions."""
CRON_DIR.mkdir(parents=True, exist_ok=True)
@ -772,6 +815,7 @@ def create_job(
enabled_toolsets: Optional[List[str]] = None,
workdir: Optional[str] = None,
no_agent: bool = False,
profile: Optional[str] = None,
) -> Dict[str, Any]:
"""
Create a new cron job.
@ -816,6 +860,13 @@ def create_job(
and deliver its stdout directly. Empty stdout = silent (no
delivery). Requires ``script`` to be set. Ideal for classic
watchdogs and periodic alerts that don't need LLM reasoning.
profile: Optional Hermes profile name the job should EXECUTE under
(its .env / config.yaml / credentials). Defaults to the active
profile of the session creating the job. The shared root store
holds every profile's jobs (#32091); this field is what scopes
a job's runtime environment to its owning profile so it runs
with that profile's permissions regardless of which ticker
picks it up.
Returns:
The created job dict
@ -850,6 +901,11 @@ def create_job(
normalized_toolsets = normalized_toolsets or None
normalized_workdir = _normalize_workdir(workdir)
normalized_no_agent = bool(no_agent)
# Tag the job with the profile whose environment it should execute under.
# When the caller does not pass one explicitly, capture the active profile
# of the session creating the job so a job created under `hermes -p donna`
# runs as donna even though it now lives in the shared root store (#32091).
normalized_profile = (str(profile).strip() if isinstance(profile, str) else "") or current_profile_name()
# no_agent jobs are meaningless without a script — the script IS the job.
# Surface this as a clear ValueError at create time so bad configs never
@ -903,6 +959,7 @@ def create_job(
"origin": origin, # Tracks where job was created for "origin" delivery
"enabled_toolsets": normalized_toolsets,
"workdir": normalized_workdir,
"profile": normalized_profile,
}
with _jobs_lock():

View file

@ -1857,6 +1857,32 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
os.environ["TERMINAL_CWD"] = _job_workdir
logger.info("Job '%s': using workdir %s", job_id, _job_workdir)
# Scope this job's execution to its owning profile's HERMES_HOME (#32091).
# The shared root store holds every profile's jobs, but a job must run with
# the .env / config.yaml / credentials of the profile that created it — not
# whichever profile's ticker happened to pick it up. We set both the
# in-process ContextVar override (consumed by _get_hermes_home() for the
# config/.env/script loads below) AND os.environ["HERMES_HOME"] (inherited
# by any child subprocess the agent spawns). tick() routes profile-scoped
# jobs to the single-worker sequential pool, so mutating os.environ here is
# safe — they never overlap. Restored in the finally block.
from cron.jobs import resolve_profile_home
from hermes_constants import set_hermes_home_override
_job_profile = (job.get("profile") or "default").strip() or "default"
_profile_home = resolve_profile_home(_job_profile)
_prior_hermes_home = os.environ.get("HERMES_HOME", "_UNSET_")
_hermes_home_token = None
if _profile_home is not None and _profile_home != _get_hermes_home().resolve():
os.environ["HERMES_HOME"] = str(_profile_home)
_hermes_home_token = set_hermes_home_override(str(_profile_home))
logger.info("Job '%s': executing under profile %r (HERMES_HOME=%s)",
job_id, _job_profile, _profile_home)
elif _profile_home is None and _job_profile != "default":
logger.warning(
"Job '%s': profile %r no longer exists — running under the "
"ticker's profile instead", job_id, _job_profile,
)
try:
# Re-read .env and config.yaml fresh every run so provider/key
# changes take effect without a gateway restart.
@ -2189,13 +2215,27 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
# would otherwise be delivered as if it were the agent's reply and the
# job's `last_status` set to "ok". Raise so the except handler below
# builds the proper failure tuple. (issue #17855)
if result.get("failed") is True or result.get("completed") is False:
turn_exit_reason = str(result.get("turn_exit_reason") or "")
final_response_text = (result.get("final_response") or "").strip()
max_iteration_summary = (
result.get("failed") is not True
and result.get("completed") is False
and turn_exit_reason.startswith("max_iterations_reached(")
and bool(final_response_text)
)
if result.get("failed") is True or (result.get("completed") is False and not max_iteration_summary):
_err_text = (
result.get("error")
or (result.get("final_response") or "").strip()
or final_response_text
or "agent reported failure"
)
raise RuntimeError(_err_text)
if max_iteration_summary:
logger.warning(
"Job '%s' reached the iteration limit but produced a final fallback response; "
"delivering the response instead of failing the cron run",
job_name,
)
final_response = result.get("final_response", "") or ""
# Strip leaked placeholder text that upstream may inject on empty completions.
@ -2254,6 +2294,19 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
os.environ.pop("TERMINAL_CWD", None)
else:
os.environ["TERMINAL_CWD"] = _prior_terminal_cwd
# Restore HERMES_HOME to the ticker's value when this job overrode it
# for profile-scoped execution (#32091). Mirrors the TERMINAL_CWD
# restore above; the sequential pool guarantees no overlap.
if _hermes_home_token is not None:
try:
from hermes_constants import reset_hermes_home_override
reset_hermes_home_override(_hermes_home_token)
except Exception:
pass
if _prior_hermes_home == "_UNSET_":
os.environ.pop("HERMES_HOME", None)
else:
os.environ["HERMES_HOME"] = _prior_hermes_home
# Clean up ContextVar session/delivery state for this job.
clear_session_vars(_ctx_tokens)
for _var_name in _cron_delivery_vars:
@ -2459,12 +2512,26 @@ def tick(verbose: bool = True, adapters=None, loop=None, sync: bool = True) -> i
body."""
return run_one_job(job, adapters=adapters, loop=loop, verbose=verbose)
# Partition due jobs: those with a per-job workdir mutate
# os.environ["TERMINAL_CWD"] inside run_job, which is process-global —
# so they MUST run sequentially to avoid corrupting each other. Jobs
# without a workdir leave env untouched and stay parallel-safe.
sequential_jobs = [j for j in due_jobs if (j.get("workdir") or "").strip()]
parallel_jobs = [j for j in due_jobs if not (j.get("workdir") or "").strip()]
# Partition due jobs: those that mutate process-global os.environ
# inside run_job MUST run sequentially to avoid corrupting each other.
# Two cases mutate env:
# - a per-job workdir sets os.environ["TERMINAL_CWD"].
# - a per-job profile whose HERMES_HOME differs from the ticker's
# sets os.environ["HERMES_HOME"] to scope execution (#32091).
# Jobs that need neither leave env untouched and stay parallel-safe.
def _needs_sequential(j: dict) -> bool:
if (j.get("workdir") or "").strip():
return True
prof = (j.get("profile") or "default").strip() or "default"
try:
from cron.jobs import resolve_profile_home
phome = resolve_profile_home(prof)
except Exception:
phome = None
return phome is not None and phome != _get_hermes_home().resolve()
sequential_jobs = [j for j in due_jobs if _needs_sequential(j)]
parallel_jobs = [j for j in due_jobs if not _needs_sequential(j)]
_results: list = []
_all_futures: list = []

View file

@ -1066,12 +1066,48 @@ def _media_delivery_denied_paths() -> List[Path]:
denied.append(home / sub)
# The active Hermes profile and shared Hermes root both contain control
# files and credentials. Only cache subdirectories under them are
# explicitly allowlisted above.
# explicitly allowlisted above (matched BEFORE this denylist in
# validate_media_delivery_path, so generated media still delivers).
#
# These are the per-file credential / secret stores that live at the
# HERMES_HOME root. The set mirrors the canonical read guard in
# agent/file_safety.py (get_read_block_error / build_write_denied_*) so the
# delivery (read/exfil) side can't trail the write side: a credential the
# agent is forbidden to write or read must also never be auto-attached to a
# chat reply. Enumerated explicitly per-file rather than denying the whole
# tree, so skills/, logs/, and ad-hoc agent-written files under ~/.hermes
# stay deliverable (see #32090, #34425).
_ROOT_CREDENTIAL_FILES = (
".env",
"auth.json",
"auth.lock",
"credentials",
"config.yaml",
# Anthropic PKCE / OAuth refresh credential store.
".anthropic_oauth.json",
# Google Workspace skill: auto-refreshing OAuth token (mtime bumps
# every turn, which defeated the strict-mode recency window) plus the
# pending-exchange session/verifier file.
"google_token.json",
"google_oauth_pending.json",
os.path.join("auth", "google_oauth.json"),
# Webhook subscription HMAC secrets.
"webhook_subscriptions.json",
# Bitwarden Secrets Manager plaintext disk cache.
os.path.join("cache", "bws_cache.json"),
)
# Directory trees whose every child is credential material. (MCP OAuth
# tokens under mcp-tokens/ are handled by the sibling targeted PR #37222;
# session/kanban SQLite stores by #41071 — kept out of this diff to avoid
# overlap.)
_ROOT_CREDENTIAL_DIRS = (
"pairing",
)
for hermes_root in (_HERMES_HOME, _HERMES_ROOT):
denied.append(hermes_root / ".env")
denied.append(hermes_root / "auth.json")
denied.append(hermes_root / "credentials")
denied.append(hermes_root / "config.yaml")
for rel in _ROOT_CREDENTIAL_FILES:
denied.append(hermes_root / rel)
for rel in _ROOT_CREDENTIAL_DIRS:
denied.append(hermes_root / rel)
return denied
@ -1190,9 +1226,12 @@ def validate_media_delivery_path(path: str) -> Optional[str]:
return str(resolved)
# Non-strict mode (default): accept anything not on the denylist.
# The denylist still blocks /etc, /proc, ~/.ssh, ~/.aws, ~/.hermes/.env,
# ~/.hermes/auth.json, etc. — so the obvious prompt-injection sites
# (``MEDIA:/etc/passwd``, ``MEDIA:~/.ssh/id_rsa``) remain rejected.
# The denylist still blocks /etc, /proc, ~/.ssh, ~/.aws, and the
# credential/secret stores under the Hermes root (~/.hermes/.env,
# auth.json, .anthropic_oauth.json, google_token.json, pairing/, ...) —
# so the obvious prompt-injection / credential-exfil sites
# (``MEDIA:/etc/passwd``, ``MEDIA:~/.ssh/id_rsa``,
# ``MEDIA:~/.hermes/google_token.json``) remain rejected.
if not _media_delivery_strict_mode():
if _path_under_denied_prefix(resolved):
return None

View file

@ -2343,7 +2343,7 @@ class GatewaySlashCommandsMixin:
from gateway.run import _hermes_home
from hermes_cli.write_approval_commands import handle_pending_subcommand
from tools import write_approval as wa
from tools.memory_tool import MemoryStore
from tools.memory_tool import load_on_disk_store
raw_args = event.get_command_args().strip()
args = raw_args.split() if raw_args else []
@ -2363,8 +2363,8 @@ class GatewaySlashCommandsMixin:
# Apply approved writes against a fresh on-disk store (the gateway has
# no long-lived agent; the store persists to the same MEMORY/USER.md).
store = MemoryStore()
store.load_from_disk()
# load_on_disk_store() honors the user's configured char limits.
store = load_on_disk_store()
out = handle_pending_subcommand(
wa.MEMORY, args, memory_store=store, set_mode_fn=_set_approval,

View file

@ -1361,6 +1361,17 @@ class CLICommandsMixin:
parts = cmd.strip().split()
args = parts[1:] if len(parts) > 1 else []
store = getattr(self.agent, "_memory_store", None) if getattr(self, "agent", None) else None
if store is None:
# No live agent store (e.g. /memory approve invoked from the Desktop
# GUI, or any context without an active agent). Apply against a freshly
# loaded on-disk store, mirroring the gateway path
# (gateway/slash_commands.py): it persists to the same MEMORY/USER.md
# and creates MEMORY.md on the first approved write. Without this the
# shared handler returns "memory store unavailable". See #46783.
# load_on_disk_store() honors the user's configured char limits, so
# an approval here enforces the same caps as the live agent would.
from tools.memory_tool import load_on_disk_store
store = load_on_disk_store()
out = handle_pending_subcommand(
wa.MEMORY, args,
memory_store=store,

View file

@ -1535,6 +1535,25 @@ DEFAULT_CONFIG = {
"timeout": 60,
"extra_body": {},
},
# Background review — the post-turn self-improvement fork that decides
# whether to save a memory / patch a skill. "auto" (default) = run on
# the main chat model, replaying the full conversation, which is already
# warm in the prompt cache (cheap cache reads) — unchanged, optimal.
# Set provider/model to a cheaper model (e.g. openrouter
# google/gemini-3-flash-preview) to run the review there for ~3-5x lower
# cost. A different model can't reuse the main prompt cache anyway, so
# the fork automatically replays a compact digest instead of the full
# transcript when routed (minimises the cold-write). Same model = full
# replay; different model = digest. Quality holds (memory capture
# identical, skill near-identical in benchmarks).
"background_review": {
"provider": "auto",
"model": "",
"base_url": "",
"api_key": "",
"timeout": 120,
"extra_body": {},
},
},
"display": {

View file

@ -120,6 +120,9 @@ def cron_list(show_all: bool = False):
workdir = job.get("workdir")
if workdir:
print(f" Workdir: {workdir}")
_prof = job.get("profile")
if _prof and _prof != "default":
print(f" Profile: {_prof}")
# Execution history
last_status = job.get("last_status")
@ -259,6 +262,7 @@ def cron_create(args):
script=getattr(args, "script", None),
workdir=getattr(args, "workdir", None),
no_agent=getattr(args, "no_agent", False) or None,
profile=getattr(args, "profile", None),
)
if not result.get("success"):
print(color(f"Failed to create job: {result.get('error', 'unknown error')}", Colors.RED))
@ -275,6 +279,9 @@ def cron_create(args):
print(" Mode: no-agent (script stdout delivered directly)")
if job_data.get("workdir"):
print(f" Workdir: {job_data['workdir']}")
_prof = job_data.get("profile")
if _prof and _prof != "default":
print(f" Profile: {_prof}")
print(f" Next run: {result['next_run_at']}")
return 0

View file

@ -12507,6 +12507,33 @@ def main():
action="store_true",
help="Emit the raw structured payload as JSON (same shape as `tools/call`).",
)
computer_use_perms = computer_use_sub.add_parser(
"permissions",
help="Check or grant macOS Accessibility + Screen Recording (macOS)",
description=(
"Computer Use drives the Mac through cua-driver, whose TCC grants\n"
"attach to cua-driver's own identity (com.trycua.driver) — not the\n"
"terminal or the Hermes app. `status` reports the driver's grant\n"
"state; `grant` launches CuaDriver via LaunchServices so the macOS\n"
"permission dialog is attributed to the process that does the work."
),
)
computer_use_perms_sub = computer_use_perms.add_subparsers(
dest="computer_use_perms_action"
)
computer_use_perms_status = computer_use_perms_sub.add_parser(
"status",
help="Report Accessibility + Screen Recording grant state (read-only)",
)
computer_use_perms_status.add_argument(
"--json",
action="store_true",
help="Emit the normalized permission payload as JSON.",
)
computer_use_perms_sub.add_parser(
"grant",
help="Request the grants (opens the dialog attributed to CuaDriver)",
)
def cmd_computer_use(args):
action = getattr(args, "computer_use_action", None)
@ -12564,6 +12591,41 @@ def main():
json_output=bool(getattr(args, "json", False)),
)
sys.exit(code)
if action == "permissions":
perms_action = getattr(args, "computer_use_perms_action", None)
if perms_action == "grant":
from tools.computer_use.permissions import request_permissions_grant
sys.exit(request_permissions_grant())
if perms_action == "status":
import json as _json
from tools.computer_use.permissions import computer_use_status
st = computer_use_status()
if bool(getattr(args, "json", False)):
print(_json.dumps(st, indent=2, sort_keys=True))
sys.exit(0 if st["ready"] else 1)
if not st["platform_supported"]:
print(f"Computer Use is not supported on {st['platform']}.")
sys.exit(1)
if not st["installed"]:
print("cua-driver: not installed. Run: hermes computer-use install")
sys.exit(1)
glyph = lambda v: "" if v is True else ("" if v is False else "") # noqa: E731
print(f"cua-driver: {st['version'] or 'installed'} ({st['platform']})")
if st["can_grant"]: # macOS TCC permissions
print(f" {glyph(st['accessibility'])} Accessibility")
print(f" {glyph(st['screen_recording'])} Screen Recording")
if not st["ready"]:
print(" Grant: hermes computer-use permissions grant")
else: # no TCC model — readiness is driver health
print(f" {glyph(st['ready'])} driver health (no permission toggles on {st['platform']})")
for c in st["checks"]:
if c["status"] != "ok":
print(f"{c['label']}: {c['message']}")
if st["error"]:
print(f"{st['error']}")
sys.exit(0 if st["ready"] else 1)
computer_use_perms.print_help()
return
# No subcommand → show help
computer_use_parser.print_help()

View file

@ -70,6 +70,10 @@ def build_cron_parser(subparsers, *, cmd_cron: Callable) -> None:
"--workdir",
help="Absolute path for the job to run from. Injects AGENTS.md / CLAUDE.md / .cursorrules from that directory and uses it as the cwd for terminal/file/code_exec tools. Omit to preserve old behaviour (no project context files).",
)
cron_create.add_argument(
"--profile",
help="Hermes profile the job should EXECUTE under (its .env / config.yaml / credentials). Defaults to the profile that created the job. Jobs live in one shared root store (#32091); this scopes a job's runtime environment to the named profile so it runs with that profile's permissions.",
)
# cron edit
cron_edit = cron_subparsers.add_parser(

View file

@ -667,102 +667,31 @@ def _pip_install(
def _check_cua_driver_asset_for_arch() -> bool:
"""Check whether the latest CUA release ships an asset for this OS+arch.
Returns True if the asset likely exists (or if we cannot determine it).
Returns False and prints a warning when the asset is confirmed missing,
so callers can skip the install attempt and avoid a raw 404.
Recognizes release-asset names across all supported platforms:
* macOS (``Darwin``) arm64 always ships; x86_64/amd64 probed.
* Windows (``AMD64``/``ARM64``) amd64/x86_64 and arm64 probed.
* Linux (``x86_64``/``aarch64``) x86_64/amd64 and aarch64/arm64 probed.
"""
import platform as _plat
import urllib.request
system = _plat.system()
machine = _plat.machine().lower() # e.g. "x86_64", "arm64", "amd64", "aarch64"
# arm64 (Apple Silicon) macOS assets are always published — short-circuit
# to preserve the original fail-open behaviour and avoid a network call.
if system == "Darwin" and machine == "arm64":
return True
# Map this host's arch to the set of asset-name substrings we'll accept.
# Asset names vary by OS (darwin-x86_64, windows-amd64, linux-aarch64, …),
# so we match on the architecture token only and let any of the common
# aliases satisfy the probe.
if machine in {"x86_64", "amd64", "x64"}:
arch_names = {"x86_64", "amd64", "x64"}
arch_label = "x86_64/amd64"
elif machine in {"arm64", "aarch64"}:
arch_names = {"arm64", "aarch64"}
arch_label = "arm64/aarch64"
else:
# Unknown arch — fail open and let the installer surface the error.
return True
# Probe the cua-driver release for an OS+arch asset before falling through
# to the upstream installer.
#
# The cua-driver-rs binaries are published to the trycua/cua monorepo under
# tag prefix ``cua-driver-rs-v*``. The repo's ``releases/latest`` is NOT
# that — it floats across the monorepo's other components (agent-*,
# computer-*, lume-*, train-*), most of which ship zero binary assets. So
# we list releases and pick the newest ``cua-driver-rs-v*`` tag, matching
# what the upstream install.sh does. Failing to find one => fail open and
# let the installer (which resolves the tag itself) be the source of truth.
driver_tag_prefix = "cua-driver-rs-v"
api_url = (
"https://api.github.com/repos/trycua/cua/releases?per_page=100"
)
try:
req = urllib.request.Request(api_url, headers={"Accept": "application/vnd.github+json"})
with urllib.request.urlopen(req, timeout=10) as resp:
releases = _json.loads(resp.read().decode())
if not isinstance(releases, list):
return True
# GitHub returns releases newest-first; take the first cua-driver-rs tag.
driver_release = next(
(
r for r in releases
if str(r.get("tag_name", "")).startswith(driver_tag_prefix)
),
None,
)
if driver_release is None:
# No cua-driver-rs release surfaced (API hiccup / unexpected shape).
# Fail open — the installer resolves the tag on its own.
return True
tag = driver_release.get("tag_name", "")
assets = driver_release.get("assets", [])
# OS token gates the asset alongside arch so a darwin asset can't
# satisfy a Linux probe (every cua-driver-rs release ships all three
# OSes, so the arch token alone would always match).
os_token = {"Darwin": "darwin", "Windows": "windows", "Linux": "linux"}.get(system, "")
has_asset = any(
os_token in (name := a_info.get("name", "").lower())
and any(a in name for a in arch_names)
for a_info in assets
)
if not has_asset:
_print_warning(
f" Latest cua-driver release ({tag}) has no {system} {arch_label} asset."
)
_print_info(
" CUA Driver may not yet ship a build for this platform."
)
_print_info(
" See: https://github.com/trycua/cua/releases"
)
return False
except Exception:
# Network / API failure — proceed and let the installer handle it.
pass
return True
# The asset-probe that lived here used to hit `/releases/latest` on
# trycua/cua and inspect the release's asset list before piping the
# installer to bash. It was broken in two places:
#
# 1. cua-driver-rs releases are marked **prerelease** on every cut,
# and GitHub's `/releases/latest` endpoint explicitly skips
# prereleases. On the live trycua/cua repo today, `/releases/latest`
# returns the Python `cua-agent v0.8.3` package (zero binary
# assets) instead of `cua-driver-rs-v0.6.0` (19 binary assets).
# The probe then reported "no asset for this arch" and skipped the
# install on every non-arm64 host — Linux x86_64, Windows, macOS
# Intel, Linux arm64 — even when the upstream installer would have
# succeeded.
# 2. Even with the right endpoint, we'd be duplicating tag-resolution
# logic the upstream installer already does correctly via
# `CUA_DRIVER_RS_BAKED_VERSION` (auto-baked by CD on every release,
# with an API fallback). Drift between our probe and theirs is a
# maintenance hazard.
#
# Resolution: trust the upstream installer. For fresh installs, run
# install.sh directly — it errors clean if the target arch has no
# asset. For the upgrade path, `cua_driver_update_check()` (which calls
# `cua-driver check-update --json`) gives us the canonical update
# answer from the binary itself — same tag-resolution as the installer,
# no Python-side duplication.
def install_cua_driver(upgrade: bool = False) -> bool:
@ -811,8 +740,9 @@ def install_cua_driver(upgrade: bool = False) -> bool:
_print_warning(f" {fetch_tool} not found — install manually:")
_print_info(" https://github.com/trycua/cua/blob/main/libs/cua-driver/README.md")
return False
if not _check_cua_driver_asset_for_arch():
return False
# Pre-install asset probe deleted — see comment near the top of
# tools_config.py for why. install.sh has CUA_DRIVER_RS_BAKED_VERSION
# baked in by CD and errors cleanly on missing-arch assets.
return _run_cua_driver_installer(label="Installing")
# Already installed and caller didn't ask to upgrade → just confirm.
@ -841,8 +771,10 @@ def install_cua_driver(upgrade: bool = False) -> bool:
_print_warning(f" {fetch_tool} not found — cannot refresh cua-driver.")
return bool(binary)
if not _check_cua_driver_asset_for_arch():
return bool(binary)
# Pre-install asset probe deleted (see top-of-file comment). The
# `cua_driver_update_check()` call further down asks the installed
# cua-driver binary itself whether an update exists — same
# tag-resolution as the installer, no duplication.
# Skip the (network) re-install when the driver itself reports it's already
# on the latest release. Best-effort: an older driver (no check-update

View file

@ -1322,13 +1322,35 @@ def _dashboard_local_update_managed_externally() -> bool:
in-browser local update action. Keep this dashboard capability separate
from install-method detection: manual git/pip installs inside containers can
still behave like their actual install method in the CLI.
However, when the install method is ``git`` (a bind-mounted checkout inside
a container e.g. the hermes-webui image sharing the Hermes source tree),
the dashboard's ``hermes update`` button is the correct update path and
should not be suppressed. Other containerized install methods remain
externally managed unless their apply path is proven safe inside the
running container filesystem.
"""
if _default_hermes_root_is_opt_data():
return True
try:
from hermes_constants import is_container
return is_container()
if not is_container():
return False
except Exception:
return False
# We are inside a container, but the install may still be self-managed.
# If the install method is git, the dashboard update button works against
# the mounted checkout and should be offered. Keep pip blocked inside
# containers: its apply path mutates the running container filesystem and
# is not the bind-mounted checkout case this gate is meant to recover.
try:
method = detect_install_method(PROJECT_ROOT)
if method == "git":
return False
except Exception:
pass
return True
def _managed_files_policy(request: Request, *, create_root: bool = True) -> ManagedFilesPolicy:
@ -8327,6 +8349,7 @@ async def install_mcp_catalog_entry(body: MCPCatalogInstall, profile: Optional[s
# Register the mcp-install action log so /api/actions/mcp-install/status works.
_ACTION_LOG_FILES.setdefault("mcp-install", "action-mcp-install.log")
_ACTION_LOG_FILES.setdefault("computer-use-grant", "action-computer-use-grant.log")
# ---------------------------------------------------------------------------
@ -10649,6 +10672,63 @@ async def run_toolset_post_setup(
return {"ok": True, "pid": proc.pid, "name": "tools-post-setup", "key": body.key}
# ---------------------------------------------------------------------------
# Computer Use (cua-driver) — cross-platform readiness + macOS permission grant
#
# cua-driver runs on macOS, Windows, and Linux. The desktop card reflects
# per-OS readiness: on macOS the Accessibility + Screen Recording TCC grants
# (which attach to cua-driver's OWN identity, com.trycua.driver — not Hermes,
# so no app entitlement is involved); elsewhere, driver health from
# `cua-driver doctor`. The grant flow is macOS-only (no TCC toggles to request
# on Windows/Linux).
# ---------------------------------------------------------------------------
@app.get("/api/tools/computer-use/status")
async def get_computer_use_status(profile: Optional[str] = None):
"""Cross-platform Computer Use readiness for the desktop card.
See ``tools.computer_use.permissions.computer_use_status`` for the payload
shape. Read-only and fast (shells ``cua-driver doctor`` + macOS
``permissions status``).
"""
from tools.computer_use.permissions import computer_use_status
with _profile_scope(profile):
return computer_use_status()
@app.post("/api/tools/computer-use/permissions/grant")
async def grant_computer_use_permissions(profile: Optional[str] = None):
"""Spawn ``hermes computer-use permissions grant`` as a background action.
macOS-only: ``cua-driver permissions grant`` launches CuaDriver via
LaunchServices so the TCC dialog is attributed to com.trycua.driver, then
waits for approval. The frontend polls ``GET /api/actions/computer-use-
grant/status`` and re-reads ``/status`` once it exits. Windows/Linux have
no TCC toggles to grant, so this returns 400 there.
"""
if sys.platform != "darwin":
raise HTTPException(
status_code=400,
detail="Computer Use permission grants are a macOS concept.",
)
try:
proc = _spawn_hermes_action(
_profile_cli_args(profile)
+ ["computer-use", "permissions", "grant"],
"computer-use-grant",
)
except HTTPException:
raise
except Exception as exc:
_log.exception("Failed to spawn computer-use permissions grant")
raise HTTPException(
status_code=500, detail=f"Failed to request permissions: {exc}"
)
return {"ok": True, "pid": proc.pid, "name": "computer-use-grant"}
# ---------------------------------------------------------------------------
# Raw YAML config endpoint
# ---------------------------------------------------------------------------

View file

@ -4598,6 +4598,83 @@ class SessionDB:
return None
return dict(row) if row else None
def delete_telegram_topic_binding(
self,
*,
chat_id: str,
thread_id: str,
) -> int:
"""Remove the binding row for a single (chat, thread) pair.
Called when the Telegram Bot API confirms a topic was deleted
externally (``Thread not found`` after the same-thread retry
already failed). Without this prune, the stale row keeps
living in ``telegram_dm_topic_bindings`` and the
recovery logic in ``gateway.run._recover_telegram_topic_thread_id``
cheerfully redirects future inbound messages to the deleted
topic, causing tool progress, approvals, and replies to land
in the wrong place. Issue #31501.
When this prune removes the chat's *last* remaining binding,
the chat's row in ``telegram_dm_topic_mode`` is also flipped to
``enabled = 0`` in the same transaction. Otherwise the chat
would be left in topic mode with zero lanes and
``gateway.run._recover_telegram_topic_thread_id`` keeps treating
the chat as topic-enabled, lobby messages keep hunting for a
binding that no longer exists, and a user who disabled topics in
the Telegram client (rather than via ``/topic off``) stays stuck
until the next send happens to fail. Clearing the flag makes
recovery fully stand down once the dead topics are gone.
Returns the number of binding rows deleted (0 when the binding
was already absent or the topic-mode tables haven't been
migrated yet both are silent no-ops; we never raise from
a cleanup hot path).
"""
chat_id = str(chat_id)
thread_id = str(thread_id)
deleted = {"count": 0}
def _do(conn):
try:
cursor = conn.execute(
"""
DELETE FROM telegram_dm_topic_bindings
WHERE chat_id = ? AND thread_id = ?
""",
(chat_id, thread_id),
)
deleted["count"] = cursor.rowcount or 0
except sqlite3.OperationalError:
# Tables don't exist yet — nothing to prune.
deleted["count"] = 0
return
if not deleted["count"]:
return
# If that was the chat's last binding, disable topic mode for
# the chat so recovery stops steering lobby messages at a now
# empty lane set. Same transaction → no read-after-prune race.
try:
remaining = conn.execute(
"""
SELECT 1 FROM telegram_dm_topic_bindings
WHERE chat_id = ? LIMIT 1
""",
(chat_id,),
).fetchone()
if remaining is None:
conn.execute(
"UPDATE telegram_dm_topic_mode "
"SET enabled = 0, updated_at = ? WHERE chat_id = ?",
(time.time(), chat_id),
)
except sqlite3.OperationalError:
# telegram_dm_topic_mode absent — binding prune still stands.
pass
self._execute_write(_do)
return deleted["count"]
def bind_telegram_topic(
self,
*,

View file

@ -1590,6 +1590,19 @@ class DiscordAdapter(BasePlatformAdapter):
mutation_count += 1
return result
# Delete obsolete commands FIRST to stay under Discord's 100-command
# limit. Discord rejects an upsert that would push the live total over
# 100 (error 30032), which silently breaks ALL slash commands. If a new
# command is created before the obsolete ones are removed, an app that
# is already at the cap momentarily exceeds it and the whole sync fails.
# Removing the no-longer-desired commands up front guarantees the live
# total never rises above the cap mid-sync.
obsolete_keys = set(existing_by_key.keys()) - set(desired_by_key.keys())
for key in obsolete_keys:
current = existing_by_key.pop(key)
await mutate(http.delete_global_command, app_id, current.id)
deleted += 1
for key, desired in desired_by_key.items():
current = existing_by_key.pop(key, None)
if current is None:
@ -1613,10 +1626,6 @@ class DiscordAdapter(BasePlatformAdapter):
await mutate(http.edit_global_command, app_id, current.id, desired)
updated += 1
for current in existing_by_key.values():
await mutate(http.delete_global_command, app_id, current.id)
deleted += 1
return {
"total": len(desired_payloads),
"unchanged": unchanged,

View file

@ -810,6 +810,47 @@ class TelegramAdapter(BasePlatformAdapter):
def _is_thread_not_found_error(error: Exception) -> bool:
return "thread not found" in str(error).lower()
def _prune_stale_dm_topic_binding(
self, chat_id: Any, thread_id: Any,
) -> None:
"""Drop the stale ``telegram_dm_topic_bindings`` row for a
topic Telegram has confirmed deleted.
Without this prune the recovery logic in
``gateway.run._recover_telegram_topic_thread_id`` keeps
steering future inbound messages to the dead thread (the
bug behind #31501 — tool progress, approvals, replies all
end up in the wrong place even though the user has moved
on to a fresh topic). Best-effort: we never raise from a
send-fallback path a failed cleanup must not turn into a
failed user-facing send.
"""
if chat_id is None or thread_id is None:
return
store = getattr(self, "_session_store", None)
if store is None:
return
db = getattr(store, "_db", None)
if db is None or not hasattr(db, "delete_telegram_topic_binding"):
return
try:
removed = db.delete_telegram_topic_binding(
chat_id=str(chat_id), thread_id=str(thread_id),
)
except Exception:
logger.debug(
"[%s] delete_telegram_topic_binding failed for "
"chat=%s thread=%s — skipping prune",
self.name, chat_id, thread_id, exc_info=True,
)
return
if removed:
logger.info(
"[%s] Pruned stale Telegram DM topic binding "
"chat=%s thread=%s (Bot API: thread not found)",
self.name, chat_id, thread_id,
)
@staticmethod
def _is_bad_request_error(error: Exception) -> bool:
name = error.__class__.__name__.lower()
@ -2670,11 +2711,17 @@ class TelegramAdapter(BasePlatformAdapter):
continue
# Second failure: the thread is genuinely gone.
# Retry without ``message_thread_id`` so the
# message still reaches the chat.
# message still reaches the chat, and prune
# the stale binding so future inbound
# messages aren't redirected back to it
# (#31501).
logger.warning(
"[%s] Thread %s not found, retrying without message_thread_id",
self.name, effective_thread_id,
)
self._prune_stale_dm_topic_binding(
chat_id, effective_thread_id,
)
used_thread_fallback = True
effective_thread_id = None
thread_kwargs = {"message_thread_id": None}
@ -3355,6 +3402,13 @@ class TelegramAdapter(BasePlatformAdapter):
self.name,
message_thread_id,
)
# Same prune as the streaming send path — the
# control-message retry tells us the topic is gone,
# so the binding row in state.db must go too
# (#31501).
self._prune_stale_dm_topic_binding(
kwargs.get("chat_id"), message_thread_id,
)
retry_kwargs = dict(kwargs)
retry_kwargs.pop("message_thread_id", None)
return await self._bot.send_message(**retry_kwargs)

View file

@ -108,6 +108,7 @@ AUTHOR_MAP = {
"804436395@qq.com": "LaPhilosophie",
"maxmitcham@mac.home": "maxtrigify",
"ccook@nvms.com": "ccook1963",
"libre-7@users.noreply.github.com": "libre-7",
"kristian@agrointel.no": "kristianvast",
"thomas.paquette@gmail.com": "RyTsYdUp",
"techxacm@gmail.com": "ProgramCaiCai",

View file

@ -100,7 +100,13 @@ class _StubAgent:
pass
def _run(agent):
def _run(
agent,
*,
final_response=None,
api_call_count=3,
turn_exit_reason="unknown",
):
messages = [
{"role": "user", "content": "do a thing"},
{
@ -114,8 +120,8 @@ def _run(agent):
]
return finalize_turn(
agent,
final_response=None, # forces the max-iterations summary path
api_call_count=3,
final_response=final_response,
api_call_count=api_call_count,
interrupted=False,
failed=False,
messages=messages,
@ -125,7 +131,7 @@ def _run(agent):
user_message="do a thing",
original_user_message="do a thing",
_should_review_memory=False,
_turn_exit_reason="unknown",
_turn_exit_reason=turn_exit_reason,
)
@ -162,4 +168,17 @@ def test_clean_turn_has_no_cleanup_errors_key():
agent = _StubAgent(raise_in=())
result = _run(agent)
assert result["final_response"] == "PARTIAL SUMMARY FROM MODEL"
assert result["completed"] is False
assert "cleanup_errors" not in result
def test_text_response_on_last_allowed_call_is_completed():
agent = _StubAgent(raise_in=())
result = _run(
agent,
final_response="final report",
api_call_count=agent.max_iterations,
turn_exit_reason="text_response(finish_reason=stop)",
)
assert result["final_response"] == "final report"
assert result["completed"] is True

View file

@ -103,3 +103,139 @@ def test_get_default_hermes_root_docker_layouts(tmp_path, monkeypatch):
# Docker profile layout: <custom>/profiles/<name> -> <custom>.
monkeypatch.setenv("HERMES_HOME", "/opt/data/profiles/coder")
assert hermes_constants.get_default_hermes_root() == Path("/opt/data")
# ---------------------------------------------------------------------------
# Per-job profile EXECUTION scoping (#32091 follow-up).
#
# The storage half of #32091 (above) moved every profile's jobs into one shared
# root store. But a job must still EXECUTE under its owning profile's
# environment (.env / config.yaml / credentials) — not whichever profile's
# ticker picks it up. These tests cover the execution-scoping half.
# ---------------------------------------------------------------------------
def _profile_env(tmp_path, monkeypatch, active="default"):
"""Set up a root home with a 'donna' profile dir and point the platform
default at it. Returns (root, donna_home). ``active`` selects which
HERMES_HOME the process runs under."""
root = tmp_path / "hermes_home"
(root / "cron").mkdir(parents=True)
donna_home = root / "profiles" / "donna"
(donna_home / "cron").mkdir(parents=True)
import hermes_constants
monkeypatch.setattr(hermes_constants, "_get_platform_default_hermes_home",
lambda: root)
monkeypatch.setenv("HERMES_HOME", str(root if active == "default" else donna_home))
return root, donna_home
def test_create_job_autocaptures_active_profile(tmp_path, monkeypatch):
"""A job created from inside a profile session is tagged with that profile,
so the scheduler can later scope its execution back to it."""
root, donna_home = _profile_env(tmp_path, monkeypatch, active="donna")
import cron.jobs as jobs
importlib.reload(jobs)
try:
job = jobs.create_job(prompt="audit", schedule="every 1h", name="a")
# auto-captured from the active (donna) session
assert job["profile"] == "donna"
# and it landed in the SHARED ROOT store, not donna's profile-local one
assert jobs.JOBS_FILE.resolve() == (root / "cron" / "jobs.json").resolve()
assert jobs.JOBS_FILE.exists()
assert not (donna_home / "cron" / "jobs.json").exists()
finally:
monkeypatch.undo()
importlib.reload(jobs)
def test_create_job_explicit_profile_override(tmp_path, monkeypatch):
"""An explicit profile= wins over the auto-captured active profile."""
root, donna_home = _profile_env(tmp_path, monkeypatch, active="default")
(root / "profiles" / "ops" / "cron").mkdir(parents=True)
import cron.jobs as jobs
importlib.reload(jobs)
try:
job = jobs.create_job(prompt="x", schedule="every 2h", profile="ops")
assert job["profile"] == "ops"
finally:
monkeypatch.undo()
importlib.reload(jobs)
def test_resolve_profile_home_maps_names(tmp_path, monkeypatch):
"""resolve_profile_home maps default/named profiles to homes and returns
None for a missing profile."""
root, donna_home = _profile_env(tmp_path, monkeypatch, active="default")
import cron.jobs as jobs
importlib.reload(jobs)
try:
assert jobs.resolve_profile_home("default").resolve() == root.resolve()
assert jobs.resolve_profile_home("").resolve() == root.resolve()
assert jobs.resolve_profile_home("donna").resolve() == donna_home.resolve()
assert jobs.resolve_profile_home("ghost") is None
finally:
monkeypatch.undo()
importlib.reload(jobs)
def test_normalize_backfills_legacy_profile_to_default(tmp_path, monkeypatch):
"""A pre-feature job with no profile field reads back as 'default'."""
import cron.jobs as jobs
legacy = {"id": "l1", "name": "old", "prompt": "x",
"schedule": {"kind": "interval", "minutes": 60}}
assert jobs._normalize_job_record(legacy)["profile"] == "default"
def test_run_job_scopes_execution_to_job_profile(tmp_path, monkeypatch):
"""The decisive test: a ticker running as the ROOT profile executes a
job tagged profile='donna' with HERMES_HOME pointed at donna's home
(both the env var and the in-process override), then restores the
ticker's env afterward."""
from unittest.mock import MagicMock, patch
root, donna_home = _profile_env(tmp_path, monkeypatch, active="default")
(donna_home / "config.yaml").write_text("model:\n default: openrouter/test\n")
import hermes_constants
import cron.jobs as jobs
import cron.scheduler as sched
importlib.reload(jobs)
importlib.reload(sched)
captured = {}
def fake_run_conversation(prompt, *a, **k):
captured["env"] = os.environ.get("HERMES_HOME")
captured["override"] = hermes_constants.get_hermes_home_override()
captured["resolved"] = str(hermes_constants.get_hermes_home())
return {"final_response": "done", "completed": True, "failed": False,
"turn_exit_reason": "text_response(finish_reason=stop)"}
job = {"id": "j-donna", "name": "donna-audit", "prompt": "audit",
"profile": "donna", "schedule": {"kind": "interval", "minutes": 60},
"deliver": "local", "model": "openrouter/test"}
before = os.environ.get("HERMES_HOME")
try:
fake_agent = MagicMock()
fake_agent.run_conversation.side_effect = fake_run_conversation
with patch("cron.scheduler._resolve_origin", return_value=None), \
patch("dotenv.load_dotenv"), \
patch("hermes_state.SessionDB", return_value=MagicMock()), \
patch("hermes_cli.runtime_provider.resolve_runtime_provider",
return_value={"api_key": "k", "base_url": "https://x/v1",
"provider": "openrouter", "api_mode": "chat_completions"}), \
patch("run_agent.AIAgent", return_value=fake_agent):
success, output, final, err = sched.run_job(job)
assert success is True, (success, err)
# During execution the job ran AS donna:
assert captured["env"] == str(donna_home)
assert captured["override"] == str(donna_home)
assert captured["resolved"] == str(donna_home)
# After the job, the ticker's HERMES_HOME is restored (no leak):
assert os.environ.get("HERMES_HOME") == before
finally:
monkeypatch.undo()
importlib.reload(jobs)
importlib.reload(sched)

View file

@ -1394,6 +1394,52 @@ class TestRunJobSessionPersistence:
assert error is None
assert final_response == "all good"
def test_run_job_delivers_max_iteration_fallback_summary(self, tmp_path):
"""Cron should deliver a usable max-iteration fallback summary.
A cron run can exhaust the iteration budget, get a final text summary
from the no-tools fallback call, and still have ``completed=False`` in
the generic agent result. That should not make cron raise the report
text as a RuntimeError.
"""
job = {
"id": "summary-job",
"name": "summary",
"prompt": "finish the report",
}
fake_db = MagicMock()
with patch("cron.scheduler._hermes_home", tmp_path), \
patch("cron.scheduler._resolve_origin", return_value=None), \
patch("dotenv.load_dotenv"), \
patch("hermes_state.SessionDB", return_value=fake_db), \
patch(
"hermes_cli.runtime_provider.resolve_runtime_provider",
return_value={
"api_key": "***",
"base_url": "https://example.invalid/v1",
"provider": "openrouter",
"api_mode": "chat_completions",
},
), \
patch("run_agent.AIAgent") as mock_agent_cls:
mock_agent = MagicMock()
mock_agent.run_conversation.return_value = {
"final_response": "final fallback report",
"completed": False,
"failed": False,
"turn_exit_reason": "max_iterations_reached(60/60)",
}
mock_agent_cls.return_value = mock_agent
success, output, final_response, error = run_job(job)
assert success is True
assert error is None
assert final_response == "final fallback report"
assert "final fallback report" in output
assert "(FAILED)" not in output
def test_tick_marks_empty_response_as_error(self, tmp_path):
"""When run_job returns success=True but final_response is empty,
tick() should mark the job as error so last_status != 'ok'.

View file

@ -0,0 +1,140 @@
"""Test Discord slash command sync respects the 100-command hard limit."""
from types import SimpleNamespace
from unittest.mock import AsyncMock, MagicMock, patch
import sys
import pytest
from gateway.config import PlatformConfig
def _ensure_discord_mock():
if "discord" in sys.modules and hasattr(sys.modules["discord"], "__file__"):
return
if sys.modules.get("discord") is None:
discord_mod = MagicMock()
discord_mod.Intents.default.return_value = MagicMock()
sys.modules["discord"] = discord_mod
sys.modules["discord.ext"] = MagicMock()
sys.modules["discord.ext.commands"] = MagicMock()
_ensure_discord_mock()
from plugins.platforms.discord.adapter import DiscordAdapter
class _FakeTreeCommand:
"""Minimal command stub matching discord.py tree command API."""
def __init__(self, name: str, command_type: int = 1):
self.name = name
self.type = command_type
def to_dict(self, _tree):
return {"name": self.name, "type": self.type}
@pytest.fixture
def adapter():
"""Create a Discord adapter with mocked Discord client."""
_ensure_discord_mock()
config = PlatformConfig(enabled=True, token="fake-token")
adapter = DiscordAdapter(config)
# Mock the Discord client and tree
adapter._client = MagicMock()
adapter._client.tree = MagicMock()
adapter._client.http = AsyncMock()
adapter._client.application_id = "test_app_id"
adapter._sleep_between_command_sync_mutations = AsyncMock()
adapter._existing_command_to_payload = MagicMock(side_effect=lambda cmd: {"name": cmd.name})
adapter._canonicalize_app_command_payload = MagicMock(side_effect=lambda p: p)
adapter._patchable_app_command_payload = MagicMock(side_effect=lambda p: p)
return adapter
@pytest.mark.asyncio
async def test_safe_sync_deletes_before_creating():
"""Sync must delete obsolete commands BEFORE creating new ones.
Discord's 100-command limit is enforced when trying to upsert. If we
have 100 commands on Discord, try to add 1 new one, and haven't deleted
any yet, Discord rejects with error 30032.
The fix: identify and delete obsolete commands first, then create/update.
This ensures we never temporarily exceed 100 during the sync operation.
This is a regression guard for the samuraiheart bug where sync would fail
with error 30032 even though the registration code properly capped at 100.
"""
_ensure_discord_mock()
config = PlatformConfig(enabled=True, token="fake-token")
adapter = DiscordAdapter(config)
adapter._client = MagicMock()
adapter._client.tree = MagicMock()
adapter._client.http = AsyncMock()
adapter._client.application_id = "test_app_id"
adapter._sleep_between_command_sync_mutations = AsyncMock()
adapter._existing_command_to_payload = MagicMock(side_effect=lambda cmd: {"name": cmd.name})
adapter._canonicalize_app_command_payload = MagicMock(side_effect=lambda p: p)
adapter._patchable_app_command_payload = MagicMock(side_effect=lambda p: p)
# Simulate having 100 commands on Discord, with 1 that's no longer desired
# and 1 new command that should be created.
# Existing on Discord: cmd_0, cmd_1, ..., cmd_99 (100 total)
# Desired locally: cmd_1, cmd_2, ..., cmd_99, cmd_new (100 total)
# So: delete cmd_0 (1 deletion), create cmd_new (1 creation)
existing_commands = [
SimpleNamespace(id=f"id_{i}", name=f"cmd_{i}", type=1)
for i in range(100)
]
adapter._client.tree.fetch_commands = AsyncMock(return_value=existing_commands)
adapter._client.tree.get_commands = MagicMock(
return_value=[
_FakeTreeCommand(name=f"cmd_{i}", command_type=1)
for i in range(1, 100)
] + [_FakeTreeCommand(name="cmd_new", command_type=1)]
)
# Track the order of mutations
mutation_log = []
async def mock_delete(*args):
mutation_log.append(("delete", args[-1]))
async def mock_upsert(*args):
mutation_log.append(("create", args[-1].get("name")))
adapter._client.http.delete_global_command = mock_delete
adapter._client.http.upsert_global_command = mock_upsert
adapter._client.http.edit_global_command = AsyncMock()
# Call sync
await adapter._safe_sync_slash_commands()
# Verify that:
# 1. A deletion happened (cmd_0)
# 2. It happened BEFORE any creation
# 3. The creation of cmd_new happened AFTER deletion
deletes = [m for m in mutation_log if m[0] == "delete"]
creates = [m for m in mutation_log if m[0] == "create"]
assert len(deletes) >= 1, "At least one command should be deleted"
assert len(creates) >= 1, "At least one command should be created"
# The key assertion: all deletions should come before all creations.
# Find the index of the last delete and the first create.
last_delete_idx = max(i for i, m in enumerate(mutation_log) if m[0] == "delete")
first_create_idx = min(i for i, m in enumerate(mutation_log) if m[0] == "create")
assert last_delete_idx < first_create_idx, (
f"Deletions must happen before creations to avoid exceeding 100-command limit. "
f"Last delete at index {last_delete_idx}, first create at index {first_create_idx}"
)

View file

@ -967,6 +967,105 @@ class TestMediaDeliveryDefaultMode:
assert BasePlatformAdapter.validate_media_delivery_path(str(config_file)) is None
def test_denylist_blocks_google_token_default_mode(self, tmp_path, monkeypatch):
"""Integration credentials at the HERMES_HOME root (google_token.json)
must never be deliverable, even though they aren't the historically
enumerated .env/auth.json/config.yaml files. Regression for a
refreshed google_token.json being auto-attached to a Slack reply
(#50912).
"""
self._patch_roots(monkeypatch)
fake_home = tmp_path / "home"
hermes_dir = fake_home / ".hermes"
hermes_dir.mkdir(parents=True)
token = hermes_dir / "google_token.json"
token.write_text('{"access_token": "***", "refresh_token": "***"}')
monkeypatch.setenv("HOME", str(fake_home))
monkeypatch.setattr("gateway.platforms.base._HERMES_HOME", hermes_dir)
monkeypatch.setattr("gateway.platforms.base._HERMES_ROOT", hermes_dir)
assert BasePlatformAdapter.validate_media_delivery_path(str(token)) is None
def test_denylist_blocks_google_token_even_when_freshly_refreshed(self, tmp_path, monkeypatch):
"""The exploit was that the Google integration rewrites
google_token.json every turn, bumping its mtime to ~now, so the
strict-mode recency window (trust_recent_files) kept re-trusting it
and it re-sent on every reply. An explicit denylist entry must win
over recency trust.
"""
self._patch_roots(monkeypatch) # zero cache allowlist, strict mode on
monkeypatch.setenv("HERMES_MEDIA_TRUST_RECENT_FILES", "1")
monkeypatch.setenv("HERMES_MEDIA_TRUST_RECENT_SECONDS", "600")
fake_home = tmp_path / "home"
hermes_dir = fake_home / ".hermes"
hermes_dir.mkdir(parents=True)
token = hermes_dir / "google_token.json"
token.write_text('{"access_token": "***"}') # mtime = now → "recent"
monkeypatch.setenv("HOME", str(fake_home))
monkeypatch.setattr("gateway.platforms.base._HERMES_HOME", hermes_dir)
monkeypatch.setattr("gateway.platforms.base._HERMES_ROOT", hermes_dir)
assert BasePlatformAdapter.validate_media_delivery_path(str(token)) is None
def test_denylist_blocks_pairing_directory_contents(self, tmp_path, monkeypatch):
"""Files under ~/.hermes/pairing/ (platform pairing tokens) are
credential material and must not be deliverable.
"""
self._patch_roots(monkeypatch)
fake_home = tmp_path / "home"
hermes_dir = fake_home / ".hermes"
pairing = hermes_dir / "pairing"
pairing.mkdir(parents=True)
token = pairing / "telegram-approved.json"
token.write_text('{"approved": ["123"]}')
monkeypatch.setenv("HOME", str(fake_home))
monkeypatch.setattr("gateway.platforms.base._HERMES_HOME", hermes_dir)
monkeypatch.setattr("gateway.platforms.base._HERMES_ROOT", hermes_dir)
assert BasePlatformAdapter.validate_media_delivery_path(str(token)) is None
def test_hermes_cache_still_delivers_under_denied_home(self, tmp_path, monkeypatch):
"""The targeted credential denylist must not break legitimate cache
deliveries: a generated artifact under the allowlisted cache root is
matched before the denylist and still delivers.
"""
fake_home = tmp_path / "home"
hermes_dir = fake_home / ".hermes"
cache_dir = hermes_dir / "cache" / "documents"
cache_dir.mkdir(parents=True)
artifact = cache_dir / "report.pdf"
artifact.write_bytes(b"%PDF-1.4")
self._patch_roots(monkeypatch, cache_dir)
monkeypatch.setenv("HOME", str(fake_home))
monkeypatch.setattr("gateway.platforms.base._HERMES_HOME", hermes_dir)
monkeypatch.setattr("gateway.platforms.base._HERMES_ROOT", hermes_dir)
assert BasePlatformAdapter.validate_media_delivery_path(str(artifact)) == str(artifact.resolve())
def test_denylist_blocks_non_cache_file_under_hermes_home(self, tmp_path, monkeypatch):
"""A non-credential file the agent wrote directly under ~/.hermes
(not in a cache subdir) is still deliverable via recency trust we
did NOT blanket-deny the tree (per #32090/#34425). This guards against
accidentally re-introducing the rejected whole-tree deny.
"""
self._patch_roots(monkeypatch) # strict mode on
monkeypatch.setenv("HERMES_MEDIA_TRUST_RECENT_FILES", "1")
monkeypatch.setenv("HERMES_MEDIA_TRUST_RECENT_SECONDS", "600")
fake_home = tmp_path / "home"
hermes_dir = fake_home / ".hermes"
hermes_dir.mkdir(parents=True)
artifact = hermes_dir / "adhoc_report.pdf"
artifact.write_bytes(b"%PDF-1.4") # fresh mtime
monkeypatch.setenv("HOME", str(fake_home))
monkeypatch.setattr("gateway.platforms.base._HERMES_HOME", hermes_dir)
monkeypatch.setattr("gateway.platforms.base._HERMES_ROOT", hermes_dir)
assert BasePlatformAdapter.validate_media_delivery_path(str(artifact)) == str(artifact.resolve())
def test_strict_mode_envvar_restores_legacy_behavior(self, tmp_path, monkeypatch):
"""Setting HERMES_MEDIA_DELIVERY_STRICT=1 reactivates the older
allowlist+recency logic. A stale file outside the allowlist is

View file

@ -0,0 +1,459 @@
"""Regression tests for #31501 — prune stale Telegram DM topic bindings.
When a Telegram user deletes a DM topic in the client, the Bot API
responds to the gateway's next send with ``Thread not found``. The
adapter falls back to a plain send (no ``message_thread_id``), but
prior to this fix it left the corresponding row in
``telegram_dm_topic_bindings`` untouched.
``gateway.run._recover_telegram_topic_thread_id`` then walked the
user's bindings newest-first on every later inbound message and
cheerfully redirected them back to the deleted topic tool
progress, approvals and replies all silently landed in the wrong
place until the operator manually ran ``DELETE`` on ``state.db``.
The fix has three pieces these tests pin all three:
1. ``SessionDB.delete_telegram_topic_binding`` the targeted
prune helper (new public API).
2. ``TelegramAdapter._prune_stale_dm_topic_binding`` the
adapter glue that calls the helper from a send-fallback hot
path without raising on cleanup failure.
3. The two "Thread not found" call sites in the streaming send
loop and the control-message helper now invoke (2) we pin
this with a source-level guard rather than spinning the full
send pipeline.
"""
from __future__ import annotations
import inspect
from types import SimpleNamespace
import pytest
from hermes_state import SessionDB
# ---------------------------------------------------------------------------
# SessionDB.delete_telegram_topic_binding
# ---------------------------------------------------------------------------
def _seed_binding(
db: SessionDB,
*,
chat_id: str = "5595856929",
thread_id: str = "15287",
user_id: str = "5595856929",
session_id: str = "sess-target",
) -> None:
db.create_session(
session_id=session_id,
source="telegram",
user_id=user_id,
)
db.bind_telegram_topic(
chat_id=chat_id,
thread_id=thread_id,
user_id=user_id,
session_key=f"agent:main:telegram:dm:{chat_id}:{thread_id}",
session_id=session_id,
)
class TestDeleteTelegramTopicBinding:
def test_removes_matching_row_and_returns_count(self, tmp_path):
db = SessionDB(db_path=tmp_path / "state.db")
_seed_binding(db, thread_id="15287")
# Sanity check — binding present before prune.
assert db.get_telegram_topic_binding(
chat_id="5595856929", thread_id="15287",
) is not None
removed = db.delete_telegram_topic_binding(
chat_id="5595856929", thread_id="15287",
)
assert removed == 1
assert db.get_telegram_topic_binding(
chat_id="5595856929", thread_id="15287",
) is None
db.close()
def test_does_not_touch_unrelated_bindings(self, tmp_path):
# Critical for the fix: a chat with multiple topics must
# only lose the one Telegram confirmed deleted, never the
# rest. Otherwise the user's healthy topics also vanish
# from recovery's view.
db = SessionDB(db_path=tmp_path / "state.db")
_seed_binding(db, thread_id="15287", session_id="sess-stale")
_seed_binding(db, thread_id="15418", session_id="sess-fresh")
removed = db.delete_telegram_topic_binding(
chat_id="5595856929", thread_id="15287",
)
assert removed == 1
# Stale binding is gone; the fresh one survives.
assert db.get_telegram_topic_binding(
chat_id="5595856929", thread_id="15287",
) is None
assert db.get_telegram_topic_binding(
chat_id="5595856929", thread_id="15418",
) is not None
db.close()
def test_missing_row_returns_zero_silently(self, tmp_path):
db = SessionDB(db_path=tmp_path / "state.db")
_seed_binding(db, thread_id="15287")
# Different thread_id — must not raise, just report 0.
removed = db.delete_telegram_topic_binding(
chat_id="5595856929", thread_id="99999",
)
assert removed == 0
# Original binding still intact.
assert db.get_telegram_topic_binding(
chat_id="5595856929", thread_id="15287",
) is not None
db.close()
def test_pristine_database_with_no_topic_tables_is_silent_noop(self, tmp_path):
# Fresh profile that has never run /topic — the topic-mode
# tables don't exist yet. The send-fallback hot path can
# still hit this code, so we must not crash.
db = SessionDB(db_path=tmp_path / "state.db")
# Confirm precondition: tables really aren't there.
tables = {
row[0]
for row in db._conn.execute(
"SELECT name FROM sqlite_master WHERE type='table' "
"AND name LIKE 'telegram_dm%'"
).fetchall()
}
assert "telegram_dm_topic_bindings" not in tables
removed = db.delete_telegram_topic_binding(
chat_id="any", thread_id="any",
)
assert removed == 0
db.close()
def test_idempotent_under_repeated_calls(self, tmp_path):
db = SessionDB(db_path=tmp_path / "state.db")
_seed_binding(db, thread_id="15287")
first = db.delete_telegram_topic_binding(
chat_id="5595856929", thread_id="15287",
)
second = db.delete_telegram_topic_binding(
chat_id="5595856929", thread_id="15287",
)
assert first == 1
assert second == 0 # already gone, no spurious "1"
db.close()
class TestPruneClearsTopicModeWhenLastBindingGone:
"""Proactive cleanup (#31501 follow-up): pruning the chat's final
binding must also flip ``telegram_dm_topic_mode.enabled`` to 0 so
recovery fully stands down covers the user who disabled topics in
the Telegram client without ever running ``/topic off``."""
def test_clears_enabled_when_last_binding_pruned(self, tmp_path):
db = SessionDB(db_path=tmp_path / "state.db")
db.enable_telegram_topic_mode(
chat_id="5595856929", user_id="5595856929",
)
_seed_binding(db, thread_id="15287")
assert db.is_telegram_topic_mode_enabled(
chat_id="5595856929", user_id="5595856929",
) is True
removed = db.delete_telegram_topic_binding(
chat_id="5595856929", thread_id="15287",
)
assert removed == 1
assert db.is_telegram_topic_mode_enabled(
chat_id="5595856929", user_id="5595856929",
) is False
db.close()
def test_keeps_enabled_while_other_bindings_remain(self, tmp_path):
# Deleting one of several topics must NOT disable topic mode —
# the chat still has healthy lanes that recovery should serve.
db = SessionDB(db_path=tmp_path / "state.db")
db.enable_telegram_topic_mode(
chat_id="5595856929", user_id="5595856929",
)
_seed_binding(db, thread_id="15287", session_id="sess-stale")
_seed_binding(db, thread_id="15418", session_id="sess-fresh")
db.delete_telegram_topic_binding(
chat_id="5595856929", thread_id="15287",
)
assert db.is_telegram_topic_mode_enabled(
chat_id="5595856929", user_id="5595856929",
) is True
db.close()
def test_noop_prune_leaves_enabled_untouched(self, tmp_path):
# A prune that matches no row must not flip the flag — there's
# still a live binding the (wrong) thread_id didn't match.
db = SessionDB(db_path=tmp_path / "state.db")
db.enable_telegram_topic_mode(
chat_id="5595856929", user_id="5595856929",
)
_seed_binding(db, thread_id="15287")
removed = db.delete_telegram_topic_binding(
chat_id="5595856929", thread_id="99999",
)
assert removed == 0
assert db.is_telegram_topic_mode_enabled(
chat_id="5595856929", user_id="5595856929",
) is True
db.close()
# ---------------------------------------------------------------------------
# Adapter glue — _prune_stale_dm_topic_binding
# ---------------------------------------------------------------------------
def _bare_adapter(db: SessionDB | None = None):
# The adapter accesses the SessionDB via
# ``self._session_store._db`` (set by GatewayRunner via
# ``set_session_store``). Build a minimal stand-in with just
# the surface the prune helper touches; we don't need the
# python-telegram-bot import-graph here. ``name`` is a
# property that delegates to ``platform.value.title()``, so
# we set ``platform`` rather than poking ``name`` directly.
from gateway.config import Platform
from plugins.platforms.telegram.adapter import TelegramAdapter
adapter = object.__new__(TelegramAdapter)
adapter.platform = Platform.TELEGRAM
if db is not None:
adapter._session_store = SimpleNamespace(_db=db)
return adapter
class TestPruneStaleDmTopicBindingHelper:
def test_drops_binding_when_session_store_db_is_present(self, tmp_path):
db = SessionDB(db_path=tmp_path / "state.db")
_seed_binding(db, thread_id="15287")
adapter = _bare_adapter(db)
adapter._prune_stale_dm_topic_binding("5595856929", 15287)
assert db.get_telegram_topic_binding(
chat_id="5595856929", thread_id="15287",
) is None
db.close()
def test_silent_when_session_store_unavailable(self):
# No ``_session_store`` attribute — the helper must not
# explode (the streaming send path hits this in tests
# that bypass the gateway runner).
adapter = _bare_adapter()
adapter._prune_stale_dm_topic_binding("123", "456")
def test_silent_when_db_lacks_helper(self):
# Old SessionDB without the new method (e.g. running
# against an older state.db schema). Must be a no-op
# rather than AttributeError.
adapter = _bare_adapter()
adapter._session_store = SimpleNamespace(
_db=SimpleNamespace(), # no methods at all
)
adapter._prune_stale_dm_topic_binding("123", "456")
def test_swallows_db_exceptions_so_send_continues(self):
class ExplodingDb:
def delete_telegram_topic_binding(self, **_):
raise RuntimeError("disk full or whatever")
adapter = _bare_adapter()
adapter._session_store = SimpleNamespace(_db=ExplodingDb())
# The point of the helper is that a failed cleanup must
# NEVER turn into a failed user-facing send. No exception
# should escape.
adapter._prune_stale_dm_topic_binding("123", "456")
def test_skips_when_chat_or_thread_missing(self, tmp_path):
# Defensive — control-message paths sometimes call us
# with chat_id=None when kwargs lack the key. We must
# not produce a spurious DELETE that matches every row
# with a NULL chat_id.
db = SessionDB(db_path=tmp_path / "state.db")
_seed_binding(db, thread_id="15287")
adapter = _bare_adapter(db)
adapter._prune_stale_dm_topic_binding(None, "15287")
adapter._prune_stale_dm_topic_binding("5595856929", None)
# Still there — neither call generated a DELETE.
assert db.get_telegram_topic_binding(
chat_id="5595856929", thread_id="15287",
) is not None
db.close()
# ---------------------------------------------------------------------------
# Source-level wiring guards — both fallback sites must call the helper
# ---------------------------------------------------------------------------
class TestThreadNotFoundFallbackSitesPruneBinding:
"""Pin that the two ``Thread not found`` warning sites in the
Telegram adapter actually invoke ``_prune_stale_dm_topic_binding``.
These guards stop a future refactor from quietly losing the
cleanup wire re-opening #31501.
"""
def test_streaming_send_fallback_calls_prune(self):
from plugins.platforms.telegram import adapter as telegram_mod
src = inspect.getsource(telegram_mod.TelegramAdapter.send)
# Locate the second-failure branch (the one that flips
# ``used_thread_fallback``). It must invoke the prune
# helper before flipping the flag.
marker = "retrying without message_thread_id"
idx = src.find(marker)
assert idx != -1, (
"Streaming send must keep its 'thread not found' "
"fallback log line — the prune wiring is anchored "
"next to it."
)
# 600 char window is enough to cover the warning, the
# prune call, and the ``used_thread_fallback = True``
# assignment that follows.
window = src[idx:idx + 600]
assert "_prune_stale_dm_topic_binding" in window, (
"Streaming send 'Thread not found' fallback must call "
"_prune_stale_dm_topic_binding so the stale row in "
"telegram_dm_topic_bindings doesn't keep redirecting "
"future inbound messages to the deleted topic (#31501)."
)
def test_control_message_helper_calls_prune(self):
from plugins.platforms.telegram import adapter as telegram_mod
src = inspect.getsource(
telegram_mod.TelegramAdapter._send_message_with_thread_fallback
)
# The helper has a single retry path; the prune call
# must sit inside it, not in dead code outside the
# ``if message_thread_id is not None and …`` guard.
assert "_prune_stale_dm_topic_binding" in src, (
"_send_message_with_thread_fallback must call "
"_prune_stale_dm_topic_binding when Telegram returns "
"BadRequest('Thread not found') for a control message "
"(#31501)."
)
# Belt-and-braces: the call must precede the retry
# ``send_message`` so the prune happens whether or not
# the retry itself succeeds.
prune_idx = src.find("_prune_stale_dm_topic_binding")
retry_idx = src.find("send_message(**retry_kwargs)")
assert 0 <= prune_idx < retry_idx, (
"_prune_stale_dm_topic_binding must run before the "
"fallback send_message retry."
)
# ---------------------------------------------------------------------------
# End-to-end semantic — prune + recovery returns None for deleted topic
# ---------------------------------------------------------------------------
class TestRecoveryAfterPrune:
"""The whole point of the fix: once a topic is pruned, the
GatewayRunner's ``_recover_telegram_topic_thread_id`` must no
longer steer future inbound messages to it.
"""
def test_recovery_no_longer_returns_pruned_topic(self, tmp_path):
# Build the same fixture used elsewhere: two topic bindings
# for the same user, then prune the most-recent one.
# ``_recover_telegram_topic_thread_id`` walks bindings
# newest-first, so without the prune it would pick the
# one we just removed.
from gateway.config import GatewayConfig, Platform, PlatformConfig
from gateway.run import GatewayRunner
from gateway.session import SessionSource, build_session_key
db = SessionDB(db_path=tmp_path / "state.db")
db.enable_telegram_topic_mode(
chat_id="5595856929", user_id="5595856929",
)
for sid, thread in (("sess-A", "111"), ("sess-B", "222")):
db.create_session(
session_id=sid, source="telegram",
user_id="5595856929",
)
db.bind_telegram_topic(
chat_id="5595856929",
thread_id=thread,
user_id="5595856929",
session_key=build_session_key(SessionSource(
platform=Platform.TELEGRAM,
user_id="5595856929",
chat_id="5595856929",
user_name="tester",
chat_type="dm",
thread_id=thread,
)),
session_id=sid,
)
runner = object.__new__(GatewayRunner)
runner.config = GatewayConfig(
platforms={
Platform.TELEGRAM: PlatformConfig(enabled=True, token="***"),
}
)
runner._session_db = db
runner._telegram_topic_mode_enabled = lambda _src: True
# Sanity: before the prune, recovery picks "222" (newest).
# Recovery only fires for a lobby-shaped inbound (omitted
# message_thread_id or General topic "1"); a non-lobby
# unknown thread is preserved as a brand-new topic. Use the
# General topic id so the recovery walk actually runs.
before = runner._recover_telegram_topic_thread_id(SessionSource(
platform=Platform.TELEGRAM,
user_id="5595856929",
chat_id="5595856929",
user_name="tester",
chat_type="dm",
thread_id="1", # General/stripped reply — triggers recovery
))
assert before == "222"
# User deletes topic 222 in Telegram → adapter prunes.
db.delete_telegram_topic_binding(
chat_id="5595856929", thread_id="222",
)
# Now recovery falls back to topic 111 (the surviving
# binding) instead of the dead one. This is the exact
# behaviour change the bug report asks for.
after = runner._recover_telegram_topic_thread_id(SessionSource(
platform=Platform.TELEGRAM,
user_id="5595856929",
chat_id="5595856929",
user_name="tester",
chat_type="dm",
thread_id="1",
))
assert after == "111"
db.close()

View file

@ -0,0 +1,66 @@
"""Regression test for TUI approval-prompt credential redaction (#48456).
Follow-up to #50767, which redacted the chat-platform and SSE/API approval
transports. The TUI JSON-RPC transport is the third egress: three
`register_gateway_notify` callbacks in `tui_gateway/server.py` emit the raw
`approval_data` (with an unredacted `command`) to the TUI client. They now
route through the module-level `_emit_approval_request` helper, which redacts
`payload["command"]` via the shared `gateway.run._redact_approval_command` seam
before emitting.
"""
import inspect
import pytest
class TestTuiApprovalEmitRedaction:
def test_emit_approval_request_redacts_command_in_payload(self, monkeypatch):
from tui_gateway import server as tui_server
emitted = {}
monkeypatch.setattr(
tui_server, "_emit",
lambda event, sid, payload=None: emitted.update(
{"event": event, "sid": sid, "payload": payload}
),
)
raw = "curl -H 'Authorization: token ghp_01...6789' https://api.github.com"
tui_server._emit_approval_request("sess-1", {"command": raw, "description": "x"})
assert emitted["event"] == "approval.request"
# credential removed, non-command field + command structure preserved
assert "ghp_01...6789" not in emitted["payload"]["command"]
assert emitted["payload"]["description"] == "x"
assert "github.com" in emitted["payload"]["command"]
def test_emit_approval_request_handles_missing_command(self, monkeypatch):
from tui_gateway import server as tui_server
emitted = {}
monkeypatch.setattr(
tui_server, "_emit",
lambda event, sid, payload=None: emitted.update({"payload": payload}),
)
tui_server._emit_approval_request("s", {"description": "no command here"})
assert emitted["payload"] == {"description": "no command here"}
tui_server._emit_approval_request("s", None)
assert emitted["payload"] == {}
def test_no_raw_command_emit_in_approval_registrations(self):
"""Every register_gateway_notify approval callback must route through the
redacting `_emit_approval_request` helper no registration may emit the
raw payload via `_emit("approval.request", ...)` directly. The ONLY
allowed raw emit is inside the helper itself."""
from tui_gateway import server as tui_server
src = inspect.getsource(tui_server)
raw_emits = src.count('_emit("approval.request"')
assert raw_emits == 1, (
f'expected exactly 1 raw _emit("approval.request") (inside the '
f"redacting helper), found {raw_emits} — a registration may be "
f"emitting the unredacted command"
)
assert "_emit_approval_request(sid, data)" in src, (
"registration lambdas must route through _emit_approval_request"
)

View file

@ -1,26 +1,27 @@
"""Tests for ``install_cua_driver`` upgrade semantics and architecture pre-check.
"""Tests for ``install_cua_driver`` upgrade semantics.
The cua-driver upstream installer always pulls the latest release tag, so
re-running it is the canonical upgrade path. ``install_cua_driver(upgrade=True)``
must:
* Be cross-platform run on macOS, Windows, and Linux. Only genuinely
unsupported platforms no-op silently on upgrade so ``hermes update`` can
call it unconditionally without warning those users.
* Choose the right installer per OS: ``install.sh`` via ``curl | bash`` on
macOS/Linux, ``install.ps1`` via PowerShell ``irm | iex`` on Windows.
* Be macOS-only no-op silently on Linux/Windows so ``hermes update`` can
call it unconditionally without warning every non-macOS user.
* Re-run the installer even when the binary is already on PATH (this is the
fix for the "we only pulled cua-driver once on enable" complaint).
* Preserve original ``upgrade=False`` behaviour for the toolset-enable flow:
skip if installed, install otherwise, warn on unsupported platforms.
* Pre-check architecture compatibility before downloading to avoid raw 404
errors when the upstream release lacks an asset for this OS+arch.
skip if installed, install otherwise, warn on non-macOS.
The pre-install arch probe that used to live alongside this function was
deleted (see top-of-file comment in tools_config.py) the upstream
installer has CUA_DRIVER_RS_BAKED_VERSION baked in by CD and errors
cleanly on missing-arch assets, and the upgrade path uses
``cua_driver_update_check()`` (which shells `cua-driver check-update
--json` against the already-installed binary).
"""
from __future__ import annotations
import json
from unittest.mock import MagicMock, patch
from unittest.mock import patch
class TestInstallCuaDriverUpgrade:
@ -47,8 +48,6 @@ class TestInstallCuaDriverUpgrade:
patch.object(tools_config.shutil, "which",
side_effect=lambda n: "/usr/local/bin/" + n
if n in {"cua-driver", "curl"} else None), \
patch.object(tools_config, "_check_cua_driver_asset_for_arch",
return_value=True), \
patch.object(tools_config, "_run_cua_driver_installer",
return_value=True) as runner, \
patch("subprocess.run"):
@ -63,8 +62,6 @@ class TestInstallCuaDriverUpgrade:
with patch("platform.system", return_value="Darwin"), \
patch.object(tools_config.shutil, "which",
side_effect=lambda n: "/usr/bin/curl" if n == "curl" else None), \
patch.object(tools_config, "_check_cua_driver_asset_for_arch",
return_value=True), \
patch.object(tools_config, "_run_cua_driver_installer",
return_value=True) as runner:
assert tools_config.install_cua_driver(upgrade=True) is True
@ -88,359 +85,75 @@ class TestInstallCuaDriverUpgrade:
with patch("platform.system", return_value="Darwin"), \
patch.object(tools_config.shutil, "which",
side_effect=lambda n: "/usr/bin/curl" if n == "curl" else None), \
patch.object(tools_config, "_check_cua_driver_asset_for_arch",
return_value=True), \
patch.object(tools_config, "_run_cua_driver_installer",
return_value=True) as runner:
assert tools_config.install_cua_driver(upgrade=False) is True
runner.assert_called_once()
class TestCheckCuaDriverAssetForArch:
def test_arm64_macos_always_returns_true(self):
class TestArchProbeRemoval:
"""Regression tests for the deletion of `_check_cua_driver_asset_for_arch`.
The old probe queried ``/releases/latest`` on trycua/cua and inspected
asset names. That was wrong in two ways:
1. cua-driver-rs releases are marked **prerelease** on every cut, so
``/releases/latest`` returns the Python ``cua-agent`` / ``cua-computer``
package instead a release with zero binary assets. The probe then
reported "no asset for $arch" on Linux x86_64, Windows, macOS Intel,
Linux arm64 every non-Apple-Silicon host.
2. Even with the right endpoint, it duplicated tag-resolution the upstream
installer already does correctly via ``CUA_DRIVER_RS_BAKED_VERSION``
(auto-baked by CD on every release).
The fix: stop probing. Trust the upstream installer for fresh installs
(it has the baked version + correct API fallback) and the
``cua-driver check-update --json`` MCP-binary native command for the
upgrade path.
"""
def test_probe_function_is_gone(self):
from hermes_cli import tools_config
assert not hasattr(tools_config, "_check_cua_driver_asset_for_arch")
assert not hasattr(tools_config, "_latest_cua_driver_rs_release")
# Apple Silicon assets are always published — short-circuits without
# a network probe.
with patch("platform.system", return_value="Darwin"), \
patch("platform.machine", return_value="arm64"):
assert tools_config._check_cua_driver_asset_for_arch() is True
def test_x86_64_with_asset_returns_true(self):
def test_fresh_install_does_not_call_github_api(self):
"""Pre-install no longer probes the GitHub API — the upstream
``install.sh`` resolves the tag from its baked CUA_DRIVER_RS_BAKED_VERSION
line. install.sh errors cleanly when the arch has no asset, so the
probe was duplicate gatekeeping.
"""
from hermes_cli import tools_config
releases = [{
"tag_name": "cua-driver-rs-v0.1.6",
"assets": [
{"name": "cua-driver-rs-0.1.6-darwin-arm64.tar.gz"},
{"name": "cua-driver-rs-0.1.6-darwin-x86_64.tar.gz"},
],
}]
mock_resp = MagicMock()
mock_resp.read.return_value = json.dumps(releases).encode()
mock_resp.__enter__ = lambda s: s
mock_resp.__exit__ = MagicMock(return_value=False)
with patch("platform.system", return_value="Darwin"), \
patch("platform.machine", return_value="x86_64"), \
patch("urllib.request.urlopen", return_value=mock_resp):
assert tools_config._check_cua_driver_asset_for_arch() is True
def test_x86_64_without_asset_returns_false(self):
from hermes_cli import tools_config
releases = [{
"tag_name": "cua-driver-rs-v0.1.6",
"assets": [
{"name": "cua-driver-rs-0.1.6-darwin-arm64.tar.gz"},
{"name": "cua-driver-rs.tar.gz"},
],
}]
mock_resp = MagicMock()
mock_resp.read.return_value = json.dumps(releases).encode()
mock_resp.__enter__ = lambda s: s
mock_resp.__exit__ = MagicMock(return_value=False)
with patch("platform.system", return_value="Darwin"), \
patch("platform.machine", return_value="x86_64"), \
patch("urllib.request.urlopen", return_value=mock_resp), \
patch.object(tools_config, "_print_warning") as warn, \
patch.object(tools_config, "_print_info"):
assert tools_config._check_cua_driver_asset_for_arch() is False
warn.assert_called_once()
assert "no Intel" in warn.call_args[0][0].lower() or "x86_64" in warn.call_args[0][0]
def test_x86_64_api_failure_returns_true(self):
"""Network failure should fail open — let the installer handle it."""
from hermes_cli import tools_config
with patch("platform.machine", return_value="x86_64"), \
patch("urllib.request.urlopen", side_effect=Exception("timeout")):
assert tools_config._check_cua_driver_asset_for_arch() is True
def test_fresh_install_x86_64_no_asset_skips_installer(self):
"""When the latest release has no Intel asset, skip the installer."""
from hermes_cli import tools_config
releases = [{
"tag_name": "cua-driver-rs-v0.1.6",
"assets": [{"name": "cua-driver-rs-0.1.6-darwin-arm64.tar.gz"}],
}]
mock_resp = MagicMock()
mock_resp.read.return_value = json.dumps(releases).encode()
mock_resp.__enter__ = lambda s: s
mock_resp.__exit__ = MagicMock(return_value=False)
with patch("platform.system", return_value="Darwin"), \
patch.object(tools_config.shutil, "which",
side_effect=lambda n: "/usr/bin/curl" if n == "curl" else None), \
patch("platform.machine", return_value="x86_64"), \
patch("urllib.request.urlopen", return_value=mock_resp), \
patch.object(tools_config, "_print_warning"), \
patch.object(tools_config, "_print_info"), \
patch.object(tools_config, "_run_cua_driver_installer") as runner:
assert tools_config.install_cua_driver(upgrade=False) is False
runner.assert_not_called()
patch("urllib.request.urlopen") as urlopen, \
patch.object(tools_config, "_run_cua_driver_installer",
return_value=True) as runner:
assert tools_config.install_cua_driver(upgrade=False) is True
runner.assert_called_once()
urlopen.assert_not_called()
def test_upgrade_x86_64_no_asset_returns_existing_status(self):
"""On upgrade with no Intel asset, return whether binary existed."""
def test_upgrade_with_binary_does_not_call_github_api_directly(self):
"""The upgrade path no longer hits GitHub from Python — it delegates
to the upstream ``install.sh`` (which has the baked release tag and
the proper API fallback). When cua-driver is already installed,
``cua_driver_update_check()`` (added in a separate change) further
short-circuits the network re-install via the binary's native
``check-update --json`` verb.
"""
from hermes_cli import tools_config
releases = [{
"tag_name": "cua-driver-rs-v0.1.6",
"assets": [{"name": "cua-driver-rs-0.1.6-darwin-arm64.tar.gz"}],
}]
mock_resp = MagicMock()
mock_resp.read.return_value = json.dumps(releases).encode()
mock_resp.__enter__ = lambda s: s
mock_resp.__exit__ = MagicMock(return_value=False)
# With binary installed — returns True (binary exists)
with patch("platform.system", return_value="Darwin"), \
patch.object(tools_config.shutil, "which",
side_effect=lambda n: "/usr/local/bin/" + n
if n in ("cua-driver", "curl") else None), \
patch("platform.machine", return_value="x86_64"), \
patch("urllib.request.urlopen", return_value=mock_resp), \
patch.object(tools_config, "_print_warning"), \
patch.object(tools_config, "_print_info"), \
patch.object(tools_config, "_run_cua_driver_installer") as runner:
assert tools_config.install_cua_driver(upgrade=True) is True
runner.assert_not_called()
# Without binary — returns False
with patch("platform.system", return_value="Darwin"), \
patch.object(tools_config.shutil, "which",
side_effect=lambda n: "/usr/bin/curl" if n == "curl" else None), \
patch("platform.machine", return_value="x86_64"), \
patch("urllib.request.urlopen", return_value=mock_resp), \
patch.object(tools_config, "_print_warning"), \
patch.object(tools_config, "_print_info"), \
patch.object(tools_config, "_run_cua_driver_installer") as runner:
assert tools_config.install_cua_driver(upgrade=True) is False
runner.assert_not_called()
class TestInstallCuaDriverWindows:
"""install_cua_driver dispatch on Windows hosts."""
def test_fresh_install_runs_installer(self):
from hermes_cli import tools_config
# PowerShell present, cua-driver not yet installed.
with patch("platform.system", return_value="Windows"), \
patch.object(tools_config.shutil, "which",
side_effect=lambda n: r"C:\\Windows\\powershell.exe"
if n == "powershell" else None), \
patch.object(tools_config, "_check_cua_driver_asset_for_arch",
return_value=True), \
patch("urllib.request.urlopen") as urlopen, \
patch("subprocess.run"), \
patch.object(tools_config, "_run_cua_driver_installer",
return_value=True) as runner:
assert tools_config.install_cua_driver(upgrade=False) is True
runner.assert_called_once()
def test_fresh_install_without_powershell_fails(self):
from hermes_cli import tools_config
with patch("platform.system", return_value="Windows"), \
patch.object(tools_config.shutil, "which", lambda n: None), \
patch.object(tools_config, "_print_warning") as warn, \
patch.object(tools_config, "_print_info"), \
patch.object(tools_config, "_run_cua_driver_installer") as runner:
assert tools_config.install_cua_driver(upgrade=False) is False
runner.assert_not_called()
# The warning should name the missing fetch tool (powershell).
assert "powershell" in warn.call_args[0][0].lower()
def test_upgrade_with_binary_runs_installer(self):
from hermes_cli import tools_config
with patch("platform.system", return_value="Windows"), \
patch.object(tools_config.shutil, "which",
side_effect=lambda n: r"C:\\bin\\" + n
if n in {"cua-driver", "powershell"} else None), \
patch.object(tools_config, "_check_cua_driver_asset_for_arch",
return_value=True), \
patch.object(tools_config, "_run_cua_driver_installer",
return_value=True) as runner, \
patch("subprocess.run"):
assert tools_config.install_cua_driver(upgrade=True) is True
runner.assert_called_once()
assert runner.call_args.kwargs.get("verbose") is False
def test_installer_uses_powershell_irm_command(self):
"""_run_cua_driver_installer must shell out to PowerShell irm|iex."""
from hermes_cli import tools_config
completed = MagicMock(returncode=0)
with patch("platform.system", return_value="Windows"), \
patch.object(tools_config.shutil, "which",
side_effect=lambda n: r"C:\\bin\\" + n
if n == "cua-driver" else None), \
patch("subprocess.run", return_value=completed) as run, \
patch.object(tools_config, "_print_info"), \
patch.object(tools_config, "_print_success"), \
patch.object(tools_config, "_print_warning"):
assert tools_config._run_cua_driver_installer() is True
cmd = run.call_args[0][0]
# Argument list (shell=False), not a string.
assert isinstance(cmd, list)
assert cmd[0] == "powershell"
assert run.call_args.kwargs.get("shell") is False
joined = " ".join(cmd)
assert "install.ps1" in joined
assert "iex" in joined
class TestInstallCuaDriverLinux:
"""install_cua_driver dispatch on Linux hosts (alpha)."""
def test_fresh_install_runs_installer(self):
from hermes_cli import tools_config
with patch("platform.system", return_value="Linux"), \
patch.object(tools_config.shutil, "which",
side_effect=lambda n: "/usr/bin/curl" if n == "curl" else None), \
patch.object(tools_config, "_check_cua_driver_asset_for_arch",
return_value=True), \
patch.object(tools_config, "_run_cua_driver_installer",
return_value=True) as runner:
assert tools_config.install_cua_driver(upgrade=False) is True
runner.assert_called_once()
def test_upgrade_with_binary_runs_installer(self):
from hermes_cli import tools_config
with patch("platform.system", return_value="Linux"), \
patch.object(tools_config.shutil, "which",
side_effect=lambda n: "/usr/local/bin/" + n
if n in {"cua-driver", "curl"} else None), \
patch.object(tools_config, "_check_cua_driver_asset_for_arch",
return_value=True), \
patch.object(tools_config, "_run_cua_driver_installer",
return_value=True) as runner, \
patch("subprocess.run"):
assert tools_config.install_cua_driver(upgrade=True) is True
runner.assert_called_once()
def test_installer_uses_curl_bash_command(self):
"""_run_cua_driver_installer must shell out to curl | bash install.sh."""
from hermes_cli import tools_config
completed = MagicMock(returncode=0)
with patch("platform.system", return_value="Linux"), \
patch.object(tools_config.shutil, "which",
side_effect=lambda n: "/usr/local/bin/" + n
if n == "cua-driver" else None), \
patch("subprocess.run", return_value=completed) as run, \
patch.object(tools_config, "_print_info"), \
patch.object(tools_config, "_print_success"), \
patch.object(tools_config, "_print_warning"):
assert tools_config._run_cua_driver_installer() is True
cmd = run.call_args[0][0]
assert isinstance(cmd, str) # shell string on POSIX
assert run.call_args.kwargs.get("shell") is True
assert "install.sh" in cmd
assert "curl" in cmd
class TestCheckCuaDriverAssetCrossPlatform:
"""_check_cua_driver_asset_for_arch recognizes Windows/Linux asset names."""
@staticmethod
def _mock_release(asset_names):
# The probe lists /releases and picks the newest cua-driver-rs-v* tag,
# so the mock returns a LIST of releases with that tag prefix.
releases = [{"tag_name": "cua-driver-rs-v0.5.0",
"assets": [{"name": n} for n in asset_names]}]
resp = MagicMock()
resp.read.return_value = json.dumps(releases).encode()
resp.__enter__ = lambda s: s
resp.__exit__ = MagicMock(return_value=False)
return resp
def test_windows_amd64_with_asset_returns_true(self):
from hermes_cli import tools_config
resp = self._mock_release([
"cua-driver-rs-0.5.0-windows-x86_64.zip",
"cua-driver-rs-0.5.0-darwin-arm64.tar.gz",
])
with patch("platform.system", return_value="Windows"), \
patch("platform.machine", return_value="AMD64"), \
patch("urllib.request.urlopen", return_value=resp):
assert tools_config._check_cua_driver_asset_for_arch() is True
def test_windows_arm64_without_asset_returns_false(self):
from hermes_cli import tools_config
resp = self._mock_release([
"cua-driver-rs-0.5.0-windows-x86_64.zip",
])
with patch("platform.system", return_value="Windows"), \
patch("platform.machine", return_value="ARM64"), \
patch("urllib.request.urlopen", return_value=resp), \
patch.object(tools_config, "_print_warning") as warn, \
patch.object(tools_config, "_print_info"):
assert tools_config._check_cua_driver_asset_for_arch() is False
warn.assert_called_once()
assert "arm64" in warn.call_args[0][0].lower()
def test_linux_x86_64_with_asset_returns_true(self):
from hermes_cli import tools_config
resp = self._mock_release([
"cua-driver-rs-0.5.0-linux-x86_64.tar.gz",
])
with patch("platform.system", return_value="Linux"), \
patch("platform.machine", return_value="x86_64"), \
patch("urllib.request.urlopen", return_value=resp):
assert tools_config._check_cua_driver_asset_for_arch() is True
def test_linux_aarch64_with_asset_returns_true(self):
from hermes_cli import tools_config
resp = self._mock_release([
"cua-driver-rs-0.5.0-linux-arm64.tar.gz",
])
with patch("platform.system", return_value="Linux"), \
patch("platform.machine", return_value="aarch64"), \
patch("urllib.request.urlopen", return_value=resp):
assert tools_config._check_cua_driver_asset_for_arch() is True
def test_linux_aarch64_without_asset_returns_false(self):
from hermes_cli import tools_config
resp = self._mock_release([
"cua-driver-rs-0.5.0-linux-x86_64.tar.gz",
])
with patch("platform.system", return_value="Linux"), \
patch("platform.machine", return_value="aarch64"), \
patch("urllib.request.urlopen", return_value=resp), \
patch.object(tools_config, "_print_warning") as warn, \
patch.object(tools_config, "_print_info"):
assert tools_config._check_cua_driver_asset_for_arch() is False
warn.assert_called_once()
def test_releases_latest_tag_ignored_picks_driver_rs_tag(self):
"""A non-driver tag at the head of the list must not gate the probe.
Regression guard: the monorepo's newest release is often a Python
component (agent-*, computer-*) with zero binary assets. The probe
must skip past it to the newest cua-driver-rs-v* release.
"""
from hermes_cli import tools_config
releases = [
{"tag_name": "agent-v0.8.3", "assets": []},
{"tag_name": "computer-v0.5.19", "assets": []},
{"tag_name": "cua-driver-rs-v0.6.0",
"assets": [{"name": "cua-driver-rs-0.6.0-linux-x86_64-binary.tar.gz"}]},
]
resp = MagicMock()
resp.read.return_value = json.dumps(releases).encode()
resp.__enter__ = lambda s: s
resp.__exit__ = MagicMock(return_value=False)
with patch("platform.system", return_value="Linux"), \
patch("platform.machine", return_value="x86_64"), \
patch("urllib.request.urlopen", return_value=resp):
assert tools_config._check_cua_driver_asset_for_arch() is True
# Probe deleted — no direct GitHub API call from Python.
urlopen.assert_not_called()

View file

@ -263,6 +263,29 @@ class TestWebServerEndpoints:
import hermes_cli.web_server as web_server
monkeypatch.setattr(hermes_constants, "is_container", lambda: True)
# A docker install inside a container should be managed externally.
monkeypatch.setattr(web_server, "detect_install_method", lambda _root: "docker")
assert web_server._dashboard_local_update_managed_externally() is True
def test_dashboard_update_capability_allows_git_in_container(self, monkeypatch):
"""A git checkout inside a container (e.g. bind-mounted in hermes-webui)
should still offer dashboard updates the checkout is self-managed."""
import hermes_constants
import hermes_cli.web_server as web_server
monkeypatch.setattr(hermes_constants, "is_container", lambda: True)
monkeypatch.setattr(web_server, "detect_install_method", lambda _root: "git")
assert web_server._dashboard_local_update_managed_externally() is False
def test_dashboard_update_capability_blocks_pip_in_container(self, monkeypatch):
"""A pip install inside a container is still managed externally."""
import hermes_constants
import hermes_cli.web_server as web_server
monkeypatch.setattr(hermes_constants, "is_container", lambda: True)
monkeypatch.setattr(web_server, "detect_install_method", lambda _root: "pip")
assert web_server._dashboard_local_update_managed_externally() is True
@ -1011,6 +1034,8 @@ class TestWebServerEndpoints:
spawned = True
raise AssertionError("docker update guard should not spawn hermes update")
# Bypass the managed-externally gate so we reach the docker install check.
monkeypatch.setattr(web_server, "_dashboard_local_update_managed_externally", lambda: False)
monkeypatch.setattr(web_server, "detect_install_method", lambda _root: "docker")
monkeypatch.setattr(web_server, "_spawn_hermes_action", fail_spawn)
web_server._ACTION_PROCS.pop("hermes-update", None)

View file

@ -0,0 +1,138 @@
"""Unit coverage for the background-review aux-model selector + routed digest.
Covers the two behaviors this change adds:
_resolve_review_runtime auto/same-model not routed (main model, warm
cache); a configured different model routed with resolved credentials.
_digest_history compact replay used ONLY on the routed path (recent tail
verbatim + a digest of older turns), preserving role alternation.
Pure-function / config-driven; no live model calls.
"""
from unittest.mock import patch
from agent import background_review as br
def _msg(role, content, tool_calls=None):
m = {"role": role, "content": content}
if tool_calls:
m["tool_calls"] = tool_calls
return m
# ---------------------------------------------------------------------------
# _resolve_review_runtime — the aux-model selector
# ---------------------------------------------------------------------------
class _FakeAgent:
def __init__(self, provider="openai-codex", model="gpt-5.5"):
self.provider = provider
self.model = model
def _current_main_runtime(self):
return {
"api_key": "parent-key",
"base_url": "https://chatgpt.com/backend-api/codex",
"api_mode": "codex_app_server",
}
def test_routing_auto_inherits_parent_and_downgrades_codex_app_server():
agent = _FakeAgent()
cfg = {"auxiliary": {"background_review": {"provider": "auto", "model": ""}}}
with patch("hermes_cli.config.load_config", return_value=cfg):
rt = br._resolve_review_runtime(agent)
assert rt["routed"] is False
assert rt["provider"] == "openai-codex"
assert rt["model"] == "gpt-5.5"
assert rt["api_mode"] == "codex_responses" # downgraded so agent-loop tools dispatch
def test_routing_to_different_model_marks_routed_and_resolves_credentials():
agent = _FakeAgent()
cfg = {"auxiliary": {"background_review": {
"provider": "openrouter", "model": "google/gemini-3-flash-preview",
}}}
fake_rp = {
"provider": "openrouter", "api_key": "or-key",
"base_url": "https://openrouter.ai/api/v1", "api_mode": "chat_completions",
}
with patch("hermes_cli.config.load_config", return_value=cfg), \
patch("hermes_cli.runtime_provider.resolve_runtime_provider", return_value=fake_rp):
rt = br._resolve_review_runtime(agent)
assert rt["routed"] is True
assert rt["provider"] == "openrouter"
assert rt["model"] == "google/gemini-3-flash-preview"
assert rt["api_key"] == "or-key"
def test_routing_same_model_as_parent_is_not_routed():
agent = _FakeAgent(provider="openrouter", model="anthropic/claude-opus-4.8")
cfg = {"auxiliary": {"background_review": {
"provider": "openrouter", "model": "anthropic/claude-opus-4.8",
}}}
with patch("hermes_cli.config.load_config", return_value=cfg):
rt = br._resolve_review_runtime(agent)
assert rt["routed"] is False # same model/provider → keep full-replay path
def test_routing_resolution_failure_falls_back_to_parent():
agent = _FakeAgent()
cfg = {"auxiliary": {"background_review": {
"provider": "openrouter", "model": "google/gemini-3-flash-preview",
}}}
with patch("hermes_cli.config.load_config", return_value=cfg), \
patch("hermes_cli.runtime_provider.resolve_runtime_provider",
side_effect=RuntimeError("boom")):
rt = br._resolve_review_runtime(agent)
assert rt["routed"] is False
assert rt["provider"] == "openai-codex"
# ---------------------------------------------------------------------------
# _digest_history — routed-path compact replay
# ---------------------------------------------------------------------------
def test_digest_under_tail_returns_full():
msgs = [_msg("user", "hi"), _msg("assistant", "hello")]
assert br._digest_history(msgs, tail=24) == msgs
def test_digest_collapses_old_keeps_tail_verbatim():
msgs = []
for i in range(60):
msgs.append(_msg("user", f"u{i} " + "x" * 50))
msgs.append(_msg("assistant", f"a{i} " + "y" * 50))
out = br._digest_history(msgs, tail=10)
# First message is the synthetic digest (user role → alternation preserved).
assert out[0]["role"] == "user"
assert out[0]["content"].startswith("[Earlier conversation digest")
# Recent tail preserved verbatim.
assert out[-1] == msgs[-1]
assert len(out) == 11 # 1 digest + 10 tail
def test_digest_does_not_open_tail_on_a_tool_message():
msgs = []
for i in range(40):
msgs.append(_msg("user", "u" + "x" * 50))
msgs.append(_msg("assistant", "", tool_calls=[
{"function": {"name": "terminal", "arguments": "{}"}}]))
msgs.append({"role": "tool", "content": "result " + "w" * 50})
out = br._digest_history(msgs, tail=2)
# The verbatim tail (after the digest) must not begin on a bare tool message.
assert out[1]["role"] != "tool"
def test_digest_records_tool_names_in_arc():
old = [
_msg("user", "do the thing"),
_msg("assistant", "", tool_calls=[
{"function": {"name": "skill_view", "arguments": "{}"}},
{"function": {"name": "patch", "arguments": "{}"}}]),
]
msgs = old + [_msg("user", f"tail{i}") for i in range(30)]
out = br._digest_history(msgs, tail=10)
digest = out[0]["content"]
assert "USER: do the thing" in digest
assert "tools: skill_view, patch" in digest

View file

@ -260,6 +260,52 @@ class TestShrinkImagePartsHelper:
assert seen["max_dimension"] == 2000
assert msgs[0]["content"][0]["image_url"]["url"] == shrunk
def test_anthropic_base64_image_source_rewritten(self, monkeypatch):
"""Anthropic-native image blocks are shrinkable after adapter conversion."""
agent = _make_agent()
_install_fake_pillow(monkeypatch, (2501, 100), shrunk_size=(1500, 60))
original = _big_png_data_url(100)
_, _, original_data = original.partition(",")
shrunk = "data:image/jpeg;base64," + "N" * 1000
seen = {}
def _fake_resize(path, mime_type=None, max_base64_bytes=None, max_dimension=None):
seen["mime_type"] = mime_type
seen["max_dimension"] = max_dimension
return shrunk
monkeypatch.setattr(
"tools.vision_tools._resize_image_for_vision",
_fake_resize,
raising=False,
)
msgs = [{
"role": "user",
"content": [
{
"type": "image",
"source": {
"type": "base64",
"media_type": "image/png",
"data": original_data,
},
},
],
}]
changed = agent._try_shrink_image_parts_in_messages(
msgs,
max_dimension=2000,
)
source = msgs[0]["content"][0]["source"]
assert changed is True
assert seen["mime_type"] == "image/png"
assert seen["max_dimension"] == 2000
assert source["type"] == "base64"
assert source["media_type"] == "image/jpeg"
assert source["data"] == "N" * 1000
def test_oversized_input_image_string_shape_rewritten(self, monkeypatch):
"""OpenAI Responses shape: {type: input_image, image_url: "data:..."}."""
agent = _make_agent()

View file

@ -2139,14 +2139,18 @@ class TestStructuredElementsConsumption:
# Markdown surface doesn't carry bounds — lossy by design.
assert cap.elements[0].bounds == (0, 0, 0, 0)
def test_vision_capture_uses_get_window_state_not_removed_screenshot_tool(self):
"""cua-driver 0.6.x returns vision screenshots from
get_window_state(capture_mode="vision"); the old standalone
screenshot tool is no longer available."""
def test_vision_capture_falls_back_to_get_window_state_when_screenshot_dropped(self):
"""cua-driver >=0.5.x dropped the standalone `screenshot` MCP tool and
folded full-window PNG capture into `get_window_state`. When the driver
no longer advertises `screenshot`, vision capture must route through
`get_window_state` (discarding the AX tree) and still return a PNG."""
from tools.computer_use.cua_backend import CuaDriverBackend
backend = CuaDriverBackend()
backend._session = MagicMock()
# Modern driver: capabilities discovered, `screenshot` not advertised.
backend._session._has_tool.return_value = False
backend._session.capabilities_discovered = True
windows_payload = {
"windows": [{
@ -2164,12 +2168,11 @@ class TestStructuredElementsConsumption:
return {"data": "", "images": [], "image_mime_types": [],
"structuredContent": windows_payload, "isError": False}
if name == "get_window_state":
assert args["capture_mode"] == "vision"
return {"data": "", "images": [png_b64],
"image_mime_types": ["image/png"],
"structuredContent": None, "isError": False}
if name == "screenshot":
raise AssertionError("vision capture must not call removed screenshot tool")
raise AssertionError("driver dropped screenshot; must not be called")
return {"data": "", "images": [], "image_mime_types": [],
"structuredContent": None, "isError": False}
@ -2182,6 +2185,8 @@ class TestStructuredElementsConsumption:
assert cap.image_mime_type == "image/png"
assert cap.width == 1
assert cap.height == 1
# Vision mode stays free of AX element noise.
assert cap.elements == []
def test_capture_app_screen_targets_desktop_window(self):
"""capture(app='screen') resolves to the OS shell/desktop window

View file

@ -0,0 +1,109 @@
"""Regression tests for profile-aware tilde expansion in file tools.
The bug (#48552): in-process file tools (write_file, read_file, patch,
search_files) resolved ``~`` via ``os.path.expanduser()``, which reads the
gateway process's ``HOME``. In profile mode (Docker, systemd, s6) the gateway
``HOME`` differs from the profile ``HOME`` that interactive sessions use, so
``~`` expanded to the wrong directory and file operations failed with
"no such file or directory".
The fix adds ``_expand_tilde()`` which delegates to
``hermes_constants.get_subprocess_home()`` the same policy the terminal tool
uses for subprocess environments.
See: https://github.com/NousResearch/hermes-agent/issues/48552
"""
import os
from pathlib import Path
from unittest.mock import patch
import pytest
import tools.file_tools as ft
# ---------------------------------------------------------------------------
# _expand_tilde() unit tests
# ---------------------------------------------------------------------------
class TestExpandTilde:
"""Verify the _expand_tilde() helper resolves ~ to the profile home."""
def test_tilde_expands_to_profile_home(self):
"""When get_subprocess_home returns a value, ~/path uses it."""
with patch("hermes_constants.get_subprocess_home", return_value="/opt/data/profiles/coder/home"):
result = ft._expand_tilde("~/scratch/file.txt")
assert result == "/opt/data/profiles/coder/home/scratch/file.txt"
def test_bare_tilde_expands_to_profile_home(self):
"""Bare ~ expands to the profile home."""
with patch("hermes_constants.get_subprocess_home", return_value="/opt/data/profiles/coder/home"):
result = ft._expand_tilde("~")
assert result == "/opt/data/profiles/coder/home"
def test_falls_back_when_no_profile_home(self):
"""When get_subprocess_home returns None, use os.path.expanduser."""
with patch("hermes_constants.get_subprocess_home", return_value=None):
result = ft._expand_tilde("~/Documents")
assert result == os.path.expanduser("~/Documents")
def test_other_user_tilde_not_overridden(self):
"""~user/path must NOT use the profile home — it's a different user."""
with patch("hermes_constants.get_subprocess_home", return_value="/opt/data/profiles/coder/home"):
result = ft._expand_tilde("~root/file.txt")
# Should use os.path.expanduser, not the profile home
assert "/opt/data/profiles/coder/home" not in result
def test_no_tilde_unchanged(self):
"""Paths without ~ are returned unchanged (modulo expanduser)."""
with patch("hermes_constants.get_subprocess_home", return_value="/opt/data/profiles/coder/home"):
result = ft._expand_tilde("/etc/passwd")
assert result == "/etc/passwd"
def test_empty_path_unchanged(self):
"""Empty string returns empty."""
with patch("hermes_constants.get_subprocess_home", return_value="/opt/data/profiles/coder/home"):
assert ft._expand_tilde("") == ""
# ---------------------------------------------------------------------------
# Integration: _resolve_path_for_task uses profile home
# ---------------------------------------------------------------------------
class TestResolvePathUsesProfileHome:
"""Verify _resolve_path_for_task resolves ~ to the profile home."""
def test_relative_tilde_resolves_to_profile_home(self, tmp_path, monkeypatch):
"""A ~/path argument resolves under the profile home, not process HOME."""
profile_home = tmp_path / "profile_home"
profile_home.mkdir()
process_home = tmp_path / "process_home"
process_home.mkdir()
monkeypatch.setenv("HOME", str(process_home))
monkeypatch.setattr(ft, "_get_live_tracking_cwd", lambda task_id="default": None)
with patch("hermes_constants.get_subprocess_home", return_value=str(profile_home)):
resolved = ft._resolve_path_for_task("~/test_file.txt", task_id="test")
assert str(resolved).startswith(str(profile_home))
assert "process_home" not in str(resolved)
def test_absolute_tilde_in_workspace_root(self, tmp_path, monkeypatch):
"""A workspace root specified with ~ resolves to profile home."""
profile_home = tmp_path / "profile_home"
profile_home.mkdir()
process_home = tmp_path / "process_home"
process_home.mkdir()
monkeypatch.setenv("HOME", str(process_home))
monkeypatch.setattr(ft, "_get_live_tracking_cwd", lambda task_id="default": None)
with patch("hermes_constants.get_subprocess_home", return_value=str(profile_home)):
# _resolve_base_dir uses the workspace root from config; if it contains ~,
# it should resolve to profile home
resolved = ft._resolve_path_for_task("~/data/config.json", task_id="test")
assert str(profile_home) in str(resolved)
assert str(process_home) not in str(resolved)

View file

@ -107,6 +107,63 @@ def test_memory_gate_on_then_apply(hermes_home):
assert "approved entry" in store.user_entries[0]
def test_cli_memory_approve_without_live_agent_uses_fresh_store(hermes_home, capsys):
"""#46783: ``/memory approve`` from a context with no live agent (e.g. the
Desktop GUI) passed ``memory_store=None`` into the shared handler, which
returned "memory store unavailable" and applied nothing. The CLI handler must
fall back to a freshly loaded on-disk store, like the gateway path does."""
import json
from tools.memory_tool import memory_tool, MemoryStore
from tools import write_approval as wa
from hermes_cli.cli_commands_mixin import CLICommandsMixin
_set_approval("memory", True)
staging = MemoryStore(); staging.load_from_disk()
r = json.loads(memory_tool("add", "memory", "remember the launch date", store=staging))
assert r.get("pending_id"), r
assert wa.pending_count("memory") == 1
# Bare CLI handler with no live agent → store resolves to None pre-fix.
handler = CLICommandsMixin.__new__(CLICommandsMixin)
handler.agent = None
handler._handle_memory_command("/memory approve all")
out = capsys.readouterr().out
assert "memory store unavailable" not in out, out
assert "Approved 1" in out, out
assert wa.pending_count("memory") == 0
# The approved write landed in a freshly loaded on-disk store (MEMORY.md).
reloaded = MemoryStore(); reloaded.load_from_disk()
assert any("remember the launch date" in e for e in reloaded.memory_entries)
def test_load_on_disk_store_honors_configured_char_limits(hermes_home, monkeypatch):
"""load_on_disk_store() must read memory.memory_char_limit /
user_char_limit from config so approvals applied without a live agent
enforce the SAME caps as the live agent (agent_init.py). Falls back to
defaults when config can't be loaded.
"""
from tools.memory_tool import load_on_disk_store
# Config override path: helper picks up the configured limits.
monkeypatch.setattr(
"hermes_cli.config.load_config",
lambda: {"memory": {"memory_char_limit": 999, "user_char_limit": 444}},
)
store = load_on_disk_store()
assert store.memory_char_limit == 999
assert store.user_char_limit == 444
# Failure path: config raises → defaults, never blows up.
def _boom():
raise RuntimeError("no config")
monkeypatch.setattr("hermes_cli.config.load_config", _boom)
fallback = load_on_disk_store()
assert fallback.memory_char_limit == 2200
assert fallback.user_char_limit == 1375
# ---------------------------------------------------------------------------
# Skill gate
# ---------------------------------------------------------------------------

View file

@ -746,6 +746,28 @@ class _CuaDriverSession:
return capability in self._capabilities.get(tool, set())
return any(capability in caps for caps in self._capabilities.values())
def _has_tool(self, name: str) -> bool:
"""Return True when ``tools/list`` advertised a tool by this name.
Used to route capture(): cua-driver dropped the standalone
``screenshot`` tool and folded full-window PNG capture into
``get_window_state`` (whose own description notes it "Also captures
a PNG screenshot of the specified window"). Older drivers that still
expose ``screenshot`` keep using it; newer ones fall through to
``get_window_state``.
Returns False when discovery hasn't populated the map yet — callers
treat that as "unknown" and probe defensively rather than trusting it.
"""
return name in self._capabilities
@property
def capabilities_discovered(self) -> bool:
"""True once ``tools/list`` populated the per-tool map. When False,
``_has_tool`` answers are not trustworthy (discovery failed or the
session hasn't started) and capture() should probe defensively."""
return bool(self._capabilities)
@property
def capability_version(self) -> str:
"""Driver-advertised capability vocabulary version (empty string
@ -848,6 +870,45 @@ def _extract_tool_result(mcp_result: Any) -> Dict[str, Any]:
}
def _image_from_tool_result(out: Dict[str, Any]) -> tuple[Optional[str], Optional[str]]:
"""Pull a (png_b64, mime_type) pair out of a flattened tool result.
cua-driver delivers window screenshots in two shapes depending on tool +
transport:
* As an MCP ``image`` content part surfaced by ``_extract_tool_result``
in ``out["images"]`` with a parallel ``image_mime_types`` entry. This
is what ``get_window_state`` emits over the stdio MCP transport.
* As a base64 field inside ``structuredContent``
``screenshot_png_b64`` (+ ``screenshot_mime_type``). This is what
``get_window_state`` returns when its structured payload carries the
image instead of a content part (newer driver builds; also the shape
seen via the ``cua-driver call`` CLI surface).
Checking both makes capture() robust to either delivery shape, so the
image never silently drops just because the driver moved it between the
content list and structuredContent. Returns ``(None, None)`` when neither
location carries an image.
"""
images = out.get("images") or []
if images and images[0]:
mimes = out.get("image_mime_types") or []
mime = mimes[0] if mimes and mimes[0] else None
return images[0], mime
structured = out.get("structuredContent") or {}
b64 = structured.get("screenshot_png_b64") or structured.get("png_b64")
if b64:
mime = (
structured.get("screenshot_mime_type")
or structured.get("mime_type")
or None
)
return b64, mime
return None, None
# ---------------------------------------------------------------------------
# The backend itself
# ---------------------------------------------------------------------------
@ -1062,28 +1123,61 @@ class CuaDriverBackend(ComputerUseBackend):
window_title = ""
if mode == "vision":
# Newer cua-driver releases no longer expose a standalone
# `screenshot` MCP tool. Request a screenshot-only capture via
# get_window_state instead; this keeps vision mode working while
# avoiding the AX walk used by som/ax captures.
sc_out = self._session.call_tool(
"get_window_state",
{
"pid": self._active_pid,
"window_id": self._active_window_id,
"capture_mode": "vision",
"session": self._session_id,
},
# Plain screenshot, no AX walk. cua-driver dropped the standalone
# `screenshot` tool (≥0.5.x) and folded full-window PNG capture
# into `get_window_state`. Route accordingly:
# * Driver advertises `screenshot` (older builds) → use it; it's
# the cheapest path (no AX tree walked server-side).
# * Otherwise (current drivers) → call `get_window_state` but
# DISCARD the AX tree/elements, returning only the PNG. Vision
# mode's whole contract is "just the pixels, no element noise",
# so we drop everything but the image.
# When capability discovery hasn't run (empty map), we don't trust
# a negative `_has_tool` answer — we still try `screenshot` first
# and fall back if the driver rejects it, so the path self-heals on
# any driver version.
use_screenshot = (
self._session._has_tool("screenshot")
or not self._session.capabilities_discovered
)
if sc_out["images"]:
png_b64 = sc_out["images"][0]
# Pick up the explicit mimeType cua-driver attaches to image
# parts (Surface 7). Empty string means the driver didn't
# carry one — callers will fall back to magic-byte sniffing.
mimes = sc_out.get("image_mime_types") or []
image_mime_type = mimes[0] if mimes and mimes[0] else None
sc_out: Optional[Dict[str, Any]] = None
if use_screenshot:
sc_out = self._session.call_tool(
"screenshot",
{
"window_id": self._active_window_id,
"format": "jpeg",
"quality": 85,
"session": self._session_id,
},
)
png_b64, image_mime_type = _image_from_tool_result(sc_out)
if not png_b64:
# Driver had no usable `screenshot` (e.g. "Unknown tool:
# screenshot" on ≥0.5.x, or an empty image part). Fall
# through to the get_window_state path below.
sc_out = None
if sc_out is None:
gws_out = self._session.call_tool(
"get_window_state",
{
"pid": self._active_pid,
"window_id": self._active_window_id,
"session": self._session_id,
},
)
png_b64, image_mime_type = _image_from_tool_result(gws_out)
# Still grab the window title — it's cheap and useful in the
# vision response — but deliberately leave `elements` empty so
# vision stays free of AX-tree noise.
text = gws_out["data"] if isinstance(gws_out["data"], str) else ""
_, tree = _split_tree_text(text)
wt = re.search(r'AXWindow\s+"([^"]+)"', tree)
if wt:
window_title = wt.group(1)
else:
# get_window_state: AX tree + optional screenshot.
# get_window_state: AX tree + screenshot.
gws_out = self._session.call_tool(
"get_window_state",
{
@ -1120,10 +1214,10 @@ class CuaDriverBackend(ComputerUseBackend):
if e.element_token
}
if gws_out["images"]:
png_b64 = gws_out["images"][0]
mimes = gws_out.get("image_mime_types") or []
image_mime_type = mimes[0] if mimes and mimes[0] else None
# Image may arrive as an MCP image part or inside
# structuredContent (screenshot_png_b64) depending on the driver
# build — _image_from_tool_result handles both.
png_b64, image_mime_type = _image_from_tool_result(gws_out)
# Extract window title from the AX tree first AXWindow line.
wt = re.search(r'AXWindow\s+"([^"]+)"', tree)

View file

@ -0,0 +1,189 @@
"""
Cross-platform Computer Use readiness + macOS permission helpers.
cua-driver runs on macOS, Windows, and Linux, but "ready to drive" means
something different on each:
* macOS explicit TCC grants (Accessibility + Screen Recording). cua-driver
reports/requests them via ``permissions status`` / ``permissions grant``.
The grants attach to cua-driver's OWN identity (``com.trycua.driver`` /
the installed ``CuaDriver.app``), NOT Hermes so no Hermes entitlement is
involved, and ``grant`` launches CuaDriver via LaunchServices so the macOS
dialog is attributed correctly.
* Windows no TCC toggles; the UIAccess worker (``cua-driver-uia.exe``) may
trip a SmartScreen prompt on first run. Readiness == driver health.
* Linux assistive control via the X11/XWayland stack. Readiness == driver
health.
The universal signal on every platform is ``cua-driver doctor --json`` (binary
integrity + platform support). ``computer_use_status`` folds that together with
the macOS permission detail into one payload for the desktop card, the
``hermes computer-use permissions`` CLI, and ``/api/tools/computer-use/status``.
"""
from __future__ import annotations
import json
import os
import shutil
import subprocess
import sys
from typing import Any, Dict, List, Optional
# Platforms with a cua-driver runtime backend (mirrors the toolset platform_gate).
_RUNTIME_PLATFORMS = frozenset({"darwin", "win32", "linux"})
_BOOLS = ("accessibility", "screen_recording", "screen_recording_capturable")
def _driver_cmd(override: Optional[str]) -> str:
if override:
return override
try:
from hermes_cli.tools_config import _cua_driver_cmd
return _cua_driver_cmd()
except Exception:
return os.environ.get("HERMES_CUA_DRIVER_CMD", "").strip() or "cua-driver"
def _child_env() -> Dict[str, str]:
"""cua-driver child env honoring the Hermes telemetry opt-in policy."""
try:
from tools.computer_use.cua_backend import cua_driver_child_env
return cua_driver_child_env()
except Exception:
return dict(os.environ)
def _run(binary: str, *args: str, timeout: float) -> subprocess.CompletedProcess:
return subprocess.run(
[binary, *args],
capture_output=True,
text=True,
timeout=timeout,
env=_child_env(),
stdin=subprocess.DEVNULL,
)
def _json_out(binary: str, *args: str, timeout: float) -> Any:
"""Run ``binary args`` and parse stdout as JSON, or ``None`` on any failure."""
raw = (_run(binary, *args, timeout=timeout).stdout or "").strip()
return json.loads(raw) if raw else None
def _doctor(binary: str) -> Optional[Dict[str, Any]]:
"""``cua-driver doctor --json`` → ``{ok, checks:[{label,status,message}]}``."""
try:
data = _json_out(binary, "doctor", "--json", timeout=12)
except Exception:
return None
if not isinstance(data, dict):
return None
checks: List[Dict[str, str]] = [
{
"label": str(p.get("label", "")),
"status": str(p.get("status", "")),
"message": str(p.get("message", "")),
}
for p in data.get("probes", [])
if isinstance(p, dict)
]
return {"ok": bool(data.get("ok")), "checks": checks}
def _mac_permissions(binary: str, out: Dict[str, Any]) -> None:
"""Fold ``cua-driver permissions status --json`` booleans into ``out``."""
try:
data = _json_out(binary, "permissions", "status", "--json", timeout=10)
except subprocess.TimeoutExpired:
out["error"] = "cua-driver permissions status timed out"
return
except Exception as exc: # spawn failure or malformed JSON
out["error"] = f"cua-driver permissions status failed: {exc}"
return
if isinstance(data, dict):
out.update({k: data[k] for k in _BOOLS if isinstance(data.get(k), bool)})
if isinstance(data.get("source"), dict):
out["source"] = data["source"]
def computer_use_status(driver_cmd: Optional[str] = None) -> Dict[str, Any]:
"""Unified, OS-aware Computer Use readiness for the desktop card.
``ready`` is the single signal the UI keys off: on macOS it's both TCC
grants; elsewhere it's driver health (no TCC model). ``None`` means
unknown (binary missing / probe failed). ``can_grant`` is macOS-only.
"""
plat = sys.platform
binary = shutil.which(_driver_cmd(driver_cmd))
out: Dict[str, Any] = {
"platform": plat,
"platform_supported": plat in _RUNTIME_PLATFORMS,
"installed": bool(binary),
"version": None,
"ready": None,
"can_grant": plat == "darwin",
"checks": [],
"source": None,
"error": None,
**{k: None for k in _BOOLS},
}
if not binary:
return out
try:
out["version"] = (_run(binary, "--version", timeout=5).stdout or "").strip() or None
except Exception:
pass
doctor = _doctor(binary)
if doctor is not None:
out["checks"] = doctor["checks"]
if plat == "darwin":
_mac_permissions(binary, out)
if out["error"] is None:
out["ready"] = out["accessibility"] is True and out["screen_recording"] is True
elif doctor is not None:
# No TCC model off macOS — readiness is driver health.
out["ready"] = doctor["ok"]
return out
def request_permissions_grant(driver_cmd: Optional[str] = None) -> int:
"""Run ``cua-driver permissions grant`` (macOS); stream its output.
Launches CuaDriver via LaunchServices so the TCC dialog is attributed to
``com.trycua.driver``, then waits for the grant. Returns the driver's exit
code (0 ok), 2 if the binary is missing, 64 on a non-macOS platform (which
has no TCC permission model to grant).
"""
if sys.platform != "darwin":
print("Computer Use permissions are a macOS concept; nothing to grant here.")
return 64
binary = shutil.which(_driver_cmd(driver_cmd))
if not binary:
print("cua-driver: not installed. Run: hermes computer-use install")
return 2
print(
"Requesting Accessibility + Screen Recording for CuaDriver.\n"
"macOS will show a dialog attributed to CuaDriver (com.trycua.driver) — "
"approve it, then return here."
)
try:
return int(
subprocess.run(
[binary, "permissions", "grant"],
env=_child_env(),
stdin=subprocess.DEVNULL,
).returncode
)
except KeyboardInterrupt: # pragma: no cover - interactive
return 130
except Exception as exc: # pragma: no cover - defensive
print(f"cua-driver permissions grant failed: {exc}", file=sys.stderr)
return 2

View file

@ -539,6 +539,7 @@ def cronjob(
enabled_toolsets: Optional[List[str]] = None,
workdir: Optional[str] = None,
no_agent: Optional[bool] = None,
profile: Optional[str] = None,
task_id: str = None,
) -> str:
"""Unified cron job management tool."""
@ -605,6 +606,7 @@ def cronjob(
enabled_toolsets=enabled_toolsets or None,
workdir=_normalize_optional_job_value(workdir),
no_agent=_no_agent,
profile=_normalize_optional_job_value(profile),
)
_notify_provider_jobs_changed_safe()
return json.dumps(

View file

@ -23,6 +23,29 @@ logger = logging.getLogger(__name__)
_EXPECTED_WRITE_ERRNOS = {errno.EACCES, errno.EPERM, errno.EROFS}
def _expand_tilde(path: str) -> str:
"""Expand ``~`` using the effective profile home when available.
In-process file tools share the gateway process's HOME, which may differ
from the profile-specific HOME that interactive CLI sessions use. This
mirrors ``hermes_constants.get_subprocess_home()`` so that ``~`` resolves
consistently regardless of whether the tool runs interactively or inside a
gateway-driven cron job (#48552).
"""
if not path or "~" not in path:
return path
try:
from hermes_constants import get_subprocess_home
home = get_subprocess_home()
except Exception:
home = None
if home and (path == "~" or path.startswith("~/")):
return home if path == "~" else os.path.join(home, path[2:])
return os.path.expanduser(path)
# ---------------------------------------------------------------------------
# Read-size guard: cap the character count returned to the model.
# We're model-agnostic so we can't count tokens; characters are a safe proxy.
@ -107,7 +130,7 @@ def _sentinel_free_abs_cwd(raw: str | None) -> str | None:
raw = str(raw or "").strip()
if raw.lower() in _TERMINAL_CWD_SENTINELS:
return None
expanded = os.path.expanduser(raw)
expanded = _expand_tilde(raw)
if not os.path.isabs(expanded):
return None
return expanded
@ -222,7 +245,7 @@ def _resolve_base_dir(task_id: str = "default") -> Path:
"""
root = _authoritative_workspace_root(task_id)
if root:
base = Path(root).expanduser()
base = Path(_expand_tilde(root))
else:
base = Path(os.getcwd())
if not base.is_absolute():
@ -239,7 +262,7 @@ def _resolve_path_for_task(filepath: str, task_id: str = "default") -> Path:
See :func:`_resolve_base_dir` for how the base is chosen. Absolute input
paths are returned resolved-but-unanchored.
"""
p = Path(filepath).expanduser()
p = Path(_expand_tilde(filepath))
if p.is_absolute():
return p.resolve()
return (_resolve_base_dir(task_id) / p).resolve()
@ -261,12 +284,12 @@ def _path_resolution_warning(filepath: str, resolved: Path, task_id: str = "defa
(no ``cd`` run yet) is warned on the very first write.
"""
try:
if Path(filepath).expanduser().is_absolute():
if Path(_expand_tilde(filepath)).is_absolute():
return None
workspace_root = _authoritative_workspace_root(task_id)
if not workspace_root:
return None # No authoritative workspace root to compare against.
root = Path(workspace_root).expanduser().resolve()
root = Path(_expand_tilde(workspace_root)).resolve()
# Is `resolved` inside `root`?
try:
resolved.relative_to(root)
@ -285,7 +308,7 @@ def _path_resolution_warning(filepath: str, resolved: Path, task_id: str = "defa
def _is_blocked_device_path(path: str) -> bool:
"""Return True for concrete device/fd paths that can hang reads."""
normalized = os.path.normpath(os.path.expanduser(path))
normalized = os.path.normpath(_expand_tilde(path))
if normalized in _BLOCKED_DEVICE_PATHS:
return True
# /proc/self/fd/0-2 and /proc/<pid>/fd/0-2 are Linux aliases for stdio
@ -309,7 +332,7 @@ def _is_blocked_device(filepath: str, base_dir: str | Path | None = None) -> boo
they resolve to terminal-specific paths. Then check each symlink hop before
the final resolved path so aliases to devices cannot bypass the guard.
"""
expanded = os.path.expanduser(filepath)
expanded = _expand_tilde(filepath)
if base_dir is not None and not os.path.isabs(expanded):
expanded = os.path.join(os.fspath(base_dir), expanded)
normalized = os.path.normpath(expanded)
@ -365,7 +388,7 @@ def _get_hermes_config_resolved() -> str | None:
_hermes_config_resolved = str(get_config_path().resolve())
except Exception:
try:
_hermes_config_resolved = str(Path("~/.hermes/config.yaml").expanduser().resolve())
_hermes_config_resolved = str(Path(_expand_tilde("~/.hermes/config.yaml")).resolve())
except Exception:
_hermes_config_resolved = None
return _hermes_config_resolved
@ -377,7 +400,7 @@ def _check_sensitive_path(filepath: str, task_id: str = "default") -> str | None
resolved = str(_resolve_path_for_task(filepath, task_id))
except (OSError, ValueError):
resolved = filepath
normalized = os.path.normpath(os.path.expanduser(filepath))
normalized = os.path.normpath(_expand_tilde(filepath))
_err = (
f"Refusing to write to sensitive system path: {filepath}\n"
"Use the terminal tool with sudo if you need to modify system files."

View file

@ -1184,11 +1184,13 @@ IMAGE_GENERATE_SCHEMA = {
"`reference_image_urls` for style/composition references; omit both "
"for text-to-image. The underlying backend (FAL, OpenAI, xAI, etc.) "
"and model are user-configured and not selectable by the agent. "
"Returns either a URL or an absolute file path in the `image` field; "
"display it with markdown ![description](url-or-path) and the gateway "
"will deliver it. When the active terminal backend has a different "
"filesystem, successful local-file results may also include "
"`agent_visible_image` for follow-up terminal/file operations."
"Returns the result in the `image` field — either a URL or an absolute "
"file path. To show it to the user, reference that path/URL in your "
"response using the file-delivery convention for the current platform "
"(your platform guidance describes how files are delivered here). When "
"the active terminal backend has a different filesystem, successful "
"local-file results may also include `agent_visible_image` for "
"follow-up terminal/file operations."
),
"parameters": {
"type": "object",

View file

@ -731,6 +731,38 @@ class MemoryStore:
raise RuntimeError(f"Failed to write memory file {path}: {e}")
def load_on_disk_store() -> "MemoryStore":
"""Build a fresh on-disk :class:`MemoryStore`, honoring configured char limits.
Use this from any context that has no live agent (the messaging gateway, the
Desktop GUI, the bare CLI ``/memory`` handler) but still needs to read or
apply approved memory writes. Mirrors how the live agent constructs its store
in ``agent/agent_init.py`` including the user's ``memory.memory_char_limit``
/ ``memory.user_char_limit`` overrides so an approval applied without a live
agent enforces the SAME caps as one applied with one.
Falls back to the built-in defaults if config can't be loaded, so this can
never raise on a missing/unreadable config.
"""
memory_char_limit = 2200
user_char_limit = 1375
try:
from hermes_cli.config import load_config
mem_cfg = (load_config() or {}).get("memory", {}) or {}
memory_char_limit = int(mem_cfg.get("memory_char_limit", memory_char_limit))
user_char_limit = int(mem_cfg.get("user_char_limit", user_char_limit))
except Exception:
pass # config optional — fall back to defaults rather than break /memory
store = MemoryStore(
memory_char_limit=memory_char_limit,
user_char_limit=user_char_limit,
)
store.load_from_disk()
return store
def _apply_write_gate(action: str, target: str, content: Optional[str],
old_text: Optional[str]) -> Optional[str]:
"""Evaluate the memory write gate. Returns a JSON tool-result string when

View file

@ -419,9 +419,11 @@ _GENERIC_DESCRIPTION = (
"endpoint. The backend and model family are user-configured via "
"`hermes tools` → Video Generation; the agent does not pick them. "
"Long-running generations may take 30 seconds to several minutes — "
"the call blocks until the video is ready. Returns either an HTTP "
"URL or an absolute file path in the `video` field; display it with "
"markdown ![description](url-or-path) and the gateway will deliver it."
"the call blocks until the video is ready. Returns the result in the "
"`video` field — either an HTTP URL or an absolute file path. To show "
"it to the user, reference that path/URL in your response using the "
"file-delivery convention for the current platform (your platform "
"guidance describes how files are delivered here)."
)

View file

@ -806,6 +806,21 @@ def _emit(event: str, sid: str, payload: dict | None = None):
write_json({"jsonrpc": "2.0", "method": "event", "params": params})
def _emit_approval_request(sid: str, data: dict | None) -> None:
"""Emit an ``approval.request`` event to the TUI client with the command
redacted. The approval payload is built from the RAW command string, so a
credential-shaped value Tirith flagged would otherwise be echoed verbatim
to the TUI client (#48456 — third egress transport alongside the chat
platforms and the SSE/API stream fixed in #50767). Reuse the shared gateway
seam so all approval transports redact consistently."""
payload = dict(data or {})
if "command" in payload:
from gateway.run import _redact_approval_command
payload["command"] = _redact_approval_command(payload.get("command"))
_emit("approval.request", sid, payload)
def _status_update(sid: str, kind: str, text: str | None = None):
body = (text if text is not None else kind).strip()
if not body:
@ -1040,7 +1055,7 @@ def _start_agent_build(sid: str, session: dict) -> None:
)
register_gateway_notify(
key, lambda data: _emit("approval.request", sid, data)
key, lambda data: _emit_approval_request(sid, data)
)
notify_registered = True
load_permanent_allowlist()
@ -2554,7 +2569,7 @@ def _sync_session_key_after_compress(
try:
register_gateway_notify(
new_session_id,
lambda data: _emit("approval.request", sid, data),
lambda data: _emit_approval_request(sid, data),
)
except Exception:
pass
@ -3916,7 +3931,7 @@ def _init_session(
try:
from tools.approval import register_gateway_notify, load_permanent_allowlist
register_gateway_notify(key, lambda data: _emit("approval.request", sid, data))
register_gateway_notify(key, lambda data: _emit_approval_request(sid, data))
load_permanent_allowlist()
except Exception:
pass

View file

@ -270,6 +270,31 @@ display:
> writes to your memory/skill stores, are unaffected by this setting. Set it
> per-platform via `display.platforms.<platform>.memory_notifications`.
## Running the review on a cheaper model (`auxiliary.background_review`)
The review runs on your **main chat model** by default, replaying the
conversation — which is already warm in the prompt cache, so it's cheap cache
reads. On an expensive main model you can run the review on a cheaper model
instead:
```yaml
auxiliary:
background_review:
provider: openrouter
model: google/gemini-3-flash-preview # auto (default) = main chat model
```
When you point it at a model **different** from your main one, the review runs
there for substantially lower cost (~35× in benchmarks). Because a different
model can't reuse your main model's prompt cache anyway, the fork automatically
replays a compact **digest** of the conversation (recent turns verbatim + a
summary of older ones) rather than the full transcript — minimizing what it
writes to the new cache. Capture holds: in testing, memory capture was
identical and skill capture near-identical to the main-model review.
Leave it at `auto` (or set it to your main model) and nothing changes — the
review keeps running on the main model with the full warm-cache replay.
## Controlling skill writes (`skills.write_approval`)
Skills use the same on/off gate, but the review UX differs because a