diff --git a/.github/pr-screenshots/telegram-overflow/topic-final-response-clipped.jpg b/.github/pr-screenshots/telegram-overflow/topic-final-response-clipped.jpg new file mode 100644 index 00000000000..2f3529648e7 Binary files /dev/null and b/.github/pr-screenshots/telegram-overflow/topic-final-response-clipped.jpg differ diff --git a/agent/agent_runtime_helpers.py b/agent/agent_runtime_helpers.py index daffc025d9b..742af145380 100644 --- a/agent/agent_runtime_helpers.py +++ b/agent/agent_runtime_helpers.py @@ -679,15 +679,28 @@ def recover_with_credential_pool( # long-running TUI sessions stuck on stale tokens until the user # exited and reopened. is_entitlement = agent._is_entitlement_failure(error_context, status_code) + _auth_haystack = " ".join( + str(error_context.get(k) or "").lower() + for k in ("message", "reason", "code", "error") + if isinstance(error_context, dict) + ) + if ( + not is_entitlement + and status_code == 403 + and "oauth authentication is currently not allowed for this organization" in _auth_haystack + ): + is_entitlement = True + if ( + not is_entitlement + and status_code == 403 + and (agent.provider or "") == "anthropic" + and getattr(agent, "api_mode", "") == "anthropic_messages" + ): + is_entitlement = True if not is_entitlement and status_code == 403 and (agent.provider or "") == "xai-oauth": - _disambiguator_haystack = " ".join( - str(error_context.get(k) or "").lower() - for k in ("message", "reason", "code", "error") - if isinstance(error_context, dict) - ) _is_xai_auth_failure = ( - "[wke=unauthenticated:" in _disambiguator_haystack - or "oauth2 access token could not be validated" in _disambiguator_haystack + "[wke=unauthenticated:" in _auth_haystack + or "oauth2 access token could not be validated" in _auth_haystack ) if not _is_xai_auth_failure: is_entitlement = True diff --git a/agent/anthropic_adapter.py b/agent/anthropic_adapter.py index e64bc54bc90..8476ef67f57 100644 --- a/agent/anthropic_adapter.py +++ b/agent/anthropic_adapter.py @@ -1571,6 +1571,15 @@ def _convert_content_part_to_anthropic(part: Any) -> Optional[Dict[str, Any]]: if ptype == "input_text": block: Dict[str, Any] = {"type": "text", "text": part.get("text", "")} + elif ptype == "text": + # A stored Anthropic text block. Rebuild from whitelisted fields only — + # SDK response text blocks carry output-only siblings (parsed_output, + # citations=None) that the Messages INPUT schema rejects with HTTP 400 + # "Extra inputs are not permitted". Do NOT dict(part) it verbatim. + block = {"type": "text", "text": part.get("text", "")} + cits = part.get("citations") + if isinstance(cits, list) and cits: + block["citations"] = cits elif ptype in {"image_url", "input_image"}: image_value = part.get("image_url", {}) url = image_value.get("url", "") if isinstance(image_value, dict) else str(image_value or "") @@ -1685,6 +1694,58 @@ def _content_parts_to_anthropic_blocks(parts: Any) -> List[Dict[str, Any]]: return out +def _sanitize_replay_block(b: Dict[str, Any]) -> Optional[Dict[str, Any]]: + """Strip output-only fields from a stored Anthropic content block so it is + valid as REQUEST input on replay. + + The SDK response objects carry output-only attributes that the Messages + *input* schema forbids ("Extra inputs are not permitted"): text blocks get + ``parsed_output``/``citations`` (when null), tool_use blocks get ``caller``, + etc. ``normalize_response`` captured blocks verbatim via ``_to_plain_data``, + so these leak back as input on the next turn → HTTP 400. + + Whitelist per type (NOT a blacklist) so future SDK output-only fields can't + reintroduce the bug. Returns a clean block, or None to drop it. + """ + if not isinstance(b, dict): + return None + btype = b.get("type") + if btype == "text": + out: Dict[str, Any] = {"type": "text", "text": b.get("text", "")} + # citations is input-valid ONLY when it's a non-empty list; the SDK + # emits citations=None on responses, which the input schema rejects. + cits = b.get("citations") + if isinstance(cits, list) and cits: + out["citations"] = cits + if isinstance(b.get("cache_control"), dict): + out["cache_control"] = b["cache_control"] + return out + if btype == "thinking": + out = {"type": "thinking", "thinking": b.get("thinking", "")} + if b.get("signature"): + out["signature"] = b["signature"] + return out + if btype == "redacted_thinking": + # Only valid with its data payload; drop if missing. + return {"type": "redacted_thinking", "data": b["data"]} if b.get("data") else None + if btype == "tool_use": + out = { + "type": "tool_use", + "id": _sanitize_tool_id(b.get("id", "")), + "name": b.get("name", ""), + "input": b.get("input", {}), + } + if isinstance(b.get("cache_control"), dict): + out["cache_control"] = b["cache_control"] + return out + if btype == "image": + src = b.get("source") + return {"type": "image", "source": src} if isinstance(src, dict) else None + # Unknown/unsupported block type on the input path — drop rather than risk + # another "Extra inputs are not permitted". + return None + + def _convert_assistant_message(m: Dict[str, Any]) -> Dict[str, Any]: """Convert an assistant message to Anthropic content blocks. @@ -1692,6 +1753,55 @@ def _convert_assistant_message(m: Dict[str, Any]) -> Dict[str, Any]: reasoning_content injection for Kimi/DeepSeek endpoints. """ content = m.get("content", "") + # Anthropic interleaved-thinking fast path: when this turn carries a + # verbatim, order-preserving block list (set by normalize_response only + # for turns that interleave SIGNED thinking with tool_use), replay it. + # Each block is run through _sanitize_replay_block to strip output-only + # SDK fields (parsed_output, caller, citations=None, …) that the Messages + # INPUT schema forbids — replaying them verbatim caused HTTP 400 "Extra + # inputs are not permitted" (text.parsed_output). Block ORDER is preserved + # (the reason this channel exists); only forbidden sibling fields are + # dropped, leaving thinking signatures and tool_use id/name/input intact. + ordered_blocks = m.get("anthropic_content_blocks") + if isinstance(ordered_blocks, list) and ordered_blocks: + # Re-source each tool_use input from the stored tool_calls map rather + # than the captured block. The ordered-blocks list captures tool_use + # input from the RAW API response (normalize_response), which is NOT + # credential-redacted; tool_calls[].function.arguments IS redacted at + # storage time (build_assistant_message, #19798). Replaying the raw + # block input would resurrect a secret the model inlined into a tool + # call (e.g. terminal(command="curl -H 'Authorization: Bearer sk-...'") + # onto the wire, even though the same value is redacted everywhere else + # in history. Keying by sanitized tool id preserves interleave order + # (the reason this channel exists) while swapping in the redacted + # input. Adapted from #36071 (replay-time tool-input re-sourcing). + redacted_input_by_id: Dict[str, Any] = {} + for tc in m.get("tool_calls", []) or []: + if not isinstance(tc, dict): + continue + fn = tc.get("function", {}) or {} + raw_args = fn.get("arguments", "{}") + try: + parsed_args = json.loads(raw_args) if isinstance(raw_args, str) else raw_args + except (json.JSONDecodeError, ValueError): + parsed_args = {} + redacted_input_by_id[_sanitize_tool_id(tc.get("id", ""))] = parsed_args + replayed: List[Dict[str, Any]] = [] + for b in ordered_blocks: + clean = _sanitize_replay_block(b) + if clean is None: + continue + if clean.get("type") == "tool_use": + # Override raw (un-redacted) input with the redacted copy when + # we have one for this id; fall back to the sanitized block + # input only if the tool_call is missing (shape mismatch). + redacted = redacted_input_by_id.get(clean.get("id", "")) + if redacted is not None: + clean["input"] = redacted + replayed.append(clean) + if replayed: + return {"role": "assistant", "content": replayed} + blocks = _extract_preserved_thinking_blocks(m) if content: if isinstance(content, list): diff --git a/agent/bedrock_adapter.py b/agent/bedrock_adapter.py index 12c7afb8c18..e3abba8436f 100644 --- a/agent/bedrock_adapter.py +++ b/agent/bedrock_adapter.py @@ -208,6 +208,41 @@ def is_stale_connection_error(exc: BaseException) -> bool: return False +def is_streaming_access_denied_error(exc: BaseException) -> bool: + """Return True when AWS denied the ``bedrock:InvokeModelWithResponseStream`` action. + + IAM policies scoped to ``bedrock:InvokeModel`` only (a common least-privilege + setup) reject ``converse_stream()`` with an ``AccessDeniedException`` whose + message names the streaming action, e.g.:: + + User: arn:aws:iam::123456789012:user/x is not authorized to perform: + bedrock:InvokeModelWithResponseStream on resource: ... + + This is permanent for the session — retrying the stream can never succeed — + so callers should flip to the non-streaming ``converse()`` path (which maps + to ``bedrock:InvokeModel``) instead of burning retries. + + Detection is deliberately message-based: boto3 surfaces this as a + ``ClientError`` with ``Error.Code == "AccessDeniedException"``, and the + AnthropicBedrock SDK wraps the same AWS response in its own exception + types, but both preserve the action name in the message. + """ + msg = str(exc).lower() + if "invokemodelwithresponsestream" not in msg: + return False + # ClientError with an explicit access-denied code is the canonical form. + try: + from botocore.exceptions import ClientError + except ImportError: # pragma: no cover — botocore always present with boto3 + ClientError = None # type: ignore[assignment] + if ClientError is not None and isinstance(exc, ClientError): + code = (getattr(exc, "response", None) or {}).get("Error", {}).get("Code", "") + return code in ("AccessDeniedException", "UnauthorizedException") + # Wrapped forms (e.g. AnthropicBedrock SDK PermissionDeniedError) — match + # on the authorization-failure phrasing AWS uses. + return "not authorized" in msg or "accessdenied" in msg + + # --------------------------------------------------------------------------- # AWS credential detection # --------------------------------------------------------------------------- @@ -1003,6 +1038,16 @@ def call_converse_stream( try: response = client.converse_stream(**kwargs) except Exception as exc: + if is_streaming_access_denied_error(exc): + # IAM allows bedrock:InvokeModel but not + # InvokeModelWithResponseStream — permanent for this session. + # Fall back to the non-streaming converse() path. + logger.info( + "bedrock: converse_stream denied by IAM on (region=%s, model=%s) — " + "falling back to non-streaming converse().", + region, model, + ) + return normalize_converse_response(client.converse(**kwargs)) if is_stale_connection_error(exc): logger.warning( "bedrock: stale-connection error on converse_stream(region=%s, " diff --git a/agent/chat_completion_helpers.py b/agent/chat_completion_helpers.py index ce066d55640..1ee1702b45e 100644 --- a/agent/chat_completion_helpers.py +++ b/agent/chat_completion_helpers.py @@ -952,6 +952,18 @@ def build_assistant_message(agent, assistant_message, finish_reason: str) -> dic if preserved: msg["reasoning_details"] = preserved + # Anthropic interleaved-thinking replay: when a turn interleaves signed + # thinking blocks with tool_use, the parallel reasoning_details + + # tool_calls fields lose the cross-type ordering, and reconstruction + # front-loads thinking — reordering signed blocks and triggering HTTP 400 + # ("thinking ... blocks in the latest assistant message cannot be + # modified"). Carry the verbatim ordered block list so the adapter can + # replay the latest assistant message unchanged. See + # agent/transports/anthropic.py and agent/anthropic_adapter.py. + ordered_blocks = getattr(assistant_message, "anthropic_content_blocks", None) + if ordered_blocks: + msg["anthropic_content_blocks"] = ordered_blocks + # Codex Responses API: preserve encrypted reasoning items for # multi-turn continuity. These get replayed as input on the next turn. codex_items = getattr(assistant_message, "codex_reasoning_items", None) @@ -1603,6 +1615,8 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta= _get_bedrock_runtime_client, invalidate_runtime_client, is_stale_connection_error, + is_streaming_access_denied_error, + normalize_converse_response, stream_converse_with_callbacks, ) region = api_kwargs.pop("__bedrock_region__", "us-east-1") @@ -1611,6 +1625,29 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta= try: raw_response = client.converse_stream(**api_kwargs) except Exception as _bedrock_exc: + # IAM policies scoped to bedrock:InvokeModel only (no + # InvokeModelWithResponseStream) reject converse_stream() + # with AccessDeniedException. That denial is permanent for + # the session — fall back to the non-streaming converse() + # inline (it maps to bedrock:InvokeModel) and disable + # streaming for subsequent calls so we don't re-fail every + # turn. + if is_streaming_access_denied_error(_bedrock_exc): + agent._disable_streaming = True + agent._safe_print( + "\n⚠ AWS IAM denied bedrock:InvokeModelWithResponseStream — " + "falling back to non-streaming InvokeModel.\n" + " Grant that action to restore streaming output.\n" + ) + logger.info( + "bedrock: converse_stream denied by IAM (%s) — " + "using non-streaming converse() for this session.", + type(_bedrock_exc).__name__, + ) + result["response"] = normalize_converse_response( + client.converse(**api_kwargs) + ) + return # Evict the cached client on stale-connection failures # so the outer retry loop builds a fresh client/pool. if is_stale_connection_error(_bedrock_exc): @@ -1698,6 +1735,14 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta= # poll loop uses this to detect stale connections that keep receiving # SSE keep-alive pings but no actual data. last_chunk_time = {"t": time.time()} + # Stale-stream patience, shared between the httpx socket read timeout + # (built in ``_call_chat_completions`` below) and the stale-stream detector + # (computed further down, before the worker thread starts). Initialized + # here so the read-timeout builder can floor itself at the stale value and + # never fire before the detector. ``None`` until the detector value is + # resolved, so the builder degrades to its plain default if it ever runs + # first. + _stream_stale_timeout = None def _fire_first_delta(): if not first_delta_fired["done"] and on_first_delta: @@ -1734,6 +1779,26 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta= "Local provider detected (%s) — stream read timeout raised to %.0fs", agent.base_url, _stream_read_timeout, ) + elif ( + _stream_read_timeout == 120.0 + and _stream_stale_timeout is not None + and _stream_stale_timeout != float("inf") + and _stream_stale_timeout > _stream_read_timeout + ): + # Cloud reasoning models (e.g. Opus) routinely pause mid-stream + # for minutes during extended thinking. The stale-stream + # detector is deliberately scaled up to tolerate this (180–300s, + # see the stale-timeout block below), but the raw httpx socket + # read timeout defaulted to a flat 120s and fired *first* — + # tearing down a healthy reasoning stream before the stale + # detector (which owns retry + diagnostics) could act. Keep the + # socket read timeout in step with the detector so it no longer + # preempts it. + _stream_read_timeout = _stream_stale_timeout + logger.debug( + "Cloud reasoning stream — read timeout raised to %.0fs to " + "match stale-stream detector", _stream_read_timeout, + ) # Cap connect/pool at 60s even when provider timeout is higher. # connect/pool cover TCP handshake, not model inference. _conn_cap = min(_base_timeout, 60.0) if _provider_timeout_cfg is not None else 30.0 @@ -2384,9 +2449,34 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta= "stream" in _err_lower and "not supported" in _err_lower ) - if _is_stream_unsupported: + # AWS Bedrock (AnthropicBedrock SDK path): IAM policies + # with bedrock:InvokeModel but not + # InvokeModelWithResponseStream reject messages.stream() + # with a permission error naming the streaming action. + # Permanent for the session — flip to non-streaming + # (messages.create() maps to bedrock:InvokeModel). + _is_bedrock_stream_denied = False + if ( + not _is_stream_unsupported + and "invokemodelwithresponsestream" in _err_lower + ): + # Cheap message pre-check before importing the + # adapter — bedrock_adapter triggers a lazy boto3 + # install at import time, which must not run for + # unrelated providers' stream errors. + from agent.bedrock_adapter import ( + is_streaming_access_denied_error, + ) + _is_bedrock_stream_denied = ( + is_streaming_access_denied_error(e) + ) + if _is_stream_unsupported or _is_bedrock_stream_denied: agent._disable_streaming = True agent._safe_print( + "\n⚠ AWS IAM denied bedrock:InvokeModelWithResponseStream. " + "Switching to non-streaming.\n" + " Grant that action to restore streaming output.\n" + if _is_bedrock_stream_denied else "\n⚠ Streaming is not supported for this " "model/provider. Switching to non-streaming.\n" " To avoid this delay, set display.streaming: false " diff --git a/agent/coding_context.py b/agent/coding_context.py new file mode 100644 index 00000000000..f0ed3296a03 --- /dev/null +++ b/agent/coding_context.py @@ -0,0 +1,700 @@ +"""Coding-context awareness — base Hermes, every interactive surface. + +When the user runs Hermes inside a code workspace (CLI, TUI, desktop app, or an +editor over ACP), Hermes shifts into a **coding posture**. This module is the +single place that decides whether we're in that posture and what it implies, +so the rest of the codebase never re-derives "are we coding?" on its own. + +Architecture — one seam, many consumers +---------------------------------------- +The posture is modelled as a frozen :class:`RuntimeMode` selected from a small +:class:`ContextProfile` registry (today: ``coding`` and ``general``). A profile +is *data* — it declares the toolset to collapse to, the operating brief to +inject, and hints for other domains (model routing, memory, subagents). Every +domain reads the same resolved object instead of probing git/config itself: + + * **System prompt** — ``RuntimeMode.system_blocks()`` → the operating brief + + a live git/workspace snapshot (``agent/system_prompt.py``). + * **Toolset** — ``RuntimeMode.toolset_selection()`` → the ``coding`` toolset + plus the user's enabled MCP servers (``cli.py`` / ``tui_gateway``). Only + under the opt-in ``focus`` mode: the default posture is prompt-only and + never touches the user's configured toolsets (toolsets like messaging / + smart-home / music are off-by-default anyway, and someone who explicitly + enabled image-gen or Spotify shouldn't lose it for being in a git repo). + * **Delegation** — subagents inherit the parent's toolset and run through the + same prompt builder, so the coding posture propagates to children for free. + * **Model / memory / compression** — declared on the profile + (``model_hint``, ``memory_policy``) as the extension seam; consumers read + ``mode.profile`` rather than re-deciding. + +Cache safety +------------ +The mode is resolved **once** and is immutable. The workspace snapshot is built +once at prompt-build time and baked into the *stable* system-prompt tier — never +re-probed per turn (that would shatter the prompt cache). Branch and dirty state +drift mid-session, so the brief tells the model to re-check with ``git`` before +acting on the snapshot. A ``/coding`` flip therefore only takes effect next +session (deferred), the same contract as ``/skills install`` vs ``--now``. + +Activation (config ``agent.coding_context``): + + * ``auto`` (default) — posture (brief + snapshot) on an interactive coding + surface sitting in a code workspace (git repo or recognised project root). + Prompt-only; toolsets untouched. + * ``focus`` — like ``auto``, but additionally collapses the toolset to the + ``coding`` set + enabled MCP servers. Explicit opt-in for a lean schema. + * ``on`` — force the posture anywhere (incl. non-workspaces). Prompt-only. + * ``off`` — disable entirely. +""" + +from __future__ import annotations + +import json +import logging +import os +import re +import subprocess +from dataclasses import dataclass +from pathlib import Path +from typing import Any, Optional + +logger = logging.getLogger("hermes.coding_context") + +CODING_TOOLSET = "coding" + +# Surfaces where a coding posture makes sense under ``auto``. Messaging +# platforms (telegram, discord, slack, …) are intentionally absent — a chat bot +# in a group is not pair-programming. +INTERACTIVE_CODING_PLATFORMS = {"cli", "tui", "acp", "desktop", ""} + +# Project-root signals that mark a directory as a code workspace even when it +# isn't (yet) a git repo. Cheap filename checks — no parsing. +_PROJECT_MARKERS = ( + "pyproject.toml", "setup.py", "setup.cfg", "requirements.txt", + "package.json", "tsconfig.json", "deno.json", + "Cargo.toml", "go.mod", "pom.xml", "build.gradle", "build.gradle.kts", + "Gemfile", "composer.json", "mix.exs", "pubspec.yaml", + "CMakeLists.txt", "Makefile", "Dockerfile", + "AGENTS.md", "CLAUDE.md", ".cursorrules", +) + +# Agent-instruction files surfaced separately from manifests in the snapshot. +_CONTEXT_FILES = ("AGENTS.md", "CLAUDE.md", ".cursorrules") + +# Lockfile → package manager, checked in priority order. +_PY_LOCKFILES = (("uv.lock", "uv"), ("poetry.lock", "poetry"), ("Pipfile.lock", "pipenv")) +_JS_LOCKFILES = ( + ("pnpm-lock.yaml", "pnpm"), ("bun.lockb", "bun"), ("bun.lock", "bun"), + ("yarn.lock", "yarn"), ("package-lock.json", "npm"), +) + +# package.json scripts / Makefile targets worth surfacing as verify commands. +_VERIFY_TARGETS = ("test", "tests", "lint", "typecheck", "check", "build", "fmt", "format") +_MAX_VERIFY_COMMANDS = 8 +_MAX_FACT_FILE_BYTES = 256 * 1024 + +_GIT_TIMEOUT = 2.5 + + +# Per-model edit-format steering. Matching the edit tool format to how a model +# was trained reduces mistakes and wasted reasoning (OpenAI/Codex handle +# patch-style diffs best; Anthropic models — and most open-weight coding +# models, whose RL scaffolds use str_replace-style editors — do best with +# string-replacement). Our `patch` tool exposes both: mode="patch" (V4A +# multi-file) and mode="replace" (find-and-swap). We nudge each family toward +# its native format. Unknown families get nothing (the brief's neutral wording +# stands). Substrings match the model id; aligned with TOOL_USE_ENFORCEMENT_MODELS. +_EDIT_FORMAT_GUIDANCE: dict[str, tuple[tuple[str, ...], str]] = { + "patch": ( + ("gpt", "codex"), + "- Edit format: author new files with `write_file`; for edits to " + "existing code prefer `patch` with `mode='patch'` (V4A multi-file diff) " + "for structured or multi-file changes — it's the diff format you handle " + "most reliably. Use `mode='replace'` for a single small swap.", + ), + "replace": ( + ("claude", "sonnet", "opus", "haiku", + "gemini", "gemma", "deepseek", "qwen", "kimi", "glm", "grok", + "hermes", "llama", "mistral", "devstral", "minimax"), + "- Edit format: author new files with `write_file`; for edits to " + "existing code prefer `patch` in `mode='replace'` — match a unique " + "snippet and swap it. Reach for `mode='patch'` (V4A) only when an edit " + "genuinely spans several files at once.", + ), +} + + +def _model_family(model: Optional[str]) -> Optional[str]: + """Classify a model id into an edit-format family key, or ``None``. + + Used to steer the coding posture toward the edit tool format a model was + trained on. Family-agnostic by design: an unrecognised model gets ``None`` + and the operating brief's neutral edit wording applies. + """ + if not model: + return None + lowered = model.lower() + for family, (needles, _line) in _EDIT_FORMAT_GUIDANCE.items(): + if any(n in lowered for n in needles): + return family + return None + + +def _edit_format_line(model: Optional[str]) -> str: + """The edit-format guidance line for this model's family (``""`` if none).""" + family = _model_family(model) + if family is None: + return "" + return _EDIT_FORMAT_GUIDANCE[family][1] + + +# Operating brief for the coding posture. Tool names referenced here (read_file, +# search_files, patch, write_file, terminal, todo) are in the coding toolset and +# in _HERMES_CORE_TOOLS, so they're present on every surface this fires on. +CODING_AGENT_GUIDANCE = ( + "You are a coding agent pairing with the user inside their codebase. " + "Operate like a careful senior engineer.\n" + "\n" + "Gather context first:\n" + "- Read the relevant files with `read_file` and locate code with " + "`search_files` before changing anything. Trace a symbol to its definition " + "and usages rather than guessing its shape.\n" + "- Batch independent lookups: when several reads/searches don't depend on " + "each other, issue them together in one turn instead of one at a time.\n" + "- Never invent files, symbols, APIs, or imports. If you haven't seen it in " + "the repo, go look. Don't assume a library is available — check the project " + "manifest (pyproject.toml / package.json / Cargo.toml / go.mod) and how " + "neighbouring files import it.\n" + "\n" + "Make changes through the tools, not the chat:\n" + "- Edit with `patch`/`write_file`. Do NOT print code blocks to the user as " + "a substitute for editing — apply the change, then summarise it. Only show " + "code when the user explicitly asks to see it.\n" + "- Match the project's existing style and conventions; AGENTS.md / " + "CLAUDE.md / .cursorrules already in context win over your defaults. Touch " + "only what the task needs — no drive-by refactors, renames, or reformatting " + "— and add any imports/dependencies your code requires.\n" + "- If an edit fails to apply, re-read the file to get the current exact " + "contents before retrying — don't repeat a stale patch. If the same region " + "fails twice, rewrite the enclosing function or file with `write_file` " + "instead of attempting a third patch.\n" + "\n" + "Verify, and know when to stop:\n" + "- Use `terminal` for git, builds, tests, and inspection. Run the relevant " + "tests/linter/build and confirm they pass before claiming the work is done.\n" + "- Fix root causes, not symptoms: when you find a bug, check sibling call " + "paths for the same flaw and fix the class, not just the reported site.\n" + "- When fixing linter/type errors on a file, stop after about three " + "attempts on the same file and ask the user rather than looping.\n" + "- Track multi-step work with `todo`. Reference code as `path:line` instead " + "of pasting whole files.\n" + "\n" + "Respect the user's repo: don't commit, push, or rewrite history unless " + "asked, and never read, print, or commit secrets — leave `.env` and " + "credential files alone unless the user explicitly asks. The Workspace " + "block below is a snapshot from session start — re-run `git status`/" + "`git branch` before relying on it. Be concise: lead with the change or " + "answer, not a preamble." +) + + +# ── Context profiles (declarative posture definitions) ────────────────────── + + +@dataclass(frozen=True) +class ContextProfile: + """A named operating posture. Pure data — consumers read these fields. + + ``toolset`` — collapse to this toolset (+ enabled MCP) when no explicit + selection is pinned; ``None`` keeps the platform default. + ``guidance`` — operating brief injected into the stable system prompt; + ``""`` injects nothing. + ``model_hint`` — routing preference key for smart model routing + (extension seam; not yet consumed by the router). + ``memory_policy``— memory namespace/weighting hint (extension seam). + ``hidden_skill_categories`` — skill categories pruned from the system-prompt + skill index while this posture is active. Discovery-only: + nothing is disabled — ``skills_list`` still returns the + full catalog and ``skill_view`` loads anything. Deny-list + semantics so unknown/custom categories stay visible. + """ + + name: str + toolset: Optional[str] = None + guidance: str = "" + model_hint: Optional[str] = None + memory_policy: str = "default" + hidden_skill_categories: tuple[str, ...] = () + + +# Skill categories that are clearly not part of a coding workflow. Hidden from +# the prompt's skill index in the coding posture (deny-list — anything not +# listed here, incl. custom user categories, stays visible). Coding-adjacent +# categories (devops, github, mcp, data-science, diagramming, research, +# security, …) are intentionally absent. +_NON_CODING_SKILL_CATEGORIES = ( + "apple", "communication", "cooking", "creative", "email", "finance", + "gaming", "gifs", "health", "media", "music", "note-taking", + "productivity", "shopping", "smart-home", "social-media", "travel", + "yuanbao", +) + + +GENERAL_PROFILE = ContextProfile(name="general") +CODING_PROFILE = ContextProfile( + name="coding", + toolset=CODING_TOOLSET, + guidance=CODING_AGENT_GUIDANCE, + model_hint="coding", + memory_policy="project", + hidden_skill_categories=_NON_CODING_SKILL_CATEGORIES, +) + +_PROFILES: dict[str, ContextProfile] = { + GENERAL_PROFILE.name: GENERAL_PROFILE, + CODING_PROFILE.name: CODING_PROFILE, +} + + +def get_profile(name: str) -> ContextProfile: + """Return a registered profile, falling back to ``general``.""" + return _PROFILES.get(name, GENERAL_PROFILE) + + +# ── Helpers ───────────────────────────────────────────────────────────────── + + +def _coding_mode(config: Optional[dict[str, Any]]) -> str: + """Return the normalized ``agent.coding_context`` mode (auto/focus/on/off).""" + if config is None: + try: + from hermes_cli.config import load_config + + config = load_config() + except Exception: + config = {} + raw = ((config or {}).get("agent", {}) or {}).get("coding_context", "auto") + mode = str(raw).strip().lower() + if mode in {"focus", "strict", "lean"}: + return "focus" + if mode in {"on", "true", "yes", "1", "always"}: + return "on" + if mode in {"off", "false", "no", "0", "never"}: + return "off" + return "auto" + + +def _resolve_cwd(cwd: Optional[str | Path]) -> Path: + if cwd: + return Path(cwd).expanduser() + try: + from agent.runtime_cwd import resolve_agent_cwd + + return resolve_agent_cwd() + except Exception: + return Path(os.getcwd()) + + +def _git_root(cwd: Path) -> Optional[Path]: + current = cwd.resolve() + for parent in [current, *current.parents]: + if (parent / ".git").exists(): + return parent + return None + + +def _home() -> Optional[Path]: + try: + return Path.home().resolve() + except (OSError, RuntimeError): + return None + + +def _marker_root(cwd: Path) -> Optional[Path]: + """Nearest ancestor that looks like a project root, or ``None``. + + Walks up at most a few levels so a manifest in the workspace root counts + even when the user is in a subdirectory. ``$HOME`` itself is skipped — a + Makefile or AGENTS.md sitting in the home directory is global user config, + not a project-root signal. + """ + current = cwd.resolve() + home = _home() + for depth, parent in enumerate([current, *current.parents]): + if depth > 6: + break + if parent == home: + continue + for marker in _PROJECT_MARKERS: + if (parent / marker).exists(): + return parent + return None + + +def _detect_profile_name(mode: str, platform: str, cwd_str: str) -> str: + """Resolve which profile applies. + + ``auto``/``focus``: coding when the surface is interactive AND the cwd is a + code workspace (a git repo or a recognised project root). ``on``: always + coding. ``off``: always general. + + A git repo rooted at ``$HOME`` (the dotfiles pattern) is NOT a workspace + signal — without the guard, every session anywhere under a dotfiles-managed + home directory would silently flip to the coding posture. + + Detection is intentionally not memoized: it's a handful of ``stat`` calls, + and callers resolve the mode once per session anyway. Caching here would + risk a stale posture if a long-lived process (gateway/TUI) serves sessions + from different working directories. + """ + if mode == "off": + return GENERAL_PROFILE.name + if mode == "on": + return CODING_PROFILE.name + if platform and platform.strip().lower() not in INTERACTIVE_CODING_PLATFORMS: + return GENERAL_PROFILE.name + cwd = Path(cwd_str) + git_root = _git_root(cwd) + if git_root is not None and git_root == _home(): + git_root = None # dotfiles repo at $HOME — not a code workspace + if git_root is not None or _marker_root(cwd) is not None: + return CODING_PROFILE.name + return GENERAL_PROFILE.name + + +# ── RuntimeMode (the seam) ────────────────────────────────────────────────── + + +@dataclass(frozen=True) +class RuntimeMode: + """The resolved operating posture for a session. Immutable by construction. + + Built once via :func:`resolve_runtime_mode` and consumed by every domain + that cares about the coding/general distinction. Never mutate or re-resolve + mid-session — that would break the prompt cache. + """ + + profile: ContextProfile + surface: str + cwd: Path + # The normalized ``agent.coding_context`` mode this posture was resolved + # under (auto/focus/on/off). Toolset collapse is gated on ``focus``. + config_mode: str = "auto" + # The model id this session runs (e.g. "anthropic/claude-opus-4.8"). Used + # only to steer edit-format guidance toward the model's family — see + # ``_edit_format_line``. Fixed for the session, so cache-safe. + model: Optional[str] = None + + @property + def kind(self) -> str: + return self.profile.name + + @property + def is_coding(self) -> bool: + return self.profile.name == CODING_PROFILE.name + + def toolset_selection(self, config: Optional[dict[str, Any]] = None) -> Optional[list[str]]: + """Toolset list for this posture, or ``None`` to keep the platform default. + + Non-``None`` only under the opt-in ``focus`` mode. The default posture + is prompt-only: most strippable toolsets are off-by-default anyway, and + a user who explicitly enabled one (image-gen for frontend/game assets, + messaging for build notifications, …) keeps it while coding. + + Callers apply this only when the user hasn't pinned an explicit + selection (``--toolsets``, ``HERMES_TUI_TOOLSETS``, …); they never + override a pin. Returns the profile's toolset plus enabled MCP servers. + """ + if self.config_mode != "focus": + return None + if self.profile.toolset is None: + return None + return [self.profile.toolset, *_enabled_mcp_servers(config)] + + def system_blocks(self) -> list[str]: + """Stable system-prompt blocks for this posture (brief + workspace). + + The operating brief carries a model-family edit-format nudge appended + to it (one cached string, not a separate block) so the model is steered + toward the `patch` mode it handles best — see ``_edit_format_line``. + """ + if not self.is_coding: + return [] + blocks: list[str] = [] + if self.profile.guidance: + brief = self.profile.guidance + edit_line = _edit_format_line(self.model) + if edit_line: + brief = f"{brief}\n{edit_line}" + blocks.append(brief) + workspace = build_coding_workspace_block(self.cwd) + if workspace: + blocks.append(workspace) + return blocks + + def hidden_skill_categories(self) -> frozenset[str]: + """Skill categories to prune from the prompt's skill index (may be empty).""" + return frozenset(self.profile.hidden_skill_categories) + + +def resolve_runtime_mode( + *, + platform: Optional[str] = None, + cwd: Optional[str | Path] = None, + config: Optional[dict[str, Any]] = None, + model: Optional[str] = None, +) -> RuntimeMode: + """Resolve the operating posture once. Cheap — a handful of ``stat`` calls. + + This is the single entry point every domain should call. The returned + object is immutable and safe to cache for the session. Detection itself is + intentionally *not* memoized (see ``_detect_profile_name``) so a long-lived + process can't pin a stale posture; callers resolve once per session and + hold the result. ``model`` is recorded only to steer edit-format guidance; + it never affects detection. + """ + resolved_cwd = _resolve_cwd(cwd) + mode = _coding_mode(config) + name = _detect_profile_name( + mode, (platform or "").strip().lower(), str(resolved_cwd) + ) + return RuntimeMode( + profile=get_profile(name), + surface=platform or "", + cwd=resolved_cwd, + config_mode=mode, + model=model, + ) + + +# ── Back-compat surface (thin wrappers over RuntimeMode) ──────────────────── + + +def is_coding_context( + *, + platform: Optional[str] = None, + cwd: Optional[str | Path] = None, + config: Optional[dict[str, Any]] = None, +) -> bool: + """Whether Hermes should operate in its coding posture right now.""" + return resolve_runtime_mode(platform=platform, cwd=cwd, config=config).is_coding + + +def coding_selection( + *, + platform: Optional[str] = None, + cwd: Optional[str | Path] = None, + config: Optional[dict[str, Any]] = None, +) -> Optional[list[str]]: + """Toolset selection for the coding posture. + + ``None`` unless the user opted into ``focus`` mode AND the posture is + active — the default coding posture never overrides configured toolsets. + """ + return resolve_runtime_mode( + platform=platform, cwd=cwd, config=config + ).toolset_selection(config) + + +def coding_system_blocks( + *, + platform: Optional[str] = None, + cwd: Optional[str | Path] = None, + config: Optional[dict[str, Any]] = None, + model: Optional[str] = None, +) -> list[str]: + """Stable system-prompt blocks for the current posture (empty when general). + + ``model`` steers the brief's edit-format nudge toward the model's family. + """ + return resolve_runtime_mode( + platform=platform, cwd=cwd, config=config, model=model + ).system_blocks() + + +def coding_hidden_skill_categories( + *, + platform: Optional[str] = None, + cwd: Optional[str | Path] = None, + config: Optional[dict[str, Any]] = None, +) -> frozenset[str]: + """Skill categories the active posture prunes from the prompt's skill index. + + Empty outside the coding posture. Discovery-only: hidden skills remain + loadable via ``skills_list`` / ``skill_view``. + """ + return resolve_runtime_mode( + platform=platform, cwd=cwd, config=config + ).hidden_skill_categories() + + +def _enabled_mcp_servers(config: Optional[dict[str, Any]]) -> list[str]: + """Names of MCP servers the user has enabled — kept in the coding posture. + + MCP servers (figma, browser, tophat, …) are explicitly configured and part + of the coding workflow, not noise to strip. + """ + try: + from hermes_cli.config import read_raw_config + from hermes_cli.tools_config import _parse_enabled_flag + + servers = read_raw_config().get("mcp_servers") or {} + return [ + str(name) + for name, cfg in servers.items() + if isinstance(cfg, dict) + and _parse_enabled_flag(cfg.get("enabled", True), default=True) + ] + except Exception: + return [] + + +# ── git/workspace probe ───────────────────────────────────────────────────── + + +def _git(cwd: Path, *args: str) -> str: + try: + out = subprocess.run( + ["git", "-C", str(cwd), *args], + capture_output=True, + text=True, + timeout=_GIT_TIMEOUT, + ) + except (OSError, subprocess.SubprocessError): + return "" + return out.stdout.strip() if out.returncode == 0 else "" + + +def _parse_status(porcelain: str) -> tuple[dict[str, str], dict[str, int]]: + """Parse ``git status --porcelain=2 --branch`` into branch + counts.""" + branch: dict[str, str] = {} + counts = {"staged": 0, "modified": 0, "untracked": 0, "conflicts": 0} + for line in porcelain.splitlines(): + if line.startswith("# branch.head"): + branch["head"] = line.split(maxsplit=2)[-1] + elif line.startswith("# branch.upstream"): + branch["upstream"] = line.split(maxsplit=2)[-1] + elif line.startswith("# branch.ab"): + parts = line.split() + branch["ahead"], branch["behind"] = parts[2].lstrip("+"), parts[3].lstrip("-") + elif line.startswith(("1 ", "2 ")): + xy = line.split(maxsplit=2)[1] + if xy[0] != ".": + counts["staged"] += 1 + if xy[1] != ".": + counts["modified"] += 1 + elif line.startswith("u "): + counts["conflicts"] += 1 + elif line.startswith("? "): + counts["untracked"] += 1 + return branch, counts + + +def _read_small(path: Path) -> str: + """Read a small text file, or ``""`` — never raises, never reads huge files.""" + try: + if not path.is_file() or path.stat().st_size > _MAX_FACT_FILE_BYTES: + return "" + return path.read_text(encoding="utf-8", errors="replace") + except OSError: + return "" + + +def _project_facts(root: Path) -> list[str]: + """Detected project facts for the workspace snapshot. + + The point is to hand the model its *verify loop* up front — which manifest, + which package manager, and the exact test/lint/build commands — instead of + making it rediscover them every session. Cheap: stat calls plus reads of a + couple of small files; built once at prompt-build time (cache-safe). + """ + facts: list[str] = [] + + manifests = [m for m in _PROJECT_MARKERS if m not in _CONTEXT_FILES and (root / m).is_file()] + package_managers = [ + pm for lock, pm in (*_PY_LOCKFILES, *_JS_LOCKFILES) if (root / lock).is_file() + ] + if manifests: + line = f"- Project: {', '.join(manifests[:6])}" + if package_managers: + line += f" ({'/'.join(dict.fromkeys(package_managers))})" + facts.append(line) + + verify: list[str] = [] + if (root / "scripts" / "run_tests.sh").is_file(): + verify.append("scripts/run_tests.sh") + if (root / "package.json").is_file(): + try: + scripts = json.loads(_read_small(root / "package.json") or "{}").get("scripts") or {} + except (json.JSONDecodeError, AttributeError): + scripts = {} + js_pm = next((pm for lock, pm in _JS_LOCKFILES if (root / lock).is_file()), "npm") + verify.extend(f"{js_pm} run {name}" for name in _VERIFY_TARGETS if name in scripts) + if (root / "pytest.ini").is_file() or "[tool.pytest" in _read_small(root / "pyproject.toml"): + verify.append("pytest") + makefile = _read_small(root / "Makefile") + if makefile: + verify.extend( + f"make {name}" for name in _VERIFY_TARGETS + if re.search(rf"^{re.escape(name)}\s*:", makefile, re.MULTILINE) + ) + if verify: + deduped = list(dict.fromkeys(verify))[:_MAX_VERIFY_COMMANDS] + facts.append(f"- Verify: {'; '.join(deduped)}") + + context_files = [c for c in _CONTEXT_FILES if (root / c).is_file()] + if context_files: + facts.append(f"- Context files: {', '.join(context_files)}") + + return facts + + +def build_coding_workspace_block(cwd: Optional[str | Path] = None) -> str: + """Workspace snapshot for the system prompt (empty outside a workspace). + + Git state (branch/status/commits) when the cwd is in a repo, plus detected + project facts (manifest, package manager, verify commands, context files) + — so marker-only (non-git) projects still get a snapshot. + """ + resolved = _resolve_cwd(cwd) + git_root = _git_root(resolved) + root = git_root or _marker_root(resolved) + if root is None: + return "" + + lines = ["Workspace (snapshot at session start — re-check with `git` before acting on it):"] + lines.append(f"- Root: {root}") + + if git_root is not None: + branch, counts = _parse_status(_git(root, "status", "--porcelain=2", "--branch")) + head = branch.get("head", "") + if head and head != "(detached)": + line = f"- Branch: {head}" + if branch.get("upstream"): + line += f" \u2192 {branch['upstream']}" + ahead, behind = branch.get("ahead", "0"), branch.get("behind", "0") + if ahead != "0" or behind != "0": + line += f" (ahead {ahead}, behind {behind})" + lines.append(line) + elif head == "(detached)": + lines.append("- Branch: (detached HEAD)") + + # Linked worktree: the per-worktree git dir differs from the shared common dir. + git_dir, common_dir = _git(root, "rev-parse", "--git-dir"), _git(root, "rev-parse", "--git-common-dir") + if git_dir and common_dir and Path(git_dir).resolve() != Path(common_dir).resolve(): + main_tree = Path(common_dir).resolve().parent + lines.append(f"- Worktree: linked (primary tree at {main_tree})") + + dirty = [f"{n} {label}" for label, n in ( + ("staged", counts["staged"]), ("modified", counts["modified"]), + ("untracked", counts["untracked"]), ("conflicts", counts["conflicts"]), + ) if n] + lines.append(f"- Status: {', '.join(dirty) if dirty else 'clean'}") + + recent = _git(root, "log", "-3", "--pretty=%h %s") + if recent: + lines.append("- Recent commits:") + lines.extend(f" {c}" for c in recent.splitlines()) + + lines.extend(_project_facts(root)) + return "\n".join(lines) diff --git a/agent/conversation_loop.py b/agent/conversation_loop.py index 73bed6b0670..8850b7fd565 100644 --- a/agent/conversation_loop.py +++ b/agent/conversation_loop.py @@ -2221,30 +2221,54 @@ def run_conversation( print(f"{agent.log_prefix} • Legacy cleanup: hermes config set ANTHROPIC_TOKEN \"\"") print(f"{agent.log_prefix} • Clear stale keys: hermes config set ANTHROPIC_API_KEY \"\"") - # ── Thinking block signature recovery ───────────────── + # Thinking block signature recovery. + # # Anthropic signs thinking blocks against the full turn - # content. Any upstream mutation (context compression, + # content. Any upstream mutation (context compression, # session truncation, message merging) invalidates the - # signature → HTTP 400. Recovery: strip reasoning_details - # from all messages so the next retry sends no thinking - # blocks at all. One-shot — don't retry infinitely. + # signature and the API replies HTTP 400 ("invalid + # signature" or "cannot be modified"). Recovery strips + # ``reasoning_details`` so the retry sends no thinking + # blocks at all. One-shot per outer loop. + # + # The strip targets ``api_messages``, which is the + # API-call-time list that ``_build_api_kwargs`` consumes + # on every retry. ``api_messages`` was populated once at + # the start of the turn from shallow copies of + # ``messages``, so mutating it does not touch the + # canonical store. The previous implementation popped + # ``reasoning_details`` from ``messages`` instead, which + # had two problems: ``api_messages`` carried its own + # reference to the field through the shallow copy, so the + # retry's wire payload still included thinking blocks and + # the recovery never reached the API; and the mutation + # persisted into ``state.db`` through any subsequent + # ``_persist_session`` call, permanently corrupting the + # conversation. Future turns would replay the stripped + # state, hit the same 400, and the agent would terminate + # with ``max_retries_exhausted``, often spawning + # cascading compaction-ended sessions chained off the + # corrupted parent. if ( classified.reason == FailoverReason.thinking_signature and not _retry.thinking_sig_retry_attempted ): _retry.thinking_sig_retry_attempted = True - for _m in messages: - if isinstance(_m, dict): + _api_stripped = 0 + for _m in api_messages: + if isinstance(_m, dict) and "reasoning_details" in _m: _m.pop("reasoning_details", None) + _api_stripped += 1 agent._vprint( - f"{agent.log_prefix}⚠️ Thinking block signature invalid — " - f"stripped all thinking blocks, retrying...", + f"{agent.log_prefix}⚠️ Thinking block signature invalid, " + f"stripped reasoning_details from api_messages for retry...", force=True, ) logger.warning( "%sThinking block signature recovery: stripped " - "reasoning_details from %d messages", - agent.log_prefix, len(messages), + "reasoning_details from %d api_messages " + "(canonical messages unchanged)", + agent.log_prefix, _api_stripped, ) continue diff --git a/agent/display.py b/agent/display.py index 8514279888e..84c8509faed 100644 --- a/agent/display.py +++ b/agent/display.py @@ -858,6 +858,20 @@ def _detect_tool_failure(tool_name: str, result: str | None) -> tuple[bool, str] return False, "" +def _used_free_parallel(result: str | None) -> bool: + """True when a web result came from Parallel's free Search MCP. + + Only the keyless Parallel path tags its result with ``provider="parallel"``; + the paid REST path and every other provider omit it. Used to label the tool + line "Parallel search" / "Parallel fetch" exactly when the free MCP served + the call. + """ + if not isinstance(result, str) or '"provider"' not in result: + return False + data = safe_json_loads(result) + return isinstance(data, dict) and str(data.get("provider", "")).lower() == "parallel" + + def get_cute_tool_message( tool_name: str, args: dict, duration: float, result: str | None = None, ) -> str: @@ -895,15 +909,17 @@ def get_cute_tool_message( return f"{line}{failure_suffix}" if tool_name == "web_search": - return _wrap(f"┊ 🔍 search {_trunc(args.get('query', ''), 42)} {dur}") + verb = "Parallel search" if _used_free_parallel(result) else "search" + return _wrap(f"┊ 🔍 {verb:<9} {_trunc(args.get('query', ''), 42)} {dur}") if tool_name == "web_extract": + verb = "Parallel fetch" if _used_free_parallel(result) else "fetch" urls = args.get("urls", []) if urls: url = urls[0] if isinstance(urls, list) else str(urls) domain = url.replace("https://", "").replace("http://", "").split("/")[0] extra = f" +{len(urls)-1}" if len(urls) > 1 else "" - return _wrap(f"┊ 📄 fetch {_trunc(domain, 35)}{extra} {dur}") - return _wrap(f"┊ 📄 fetch pages {dur}") + return _wrap(f"┊ 📄 {verb:<9} {_trunc(domain, 35)}{extra} {dur}") + return _wrap(f"┊ 📄 {verb:<9} pages {dur}") if tool_name == "terminal": return _wrap(f"┊ 💻 $ {_trunc(args.get('command', ''), 42)} {dur}") if tool_name == "process": diff --git a/agent/error_classifier.py b/agent/error_classifier.py index a2045b5f8cd..c39c24a6a5d 100644 --- a/agent/error_classifier.py +++ b/agent/error_classifier.py @@ -549,14 +549,32 @@ def classify_api_error( should_fallback=True, ) - # Anthropic thinking block signature invalid (400). + # Anthropic thinking block recovery (400). Two distinct failure modes, + # same recovery (strip all reasoning_details and retry without thinking + # blocks — see the thinking_signature handler in conversation_loop.py): + # 1. Signature mismatch: a thinking block is signed against the full + # turn content; any upstream mutation (context compression, session + # truncation, message merging) invalidates the signature. + # Pattern: "signature" + "thinking". + # 2. Frozen-block mutation: Anthropic rejects any change to the + # thinking/redacted_thinking blocks in the *latest* assistant + # message — "`thinking` or `redacted_thinking` blocks in the latest + # assistant message cannot be modified. These blocks must remain as + # they were in the original response." This carries no "signature" + # token, so the original pattern missed it and the turn hard-aborted + # as a non-retryable client error instead of self-healing. + # Pattern: "thinking" + ("cannot be modified" | "must remain as they were"). # Don't gate on provider — OpenRouter proxies Anthropic errors, so the # provider may be "openrouter" even though the error is Anthropic-specific. - # The message pattern ("signature" + "thinking") is unique enough. + # The combined patterns are unique enough. if ( status_code == 400 - and "signature" in error_msg and "thinking" in error_msg + and ( + "signature" in error_msg + or "cannot be modified" in error_msg + or "must remain as they were" in error_msg + ) ): return _result( FailoverReason.thinking_signature, diff --git a/agent/prompt_builder.py b/agent/prompt_builder.py index b9c8638ddbc..4868f054c10 100644 --- a/agent/prompt_builder.py +++ b/agent/prompt_builder.py @@ -1118,11 +1118,12 @@ def _skill_should_show( def build_skills_system_prompt( available_tools: "set[str] | None" = None, available_toolsets: "set[str] | None" = None, + hidden_categories: "frozenset[str] | None" = None, ) -> str: """Build a compact skill index for the system prompt. Two-layer cache: - 1. In-process LRU dict keyed by (skills_dir, tools, toolsets) + 1. In-process LRU dict keyed by (skills_dir, tools, toolsets, hidden) 2. Disk snapshot (``.skills_prompt_snapshot.json``) validated by mtime/size manifest — survives process restarts @@ -1132,6 +1133,12 @@ def build_skills_system_prompt( scanned alongside the local ``~/.hermes/skills/`` directory. External dirs are read-only — they appear in the index but new skills are always created in the local dir. Local skills take precedence when names collide. + + ``hidden_categories`` (e.g. from the coding posture — see + agent/coding_context.py) prunes whole categories from the rendered index. + Discovery-only: the snapshot stores everything, ``skills_list`` / + ``skill_view`` still reach every skill, and a footer note tells the model + the full catalog exists. """ skills_dir = get_skills_dir() external_dirs = get_all_skills_dirs()[1:] # skip local (index 0) @@ -1156,6 +1163,7 @@ def build_skills_system_prompt( tuple(sorted(str(ts) for ts in (available_toolsets or set()))), _platform_hint, tuple(sorted(disabled)), + tuple(sorted(hidden_categories or ())), ) with _SKILLS_PROMPT_CACHE_LOCK: cached = _SKILLS_PROMPT_CACHE.get(cache_key) @@ -1289,6 +1297,26 @@ def build_skills_system_prompt( except Exception as e: logger.debug("Could not read external skill description %s: %s", desc_file, e) + # Posture-driven category pruning (e.g. non-coding skills while pairing on + # code). Match on the top-level category segment so nested categories + # ("social-media/twitter") are pruned with their parent. + hidden_note = "" + if hidden_categories: + before = sum(len(v) for v in skills_by_category.values()) + skills_by_category = { + cat: entries + for cat, entries in skills_by_category.items() + if cat.split("/", 1)[0] not in hidden_categories + } + pruned = before - sum(len(v) for v in skills_by_category.values()) + if pruned: + hidden_note = ( + f"\n(Note: {pruned} skill(s) in categories unrelated to the " + "current coding context are not listed here. The full catalog " + "is available via skills_list if the user asks for something " + "outside this list.)" + ) + if not skills_by_category: result = "" else: @@ -1337,6 +1365,7 @@ def build_skills_system_prompt( "\n" "\n" "Only proceed without loading a skill if genuinely none are relevant to the task." + + hidden_note ) # ── Store in LRU cache ──────────────────────────────────────────── diff --git a/agent/system_prompt.py b/agent/system_prompt.py index 4038716df48..0c6da6c2243 100644 --- a/agent/system_prompt.py +++ b/agent/system_prompt.py @@ -191,9 +191,21 @@ def build_system_prompt_parts(agent: Any, system_message: Optional[str] = None) ) if toolset } + # Coding posture prunes non-coding skill categories from the index + # (discovery-only — skills_list/skill_view still reach everything). + _hidden_cats = frozenset() + try: + from agent.coding_context import coding_hidden_skill_categories + + _hidden_cats = coding_hidden_skill_categories( + platform=agent.platform, cwd=resolve_context_cwd() + ) + except Exception: + _hidden_cats = frozenset() skills_prompt = _r.build_skills_system_prompt( available_tools=agent.valid_tool_names, available_toolsets=avail_toolsets, + hidden_categories=_hidden_cats or None, ) else: skills_prompt = "" @@ -221,6 +233,26 @@ def build_system_prompt_parts(agent: Any, system_message: Optional[str] = None) if _env_hints: stable_parts.append(_env_hints) + # Coding posture (base Hermes, any interactive coding surface in a code + # workspace — see agent/coding_context.py). The operating brief + the live + # git/workspace snapshot are built once here and cached for the session; + # the snapshot is never re-probed per turn (that would break the prompt + # cache), so the brief tells the model to re-check git before relying on it. + if agent.valid_tool_names: + try: + from agent.coding_context import coding_system_blocks + + stable_parts.extend( + coding_system_blocks( + platform=agent.platform, + cwd=resolve_context_cwd(), + model=agent.model, + ) + ) + except Exception: + # Coding-context probing must never block prompt build. + pass + # Local Python toolchain probe — names python/pip/uv/PEP-668 state when # something is non-default so the model can pick the right install # strategy without discovering by failure. Emits a single line; emits diff --git a/agent/tool_executor.py b/agent/tool_executor.py index cd24b63f393..144a2929782 100644 --- a/agent/tool_executor.py +++ b/agent/tool_executor.py @@ -417,7 +417,7 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe # ── Logging / callbacks ────────────────────────────────────────── tool_names_str = ", ".join(name for _, name, _, _, _, _ in parsed_calls) - if not agent.quiet_mode: + if not agent.quiet_mode and getattr(agent, "tool_progress_mode", "all") != "off": print(f" ⚡ Concurrent: {num_tools} tool calls — {tool_names_str}") for i, (tc, name, args, middleware_trace, block_result, blocked_by_guardrail) in enumerate(parsed_calls, 1): args_str = json.dumps(args, ensure_ascii=False) @@ -702,7 +702,7 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe if agent._should_emit_quiet_tool_messages(): cute_msg = _get_cute_tool_message_impl(name, args, tool_duration, result=function_result) agent._safe_print(f" {cute_msg}") - elif getattr(agent, "tool_progress_mode", "all") != "off": + elif not agent.quiet_mode and getattr(agent, "tool_progress_mode", "all") != "off": _preview_str = _multimodal_text_summary(function_result) if agent.verbose_logging: print(f" ✅ Tool {i+1} completed in {tool_duration:.2f}s") @@ -866,7 +866,7 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe elif function_name == "skill_manage": agent._iters_since_skill = 0 - if not agent.quiet_mode: + if not agent.quiet_mode and getattr(agent, "tool_progress_mode", "all") != "off": args_str = json.dumps(function_args, ensure_ascii=False) if agent.verbose_logging: print(f" 📞 Tool {i}: {function_name}({list(function_args.keys())})") @@ -1384,7 +1384,7 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe # entire batch. The model sees it on the next API iteration. agent._apply_pending_steer_to_tool_results(messages, 1) - if not agent.quiet_mode: + if not agent.quiet_mode and getattr(agent, "tool_progress_mode", "all") != "off": if agent.verbose_logging: print(f" ✅ Tool {i} completed in {tool_duration:.2f}s") print(agent._wrap_verbose("Result: ", function_result)) diff --git a/agent/transports/anthropic.py b/agent/transports/anthropic.py index d77ae63ef32..3a209f2d753 100644 --- a/agent/transports/anthropic.py +++ b/agent/transports/anthropic.py @@ -84,7 +84,7 @@ class AnthropicTransport(ProviderTransport): to OpenAI finish_reason, and collects reasoning_details in provider_data. """ import json - from agent.anthropic_adapter import _to_plain_data + from agent.anthropic_adapter import _to_plain_data, _sanitize_replay_block from agent.transports.types import ToolCall strip_tool_prefix = kwargs.get("strip_tool_prefix", False) @@ -94,14 +94,40 @@ class AnthropicTransport(ProviderTransport): reasoning_parts = [] reasoning_details = [] tool_calls = [] + # Verbatim, order-preserving copy of every content block in the turn. + # Anthropic signs each thinking block against the turn content that + # PRECEDES it at its position; when a turn interleaves thinking and + # tool_use (adaptive/interleaved thinking, Claude 4.6+), the parallel + # reasoning_details + tool_calls lists below lose that cross-type + # ordering. Replaying the latest assistant message in the wrong order + # invalidates the signatures -> HTTP 400 "thinking ... blocks in the + # latest assistant message cannot be modified". Preserve the exact + # block sequence here so the adapter can replay it unchanged. See + # tests/agent/test_anthropic_thinking_block_order.py. + ordered_blocks = [] for block in response.content: + block_dict = _to_plain_data(block) + clean_block = None + if isinstance(block_dict, dict): + # Sanitize at capture so output-only SDK fields (parsed_output, + # caller, citations=None, …) never persist to state.db and leak + # back as request input on replay → HTTP 400 "Extra inputs are + # not permitted". Defence-in-depth with the replay-side sanitize. + clean_block = _sanitize_replay_block(block_dict) + if clean_block is not None: + ordered_blocks.append(clean_block) if block.type == "text": text_parts.append(block.text) - elif block.type == "thinking": - reasoning_parts.append(block.thinking) - block_dict = _to_plain_data(block) - if isinstance(block_dict, dict): + elif block.type in ("thinking", "redacted_thinking"): + if block.type == "thinking": + reasoning_parts.append(block.thinking) + # Use the sanitized block (clean_block) for reasoning_details too, + # since _extract_preserved_thinking_blocks replays these on the + # non-ordered path. Falls back to raw only if sanitize dropped it. + if isinstance(clean_block, dict): + reasoning_details.append(clean_block) + elif isinstance(block_dict, dict): reasoning_details.append(block_dict) elif block.type == "tool_use": name = block.name @@ -130,6 +156,23 @@ class AnthropicTransport(ProviderTransport): provider_data = {} if reasoning_details: provider_data["reasoning_details"] = reasoning_details + # Only worth carrying the ordered-blocks channel when the turn + # actually interleaves signed thinking with tool_use — that's the + # only shape the parallel lists reconstruct incorrectly. A turn that + # is purely text, or thinking-then-tools with a single leading + # thinking block, replays correctly without it. + _has_signed_thinking = any( + isinstance(b, dict) + and b.get("type") in ("thinking", "redacted_thinking") + and (b.get("signature") or b.get("data")) + for b in ordered_blocks + ) + _has_tool_use = any( + isinstance(b, dict) and b.get("type") == "tool_use" + for b in ordered_blocks + ) + if _has_signed_thinking and _has_tool_use: + provider_data["anthropic_content_blocks"] = ordered_blocks return NormalizedResponse( content="\n".join(text_parts) if text_parts else None, diff --git a/agent/transports/types.py b/agent/transports/types.py index 2deb157535b..6ad20f2376d 100644 --- a/agent/transports/types.py +++ b/agent/transports/types.py @@ -121,6 +121,18 @@ class NormalizedResponse: pd = self.provider_data or {} return pd.get("reasoning_details") + @property + def anthropic_content_blocks(self): + """Verbatim, order-preserving Anthropic content blocks for a turn. + + Present only when an Anthropic turn interleaves signed thinking with + tool_use — the one shape the parallel reasoning_details + tool_calls + lists reconstruct in the wrong order, invalidating thinking-block + signatures on replay. See agent/transports/anthropic.py. + """ + pd = self.provider_data or {} + return pd.get("anthropic_content_blocks") + @property def codex_reasoning_items(self): pd = self.provider_data or {} diff --git a/apps/desktop/electron/fs-read-dir.cjs b/apps/desktop/electron/fs-read-dir.cjs new file mode 100644 index 00000000000..52d182ad567 --- /dev/null +++ b/apps/desktop/electron/fs-read-dir.cjs @@ -0,0 +1,109 @@ +'use strict' + +const fs = require('node:fs') +const path = require('node:path') +const { resolveDirectoryForIpc } = require('./hardening.cjs') + +const FS_READDIR_STAT_CONCURRENCY = 16 + +// Always-hidden noise (covers non-git projects too; gitignore catches many of +// these, but the project tree should keep the same hygiene without one). +const FS_READDIR_HIDDEN = new Set([ + '.git', + '.hg', + '.svn', + '.cache', + '.next', + '.turbo', + '.venv', + '__pycache__', + 'build', + 'dist', + 'node_modules', + 'target', + 'venv' +]) + +function direntIsDirectory(dirent) { + return typeof dirent.isDirectory === 'function' && dirent.isDirectory() +} + +function direntIsFile(dirent) { + return typeof dirent.isFile === 'function' && dirent.isFile() +} + +function direntIsSymbolicLink(dirent) { + return typeof dirent.isSymbolicLink === 'function' && dirent.isSymbolicLink() +} + +function shouldStatDirent(dirent) { + if (direntIsDirectory(dirent)) return false + + return direntIsSymbolicLink(dirent) || !direntIsFile(dirent) +} + +async function entryForDirent(dirent, resolved, fsImpl) { + const fullPath = path.join(resolved, dirent.name) + let isDirectory = direntIsDirectory(dirent) + + if (!isDirectory && shouldStatDirent(dirent)) { + try { + isDirectory = (await fsImpl.promises.stat(fullPath)).isDirectory() + } catch { + isDirectory = false + } + } + + return { name: dirent.name, path: fullPath, isDirectory } +} + +async function mapWithStatConcurrency(items, mapper) { + const results = new Array(items.length) + let nextIndex = 0 + + async function runWorker() { + while (nextIndex < items.length) { + const index = nextIndex + nextIndex += 1 + results[index] = await mapper(items[index]) + } + } + + const workerCount = Math.min(FS_READDIR_STAT_CONCURRENCY, items.length) + const workers = Array.from({ length: workerCount }, () => runWorker()) + await Promise.all(workers) + + return results +} + +async function readDirForIpc(dirPath, options = {}) { + const fsImpl = options.fs || fs + let resolved + + try { + ;({ resolvedPath: resolved } = await resolveDirectoryForIpc(dirPath, { + fs: fsImpl, + purpose: 'Directory read' + })) + } catch (error) { + return { entries: [], error: error?.code || 'read-error' } + } + + try { + const dirents = await fsImpl.promises.readdir(resolved, { withFileTypes: true }) + const visibleDirents = dirents.filter(dirent => !FS_READDIR_HIDDEN.has(dirent.name)) + const entries = await mapWithStatConcurrency(visibleDirents, dirent => + entryForDirent(dirent, resolved, fsImpl) + ) + + entries.sort((a, b) => Number(b.isDirectory) - Number(a.isDirectory) || a.name.localeCompare(b.name)) + + return { entries } + } catch (error) { + return { entries: [], error: error?.code || 'read-error' } + } +} + +module.exports = { + readDirForIpc +} diff --git a/apps/desktop/electron/fs-read-dir.test.cjs b/apps/desktop/electron/fs-read-dir.test.cjs new file mode 100644 index 00000000000..42e80af3489 --- /dev/null +++ b/apps/desktop/electron/fs-read-dir.test.cjs @@ -0,0 +1,364 @@ +'use strict' + +const assert = require('node:assert/strict') +const fs = require('node:fs') +const os = require('node:os') +const path = require('node:path') +const test = require('node:test') +const { pathToFileURL } = require('node:url') + +const { readDirForIpc } = require('./fs-read-dir.cjs') + +function mkTmpDir() { + return fs.mkdtempSync(path.join(os.tmpdir(), 'hermes-fs-read-dir-')) +} + +function fakeDirent(name, flags = {}) { + return { + name, + isDirectory: () => Boolean(flags.directory), + isFile: () => Boolean(flags.file), + isSymbolicLink: () => Boolean(flags.symlink) + } +} + +test('readDirForIpc hides noisy directories and files from the project tree', async () => { + const root = mkTmpDir() + + try { + fs.mkdirSync(path.join(root, 'node_modules')) + fs.mkdirSync(path.join(root, 'src')) + fs.writeFileSync(path.join(root, 'target'), 'hidden file') + fs.writeFileSync(path.join(root, 'README.md'), 'visible file') + + const result = await readDirForIpc(root) + + assert.equal(result.error, undefined) + assert.deepEqual( + result.entries.map(entry => entry.name), + ['src', 'README.md'] + ) + } finally { + fs.rmSync(root, { recursive: true, force: true }) + } +}) + +test('readDirForIpc filters a hidden basename whether it is a file or directory', async () => { + const dirRoot = mkTmpDir() + const fileRoot = mkTmpDir() + + try { + fs.mkdirSync(path.join(dirRoot, 'node_modules')) + fs.writeFileSync(path.join(dirRoot, 'visible.txt'), 'visible') + fs.writeFileSync(path.join(fileRoot, 'node_modules'), 'hidden file') + fs.writeFileSync(path.join(fileRoot, 'visible.txt'), 'visible') + + assert.deepEqual( + (await readDirForIpc(dirRoot)).entries.map(entry => entry.name), + ['visible.txt'] + ) + assert.deepEqual( + (await readDirForIpc(fileRoot)).entries.map(entry => entry.name), + ['visible.txt'] + ) + } finally { + fs.rmSync(dirRoot, { recursive: true, force: true }) + fs.rmSync(fileRoot, { recursive: true, force: true }) + } +}) + +test('readDirForIpc returns directories before files and sorts by name within groups', async () => { + const root = mkTmpDir() + + try { + fs.writeFileSync(path.join(root, 'z.txt'), 'z') + fs.mkdirSync(path.join(root, 'src')) + fs.writeFileSync(path.join(root, 'a.txt'), 'a') + fs.mkdirSync(path.join(root, 'lib')) + + const result = await readDirForIpc(root) + + assert.equal(result.error, undefined) + assert.deepEqual( + result.entries.map(entry => entry.name), + ['lib', 'src', 'a.txt', 'z.txt'] + ) + } finally { + fs.rmSync(root, { recursive: true, force: true }) + } +}) + +test('readDirForIpc accepts file URLs for directories', async () => { + const root = mkTmpDir() + + try { + fs.mkdirSync(path.join(root, 'src')) + fs.writeFileSync(path.join(root, 'README.md'), 'visible file') + + const result = await readDirForIpc(pathToFileURL(root).toString()) + + assert.equal(result.error, undefined) + assert.deepEqual( + result.entries.map(entry => entry.name), + ['src', 'README.md'] + ) + } finally { + fs.rmSync(root, { recursive: true, force: true }) + } +}) + +test('readDirForIpc returns invalid-path for blank or non-string input', async () => { + let readdirCalls = 0 + const fsImpl = { + promises: { + readdir: async () => { + readdirCalls += 1 + return [] + } + } + } + + assert.deepEqual(await readDirForIpc('', { fs: fsImpl }), { entries: [], error: 'invalid-path' }) + assert.deepEqual(await readDirForIpc(' ', { fs: fsImpl }), { entries: [], error: 'invalid-path' }) + assert.deepEqual(await readDirForIpc(null, { fs: fsImpl }), { entries: [], error: 'invalid-path' }) + assert.equal(readdirCalls, 0) +}) + +test('readDirForIpc rejects Windows device paths before readdir', async () => { + let readdirCalls = 0 + const fsImpl = { + promises: { + readdir: async () => { + readdirCalls += 1 + return [] + } + } + } + + assert.deepEqual(await readDirForIpc('\\\\?\\C:\\secret', { fs: fsImpl }), { + entries: [], + error: 'device-path' + }) + assert.equal(readdirCalls, 0) +}) + +test('readDirForIpc returns filesystem error codes instead of throwing', async () => { + const root = mkTmpDir() + + try { + const result = await readDirForIpc(path.join(root, 'missing')) + + assert.deepEqual(result, { entries: [], error: 'ENOENT' }) + } finally { + fs.rmSync(root, { recursive: true, force: true }) + } +}) + +test('readDirForIpc marks a symlink to a directory as a directory', async t => { + const root = mkTmpDir() + + try { + fs.mkdirSync(path.join(root, 'actual-dir')) + + try { + fs.symlinkSync(path.join(root, 'actual-dir'), path.join(root, 'linked-dir'), 'dir') + } catch (error) { + if (error?.code === 'EPERM' || error?.code === 'EACCES') { + t.skip(`symlink creation is not permitted on this platform (${error.code})`) + + return + } + + throw error + } + + const result = await readDirForIpc(root) + const linked = result.entries.find(entry => entry.name === 'linked-dir') + + assert.equal(result.error, undefined) + assert.equal(linked?.isDirectory, true) + } finally { + fs.rmSync(root, { recursive: true, force: true }) + } +}) + +test('readDirForIpc marks a Windows junction to a directory as a directory', async t => { + if (process.platform !== 'win32') { + t.skip('junctions are a Windows-specific symlink type') + + return + } + + const root = mkTmpDir() + + try { + fs.mkdirSync(path.join(root, 'actual-dir')) + + try { + fs.symlinkSync(path.join(root, 'actual-dir'), path.join(root, 'junction-dir'), 'junction') + } catch (error) { + if (error?.code === 'EPERM' || error?.code === 'EACCES') { + t.skip(`junction creation is not permitted on this platform (${error.code})`) + + return + } + + throw error + } + + const result = await readDirForIpc(root) + const junction = result.entries.find(entry => entry.name === 'junction-dir') + + assert.equal(result.error, undefined) + assert.equal(junction?.isDirectory, true) + } finally { + fs.rmSync(root, { recursive: true, force: true }) + } +}) + +test('readDirForIpc allows expanding symlink or junction directories outside the project root', async t => { + const root = mkTmpDir() + const outside = mkTmpDir() + + try { + fs.writeFileSync(path.join(outside, 'outside.txt'), 'ok') + + const linkPath = path.join(root, 'outside-link') + try { + fs.symlinkSync(outside, linkPath, process.platform === 'win32' ? 'junction' : 'dir') + } catch (error) { + if (error?.code === 'EPERM' || error?.code === 'EACCES') { + t.skip(`directory symlink creation is not permitted on this platform (${error.code})`) + + return + } + + throw error + } + + const result = await readDirForIpc(linkPath) + + assert.equal(result.error, undefined) + assert.deepEqual(result.entries, [ + { name: 'outside.txt', path: path.join(linkPath, 'outside.txt'), isDirectory: false } + ]) + } finally { + fs.rmSync(root, { recursive: true, force: true }) + fs.rmSync(outside, { recursive: true, force: true }) + } +}) + +test('readDirForIpc stats symbolic links and unknown entries without dropping the whole listing', async () => { + const input = path.join('virtual-root') + const resolved = path.resolve(input) + const statCalls = [] + const fsImpl = { + promises: { + readdir: async () => [ + fakeDirent('unknown-entry'), + fakeDirent('linked-dir', { symlink: true }), + fakeDirent('broken-link', { symlink: true }), + fakeDirent('plain.txt', { file: true }) + ], + stat: async fullPath => { + if (fullPath === resolved) { + return { isDirectory: () => true } + } + + statCalls.push(fullPath) + if (fullPath.endsWith(`${path.sep}linked-dir`)) { + return { isDirectory: () => true } + } + throw Object.assign(new Error('gone'), { code: 'ENOENT' }) + } + } + } + + const result = await readDirForIpc(input, { fs: fsImpl }) + + assert.equal(result.error, undefined) + assert.deepEqual( + statCalls.sort(), + [path.join(resolved, 'broken-link'), path.join(resolved, 'linked-dir'), path.join(resolved, 'unknown-entry')].sort() + ) + assert.deepEqual(result.entries, [ + { name: 'linked-dir', path: path.join(resolved, 'linked-dir'), isDirectory: true }, + { name: 'broken-link', path: path.join(resolved, 'broken-link'), isDirectory: false }, + { name: 'plain.txt', path: path.join(resolved, 'plain.txt'), isDirectory: false }, + { name: 'unknown-entry', path: path.join(resolved, 'unknown-entry'), isDirectory: false } + ]) +}) + +test('readDirForIpc bounds concurrent stats while preserving complete sorted output', async () => { + const input = path.join('virtual-root') + const resolved = path.resolve(input) + const names = Array.from({ length: 105 }, (_, index) => `entry-${String(104 - index).padStart(3, '0')}`) + const failedName = 'entry-100' + const directoryNames = new Set(names.filter((_, index) => index % 10 === 4)) + const successfulDirectoryNames = new Set([...directoryNames].filter(name => name !== failedName)) + const statCalls = [] + let active = 0 + let peak = 0 + let releaseStats + let markFirstStatStarted + const statsReleased = new Promise(resolve => { + releaseStats = resolve + }) + const firstStatStarted = new Promise(resolve => { + markFirstStatStarted = resolve + }) + const fsImpl = { + promises: { + readdir: async () => [ + fakeDirent('node_modules', { symlink: true }), + ...names.map((name, index) => fakeDirent(name, { symlink: index % 2 === 0 })) + ], + stat: async fullPath => { + if (fullPath === resolved) { + return { isDirectory: () => true } + } + + statCalls.push(fullPath) + active += 1 + peak = Math.max(peak, active) + markFirstStatStarted() + await statsReleased + active -= 1 + + const name = path.basename(fullPath) + if (name === failedName) { + throw Object.assign(new Error('gone'), { code: 'ENOENT' }) + } + + return { isDirectory: () => successfulDirectoryNames.has(name) } + } + } + } + + const resultPromise = readDirForIpc(input, { fs: fsImpl }) + await firstStatStarted + await new Promise(resolve => setImmediate(resolve)) + releaseStats() + const result = await resultPromise + + const expectedNames = [ + ...names.filter(name => successfulDirectoryNames.has(name)).sort(), + ...names.filter(name => !successfulDirectoryNames.has(name)).sort() + ] + + assert.equal(result.error, undefined) + assert.equal(result.entries.length, names.length) + assert.equal(statCalls.length, names.length) + assert.equal(statCalls.some(fullPath => fullPath.endsWith(`${path.sep}node_modules`)), false) + assert.ok(peak > 1, `expected concurrent stats, observed peak ${peak}`) + assert.ok(peak <= 16, `expected at most 16 concurrent stats, observed peak ${peak}`) + assert.deepEqual( + result.entries.map(entry => entry.name), + expectedNames + ) + assert.equal(result.entries.find(entry => entry.name === failedName)?.isDirectory, false) + assert.equal( + result.entries.filter(entry => entry.isDirectory).length, + successfulDirectoryNames.size + ) +}) diff --git a/apps/desktop/electron/git-root.cjs b/apps/desktop/electron/git-root.cjs new file mode 100644 index 00000000000..593d3531ebc --- /dev/null +++ b/apps/desktop/electron/git-root.cjs @@ -0,0 +1,54 @@ +'use strict' + +const fs = require('node:fs') +const path = require('node:path') +const { resolveRequestedPathForIpc } = require('./hardening.cjs') + +function findGitRoot(start, fsImpl = fs) { + let dir = start + + for (let i = 0; i < 50; i += 1) { + try { + if (fsImpl.existsSync(path.join(dir, '.git'))) { + return dir + } + } catch { + return null + } + + const parent = path.dirname(dir) + + if (parent === dir) { + return null + } + + dir = parent + } + + return null +} + +async function gitRootForIpc(startPath, options = {}) { + const fsImpl = options.fs || fs + let resolved + + try { + resolved = resolveRequestedPathForIpc(startPath, { purpose: 'Git root' }) + } catch { + return null + } + + try { + const stat = await fsImpl.promises.stat(resolved) + const start = stat.isDirectory() ? resolved : path.dirname(resolved) + + return findGitRoot(start, fsImpl) + } catch { + return findGitRoot(resolved, fsImpl) + } +} + +module.exports = { + findGitRoot, + gitRootForIpc +} diff --git a/apps/desktop/electron/git-root.test.cjs b/apps/desktop/electron/git-root.test.cjs new file mode 100644 index 00000000000..ba649b259f3 --- /dev/null +++ b/apps/desktop/electron/git-root.test.cjs @@ -0,0 +1,40 @@ +'use strict' + +const assert = require('node:assert/strict') +const fs = require('node:fs') +const os = require('node:os') +const path = require('node:path') +const test = require('node:test') +const { pathToFileURL } = require('node:url') + +const { gitRootForIpc } = require('./git-root.cjs') + +function mkTmpDir() { + return fs.mkdtempSync(path.join(os.tmpdir(), 'hermes-git-root-')) +} + +test('gitRootForIpc returns null for invalid and device paths', async () => { + assert.equal(await gitRootForIpc(''), null) + assert.equal(await gitRootForIpc(' '), null) + assert.equal(await gitRootForIpc(null), null) + assert.equal(await gitRootForIpc('\\\\?\\C:\\secret'), null) + assert.equal(await gitRootForIpc('file:///%E0%A4%A'), null) +}) + +test('gitRootForIpc resolves directories files missing descendants and file URLs', async t => { + const root = mkTmpDir() + t.after(() => fs.rmSync(root, { recursive: true, force: true })) + + const gitDir = path.join(root, '.git') + const srcDir = path.join(root, 'src') + const filePath = path.join(srcDir, 'index.ts') + fs.mkdirSync(gitDir) + fs.mkdirSync(srcDir) + fs.writeFileSync(filePath, 'export {}\n', 'utf8') + + assert.equal(await gitRootForIpc(root), root) + assert.equal(await gitRootForIpc(srcDir), root) + assert.equal(await gitRootForIpc(filePath), root) + assert.equal(await gitRootForIpc(pathToFileURL(filePath).toString()), root) + assert.equal(await gitRootForIpc(path.join(srcDir, 'missing.ts')), root) +}) diff --git a/apps/desktop/electron/hardening.cjs b/apps/desktop/electron/hardening.cjs index 4ffdea051b5..812dc3f77c7 100644 --- a/apps/desktop/electron/hardening.cjs +++ b/apps/desktop/electron/hardening.cjs @@ -106,71 +106,155 @@ function sensitiveFileBlockReason(filePath) { return null } -function resolveRequestedFilePath(filePath, baseDir = process.cwd(), purpose = 'File read') { - const raw = String(filePath || '').trim() +function ipcPathError(code, message) { + const error = new Error(message) + error.code = code + return error +} + +function rejectUnsafePathSyntax(filePath, purpose = 'File read') { + if (typeof filePath !== 'string') { + throw ipcPathError('invalid-path', `${purpose} failed: file path is required.`) + } + + const raw = filePath.trim() if (!raw) { - throw new Error(`${purpose} failed: file path is required.`) + throw ipcPathError('invalid-path', `${purpose} failed: file path is required.`) } if (raw.includes('\0')) { - throw new Error(`${purpose} failed: file path is invalid.`) + throw ipcPathError('invalid-path', `${purpose} failed: file path is invalid.`) } + const normalized = raw.replace(/\\/g, '/').toLowerCase() + if ( + normalized.startsWith('//?/') || + normalized.startsWith('//./') || + normalized.startsWith('globalroot/device/') || + normalized.includes('/globalroot/device/') + ) { + throw ipcPathError('device-path', `${purpose} blocked: Windows device paths are not allowed.`) + } + + return raw +} + +function resolveRequestedPathForIpc(filePath, options = {}) { + const purpose = String(options.purpose || 'File read') + const raw = rejectUnsafePathSyntax(filePath, purpose) + if (/^file:/i.test(raw)) { + let resolvedPath try { - return fileURLToPath(raw) + const parsed = new URL(raw) + if (parsed.protocol !== 'file:') { + throw new Error('not a file URL') + } + resolvedPath = fileURLToPath(parsed) } catch { - throw new Error(`${purpose} failed: file URL is invalid.`) + throw ipcPathError('invalid-path', `${purpose} failed: file URL is invalid.`) } + + rejectUnsafePathSyntax(resolvedPath, purpose) + return path.resolve(resolvedPath) } - const resolvedBase = path.resolve(String(baseDir || process.cwd())) - return path.resolve(resolvedBase, raw) + const baseInput = typeof options.baseDir === 'string' && options.baseDir.trim() ? options.baseDir : process.cwd() + const safeBaseInput = rejectUnsafePathSyntax(baseInput, purpose) + const resolvedBase = path.resolve(safeBaseInput) + rejectUnsafePathSyntax(resolvedBase, purpose) + const resolvedPath = path.resolve(resolvedBase, raw) + rejectUnsafePathSyntax(resolvedPath, purpose) + + return resolvedPath +} + +async function statForIpc(fsImpl, resolvedPath, purpose, typeLabel) { + try { + return await fsImpl.promises.stat(resolvedPath) + } catch (error) { + const code = error && typeof error === 'object' ? error.code : '' + if (code === 'ENOENT' || code === 'ENOTDIR') { + throw ipcPathError(code || 'ENOENT', `${purpose} failed: ${typeLabel} does not exist.`) + } + throw ipcPathError(code || 'read-error', `${purpose} failed: ${error instanceof Error ? error.message : String(error)}`) + } +} + +async function realpathForIpc(fsImpl, resolvedPath, purpose) { + if (typeof fsImpl.promises.realpath !== 'function') { + return resolvedPath + } + + try { + const realPath = await fsImpl.promises.realpath(resolvedPath) + rejectUnsafePathSyntax(realPath, purpose) + return realPath + } catch (error) { + const code = error && typeof error === 'object' ? error.code : '' + throw ipcPathError(code || 'read-error', `${purpose} failed: ${error instanceof Error ? error.message : String(error)}`) + } +} + +function rejectSensitiveFilePath(filePath, purpose) { + const blockReason = sensitiveFileBlockReason(filePath) + if (blockReason) { + throw ipcPathError('sensitive-file', `${purpose} blocked for sensitive file: ${blockReason}`) + } +} + +async function resolveDirectoryForIpc(dirPath, options = {}) { + const purpose = String(options.purpose || 'Directory read') + const fsImpl = options.fs || fs + const resolvedPath = resolveRequestedPathForIpc(dirPath, { baseDir: options.baseDir, purpose }) + const stat = await statForIpc(fsImpl, resolvedPath, purpose, 'directory') + + if (!stat.isDirectory()) { + throw ipcPathError('ENOTDIR', `${purpose} failed: path is not a directory.`) + } + + const realPath = await realpathForIpc(fsImpl, resolvedPath, purpose) + + return { realPath, resolvedPath, stat } } async function resolveReadableFileForIpc(filePath, options = {}) { const purpose = String(options.purpose || 'File read') - const resolvedPath = resolveRequestedFilePath(filePath, options.baseDir, purpose) + const fsImpl = options.fs || fs + const resolvedPath = resolveRequestedPathForIpc(filePath, { baseDir: options.baseDir, purpose }) if (options.blockSensitive !== false) { - const blockReason = sensitiveFileBlockReason(resolvedPath) - if (blockReason) { - throw new Error(`${purpose} blocked for sensitive file: ${blockReason}`) - } + rejectSensitiveFilePath(resolvedPath, purpose) } - let stat - try { - stat = await fs.promises.stat(resolvedPath) - } catch (error) { - const code = error && typeof error === 'object' ? error.code : '' - if (code === 'ENOENT' || code === 'ENOTDIR') { - throw new Error(`${purpose} failed: file does not exist.`) - } - throw new Error(`${purpose} failed: ${error instanceof Error ? error.message : String(error)}`) - } + const stat = await statForIpc(fsImpl, resolvedPath, purpose, 'file') if (stat.isDirectory()) { - throw new Error(`${purpose} failed: path points to a directory.`) + throw ipcPathError('EISDIR', `${purpose} failed: path points to a directory.`) } if (!stat.isFile()) { - throw new Error(`${purpose} failed: only regular files can be read.`) + throw ipcPathError('EINVAL', `${purpose} failed: only regular files can be read.`) + } + + const realPath = await realpathForIpc(fsImpl, resolvedPath, purpose) + if (options.blockSensitive !== false) { + rejectSensitiveFilePath(realPath, purpose) } const maxBytes = Number.isFinite(options.maxBytes) && Number(options.maxBytes) > 0 ? Number(options.maxBytes) : null if (maxBytes && stat.size > maxBytes) { - throw new Error(`${purpose} failed: file is too large (${stat.size} bytes; limit ${maxBytes} bytes).`) + throw ipcPathError('EFBIG', `${purpose} failed: file is too large (${stat.size} bytes; limit ${maxBytes} bytes).`) } try { - await fs.promises.access(resolvedPath, fs.constants.R_OK) + await fsImpl.promises.access(resolvedPath, fs.constants.R_OK) } catch { - throw new Error(`${purpose} failed: file is not readable.`) + throw ipcPathError('EACCES', `${purpose} failed: file is not readable.`) } - return { resolvedPath, stat } + return { realPath, resolvedPath, stat } } module.exports = { @@ -178,7 +262,10 @@ module.exports = { DEFAULT_FETCH_TIMEOUT_MS, TEXT_PREVIEW_SOURCE_MAX_BYTES, encryptDesktopSecret, + rejectUnsafePathSyntax, + resolveDirectoryForIpc, resolveReadableFileForIpc, + resolveRequestedPathForIpc, resolveTimeoutMs, sensitiveFileBlockReason } diff --git a/apps/desktop/electron/hardening.test.cjs b/apps/desktop/electron/hardening.test.cjs index 865da8fe797..a52ee27c830 100644 --- a/apps/desktop/electron/hardening.test.cjs +++ b/apps/desktop/electron/hardening.test.cjs @@ -8,11 +8,20 @@ const { pathToFileURL } = require('node:url') const { DEFAULT_FETCH_TIMEOUT_MS, encryptDesktopSecret, + resolveDirectoryForIpc, resolveReadableFileForIpc, + resolveRequestedPathForIpc, resolveTimeoutMs, sensitiveFileBlockReason } = require('./hardening.cjs') +async function rejectsWithCode(promise, code) { + await assert.rejects(promise, error => { + assert.equal(error?.code, code) + return true + }) +} + test('resolveTimeoutMs falls back to defaults and accepts overrides', () => { assert.equal(resolveTimeoutMs(undefined), DEFAULT_FETCH_TIMEOUT_MS) assert.equal(resolveTimeoutMs(0), DEFAULT_FETCH_TIMEOUT_MS) @@ -51,6 +60,52 @@ test('sensitiveFileBlockReason blocks obvious secret file patterns', () => { assert.match(String(sensitiveFileBlockReason('/tmp/server-cert.pem')), /\.pem/) }) +test('path helpers reject blank non-string NUL and Windows device syntax', async () => { + await rejectsWithCode(resolveReadableFileForIpc('', { purpose: 'File preview' }), 'invalid-path') + await rejectsWithCode(resolveReadableFileForIpc(' ', { purpose: 'File preview' }), 'invalid-path') + await rejectsWithCode(resolveReadableFileForIpc(null, { purpose: 'File preview' }), 'invalid-path') + await rejectsWithCode(resolveReadableFileForIpc(`safe${String.fromCharCode(0)}name.txt`), 'invalid-path') + + const devicePaths = [ + '\\\\?\\C:\\secret.txt', + '\\\\.\\C:\\secret.txt', + '\\\\?\\UNC\\server\\share\\secret.txt', + 'GLOBALROOT/Device/HarddiskVolumeShadowCopy1/secret.txt' + ] + + for (const devicePath of devicePaths) { + assert.throws( + () => resolveRequestedPathForIpc(devicePath, { purpose: 'File preview' }), + error => { + assert.equal(error?.code, 'device-path') + return true + } + ) + await rejectsWithCode(resolveReadableFileForIpc(devicePath, { purpose: 'File preview' }), 'device-path') + } + + assert.throws( + () => resolveRequestedPathForIpc('file:///%E0%A4%A', { purpose: 'File preview' }), + error => { + assert.equal(error?.code, 'invalid-path') + return true + } + ) + await rejectsWithCode(resolveReadableFileForIpc('file:///%E0%A4%A', { purpose: 'File preview' }), 'invalid-path') +}) + +test('resolveRequestedPathForIpc resolves relative paths from the trimmed base directory', () => { + const baseDir = path.join(os.tmpdir(), 'hermes-desktop-base') + + assert.equal( + resolveRequestedPathForIpc('notes.txt', { + baseDir: ` ${baseDir} `, + purpose: 'File preview' + }), + path.resolve(baseDir, 'notes.txt') + ) +}) + test('resolveReadableFileForIpc validates existence type size and sensitivity', async t => { const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'hermes-desktop-hardening-')) t.after(() => fs.rmSync(tempDir, { recursive: true, force: true })) @@ -71,6 +126,13 @@ test('resolveReadableFileForIpc validates existence type size and sensitivity', }) assert.equal(fromFileUrl.resolvedPath, textPath) + const spacedPath = path.join(tempDir, 'notes with spaces.txt') + fs.writeFileSync(spacedPath, 'space ok', 'utf8') + const fromSpacedFileUrl = await resolveReadableFileForIpc(pathToFileURL(spacedPath).toString(), { + purpose: 'File preview' + }) + assert.equal(fromSpacedFileUrl.resolvedPath, spacedPath) + await assert.rejects( resolveReadableFileForIpc('missing.txt', { baseDir: tempDir, @@ -114,3 +176,91 @@ test('resolveReadableFileForIpc validates existence type size and sensitivity', }) assert.equal(envTemplate.resolvedPath, envTemplatePath) }) + +test('resolveReadableFileForIpc blocks common sensitive files', async t => { + const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'hermes-desktop-sensitive-')) + t.after(() => fs.rmSync(tempDir, { recursive: true, force: true })) + + const sshDir = path.join(tempDir, '.ssh') + fs.mkdirSync(sshDir) + + const blockedFiles = [ + path.join(tempDir, '.env'), + path.join(tempDir, '.npmrc'), + path.join(sshDir, 'id_ed25519'), + path.join(tempDir, 'cert.pem'), + path.join(tempDir, 'cert.p12'), + path.join(tempDir, 'cert.pfx') + ] + + for (const filePath of blockedFiles) { + fs.writeFileSync(filePath, 'secret', 'utf8') + await rejectsWithCode(resolveReadableFileForIpc(filePath, { purpose: 'File preview' }), 'sensitive-file') + } + + const allowed = path.join(tempDir, '.env.example') + fs.writeFileSync(allowed, 'EXAMPLE_TOKEN=value', 'utf8') + assert.equal((await resolveReadableFileForIpc(allowed, { purpose: 'File preview' })).resolvedPath, allowed) +}) + +test('resolveReadableFileForIpc blocks symlinks whose realpath is sensitive', async t => { + const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'hermes-desktop-realpath-')) + t.after(() => fs.rmSync(tempDir, { recursive: true, force: true })) + + const envPath = path.join(tempDir, '.env') + const linkPath = path.join(tempDir, 'safe-name.txt') + fs.writeFileSync(envPath, 'SECRET_TOKEN=123', 'utf8') + + try { + fs.symlinkSync(envPath, linkPath, 'file') + } catch (error) { + if (error?.code === 'EPERM' || error?.code === 'EACCES') { + t.skip(`symlink creation is not permitted on this platform (${error.code})`) + return + } + throw error + } + + await rejectsWithCode(resolveReadableFileForIpc(linkPath, { purpose: 'File preview' }), 'sensitive-file') +}) + +test('resolveDirectoryForIpc accepts directories and rejects invalid directory targets', async t => { + const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'hermes-desktop-dir-')) + t.after(() => fs.rmSync(tempDir, { recursive: true, force: true })) + + const directory = path.join(tempDir, 'project') + const filePath = path.join(tempDir, 'file.txt') + fs.mkdirSync(directory) + fs.writeFileSync(filePath, 'not a directory', 'utf8') + + const resolved = await resolveDirectoryForIpc(directory) + assert.equal(resolved.resolvedPath, directory) + assert.equal(resolved.stat.isDirectory(), true) + + await rejectsWithCode(resolveDirectoryForIpc(filePath), 'ENOTDIR') + await rejectsWithCode(resolveDirectoryForIpc(path.join(tempDir, 'missing')), 'ENOENT') + await rejectsWithCode(resolveDirectoryForIpc('\\\\?\\C:\\secret'), 'device-path') +}) + +test('resolveDirectoryForIpc accepts directory symlinks or junctions', async t => { + const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'hermes-desktop-dir-link-')) + t.after(() => fs.rmSync(tempDir, { recursive: true, force: true })) + + const directory = path.join(tempDir, 'actual-project') + const linkPath = path.join(tempDir, 'linked-project') + fs.mkdirSync(directory) + + try { + fs.symlinkSync(directory, linkPath, process.platform === 'win32' ? 'junction' : 'dir') + } catch (error) { + if (error?.code === 'EPERM' || error?.code === 'EACCES') { + t.skip(`directory symlink creation is not permitted on this platform (${error.code})`) + return + } + throw error + } + + const resolved = await resolveDirectoryForIpc(linkPath) + assert.equal(resolved.resolvedPath, linkPath) + assert.equal(resolved.stat.isDirectory(), true) +}) diff --git a/apps/desktop/electron/main.cjs b/apps/desktop/electron/main.cjs index 5e128421a83..9abfc216e56 100644 --- a/apps/desktop/electron/main.cjs +++ b/apps/desktop/electron/main.cjs @@ -22,7 +22,7 @@ const http = require('node:http') const https = require('node:https') const net = require('node:net') const path = require('node:path') -const { fileURLToPath, pathToFileURL } = require('node:url') +const { pathToFileURL } = require('node:url') const { execFileSync, spawn } = require('node:child_process') const { detectRemoteDisplay, isWindowsBinaryPathInWsl, isWslEnvironment } = require('./bootstrap-platform.cjs') const { runBootstrap } = require('./bootstrap-runner.cjs') @@ -31,6 +31,12 @@ const { canImportHermesCli, verifyHermesCli } = require('./backend-probes.cjs') const { probeGatewayWebSocket } = require('./gateway-ws-probe.cjs') const { serializeJsonBody, setJsonRequestHeaders } = require('./oauth-net-request.cjs') const { fetchMarketplaceThemes, searchMarketplaceThemes } = require('./vscode-marketplace.cjs') +const { readDirForIpc } = require('./fs-read-dir.cjs') +const { gitRootForIpc } = require('./git-root.cjs') +const { + OFFICIAL_REPO_HTTPS_URL, + isOfficialSshRemote +} = require('./update-remote.cjs') const { buildPosixCleanupScript, buildWindowsCleanupScript, @@ -61,6 +67,7 @@ const { TEXT_PREVIEW_SOURCE_MAX_BYTES, encryptDesktopSecret: encryptDesktopSecretStrict, resolveReadableFileForIpc, + resolveRequestedPathForIpc, resolveTimeoutMs } = require('./hardening.cjs') @@ -726,7 +733,7 @@ function openExternalUrl(rawUrl) { if (parsed.protocol === 'file:') { let localPath try { - localPath = fileURLToPath(parsed.toString()) + localPath = resolveRequestedPathForIpc(parsed.toString(), { purpose: 'Open external file' }) } catch { return false } @@ -1312,6 +1319,11 @@ function runGit(args, options = {}) { const firstLine = text => (text || '').split('\n').find(Boolean) || '' +async function getOriginUrl(updateRoot) { + const origin = await runGit(['remote', 'get-url', 'origin'], { cwd: updateRoot }) + return origin.code === 0 ? origin.stdout.trim() : '' +} + function emitUpdateProgress(payload) { const merged = { stage: 'idle', message: '', percent: null, error: null, ...payload, at: Date.now() } rememberLog(`[updates] ${merged.stage}: ${merged.message || merged.error || ''}`) @@ -1331,7 +1343,9 @@ async function resolveHealedBranch(updateRoot, branch) { return branch || 'main' } - const probe = await runGit(['ls-remote', '--exit-code', '--heads', 'origin', branch], { cwd: updateRoot }) + const originUrl = await getOriginUrl(updateRoot) + const remote = isOfficialSshRemote(originUrl) ? OFFICIAL_REPO_HTTPS_URL : 'origin' + const probe = await runGit(['ls-remote', '--exit-code', '--heads', remote, branch], { cwd: updateRoot }) if (probe.code !== 2) { return branch } @@ -1359,6 +1373,40 @@ async function checkUpdates() { } branch = await resolveHealedBranch(updateRoot, branch) + const originUrl = await getOriginUrl(updateRoot) + if (isOfficialSshRemote(originUrl)) { + const git = args => runGit(args, { cwd: updateRoot }).then(r => r.stdout.trim()) + const [currentSha, target, dirtyStr, currentBranch] = await Promise.all([ + git(['rev-parse', 'HEAD']), + runGit(['ls-remote', OFFICIAL_REPO_HTTPS_URL, `refs/heads/${branch}`], { cwd: updateRoot }), + git(['status', '--porcelain']), + git(['rev-parse', '--abbrev-ref', 'HEAD']) + ]) + const targetSha = firstLine(target.stdout).split(/\s+/)[0] || '' + if (target.code !== 0 || !targetSha) { + return { + supported: true, + branch, + error: 'fetch-failed', + message: firstLine(target.stderr) || 'git ls-remote failed.', + hermesRoot: updateRoot, + fetchedAt: Date.now() + } + } + return { + supported: true, + branch, + currentBranch, + behind: currentSha && currentSha === targetSha ? 0 : 1, + currentSha, + targetSha, + commits: [], + dirty: dirtyStr.length > 0, + hermesRoot: updateRoot, + fetchedAt: Date.now() + } + } + const fetched = await runGit(['fetch', '--quiet', 'origin', branch], { cwd: updateRoot }) if (fetched.code !== 0) { return { @@ -2833,10 +2881,10 @@ async function resourceBufferFromUrl(rawUrl) { const buffer = match[2] ? Buffer.from(encoded, 'base64') : Buffer.from(decodeURIComponent(encoded), 'utf8') return { buffer, mimeType } } - if (rawUrl.startsWith('file:')) { - const filePath = fileURLToPath(rawUrl) - const buffer = await fs.promises.readFile(filePath) - return { buffer, mimeType: mimeTypeForPath(filePath) } + if (/^file:/i.test(rawUrl)) { + const { resolvedPath } = await resolveReadableFileForIpc(rawUrl, { purpose: 'Image file' }) + const buffer = await fs.promises.readFile(resolvedPath) + return { buffer, mimeType: mimeTypeForPath(resolvedPath) } } const parsed = new URL(rawUrl) @@ -2914,11 +2962,13 @@ function expandUserPath(filePath) { return value } -function previewFileTarget(rawTarget, baseDir) { +async function previewFileTarget(rawTarget, baseDir) { const raw = String(rawTarget || '').trim() const base = baseDir ? path.resolve(expandUserPath(baseDir)) : resolveHermesCwd() - const filePath = raw.startsWith('file:') ? fileURLToPath(raw) : path.resolve(base, expandUserPath(raw)) - let resolved = filePath + let resolved = resolveRequestedPathForIpc(/^file:/i.test(raw) ? raw : expandUserPath(raw), { + baseDir: base, + purpose: 'Preview target' + }) if (directoryExists(resolved)) { resolved = path.join(resolved, 'index.html') @@ -2929,6 +2979,8 @@ function previewFileTarget(rawTarget, baseDir) { return null } + ;({ resolvedPath: resolved } = await resolveReadableFileForIpc(resolved, { purpose: 'Preview target' })) + const mimeType = mimeTypeForPath(resolved) const metadata = previewFileMetadata(resolved, mimeType) const isHtml = PREVIEW_HTML_EXTENSIONS.has(ext) @@ -2974,7 +3026,7 @@ function previewUrlTarget(rawTarget) { } } -function normalizePreviewTarget(rawTarget, baseDir) { +async function normalizePreviewTarget(rawTarget, baseDir) { const raw = String(rawTarget || '').trim() if (!raw) { @@ -2986,20 +3038,15 @@ function normalizePreviewTarget(rawTarget, baseDir) { return previewUrlTarget(raw) } - return previewFileTarget(raw, baseDir) + return await previewFileTarget(raw, baseDir) } catch { return null } } -function filePathFromPreviewUrl(rawUrl) { - const filePath = fileURLToPath(String(rawUrl || '')) - - if (!fileExists(filePath)) { - throw new Error('Preview file is not readable') - } - - return filePath +async function filePathFromPreviewUrl(rawUrl) { + const { resolvedPath } = await resolveReadableFileForIpc(String(rawUrl || ''), { purpose: 'Preview file' }) + return resolvedPath } function sendPreviewFileChanged(payload) { @@ -3009,8 +3056,8 @@ function sendPreviewFileChanged(payload) { webContents.send('hermes:preview-file-changed', payload) } -function watchPreviewFile(rawUrl) { - const filePath = filePathFromPreviewUrl(rawUrl) +async function watchPreviewFile(rawUrl) { + const filePath = await filePathFromPreviewUrl(rawUrl) const watchDir = path.dirname(filePath) const targetName = path.basename(filePath) const id = crypto.randomBytes(12).toString('base64url') @@ -5542,48 +5589,6 @@ ipcMain.handle('hermes:logs:reveal', async () => { ipcMain.handle('hermes:logs:recent', async () => ({ path: DESKTOP_LOG_PATH, lines: hermesLog.slice(-200) })) -// Always-hidden noise (covers non-git projects too — gitignore would catch -// these anyway when present, but we want the same hygiene without one). -const FS_READDIR_HIDDEN = new Set([ - '.git', - '.hg', - '.svn', - '.cache', - '.next', - '.turbo', - '.venv', - '__pycache__', - 'build', - 'dist', - 'node_modules', - 'target', - 'venv' -]) - -function findGitRoot(start) { - let dir = start - - for (let i = 0; i < 50; i += 1) { - try { - if (fs.existsSync(path.join(dir, '.git'))) { - return dir - } - } catch { - return null - } - - const parent = path.dirname(dir) - - if (parent === dir) { - return null - } - - dir = parent - } - - return null -} - function isExecutableFile(filePath) { if (!filePath || !path.isAbsolute(filePath)) { return false @@ -5766,46 +5771,9 @@ function disposeTerminalSession(id) { return true } -ipcMain.handle('hermes:fs:readDir', async (_event, dirPath) => { - const resolved = path.resolve(String(dirPath || '')) +ipcMain.handle('hermes:fs:readDir', async (_event, dirPath) => readDirForIpc(dirPath)) - if (!resolved) { - return { entries: [], error: 'invalid-path' } - } - - try { - const dirents = await fs.promises.readdir(resolved, { withFileTypes: true }) - - const entries = dirents - .filter(d => { - if (FS_READDIR_HIDDEN.has(d.name)) { - return false - } - - return true - }) - .map(d => ({ name: d.name, path: path.join(resolved, d.name), isDirectory: d.isDirectory() })) - .sort((a, b) => Number(b.isDirectory) - Number(a.isDirectory) || a.name.localeCompare(b.name)) - - return { entries } - } catch (error) { - return { entries: [], error: error?.code || 'read-error' } - } -}) - -ipcMain.handle('hermes:fs:gitRoot', async (_event, startPath) => { - const input = String(startPath || '') - const resolved = input.startsWith('file:') ? fileURLToPath(input) : path.resolve(input) - - try { - const stat = await fs.promises.stat(resolved) - const start = stat.isDirectory() ? resolved : path.dirname(resolved) - - return findGitRoot(start) - } catch { - return findGitRoot(resolved) - } -}) +ipcMain.handle('hermes:fs:gitRoot', async (_event, startPath) => gitRootForIpc(startPath)) ipcMain.handle('hermes:terminal:start', async (event, payload = {}) => { if (!nodePty) { diff --git a/apps/desktop/electron/update-remote.cjs b/apps/desktop/electron/update-remote.cjs new file mode 100644 index 00000000000..3cb432d1b1e --- /dev/null +++ b/apps/desktop/electron/update-remote.cjs @@ -0,0 +1,56 @@ +/** + * Pure helpers for choosing a remote URL during passive update checks. + * + * A public install can end up with `origin=git@github.com:NousResearch/hermes-agent.git`. + * If the user's GitHub SSH key is FIDO2/passkey-backed, a background `git fetch + * origin` triggers an unexplained hardware-touch prompt. For passive checks + * against the official repo we substitute the public HTTPS `ls-remote` path, + * which needs no auth and cannot prompt. Active update/apply flows are left + * unchanged. + * + * Extracted from main.cjs so the security-critical remote detection is unit + * testable without booting Electron (main.cjs requires('electron') at load). + */ + +const OFFICIAL_REPO_HTTPS_URL = 'https://github.com/NousResearch/hermes-agent.git' +const OFFICIAL_REPO_CANONICAL = 'github.com/nousresearch/hermes-agent' + +// Normalize common GitHub remote URL forms to `host/owner/repo` (lowercased, +// no trailing slash, no .git suffix) so SSH and HTTPS forms of the same repo +// compare equal. +function canonicalGitHubRemote(url) { + if (!url) return '' + let value = String(url).trim() + if (value.startsWith('git@github.com:')) { + value = `github.com/${value.slice('git@github.com:'.length)}` + } else if (value.startsWith('ssh://git@github.com/')) { + value = `github.com/${value.slice('ssh://git@github.com/'.length)}` + } else { + try { + const parsed = new URL(value) + if (parsed.hostname && parsed.pathname) value = `${parsed.hostname}${parsed.pathname}` + } catch { + // Leave non-URL forms unchanged. + } + } + value = value.trim().replace(/\/+$/, '') + if (value.endsWith('.git')) value = value.slice(0, -4) + return value.toLowerCase() +} + +function isSshRemote(url) { + const value = String(url || '').trim().toLowerCase() + return value.startsWith('git@') || value.startsWith('ssh://') +} + +function isOfficialSshRemote(url) { + return isSshRemote(url) && canonicalGitHubRemote(url) === OFFICIAL_REPO_CANONICAL +} + +module.exports = { + OFFICIAL_REPO_HTTPS_URL, + OFFICIAL_REPO_CANONICAL, + canonicalGitHubRemote, + isSshRemote, + isOfficialSshRemote +} diff --git a/apps/desktop/electron/update-remote.test.cjs b/apps/desktop/electron/update-remote.test.cjs new file mode 100644 index 00000000000..0dfba970138 --- /dev/null +++ b/apps/desktop/electron/update-remote.test.cjs @@ -0,0 +1,78 @@ +/** + * Tests for electron/update-remote.cjs — the remote-detection helpers that + * keep passive update checks off the SSH origin for official installs. + * + * Run with: node --test electron/update-remote.test.cjs + * (Wired into npm test:desktop:platforms in package.json.) + * + * Why this matters: a public install can carry + * origin=git@github.com:NousResearch/hermes-agent.git. A background + * `git fetch origin` then authenticates over SSH and, with a FIDO2/passkey + * key, triggers an unexplained hardware-touch prompt. isOfficialSshRemote + * must reliably recognize the official SSH remote (in every URL form, + * case-insensitively) so the caller can swap in the anonymous HTTPS path — + * while NOT misclassifying forks, other hosts, or the HTTPS remote (which + * never prompts and should keep the normal fetch path). + */ + +const test = require('node:test') +const assert = require('node:assert/strict') + +const { + OFFICIAL_REPO_HTTPS_URL, + OFFICIAL_REPO_CANONICAL, + canonicalGitHubRemote, + isSshRemote, + isOfficialSshRemote +} = require('./update-remote.cjs') + +test('canonicalGitHubRemote normalizes SSH and HTTPS forms to the same value', () => { + assert.equal(canonicalGitHubRemote('git@github.com:NousResearch/hermes-agent.git'), OFFICIAL_REPO_CANONICAL) + assert.equal(canonicalGitHubRemote('git@github.com:NousResearch/hermes-agent'), OFFICIAL_REPO_CANONICAL) + assert.equal(canonicalGitHubRemote('ssh://git@github.com/NousResearch/hermes-agent.git'), OFFICIAL_REPO_CANONICAL) + assert.equal(canonicalGitHubRemote('https://github.com/NousResearch/hermes-agent.git'), OFFICIAL_REPO_CANONICAL) + // Case-insensitive: an uppercased owner still canonicalizes to the same repo. + assert.equal(canonicalGitHubRemote('git@github.com:nousresearch/hermes-agent.git'), OFFICIAL_REPO_CANONICAL) + // Trailing slashes are stripped. + assert.equal(canonicalGitHubRemote('https://github.com/NousResearch/hermes-agent/'), OFFICIAL_REPO_CANONICAL) +}) + +test('canonicalGitHubRemote is empty for falsy input', () => { + assert.equal(canonicalGitHubRemote(''), '') + assert.equal(canonicalGitHubRemote(null), '') + assert.equal(canonicalGitHubRemote(undefined), '') +}) + +test('isSshRemote detects scp-like and ssh:// forms only', () => { + assert.equal(isSshRemote('git@github.com:NousResearch/hermes-agent.git'), true) + assert.equal(isSshRemote('ssh://git@github.com/NousResearch/hermes-agent.git'), true) + assert.equal(isSshRemote('https://github.com/NousResearch/hermes-agent.git'), false) + assert.equal(isSshRemote(''), false) + assert.equal(isSshRemote(null), false) +}) + +test('isOfficialSshRemote is true only for the official repo over SSH', () => { + assert.equal(isOfficialSshRemote('git@github.com:NousResearch/hermes-agent.git'), true) + assert.equal(isOfficialSshRemote('git@github.com:NousResearch/hermes-agent'), true) + assert.equal(isOfficialSshRemote('ssh://git@github.com/NousResearch/hermes-agent.git'), true) + // Case-insensitive owner/repo match. + assert.equal(isOfficialSshRemote('git@github.com:nousresearch/hermes-agent.git'), true) +}) + +test('isOfficialSshRemote does NOT match forks, other hosts, or HTTPS', () => { + // A fork over SSH belongs to the user — fetching it is their own remote, + // not the official upstream, so the SSH-avoidance swap must not apply. + assert.equal(isOfficialSshRemote('git@github.com:someuser/hermes-agent.git'), false) + // Same repo name on a different host is not the official repo. + assert.equal(isOfficialSshRemote('git@gitlab.com:NousResearch/hermes-agent.git'), false) + // HTTPS to the official repo never prompts for SSH/FIDO2, so it keeps the + // normal fetch path — must not be flagged as an official SSH remote. + assert.equal(isOfficialSshRemote('https://github.com/NousResearch/hermes-agent.git'), false) + assert.equal(isOfficialSshRemote(''), false) + assert.equal(isOfficialSshRemote(null), false) +}) + +test('OFFICIAL_REPO_HTTPS_URL canonicalizes to OFFICIAL_REPO_CANONICAL', () => { + // Invariant: the URL we substitute in must be the same repo we detect. + assert.equal(canonicalGitHubRemote(OFFICIAL_REPO_HTTPS_URL), OFFICIAL_REPO_CANONICAL) +}) diff --git a/apps/desktop/package.json b/apps/desktop/package.json index e373fc78825..d03bd7cd0ad 100644 --- a/apps/desktop/package.json +++ b/apps/desktop/package.json @@ -35,7 +35,7 @@ "test:desktop:nsis": "node scripts/test-desktop.mjs nsis", "test:desktop:existing": "node scripts/test-desktop.mjs existing", "test:desktop:fresh": "node scripts/test-desktop.mjs fresh", - "test:desktop:platforms": "node --test electron/bootstrap-platform.test.cjs electron/hardening.test.cjs electron/backend-probes.test.cjs electron/bootstrap-runner.test.cjs electron/connection-config.test.cjs electron/gateway-ws-probe.test.cjs electron/oauth-net-request.test.cjs electron/desktop-uninstall.test.cjs electron/session-windows.test.cjs electron/workspace-cwd.test.cjs electron/windows-child-process.test.cjs", + "test:desktop:platforms": "node --test electron/bootstrap-platform.test.cjs electron/hardening.test.cjs electron/backend-probes.test.cjs electron/bootstrap-runner.test.cjs electron/connection-config.test.cjs electron/gateway-ws-probe.test.cjs electron/oauth-net-request.test.cjs electron/desktop-uninstall.test.cjs electron/session-windows.test.cjs electron/workspace-cwd.test.cjs electron/fs-read-dir.test.cjs electron/git-root.test.cjs electron/windows-child-process.test.cjs electron/update-remote.test.cjs", "typecheck": "tsc -p . --noEmit", "lint": "eslint src/ electron/", "lint:fix": "eslint src/ electron/ --fix", diff --git a/apps/desktop/src/app/chat/composer/completion-drawer.tsx b/apps/desktop/src/app/chat/composer/completion-drawer.tsx index 8b23c54f879..d7738cb82a7 100644 --- a/apps/desktop/src/app/chat/composer/completion-drawer.tsx +++ b/apps/desktop/src/app/chat/composer/completion-drawer.tsx @@ -3,32 +3,25 @@ import { ComposerPrimitive } from '@assistant-ui/react' import type { ReactNode } from 'react' export const COMPLETION_DRAWER_CLASS = [ - 'absolute bottom-[calc(100%+0.25rem)] left-0 z-50', - 'w-60 max-w-[calc(100vw-2rem)]', - 'max-h-[min(23rem,calc(100vh-8rem))] overflow-y-auto overscroll-contain', - 'rounded-lg border border-(--ui-stroke-secondary)', - 'bg-[color-mix(in_srgb,var(--ui-bg-elevated)_96%,transparent)]', - 'p-1 text-xs text-popover-foreground shadow-md', + 'absolute bottom-[calc(100%+0.375rem)] left-0 z-50', + 'w-80 max-w-[calc(100vw-2rem)]', + 'max-h-[min(22rem,calc(100vh-8rem))] overflow-y-auto overscroll-contain', + 'rounded-xl border border-(--ui-stroke-secondary)', + 'bg-[color-mix(in_srgb,var(--ui-bg-elevated)_97%,transparent)]', + 'p-1 text-xs text-popover-foreground shadow-lg', 'backdrop-blur-md' ].join(' ') export const COMPLETION_DRAWER_BELOW_CLASS = [ - 'absolute left-0 top-[calc(100%+0.25rem)] z-50', - 'w-60 max-w-[calc(100vw-2rem)]', - 'max-h-[min(23rem,calc(100vh-8rem))] overflow-y-auto overscroll-contain', - 'rounded-lg border border-(--ui-stroke-secondary)', - 'bg-[color-mix(in_srgb,var(--ui-bg-elevated)_96%,transparent)]', - 'p-1 text-xs text-popover-foreground shadow-md', + 'absolute left-0 top-[calc(100%+0.375rem)] z-50', + 'w-80 max-w-[calc(100vw-2rem)]', + 'max-h-[min(22rem,calc(100vh-8rem))] overflow-y-auto overscroll-contain', + 'rounded-xl border border-(--ui-stroke-secondary)', + 'bg-[color-mix(in_srgb,var(--ui-bg-elevated)_97%,transparent)]', + 'p-1 text-xs text-popover-foreground shadow-lg', 'backdrop-blur-md' ].join(' ') -export const COMPLETION_DRAWER_ROW_CLASS = [ - 'relative flex cursor-default select-none items-center gap-2 rounded-md px-2 py-1', - 'w-full min-w-0 text-left text-xs outline-hidden transition-colors', - 'hover:bg-(--ui-bg-tertiary)', - 'data-[highlighted]:bg-(--ui-bg-tertiary) data-[highlighted]:text-foreground' -].join(' ') - export function ComposerCompletionDrawer({ adapter, ariaLabel, diff --git a/apps/desktop/src/app/chat/composer/hooks/use-live-completion-adapter.ts b/apps/desktop/src/app/chat/composer/hooks/use-live-completion-adapter.ts index fbeca7d59ee..6da699b602a 100644 --- a/apps/desktop/src/app/chat/composer/hooks/use-live-completion-adapter.ts +++ b/apps/desktop/src/app/chat/composer/hooks/use-live-completion-adapter.ts @@ -5,6 +5,13 @@ export interface CompletionEntry { text: string display?: unknown meta?: unknown + /** Optional section label (e.g. "Commands", "Skills"). The popover renders a + * header whenever this changes between consecutive items, so the fetcher must + * emit entries already grouped contiguously. */ + group?: string + /** Optional completion-action id. When set, picking the item runs that action + * (e.g. opening an overlay) instead of inserting a chip + waiting for submit. */ + action?: string } export interface CompletionPayload { diff --git a/apps/desktop/src/app/chat/composer/hooks/use-slash-completions.ts b/apps/desktop/src/app/chat/composer/hooks/use-slash-completions.ts index f3344158097..b0bac82825c 100644 --- a/apps/desktop/src/app/chat/composer/hooks/use-slash-completions.ts +++ b/apps/desktop/src/app/chat/composer/hooks/use-slash-completions.ts @@ -2,12 +2,17 @@ import type { Unstable_TriggerAdapter, Unstable_TriggerItem } from '@assistant-u import { useCallback } from 'react' import type { HermesGateway } from '@/hermes' +import { sessionTitle } from '@/lib/chat-runtime' import { type CommandsCatalogLike, + desktopSkinSlashCompletions, desktopSlashDescription, + type DesktopThemeCommandOption, filterDesktopCommandsCatalog, + isDesktopSlashExtensionCommand, isDesktopSlashSuggestion } from '@/lib/desktop-slash-commands' +import { $sessions } from '@/store/session' import type { CompletionEntry, CompletionPayload } from './use-live-completion-adapter' import { useLiveCompletionAdapter } from './use-live-completion-adapter' @@ -16,7 +21,10 @@ interface SlashItemMetadata extends Record { command: string display: string meta: string + group: string rawText: string + /** Completion-action id; empty for ordinary insert-a-chip completions. */ + action: string } function textValue(value: unknown, fallback = ''): string { @@ -38,12 +46,21 @@ function commandText(value: string): string { return value.startsWith('/') ? value : `/${value}` } +/** How many recent sessions to surface inline before the "Browse all…" entry. */ +const SESSION_INLINE_LIMIT = 7 + /** Live `/` completions backed by the gateway's `complete.slash` RPC. */ -export function useSlashCompletions(options: { gateway: HermesGateway | null }): { +export function useSlashCompletions(options: { + gateway: HermesGateway | null + /** Desktop theme list — `/skin` is owned client-side, so its arg completions + * come from here, not the backend (whose skin list is CLI/TUI-only). */ + skinThemes?: DesktopThemeCommandOption[] + activeSkin?: string +}): { adapter: Unstable_TriggerAdapter loading: boolean } { - const { gateway } = options + const { gateway, skinThemes, activeSkin } = options const enabled = Boolean(gateway) const fetcher = useCallback( @@ -54,34 +71,136 @@ export function useSlashCompletions(options: { gateway: HermesGateway | null }): const text = `/${query}` + // The desktop owns /skin entirely (client-side theme context). Surface its + // theme list inside this single popover instead of a bespoke one, and skip + // the backend skin completions (which describe CLI/TUI skins that don't + // apply here). Matches once we're past `/skin ` into the arg stage. + const skinArg = /^\/skin\s+(.*)$/is.exec(text) + + if (skinArg && skinThemes) { + const items = desktopSkinSlashCompletions(skinThemes, activeSkin ?? '', skinArg[1] ?? '').map(entry => ({ + text: entry.text, + display: entry.display, + meta: entry.meta, + group: 'Themes' + })) + + return { items, query } + } + + // /resume (and its aliases) completes recent sessions inline — the same + // client-side list the picker overlay shows — instead of the backend + // (whose /resume opens an interactive TUI picker we can't render here). + const sessionArg = /^\/(?:resume|sessions|switch)\s+(.*)$/is.exec(text) + + if (sessionArg) { + const needle = (sessionArg[1] ?? '').trim().toLowerCase() + + const matches = ( + needle + ? $sessions.get().filter( + session => + sessionTitle(session).toLowerCase().includes(needle) || + (session.preview ?? '').toLowerCase().includes(needle) || + session.id.toLowerCase().includes(needle) + ) + : $sessions.get() + ).slice(0, SESSION_INLINE_LIMIT) + + const items: CompletionEntry[] = matches.map(session => ({ + text: `/resume ${session.id}`, + display: sessionTitle(session), + meta: (session.preview ?? '').trim(), + group: 'Sessions' + })) + + // Trailing "more" affordance (Cursor-style): picking it opens the full + // session picker overlay directly. `text` stays a bare `/resume` so that + // submitting it (Enter) still opens the overlay if the action is skipped. + items.push({ + text: '/resume', + display: 'Browse all sessions…', + meta: '', + group: 'Sessions', + action: 'session-picker' + }) + + return { items, query } + } + try { if (!query) { const catalog = filterDesktopCommandsCatalog(await gateway.request('commands.catalog')) - const items = (catalog.pairs ?? []).map(([command, meta]) => ({ - text: command, - display: command, - meta - })) + // Prefer the categorized layout so the popover renders section headers + // (Session, Tools & Skills, ...). Fall back to the flat list when the + // backend didn't categorize. + const sections = catalog.categories?.length + ? catalog.categories + : [{ name: '', pairs: catalog.pairs ?? [] }] + + const items = sections.flatMap(section => + section.pairs.map(([command, meta]) => ({ + text: command, + display: command, + group: section.name || undefined, + meta + })) + ) return { items, query } } - const result = await gateway.request<{ items?: CompletionEntry[] }>('complete.slash', { text }) + const result = await gateway.request<{ items?: CompletionEntry[]; replace_from?: number }>( + 'complete.slash', + { text } + ) - const items = (result.items ?? []) - .filter(item => isDesktopSlashSuggestion(item.text)) + // Arg-completion items (replace_from > 1) carry just the arg stub — + // e.g. complete.slash returns `{text: "alice"}` for `/personality alic` + // with replace_from = 14. Rewrite those entries so the popover inserts + // the full `/personality alice` token instead of stranding `/alice`. + const replaceFrom = typeof result.replace_from === 'number' ? result.replace_from : 1 + const isArgCompletion = replaceFrom > 1 + const prefix = isArgCompletion ? text.slice(0, replaceFrom) : '' + + const decorated = (result.items ?? []) + .map(item => { + if (!isArgCompletion) { + return item + } + + const argText = typeof item.text === 'string' ? item.text : '' + + return { ...item, text: `${prefix}${argText}` } + }) + .filter(item => isArgCompletion || isDesktopSlashSuggestion(item.text)) .map(item => ({ ...item, - meta: desktopSlashDescription(item.text, textValue(item.meta)) + // Arg suggestions (e.g. `/handoff `) live under one + // header; otherwise split skills out from built-in commands. + group: isArgCompletion ? 'Options' : isDesktopSlashExtensionCommand(item.text) ? 'Skills' : 'Commands', + // Arg items carry their own meta (the personality/toolset/platform + // blurb). Only command rows get the registry description — looking + // one up for `/personality none` would clobber it with the parent + // command's text. + meta: isArgCompletion ? textValue(item.meta) : desktopSlashDescription(item.text, textValue(item.meta)) })) + // Keep each group contiguous so headers render once: Commands before + // Skills (stable within a group, preserving backend relevance order). + const groupOrder = ['Commands', 'Skills', 'Options'] + + const items = isArgCompletion + ? decorated + : [...decorated].sort((a, b) => groupOrder.indexOf(a.group) - groupOrder.indexOf(b.group)) + return { items, query } } catch { return { items: [], query } } }, - [gateway] + [gateway, skinThemes, activeSkin] ) const toItem = useCallback((entry: CompletionEntry, index: number): Unstable_TriggerItem => { @@ -93,6 +212,8 @@ export function useSlashCompletions(options: { gateway: HermesGateway | null }): command, display, meta, + group: textValue(entry.group), + action: textValue(entry.action), // Provide rawText so hermesDirectiveFormatter.serialize uses the // direct-insertion path instead of the legacy @type:id fallback. // Without this, the item.id (which includes a "|index" suffix for diff --git a/apps/desktop/src/app/chat/composer/index.tsx b/apps/desktop/src/app/chat/composer/index.tsx index d8b06a68d37..c18313a7386 100644 --- a/apps/desktop/src/app/chat/composer/index.tsx +++ b/apps/desktop/src/app/chat/composer/index.tsx @@ -13,17 +13,25 @@ import { useState } from 'react' -import { hermesDirectiveFormatter } from '@/components/assistant-ui/directive-text' +import { hermesDirectiveFormatter, type SlashChipKind } from '@/components/assistant-ui/directive-text' import { Button } from '@/components/ui/button' import { useMediaQuery } from '@/hooks/use-media-query' import { useResizeObserver } from '@/hooks/use-resize-observer' import { useI18n } from '@/i18n' import { chatMessageText } from '@/lib/chat-messages' import { SLASH_COMMAND_RE } from '@/lib/chat-runtime' +import { desktopSlashCommandTakesArgs } from '@/lib/desktop-slash-commands' import { DATA_IMAGE_URL_RE } from '@/lib/embedded-images' import { triggerHaptic } from '@/lib/haptics' import { cn } from '@/lib/utils' -import { $composerAttachments, clearComposerAttachments, type ComposerAttachment } from '@/store/composer' +import { + $composerAttachments, + clearComposerAttachments, + clearSessionDraft, + type ComposerAttachment, + stashSessionDraft, + takeSessionDraft +} from '@/store/composer' import { browseBackward, browseForward, @@ -40,8 +48,9 @@ import { shouldAutoDrainOnSettle, updateQueuedPrompt } from '@/store/composer-queue' -import { $gatewayState, $messages } from '@/store/session' +import { $gatewayState, $messages, setSessionPickerOpen } from '@/store/session' import { $threadScrolledUp } from '@/store/thread-scroll' +import { useTheme } from '@/themes' import { extractDroppedFiles, HERMES_PATHS_MIME, partitionDroppedFiles } from '../hooks/use-composer-actions' @@ -74,9 +83,9 @@ import { placeCaretEnd, refChipElement, renderComposerContents, - RICH_INPUT_SLOT + RICH_INPUT_SLOT, + slashChipElement } from './rich-editor' -import { SkinSlashPopover } from './skin-slash-popover' import { detectTrigger, extractClipboardImageBlobs, textBeforeCaret, type TriggerState } from './text-utils' import { ComposerTriggerPopover } from './trigger-popover' import type { ChatBarProps } from './types' @@ -95,6 +104,30 @@ const COMPOSER_FADE_BACKGROUND = const pickPlaceholder = (pool: readonly string[]) => pool[Math.floor(Math.random() * pool.length)] +/** Completion items can carry an `action` (set in use-slash-completions) that + * runs a side effect on pick instead of inserting a chip — e.g. the session + * picker's "Browse all…" entry opens the overlay. Table-driven so new action + * items are a registry row, not a composer branch. */ +const COMPLETION_ACTIONS: Record void> = { + 'session-picker': () => setSessionPickerOpen(true) +} + +/** Map a picked `/` completion to its pill accent. Driven by the completion + * group set in use-slash-completions (Skills / Themes / Commands|Options). */ +function slashChipKindForItem(item: Unstable_TriggerItem): SlashChipKind { + const group = (item.metadata as { group?: unknown } | undefined)?.group + + if (group === 'Skills') { + return 'skill' + } + + if (group === 'Themes') { + return 'theme' + } + + return 'command' +} + interface QueueEditState { attachments: ComposerAttachment[] draft: string @@ -104,6 +137,10 @@ interface QueueEditState { const cloneAttachments = (attachments: ComposerAttachment[]) => attachments.map(a => ({ ...a })) +// Quiet period after the last keystroke before persisting the draft; +// unmount/pagehide flushes bypass it. +const DRAFT_PERSIST_DEBOUNCE_MS = 400 + export function ChatBar({ busy, cwd, @@ -145,6 +182,9 @@ export function ChatBar({ const editorRef = useRef(null) const draftRef = useRef(draft) const previousBusyRef = useRef(busy) + const pendingDraftPersistRef = useRef<{ scope: string | null; text: string } | null>(null) + const activeQueueSessionKeyRef = useRef(activeQueueSessionKey) + activeQueueSessionKeyRef.current = activeQueueSessionKey const drainingQueueRef = useRef(false) const urlInputRef = useRef(null) @@ -156,14 +196,17 @@ export function ChatBar({ const [dragActive, setDragActive] = useState(false) const [queueEdit, setQueueEdit] = useState(null) const [focusRequestId, setFocusRequestId] = useState(0) + const queueEditRef = useRef(queueEdit) + queueEditRef.current = queueEdit const dragDepthRef = useRef(0) const composingRef = useRef(false) // true during IME composition (CJK input) const lastSpokenIdRef = useRef(null) const narrow = useMediaQuery('(max-width: 30rem)') + const { availableThemes, themeName } = useTheme() const at = useAtCompletions({ gateway: gateway ?? null, sessionId: sessionId ?? null, cwd: cwd ?? null }) - const slash = useSlashCompletions({ gateway: gateway ?? null }) + const slash = useSlashCompletions({ activeSkin: themeName, gateway: gateway ?? null, skinThemes: availableThemes }) const stacked = expanded || narrow || tight const trimmedDraft = draft.trim() @@ -171,10 +214,12 @@ export function ChatBar({ const canSubmit = busy || hasComposerPayload const editingQueuedPrompt = queueEdit ? (queuedPrompts.find(entry => entry.id === queueEdit.entryId) ?? null) : null const busyAction = busy && hasComposerPayload ? 'queue' : 'stop' + // Steer only makes sense mid-turn, text-only (the gateway can't carry images // into a tool result) and never for a slash command (those execute inline). const canSteer = busy && !!onSteer && attachments.length === 0 && trimmedDraft.length > 0 && !SLASH_COMMAND_RE.test(trimmedDraft) + const showHelpHint = draft === '?' const { t } = useI18n() @@ -462,12 +507,6 @@ export function ChatBar({ }) }, []) - const selectSkinSlashCommand = (command: string) => { - draftRef.current = command - aui.composer().setText(command) - requestMainFocus() - } - const handlePaste = (event: ClipboardEvent) => { const imageBlobs = extractClipboardImageBlobs(event.clipboardData) @@ -620,16 +659,50 @@ export function ChatBar({ return } + // Action items (e.g. "Browse all sessions…") run a side effect instead of + // inserting a chip: strip the typed trigger token, then fire the action. + const completionAction = (item.metadata as { action?: unknown } | undefined)?.action + const runAction = typeof completionAction === 'string' ? COMPLETION_ACTIONS[completionAction] : undefined + + if (runAction) { + const current = composerPlainText(editor) + const prefix = current.slice(0, Math.max(0, current.length - trigger.tokenLength)) + + renderComposerContents(editor, prefix) + placeCaretEnd(editor) + draftRef.current = composerPlainText(editor) + aui.composer().setText(draftRef.current) + closeTrigger() + runAction() + requestMainFocus() + + return + } + const serialized = hermesDirectiveFormatter.serialize(item) const starter = serialized.endsWith(':') + + // Picking a bare arg-taking command (e.g. `/personality`) shouldn't commit + // it — expand to its options step so the popover shows the inline list, just + // as typing `/personality ` by hand would. A serialized value with a space is + // already an arg pick (`/personality alice`), so it commits normally. + const command = (item.metadata as { command?: string } | undefined)?.command ?? '' + + const expandsToArgs = + trigger.kind === '/' && !serialized.includes(' ') && desktopSlashCommandTakesArgs(command) + const text = starter || serialized.endsWith(' ') ? serialized : `${serialized} ` const directive = !starter && serialized.match(/^@([^:]+):(.+)$/) + // No pill while expanding — the bare command stays plain text until an arg + // is picked, at which point a single pill is emitted for the full command. + const slashKind = !expandsToArgs && trigger.kind === '/' ? slashChipKindForItem(item) : null + const keepTriggerOpen = starter || expandsToArgs const finish = () => { draftRef.current = composerPlainText(editor) aui.composer().setText(draftRef.current) requestMainFocus() - starter ? window.setTimeout(refreshTrigger, 0) : closeTrigger() + keepTriggerOpen ? window.setTimeout(refreshTrigger, 0) : closeTrigger() } const sel = window.getSelection() @@ -639,7 +712,20 @@ export function ChatBar({ if (!sel || !range || node?.nodeType !== Node.TEXT_NODE || offset < trigger.tokenLength) { const current = composerPlainText(editor) - renderComposerContents(editor, `${current.slice(0, Math.max(0, current.length - trigger.tokenLength))}${text}`) + const prefix = current.slice(0, Math.max(0, current.length - trigger.tokenLength)) + + if (slashKind) { + // Two-step arg picks (e.g. `/handoff` pill already inserted, now picking + // the platform) land here because the caret sits past a contenteditable + // chip. Rebuild the prefix and re-emit a single pill for the full command. + renderComposerContents(editor, prefix) + editor.append(slashChipElement(serialized, slashKind), document.createTextNode(' ')) + placeCaretEnd(editor) + + return finish() + } + + renderComposerContents(editor, `${prefix}${text}`) placeCaretEnd(editor) return finish() @@ -650,8 +736,13 @@ export function ChatBar({ replaceRange.setEnd(node, offset) replaceRange.deleteContents() - if (directive) { - const chip = refChipElement(directive[1], directive[2]) + const chip = slashKind + ? slashChipElement(serialized, slashKind) + : directive + ? refChipElement(directive[1], directive[2]) + : null + + if (chip) { const space = document.createTextNode(' ') const fragment = document.createDocumentFragment() fragment.append(chip, space) @@ -1022,6 +1113,69 @@ export function ChatBar({ } } + const stashAt = ( + scope: string | null, + text = draftRef.current, + attachments = $composerAttachments.get() + ) => stashSessionDraft(scope, text, attachments) + + // Per-thread draft swap — the composer's only session coupling. Lifecycle + // never clears composer state; this effect alone stashes on leave, restores + // on enter. Keyed writes are idempotent, so no skip-sentinel. + useEffect(() => { + const { attachments, text } = takeSessionDraft(activeQueueSessionKey) + loadIntoComposer(text, attachments) + + return () => { + const editing = queueEditRef.current + + if (editing?.sessionKey === activeQueueSessionKey) { + stashAt(activeQueueSessionKey, editing.draft, editing.attachments) + } else if (!isBrowsingHistory(sessionId)) { + stashAt(activeQueueSessionKey) + } + } + }, [activeQueueSessionKey]) // eslint-disable-line react-hooks/exhaustive-deps + + // Debounced stash into the active scope. Skipped while browsing history or + // editing a queued prompt — recalled text must not clobber the real draft. + useEffect(() => { + if (isBrowsingHistory(sessionId) || queueEdit) { + return + } + + pendingDraftPersistRef.current = { scope: activeQueueSessionKey, text: draft } + + const handle = window.setTimeout(() => { + pendingDraftPersistRef.current = null + stashAt(activeQueueSessionKey, draft) + }, DRAFT_PERSIST_DEBOUNCE_MS) + + return () => window.clearTimeout(handle) + }, [activeQueueSessionKey, draft, queueEdit, sessionId]) + + // pagehide is load-bearing: React skips effect cleanups on reload, so Cmd+R + // inside the debounce window would drop trailing keystrokes without this. + useEffect(() => { + const flushPendingDraftPersist = () => { + const pending = pendingDraftPersistRef.current + + if (!pending) { + return + } + + pendingDraftPersistRef.current = null + stashAt(pending.scope, pending.text) + } + + window.addEventListener('pagehide', flushPendingDraftPersist) + + return () => { + window.removeEventListener('pagehide', flushPendingDraftPersist) + flushPendingDraftPersist() + } + }, []) + const beginQueuedEdit = (entry: QueuedPromptEntry) => { if (!activeQueueSessionKey || queueEdit) { return @@ -1224,20 +1378,38 @@ export function ChatBar({ } }, [busy, drainNextQueued, queuedPrompts.length]) - // Clean up queue edit when its target disappears (session swap or external delete). + // Queue-edit cleanup: on session swap the scope effect already stashed the + // edit snapshot; only restore into the composer when still on the same scope. useEffect(() => { if (!queueEdit) { return } - if (queueEdit.sessionKey === activeQueueSessionKey && editingQueuedPrompt) { - return + if (queueEdit.sessionKey === activeQueueSessionKey) { + if (editingQueuedPrompt) { + return + } + + loadIntoComposer(queueEdit.draft, queueEdit.attachments) } - loadIntoComposer(queueEdit.draft, queueEdit.attachments) setQueueEdit(null) }, [activeQueueSessionKey, editingQueuedPrompt, queueEdit]) // eslint-disable-line react-hooks/exhaustive-deps + const dispatchSubmit = (text: string, attachments?: ComposerAttachment[]) => { + const submittedScope = activeQueueSessionKeyRef.current + const submittedAttachments = attachments ?? [] + + const restore = () => { + loadIntoComposer(text, submittedAttachments) + stashAt(activeQueueSessionKeyRef.current, text, submittedAttachments) + } + + void Promise.resolve(attachments ? onSubmit(text, { attachments }) : onSubmit(text)) + .then(accepted => void (accepted === false ? restore() : clearSessionDraft(submittedScope))) + .catch(restore) + } + const submitDraft = () => { // Source the text from the DOM editor, not React state. The AUI composer // state (`draft`) and the derived `hasComposerPayload` lag the DOM by a @@ -1248,8 +1420,10 @@ export function ChatBar({ // input event; refresh it from the editor once more to also cover an // in-flight keystroke that hasn't fired its input event yet. const editor = editorRef.current + if (editor) { const domText = composerPlainText(editor) + if (domText !== draftRef.current) { draftRef.current = domText aui.composer().setText(domText) @@ -1270,10 +1444,9 @@ export function ChatBar({ // /send directives). Queuing them would make every slash command wait // for the current turn to finish, which is how the TUI never behaves. if (!attachments.length && SLASH_COMMAND_RE.test(text.trim())) { - const submitted = text triggerHaptic('submit') clearDraft() - void onSubmit(submitted) + dispatchSubmit(text) } else if (payloadPresent) { queueCurrentDraft() } else { @@ -1285,12 +1458,12 @@ export function ChatBar({ } else if (!payloadPresent && queuedPrompts.length > 0) { void drainNextQueued() } else if (payloadPresent) { - const submitted = text + const submittedAttachments = cloneAttachments(attachments) triggerHaptic('submit') resetBrowseState(sessionId) clearDraft() clearComposerAttachments() - void onSubmit(submitted, { attachments }) + dispatchSubmit(text, submittedAttachments) } focusInput() @@ -1515,7 +1688,6 @@ export function ChatBar({ onPick={replaceTriggerWithChip} /> )} - {activeQueueSessionKey && queuedPrompts.length > 0 && ( // Out of flow so the queue never inflates the composer's measured // height (that drives thread bottom padding → chat resizes on diff --git a/apps/desktop/src/app/chat/composer/rich-editor.ts b/apps/desktop/src/app/chat/composer/rich-editor.ts index 38ab85d0f35..ea6382f9abd 100644 --- a/apps/desktop/src/app/chat/composer/rich-editor.ts +++ b/apps/desktop/src/app/chat/composer/rich-editor.ts @@ -10,7 +10,10 @@ import { DIRECTIVE_CHIP_CLASS, directiveIconElement, directiveIconSvg, - formatRefValue + formatRefValue, + slashChipClass, + type SlashChipKind, + slashIconElement } from '@/components/assistant-ui/directive-text' export const RICH_INPUT_SLOT = 'composer-rich-input' @@ -77,6 +80,24 @@ export function refChipElement(kind: string, rawValue: string, displayLabel?: st return chip } +/** A non-editable pill for a picked slash command (`/skin nous`, `/tropes`). + * `data-ref-text` carries the literal command so `composerPlainText` round-trips + * it back to the exact text that gets submitted. */ +export function slashChipElement(command: string, kind: SlashChipKind, label?: string) { + const chip = document.createElement('span') + const text = document.createElement('span') + + chip.contentEditable = 'false' + chip.dataset.refText = command + chip.dataset.slashKind = kind + chip.className = slashChipClass(kind) + text.className = 'truncate' + text.textContent = label || command + chip.append(slashIconElement(kind), text) + + return chip +} + function appendTextWithBreaks(target: DocumentFragment | HTMLElement, text: string) { const lines = text.split('\n') diff --git a/apps/desktop/src/app/chat/composer/skin-slash-popover.tsx b/apps/desktop/src/app/chat/composer/skin-slash-popover.tsx deleted file mode 100644 index 2bfc27e51ad..00000000000 --- a/apps/desktop/src/app/chat/composer/skin-slash-popover.tsx +++ /dev/null @@ -1,61 +0,0 @@ -import { useI18n } from '@/i18n' -import { desktopSkinSlashCompletions } from '@/lib/desktop-slash-commands' -import { triggerHaptic } from '@/lib/haptics' -import { useTheme } from '@/themes/context' - -import { COMPLETION_DRAWER_CLASS, COMPLETION_DRAWER_ROW_CLASS, CompletionDrawerEmpty } from './completion-drawer' - -interface SkinSlashPopoverProps { - draft: string - onSelect: (command: string) => void -} - -export function SkinSlashPopover({ draft, onSelect }: SkinSlashPopoverProps) { - const { t } = useI18n() - const c = t.composer - const { availableThemes, themeName } = useTheme() - const match = draft.match(/^\/skin\s+(\S*)$/i) - - if (!match) { - return null - } - - const items = desktopSkinSlashCompletions(availableThemes, themeName, match[1] ?? '') - - return ( -
-
- {items.length === 0 ? ( - - {c.themeTryPre} - /skin list - {c.themeTryPost} - - ) : ( - items.map(item => ( - - )) - )} -
-
- ) -} diff --git a/apps/desktop/src/app/chat/composer/text-utils.test.ts b/apps/desktop/src/app/chat/composer/text-utils.test.ts index 5ef677f4d0f..f80e6db4385 100644 --- a/apps/desktop/src/app/chat/composer/text-utils.test.ts +++ b/apps/desktop/src/app/chat/composer/text-utils.test.ts @@ -22,6 +22,33 @@ describe('detectTrigger', () => { it('returns null for plain text', () => { expect(detectTrigger('hello there')).toBeNull() }) + + it('keeps the slash trigger live while typing args', () => { + expect(detectTrigger('/personality ')).toEqual({ + kind: '/', + query: 'personality ', + tokenLength: 13 + }) + expect(detectTrigger('/personality alic')).toEqual({ + kind: '/', + query: 'personality alic', + tokenLength: 17 + }) + expect(detectTrigger('/tools enable foo')).toEqual({ + kind: '/', + query: 'tools enable foo', + tokenLength: 17 + }) + }) + + it('does not treat file-style paths as slash triggers', () => { + expect(detectTrigger('src/foo/bar')).toBeNull() + expect(detectTrigger('/path/to/file')).toBeNull() + }) + + it('still anchors at-mention triggers strictly at the token edge', () => { + expect(detectTrigger('@file:path with space')).toBeNull() + }) }) describe('extractClipboardImageBlobs', () => { diff --git a/apps/desktop/src/app/chat/composer/text-utils.ts b/apps/desktop/src/app/chat/composer/text-utils.ts index e9a8fb6aaee..4535d6963c3 100644 --- a/apps/desktop/src/app/chat/composer/text-utils.ts +++ b/apps/desktop/src/app/chat/composer/text-utils.ts @@ -6,7 +6,13 @@ export interface TriggerState { tokenLength: number } -const TRIGGER_RE = /(?:^|[\s])([@/])([^\s@/]*)$/ +// `@` triggers stop at the first whitespace — `@file:path` and `@diff` are +// single tokens. `/` triggers keep going so the popover stays live while the +// user types args (`/personality alic` → arg completer suggests `alice`). +// Restricting the slash command name to `[a-zA-Z][\w-]*` avoids matching file +// paths like `src/foo/bar`. +const AT_TRIGGER_RE = /(?:^|[\s])(@)([^\s@/]*)$/ +const SLASH_TRIGGER_RE = /(?:^|[\s])(\/)((?:[a-zA-Z][\w-]*(?:\s+\S*)*)?)$/ /** Stable key for paste dedupe — `items` and `files` often mirror the same image as different objects. */ export function blobDedupeKey(blob: Blob): string { @@ -97,11 +103,17 @@ export function textBeforeCaret(editor: HTMLDivElement): string | null { } export function detectTrigger(textBefore: string): TriggerState | null { - const match = TRIGGER_RE.exec(textBefore) + const slash = SLASH_TRIGGER_RE.exec(textBefore) - if (!match) { - return null + if (slash) { + return { kind: '/', query: slash[2], tokenLength: 1 + slash[2].length } } - return { kind: match[1] as '@' | '/', query: match[2], tokenLength: 1 + match[2].length } + const at = AT_TRIGGER_RE.exec(textBefore) + + if (at) { + return { kind: '@', query: at[2], tokenLength: 1 + at[2].length } + } + + return null } diff --git a/apps/desktop/src/app/chat/composer/trigger-popover.test.tsx b/apps/desktop/src/app/chat/composer/trigger-popover.test.tsx index 9acc43f7f19..3aefbfee0a5 100644 --- a/apps/desktop/src/app/chat/composer/trigger-popover.test.tsx +++ b/apps/desktop/src/app/chat/composer/trigger-popover.test.tsx @@ -34,9 +34,17 @@ describe('ComposerTriggerPopover i18n', () => { }) it('renders localized loading copy for slash commands', () => { - const { container } = renderPopover('/', true) + renderPopover('/', true) + // While loading the popover shows only the spinner + loading copy — the + // `/help` empty-state hint is reserved for the resolved (not-loading) state. expect(screen.getByText('查找中…')).toBeTruthy() + }) + + it('renders the slash empty-state hint when not loading', () => { + const { container } = renderPopover('/') + + expect(screen.getByText('没有匹配项。')).toBeTruthy() expect(container.textContent).toContain('/help') }) }) diff --git a/apps/desktop/src/app/chat/composer/trigger-popover.tsx b/apps/desktop/src/app/chat/composer/trigger-popover.tsx index a09190dd6b3..dffa1ae7745 100644 --- a/apps/desktop/src/app/chat/composer/trigger-popover.tsx +++ b/apps/desktop/src/app/chat/composer/trigger-popover.tsx @@ -1,5 +1,7 @@ import type { Unstable_TriggerItem } from '@assistant-ui/core' +import { Fragment } from 'react' +import { BrailleSpinner } from '@/components/ui/braille-spinner' import { Codicon } from '@/components/ui/codicon' import { useI18n } from '@/i18n' import { cn } from '@/lib/utils' @@ -7,7 +9,6 @@ import { cn } from '@/lib/utils' import { COMPLETION_DRAWER_BELOW_CLASS, COMPLETION_DRAWER_CLASS, - COMPLETION_DRAWER_ROW_CLASS, CompletionDrawerEmpty } from './completion-drawer' @@ -23,11 +24,7 @@ const AT_ICON_BY_TYPE: Record = { url: 'globe' } -function completionIcon(kind: '@' | '/', item: Unstable_TriggerItem) { - if (kind === '/') { - return 'terminal' - } - +function atIcon(item: Unstable_TriggerItem) { const meta = item.metadata as { rawText?: string } | undefined const raw = meta?.rawText || item.label @@ -42,6 +39,18 @@ function completionIcon(kind: '@' | '/', item: Unstable_TriggerItem) { return AT_ICON_BY_TYPE[item.type] || AT_ICON_BY_TYPE.simple } +interface RowMeta { + display?: string + group?: string + meta?: string +} + +const ROW_BASE_CLASS = [ + 'relative flex w-full cursor-default select-none rounded-md px-2 py-1 text-left', + 'outline-hidden transition-colors hover:bg-(--ui-bg-tertiary)', + 'data-[highlighted]:bg-(--ui-bg-tertiary) data-[highlighted]:text-foreground' +].join(' ') + interface ComposerTriggerPopoverProps { activeIndex: number items: readonly Unstable_TriggerItem[] @@ -63,6 +72,9 @@ export function ComposerTriggerPopover({ }: ComposerTriggerPopoverProps) { const { t } = useI18n() const copy = t.composer + const isSlash = kind === '/' + + let lastGroup: string | undefined return (
{items.length === 0 ? ( - - {kind === '@' ? ( - <> - {copy.lookupTry} @file: {copy.lookupOr}{' '} - @folder:. - - ) : ( - <> - {copy.lookupTry} /help. - - )} - + loading ? ( +
+ + {copy.lookupLoading} +
+ ) : ( + + {kind === '@' ? ( + <> + {copy.lookupTry} @file: {copy.lookupOr}{' '} + @folder:. + + ) : ( + <> + {copy.lookupTry} /help. + + )} + + ) ) : ( items.map((item, index) => { - const meta = item.metadata as { display?: string; meta?: string } | undefined - const display = meta?.display ?? (kind === '/' ? `/${item.label}` : item.label) + const meta = item.metadata as RowMeta | undefined + const display = meta?.display ?? (isSlash ? `/${item.label}` : item.label) const description = meta?.meta || item.description + const group = meta?.group?.trim() + const showHeader = isSlash && Boolean(group) && group !== lastGroup + const isFirstHeader = lastGroup === undefined + lastGroup = group || lastGroup + const active = index === activeIndex return ( - + + ) }) )} diff --git a/apps/desktop/src/app/desktop-controller.tsx b/apps/desktop/src/app/desktop-controller.tsx index ab4f3f0eb0e..0da26639544 100644 --- a/apps/desktop/src/app/desktop-controller.tsx +++ b/apps/desktop/src/app/desktop-controller.tsx @@ -98,6 +98,7 @@ import { RightSidebarPane } from './right-sidebar' import { $terminalTakeover } from './right-sidebar/store' import { PersistentTerminal, TerminalSlot } from './right-sidebar/terminal/persistent' import { CRON_ROUTE, NEW_CHAT_ROUTE, routeSessionId, sessionRoute, SETTINGS_ROUTE } from './routes' +import { SessionPickerOverlay } from './session-picker-overlay' import { SessionSwitcher } from './session-switcher' import { useContextSuggestions } from './session/hooks/use-context-suggestions' import { useCwdActions } from './session/hooks/use-cwd-actions' @@ -694,6 +695,7 @@ export function DesktopController() { handleSkinCommand, refreshSessions, requestGateway, + resumeStoredSession: resumeSession, selectedStoredSessionIdRef, startFreshSessionDraft, sttEnabled, @@ -829,6 +831,7 @@ export function DesktopController() { /> )} + diff --git a/apps/desktop/src/app/right-sidebar/files/ipc.test.ts b/apps/desktop/src/app/right-sidebar/files/ipc.test.ts new file mode 100644 index 00000000000..bcaddad55b5 --- /dev/null +++ b/apps/desktop/src/app/right-sidebar/files/ipc.test.ts @@ -0,0 +1,100 @@ +/// + +import { Buffer } from 'node:buffer' + +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' + +import type { HermesReadDirEntry, HermesReadDirResult } from '@/global' + +import { clearProjectDirCache, readProjectDir } from './ipc' + +const readDir = vi.fn<(path: string) => Promise>() +const readFileDataUrl = vi.fn<(path: string) => Promise>() +const gitRoot = vi.fn<(path: string) => Promise>() + +function ok(entries: HermesReadDirEntry[]): HermesReadDirResult { + return { entries } +} + +function dataUrl(text: string) { + return `data:text/plain;base64,${Buffer.from(text, 'utf8').toString('base64')}` +} + +function installBridge() { + ;( + window as unknown as { + hermesDesktop: { + gitRoot: typeof gitRoot + readDir: typeof readDir + readFileDataUrl: typeof readFileDataUrl + } + } + ).hermesDesktop = { gitRoot, readDir, readFileDataUrl } +} + +describe('readProjectDir', () => { + beforeEach(() => { + clearProjectDirCache() + readDir.mockReset() + readFileDataUrl.mockReset() + gitRoot.mockReset() + installBridge() + }) + + afterEach(() => { + clearProjectDirCache() + delete (window as unknown as { hermesDesktop?: unknown }).hermesDesktop + }) + + it('returns no-bridge when the desktop bridge is unavailable', async () => { + delete (window as unknown as { hermesDesktop?: unknown }).hermesDesktop + + await expect(readProjectDir('/repo')).resolves.toEqual({ entries: [], error: 'no-bridge' }) + }) + + it('filters gitignored entries when readDir returns Windows-style paths', async () => { + gitRoot.mockResolvedValue('C:\\repo') + readDir.mockImplementation(async path => { + if (path === 'C:\\repo\\src') { + return ok([ + { name: 'debug.log', path: 'C:\\repo\\src\\debug.log', isDirectory: false }, + { name: '临时.txt', path: 'C:\\repo\\src\\临时.txt', isDirectory: false }, + { name: 'keep.ts', path: 'C:\\repo\\src\\keep.ts', isDirectory: false } + ]) + } + + if (path === 'C:/repo') { + return ok([{ name: '.gitignore', path: 'C:/repo/.gitignore', isDirectory: false }]) + } + + if (path === 'C:/repo/src') { + return ok([]) + } + + return ok([]) + }) + readFileDataUrl.mockResolvedValue(dataUrl('# Unicode 路径规则\nsrc/*.log\nsrc/临时.txt\n')) + + const result = await readProjectDir('C:\\repo\\src', 'C:\\repo') + + expect(result.entries.map(entry => entry.name)).toEqual(['keep.ts']) + expect(gitRoot).toHaveBeenCalledWith('C:/repo') + expect(readFileDataUrl).toHaveBeenCalledWith('C:/repo/.gitignore') + }) + + it('does not fetch .gitignore contents when listings do not contain .gitignore', async () => { + gitRoot.mockResolvedValue('/repo') + readDir.mockImplementation(async path => { + if (path === '/repo/src') { + return ok([{ name: 'debug.log', path: '/repo/src/debug.log', isDirectory: false }]) + } + + return ok([]) + }) + + const result = await readProjectDir('/repo/src', '/repo') + + expect(result.entries.map(entry => entry.name)).toEqual(['debug.log']) + expect(readFileDataUrl).not.toHaveBeenCalled() + }) +}) diff --git a/apps/desktop/src/app/right-sidebar/files/ipc.ts b/apps/desktop/src/app/right-sidebar/files/ipc.ts index 843ebe761cd..078f0baab1e 100644 --- a/apps/desktop/src/app/right-sidebar/files/ipc.ts +++ b/apps/desktop/src/app/right-sidebar/files/ipc.ts @@ -27,7 +27,7 @@ function decodeDataUrl(dataUrl: string) { } function clean(path: string) { - return path.replace(/\/+$/, '') || '/' + return path.replace(/\\/g, '/').replace(/\/+$/, '') || '/' } /** Strict POSIX-style relative path; null if `child` is not inside `root`. */ diff --git a/apps/desktop/src/app/right-sidebar/files/tree.tsx b/apps/desktop/src/app/right-sidebar/files/tree.tsx index 6421581ca8c..49cd72a8d27 100644 --- a/apps/desktop/src/app/right-sidebar/files/tree.tsx +++ b/apps/desktop/src/app/right-sidebar/files/tree.tsx @@ -145,7 +145,8 @@ function ProjectTreeRow({ } const isFolder = node.data.isDirectory - const isPlaceholder = node.data.id.endsWith('::__loading__') + const isPlaceholder = Boolean(node.data.placeholder) + const isErrorPlaceholder = node.data.placeholder === 'error' return (
} - {isPlaceholder ? ( + {isPlaceholder && !isErrorPlaceholder ? ( + ) : isErrorPlaceholder ? ( + ) : isFolder ? ( ) : ( diff --git a/apps/desktop/src/app/right-sidebar/files/use-project-tree.test.ts b/apps/desktop/src/app/right-sidebar/files/use-project-tree.test.ts index a0ecd409f4a..d1c0018bf2e 100644 --- a/apps/desktop/src/app/right-sidebar/files/use-project-tree.test.ts +++ b/apps/desktop/src/app/right-sidebar/files/use-project-tree.test.ts @@ -106,7 +106,7 @@ describe('useProjectTree', () => { expect(readDir).toHaveBeenCalledTimes(1) }) - it('captures per-folder error code and leaves the folder expandable but empty', async () => { + it('captures per-folder error code and shows an error placeholder child', async () => { readDir.mockResolvedValueOnce(ok([{ name: 'priv', path: '/p/priv', isDirectory: true }])) readDir.mockResolvedValueOnce({ entries: [], error: 'EACCES' }) @@ -119,7 +119,14 @@ describe('useProjectTree', () => { }) expect(result.current.data[0].error).toBe('EACCES') - expect(result.current.data[0].children).toEqual([]) + expect(result.current.data[0].children).toEqual([ + { + id: '/p/priv::__error__', + isDirectory: false, + name: 'Unable to read (EACCES)', + placeholder: 'error' + } + ]) }) it('dedupes concurrent loadChildren calls for the same id', async () => { diff --git a/apps/desktop/src/app/right-sidebar/files/use-project-tree.ts b/apps/desktop/src/app/right-sidebar/files/use-project-tree.ts index 23fb5efe2dc..3e022c19fd3 100644 --- a/apps/desktop/src/app/right-sidebar/files/use-project-tree.ts +++ b/apps/desktop/src/app/right-sidebar/files/use-project-tree.ts @@ -14,11 +14,14 @@ export interface TreeNode { children?: TreeNode[] /** True while a readDir for this folder is in flight. */ loading?: boolean + /** Synthetic loading/error rows are not real filesystem entries. */ + placeholder?: 'error' | 'loading' /** Last error code from readDir (e.g. EACCES). Cleared on next successful load. */ error?: string } const PLACEHOLDER_ID = '__loading__' +const ERROR_PLACEHOLDER_ID = '__error__' function makeNode(path: string, name: string, isDirectory: boolean): TreeNode { return { id: path, isDirectory, name } @@ -43,7 +46,16 @@ function patchNode(nodes: TreeNode[] | undefined | null, id: string, patch: (n: } function placeholderChild(parentId: string): TreeNode { - return { id: `${parentId}::${PLACEHOLDER_ID}`, isDirectory: false, name: 'Loading…' } + return { id: `${parentId}::${PLACEHOLDER_ID}`, isDirectory: false, name: 'Loading…', placeholder: 'loading' } +} + +function errorChild(parentId: string, error: string | undefined): TreeNode { + return { + id: `${parentId}::${ERROR_PLACEHOLDER_ID}`, + isDirectory: false, + name: `Unable to read (${error || 'read-error'})`, + placeholder: 'error' + } } export interface UseProjectTreeResult { @@ -227,7 +239,7 @@ export function useProjectTree(cwd: string): UseProjectTreeResult { ...n, loading: false, error: error || undefined, - children: error ? [] : entries.map(e => makeNode(e.path, e.name, e.isDirectory)) + children: error ? [errorChild(n.id, error)] : entries.map(e => makeNode(e.path, e.name, e.isDirectory)) })) } }) diff --git a/apps/desktop/src/app/session-picker-overlay.tsx b/apps/desktop/src/app/session-picker-overlay.tsx new file mode 100644 index 00000000000..65344fcac26 --- /dev/null +++ b/apps/desktop/src/app/session-picker-overlay.tsx @@ -0,0 +1,32 @@ +import { useStore } from '@nanostores/react' + +import { SessionPickerDialog } from '@/components/session-picker' +import { $gatewayState, $selectedStoredSessionId, $sessionPickerOpen, setSessionPickerOpen } from '@/store/session' + +interface SessionPickerOverlayProps { + onResume: (storedSessionId: string) => void +} + +/** + * Mounts the session picker that `/resume` (and `/sessions`, `/switch`) opens — + * the desktop equivalent of the TUI's sessions overlay. Resuming runs through + * the same `resumeSession` path the sidebar uses. + */ +export function SessionPickerOverlay({ onResume }: SessionPickerOverlayProps) { + const open = useStore($sessionPickerOpen) + const gatewayOpen = useStore($gatewayState) === 'open' + const activeStoredSessionId = useStore($selectedStoredSessionId) + + if (!gatewayOpen) { + return null + } + + return ( + + ) +} diff --git a/apps/desktop/src/app/session/hooks/use-message-stream.ts b/apps/desktop/src/app/session/hooks/use-message-stream.ts index 75ff43b5ee8..86244c84386 100644 --- a/apps/desktop/src/app/session/hooks/use-message-stream.ts +++ b/apps/desktop/src/app/session/hooks/use-message-stream.ts @@ -64,6 +64,67 @@ interface QueuedStreamDeltas { reasoning: string } +type SessionRuntimeStatePatch = Partial< + Pick< + ClientSessionState, + | 'branch' + | 'cwd' + | 'fast' + | 'model' + | 'personality' + | 'provider' + | 'reasoningEffort' + | 'serviceTier' + | 'yolo' + > +> + +function sessionInfoStatePatch(payload: GatewayEventPayload | undefined): SessionRuntimeStatePatch { + const patch: SessionRuntimeStatePatch = {} + + if (typeof payload?.model === 'string') { + patch.model = payload.model || '' + } + + if (typeof payload?.provider === 'string') { + patch.provider = payload.provider || '' + } + + if (typeof payload?.cwd === 'string') { + patch.cwd = payload.cwd + } + + if (typeof payload?.branch === 'string') { + patch.branch = payload.branch + } + + if (typeof payload?.personality === 'string') { + patch.personality = normalizePersonalityValue(payload.personality) + } + + if (typeof payload?.reasoning_effort === 'string') { + patch.reasoningEffort = payload.reasoning_effort + } + + if (typeof payload?.service_tier === 'string') { + patch.serviceTier = payload.service_tier + } + + if (typeof payload?.fast === 'boolean') { + patch.fast = payload.fast + } + + if (typeof payload?.yolo === 'boolean') { + patch.yolo = payload.yolo + } + + return patch +} + +function hasSessionInfoStatePatch(patch: SessionRuntimeStatePatch): boolean { + return Object.keys(patch).length > 0 +} + // Minimum gap between two assistant-text flushes during a stream. Was 16ms // (rAF only), which at typical LLM token rates of ~30-80 tok/sec meant every // token got its own React commit + Streamdown markdown re-parse, scaling @@ -628,36 +689,27 @@ export function useMessageStream({ // Apply session-scoped fields when the event targets the active // session, OR when it's a global broadcast and we have no session. const apply = explicitSid ? isActiveEvent : !activeSessionIdRef.current + const statePatch = sessionInfoStatePatch(payload) + const hasStatePatch = hasSessionInfoStatePatch(statePatch) const modelChanged = typeof payload?.model === 'string' const providerChanged = typeof payload?.provider === 'string' const runningChanged = typeof payload?.running === 'boolean' if (apply) { - const runtimeInfo: Partial< - Pick< - ClientSessionState, - 'branch' | 'cwd' | 'fast' | 'model' | 'provider' | 'reasoningEffort' | 'serviceTier' | 'yolo' - > - > = {} - if (modelChanged) { setCurrentModel(payload!.model || '') - runtimeInfo.model = payload!.model || '' } if (providerChanged) { setCurrentProvider(payload!.provider || '') - runtimeInfo.provider = payload!.provider || '' } if (typeof payload?.cwd === 'string') { setCurrentCwd(payload.cwd) - runtimeInfo.cwd = payload.cwd } if (typeof payload?.branch === 'string') { setCurrentBranch(payload.branch) - runtimeInfo.branch = payload.branch } if (typeof payload?.personality === 'string') { @@ -666,28 +718,31 @@ export function useMessageStream({ if (typeof payload?.reasoning_effort === 'string') { setCurrentReasoningEffort(payload.reasoning_effort) - runtimeInfo.reasoningEffort = payload.reasoning_effort } if (typeof payload?.service_tier === 'string') { setCurrentServiceTier(payload.service_tier) - runtimeInfo.serviceTier = payload.service_tier } if (typeof payload?.fast === 'boolean') { setCurrentFastMode(payload.fast) - runtimeInfo.fast = payload.fast } if (typeof payload?.yolo === 'boolean') { setYoloActive(payload.yolo) - runtimeInfo.yolo = payload.yolo } + } - if (sessionId && Object.keys(runtimeInfo).length > 0) { - updateSessionState(sessionId, state => ({ ...state, ...runtimeInfo })) - } + if (sessionId && hasStatePatch) { + updateSessionState(sessionId, state => ({ + ...state, + ...statePatch, + branch: statePatch.branch ?? state.branch, + cwd: statePatch.cwd ?? state.cwd + })) + } + if (apply) { if (runningChanged && sessionId) { updateSessionState(sessionId, state => { const busy = Boolean(payload!.running) diff --git a/apps/desktop/src/app/session/hooks/use-prompt-actions.test.tsx b/apps/desktop/src/app/session/hooks/use-prompt-actions.test.tsx index 96af1e8400e..e7dfe9d7da5 100644 --- a/apps/desktop/src/app/session/hooks/use-prompt-actions.test.tsx +++ b/apps/desktop/src/app/session/hooks/use-prompt-actions.test.tsx @@ -1,6 +1,6 @@ import { cleanup, render, waitFor } from '@testing-library/react' import type { MutableRefObject } from 'react' -import { useEffect } from 'react' +import { useEffect, useRef } from 'react' import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' import { $composerAttachments, type ComposerAttachment } from '@/store/composer' @@ -42,6 +42,7 @@ function sessionInfo(overrides: Partial = {}): SessionInfo { } interface HarnessHandle { + cancelRun: () => Promise steerPrompt: (text: string) => Promise submitText: ( text: string, @@ -55,6 +56,7 @@ function Harness({ onSeedState, refreshSessions, requestGateway, + resumeStoredSession, storedSessionId }: { busyRef?: MutableRefObject @@ -62,6 +64,7 @@ function Harness({ onSeedState?: (state: Record) => void refreshSessions: () => Promise requestGateway: (method: string, params?: Record) => Promise + resumeStoredSession?: (storedSessionId: string) => Promise | void storedSessionId?: null | string }) { const activeSessionIdRef: MutableRefObject = { current: RUNTIME_SESSION_ID } @@ -69,6 +72,12 @@ function Harness({ current: storedSessionId === undefined ? RUNTIME_SESSION_ID : storedSessionId } const localBusyRef = busyRef ?? { current: false } + const stateRef = useRef({ + messages: [], + busy: false, + awaitingResponse: false, + interrupted: true + } as never) const actions = usePromptActions({ activeSessionId: RUNTIME_SESSION_ID, @@ -79,17 +88,14 @@ function Harness({ handleSkinCommand: () => '', refreshSessions, requestGateway, + resumeStoredSession: resumeStoredSession ?? (() => undefined), selectedStoredSessionIdRef, startFreshSessionDraft: () => undefined, sttEnabled: false, updateSessionState: (_sessionId, updater) => { // Seed with interrupted:true so we can prove a fresh submit clears it. - const next = updater({ - messages: [], - busy: false, - awaitingResponse: false, - interrupted: true - } as never) as unknown as Record + const next = updater(stateRef.current) as unknown as Record + stateRef.current = next as never onSeedState?.(next) return next as never @@ -97,8 +103,12 @@ function Harness({ }) useEffect(() => { - onReady({ steerPrompt: actions.steerPrompt, submitText: actions.submitText }) - }, [actions.steerPrompt, actions.submitText, onReady]) + onReady({ + cancelRun: actions.cancelRun, + steerPrompt: actions.steerPrompt, + submitText: actions.submitText + }) + }, [actions.cancelRun, actions.steerPrompt, actions.submitText, onReady]) return null } @@ -190,6 +200,68 @@ describe('usePromptActions /title', () => { }) }) +describe('usePromptActions desktop slash pickers', () => { + beforeEach(() => { + setSessions(() => [sessionInfo({ id: '20260610_120000_abcdef', title: 'Loaded session' })]) + }) + + afterEach(() => { + cleanup() + vi.useRealTimers() + vi.restoreAllMocks() + }) + + it('resumes an exact session id even when it is not in the loaded sidebar cache', async () => { + const resumeStoredSession = vi.fn(async () => undefined) + const requestGateway = vi.fn(async () => ({}) as never) + + let handle: HarnessHandle | null = null + render( + (handle = h)} + refreshSessions={async () => undefined} + requestGateway={requestGateway} + resumeStoredSession={resumeStoredSession} + /> + ) + + await handle!.submitText('/resume 20260610_130000_123abc') + + expect(resumeStoredSession).toHaveBeenCalledWith('20260610_130000_123abc') + expect(requestGateway).not.toHaveBeenCalledWith('slash.exec', expect.anything()) + }) + + it('marks a timed-out handoff as failed so the next attempt can retry', async () => { + vi.useFakeTimers() + const calls: { method: string; params?: Record }[] = [] + const requestGateway = vi.fn(async (method: string, params?: Record) => { + calls.push({ method, params }) + + if (method === 'handoff.state') { + return { state: 'pending' } as never + } + + return {} as never + }) + + let handle: HarnessHandle | null = null + render( (handle = h)} refreshSessions={async () => undefined} requestGateway={requestGateway} />) + + const result = handle!.submitText('/handoff telegram') + await vi.advanceTimersByTimeAsync(61_000) + await result + + expect(calls.some(call => call.method === 'handoff.request')).toBe(true) + expect(calls).toContainEqual({ + method: 'handoff.fail', + params: { + error: expect.stringContaining('Timed out'), + session_id: RUNTIME_SESSION_ID + } + }) + }) +}) + describe('usePromptActions submit / queue drain semantics', () => { afterEach(() => { cleanup() @@ -562,6 +634,43 @@ describe('usePromptActions sleep/wake session recovery', () => { expect(calls[2]?.params).toEqual({ session_id: RECOVERED_SESSION_ID, text: 'message after wake' }) }) + it('resumes the stored session and retries once when session.interrupt reports "session not found"', async () => { + const calls: { method: string; params?: Record }[] = [] + let interruptAttempts = 0 + const requestGateway = vi.fn(async (method: string, params?: Record) => { + calls.push({ method, params }) + if (method === 'session.interrupt') { + interruptAttempts += 1 + if (interruptAttempts === 1) { + throw new Error('session not found') + } + return {} as never + } + if (method === 'session.resume') { + return { session_id: RECOVERED_SESSION_ID } as never + } + return {} as never + }) + + let handle: HarnessHandle | null = null + render( + (handle = h)} + refreshSessions={async () => undefined} + requestGateway={requestGateway} + storedSessionId={STORED_SESSION_ID} + /> + ) + await waitFor(() => expect(handle).not.toBeNull()) + + await handle!.cancelRun() + + expect(calls.map(c => c.method)).toEqual(['session.interrupt', 'session.resume', 'session.interrupt']) + expect(calls[0]?.params).toEqual({ session_id: RUNTIME_SESSION_ID }) + expect(calls[1]?.params).toEqual({ session_id: STORED_SESSION_ID }) + expect(calls[2]?.params).toEqual({ session_id: RECOVERED_SESSION_ID }) + }) + it('surfaces the original error (no resume) when the failure is not "session not found"', async () => { const calls: string[] = [] const states: Record[] = [] @@ -751,4 +860,3 @@ describe('uploadComposerAttachment remote read failures', () => { ).rejects.toThrow('ENOENT: no such file') }) }) - diff --git a/apps/desktop/src/app/session/hooks/use-prompt-actions.ts b/apps/desktop/src/app/session/hooks/use-prompt-actions.ts index 167f0d3224f..b09d86ffd10 100644 --- a/apps/desktop/src/app/session/hooks/use-prompt-actions.ts +++ b/apps/desktop/src/app/session/hooks/use-prompt-actions.ts @@ -4,20 +4,24 @@ import { type MutableRefObject, useCallback, useEffect, useRef } from 'react' import { getProfiles, transcribeAudio } from '@/hermes' import { translateNow, type Translations, useI18n } from '@/i18n' +import { stripAnsi } from '@/lib/ansi' import { branchGroupForUser, type ChatMessage, chatMessageText, textPart } from '@/lib/chat-messages' import { optimisticAttachmentRef, parseCommandDispatch, parseSlashCommand, pathLabel, + sessionTitle, SLASH_COMMAND_RE } from '@/lib/chat-runtime' import { type CommandsCatalogLike, + type DesktopActionId, + type DesktopPickerId, desktopSlashUnavailableMessage, filterDesktopCommandsCatalog, isDesktopSlashCommand, - isModelPickerCommand + resolveDesktopCommand } from '@/lib/desktop-slash-commands' import { triggerHaptic } from '@/lib/haptics' import { setMutableRef } from '@/lib/mutable-ref' @@ -38,11 +42,13 @@ import { $busy, $connection, $messages, + $sessions, $yoloActive, setAwaitingResponse, setBusy, setMessages, setModelPickerOpen, + setSessionPickerOpen, setSessions, setYoloActive } from '@/store/session' @@ -50,12 +56,30 @@ import { import type { ClientSessionState, FileAttachResponse, + HandoffFailResponse, + HandoffRequestResponse, + HandoffStateResponse, ImageAttachResponse, SessionSteerResponse, SessionTitleResponse, SlashExecResponse } from '../../types' +interface HandoffResult { + ok: boolean + error?: string +} + +function delay(ms: number): Promise { + return new Promise(resolve => setTimeout(resolve, ms)) +} + +function isSessionIdCandidate(value: string): boolean { + const trimmed = value.trim() + + return /^\d{8}_\d{6}_[A-Fa-f0-9]{6}$/.test(trimmed) || /^[A-Fa-f0-9]{32}$/.test(trimmed) +} + function blobToDataUrl(blob: Blob): Promise { return new Promise((resolve, reject) => { const reader = new FileReader() @@ -84,6 +108,12 @@ function inlineErrorMessage(error: unknown, fallback: string): string { return (raw.match(/Error invoking remote method '[^']+': Error: (.+)$/)?.[1] ?? raw).replace(/^Error:\s*/, '').trim() } +function isSessionNotFoundError(error: unknown): boolean { + const message = error instanceof Error ? error.message : String(error) + + return /session not found/i.test(message) +} + function base64FromDataUrl(dataUrl: string): string { const comma = dataUrl.indexOf(',') @@ -245,6 +275,7 @@ interface PromptActionsOptions { handleSkinCommand: (arg: string) => string refreshSessions: () => Promise requestGateway: (method: string, params?: Record) => Promise + resumeStoredSession: (storedSessionId: string) => Promise | void selectedStoredSessionIdRef: MutableRefObject startFreshSessionDraft: () => void sttEnabled: boolean @@ -260,6 +291,15 @@ interface SubmitTextOptions { fromQueue?: boolean } +/** Everything a slash handler needs about the invocation it's serving. */ +interface SlashActionCtx { + arg: string + command: string + name: string + recordInput: boolean + sessionHint?: string +} + function renderCommandsCatalog(catalog: CommandsCatalogLike, copy: Translations['desktop']): string { const desktopCatalog = filterDesktopCommandsCatalog(catalog) @@ -310,6 +350,7 @@ export function usePromptActions({ handleSkinCommand, refreshSessions, requestGateway, + resumeStoredSession, selectedStoredSessionIdRef, startFreshSessionDraft, sttEnabled, @@ -320,7 +361,11 @@ export function usePromptActions({ const appendSessionTextMessage = useCallback( (sessionId: string, role: ChatMessage['role'], text: string) => { - const body = text.trim() + // Strip ANSI: slash-command output from the backend worker carries SGR + // color codes (e.g. "Unknown command" in red). The ESC byte is invisible + // in the chat panel, so without this the `[1;31m…[0m` payload leaks as + // literal text. + const body = stripAnsi(text).trim() if (!body) { return @@ -622,9 +667,7 @@ export function usePromptActions({ try { await requestGateway('prompt.submit', { session_id: sessionId, text }) } catch (firstErr) { - const firstMsg = firstErr instanceof Error ? firstErr.message : String(firstErr) - - if (/session not found/i.test(firstMsg) && selectedStoredSessionIdRef.current) { + if (isSessionNotFoundError(firstErr) && selectedStoredSessionIdRef.current) { // Re-register the session in the gateway and get a fresh live ID. const resumed = await requestGateway<{ session_id: string }>('session.resume', { session_id: selectedStoredSessionIdRef.current @@ -696,230 +739,124 @@ export function usePromptActions({ ] ) + // Queue a handoff of this session to a messaging platform and watch it to + // a terminal state. We only write the request through the gateway; the + // separate `hermes gateway` process performs the actual transfer, so we + // poll `handoff.state` (mirror of the CLI's block-poll) for the result. + const handoffSession = useCallback( + async ( + platform: string, + options?: { onProgress?: (state: string) => void; sessionId?: string } + ): Promise => { + const sid = options?.sessionId || activeSessionIdRef.current + + if (!sid) { + return { error: copy.sessionUnavailable, ok: false } + } + + const target = platform.trim().toLowerCase() + + if (!target) { + return { error: copy.handoff.failed(''), ok: false } + } + + try { + options?.onProgress?.('pending') + await requestGateway('handoff.request', { + platform: target, + session_id: sid + }) + } catch (err) { + return { error: inlineErrorMessage(err, copy.handoff.failed(target)), ok: false } + } + + const deadline = Date.now() + 60_000 + let lastState = 'pending' + + while (Date.now() < deadline) { + await delay(800) + + let record: HandoffStateResponse + + try { + record = await requestGateway('handoff.state', { session_id: sid }) + } catch { + continue + } + + const state = record.state || 'pending' + + if (state !== lastState) { + options?.onProgress?.(state) + lastState = state + } + + if (state === 'completed') { + appendSessionTextMessage(sid, 'system', copy.handoff.systemNote(target)) + notify({ kind: 'success', message: copy.handoff.success(target) }) + + return { ok: true } + } + + if (state === 'failed') { + return { error: record.error || copy.handoff.failed(target), ok: false } + } + } + + const cleanup = await requestGateway('handoff.fail', { + error: copy.handoff.timedOut, + session_id: sid + }).catch(() => null) + + if (cleanup?.state === 'completed') { + appendSessionTextMessage(sid, 'system', copy.handoff.systemNote(target)) + notify({ kind: 'success', message: copy.handoff.success(target) }) + + return { ok: true } + } + + return { error: copy.handoff.timedOut, ok: false } + }, + [activeSessionIdRef, appendSessionTextMessage, copy, requestGateway] + ) + const executeSlashCommand = useCallback( async (rawCommand: string, options?: { sessionId?: string; recordInput?: boolean }) => { - const runSlash = async (commandText: string, sessionHint?: string, recordInput = true): Promise => { - const command = commandText.trim() - const { name, arg } = parseSlashCommand(command) - const normalizedName = name.toLowerCase() + const ensureSessionId = async (sessionHint?: string) => + sessionHint || activeSessionIdRef.current || (await createBackendSessionForSend()) - if (!name) { - const sessionId = sessionHint || activeSessionIdRef.current || (await createBackendSessionForSend()) - - if (sessionId) { - appendSessionTextMessage(sessionId, 'system', copy.emptySlashCommand) - } - - return - } - - if (normalizedName === 'new' || normalizedName === 'reset') { - startFreshSessionDraft() - - return - } - - if (normalizedName === 'branch' || normalizedName === 'fork') { - await branchCurrentSession() - - return - } - - // /yolo maps to the status-bar YOLO control — a per-session approval - // bypass, same scope as the TUI's Shift+Tab. With no session yet we arm - // it locally; the session-create path applies it on the first message. - if (normalizedName === 'yolo') { - const sid = sessionHint || activeSessionIdRef.current - const next = !$yoloActive.get() - - if (!sid) { - setYoloActive(next) - notify({ kind: 'success', message: next ? copy.yoloArmed : copy.yoloOff }) - - return - } - - try { - const active = await setSessionYolo(requestGateway, sid, next) - appendSessionTextMessage(sid, 'system', copy.yoloSystem(active)) - } catch { - notify({ kind: 'error', title: copy.yoloTitle, message: copy.yoloToggleFailed }) - } - - return - } - - // /model opens the desktop model picker overlay — the same full - // provider+model picker reachable from the status-bar model button — - // instead of the headless prompt_toolkit modal the slash worker can't - // render. With explicit args (`/model [--provider ...]`) run the - // switch directly through slash.exec so power users can still type it. - if (isModelPickerCommand(`/${normalizedName}`)) { - if (!arg.trim()) { - setModelPickerOpen(true) - - return - } - - const sid = sessionHint || activeSessionIdRef.current || (await createBackendSessionForSend()) - - if (!sid) { - notify({ kind: 'error', title: 'Session unavailable', message: 'Could not create a new session' }) - - return - } - - try { - const result = await requestGateway('slash.exec', { - session_id: sid, - command: command.replace(/^\/+/, '') - }) - - const body = result?.output || `/${name}: model switched` - appendSessionTextMessage( - sid, - 'system', - recordInput ? slashStatusText(command, body) : body - ) - } catch (err) { - appendSessionTextMessage( - sid, - 'system', - `error: ${err instanceof Error ? err.message : String(err)}` - ) - } - - return - } - - if (normalizedName === 'skin' && !sessionHint && !activeSessionIdRef.current) { - notify({ kind: 'success', message: handleSkinCommand(arg) }) - - return - } - - // /profile selects which profile new chats open in — no app relaunch. - // A profile is per-session now, so an existing thread can't change its - // profile mid-stream; `/profile ` instead points the next new chat - // (and the current empty draft) at that profile's backend. - if (normalizedName === 'profile') { - const target = arg.trim() - const current = normalizeProfileKey($activeGatewayProfile.get()) - - if (!target) { - notify({ - kind: 'success', - message: copy.profileStatus(current) - }) - - return - } - - try { - const { profiles } = await getProfiles() - const match = profiles.find(profile => profile.name === target) - - if (!match) { - notify({ - kind: 'error', - title: copy.unknownProfile, - message: copy.noProfileNamed(target, profiles.map(profile => profile.name).join(', ')) - }) - - return - } - - const key = normalizeProfileKey(match.name) - - $newChatProfile.set(key) - // Swap the live gateway now so an empty draft sends into this - // profile immediately; an existing thread keeps its own profile. - await ensureGatewayProfile(key) - notify({ kind: 'success', message: copy.newChatsProfile(match.name) }) - } catch (err) { - notifyError(err, copy.setProfileFailed) - } - - return - } - - const sessionId = sessionHint || activeSessionIdRef.current || (await createBackendSessionForSend()) + // Resolve the target session plus a writer for inline slash output, or + // notify + return null when none can be created. Folds the ensure / bail / + // build-renderSlashOutput boilerplate every exec-style handler repeats. + const withSlashOutput = async ( + ctx: SlashActionCtx + ): Promise<{ render: (text: string) => void; sessionId: string } | null> => { + const sessionId = await ensureSessionId(ctx.sessionHint) if (!sessionId) { - notify({ - kind: 'error', - title: copy.sessionUnavailable, - message: copy.createSessionFailed - }) + notify({ kind: 'error', title: copy.sessionUnavailable, message: copy.createSessionFailed }) + return null + } + + const render = (text: string) => + appendSessionTextMessage(sessionId, 'system', ctx.recordInput ? slashStatusText(ctx.command, text) : text) + + return { render, sessionId } + } + + // `exec` commands (and unknown skill / quick commands the backend owns) + // run on the gateway and render their text output inline. This is the only + // path that talks to slash.exec / command.dispatch. + async function runExec(ctx: SlashActionCtx): Promise { + const { arg, command, name } = ctx + const resolved = await withSlashOutput(ctx) + + if (!resolved) { return } - const renderSlashOutput = (text: string) => - appendSessionTextMessage(sessionId, 'system', recordInput ? slashStatusText(command, text) : text) - - // /title renames the session. Route through the gateway's - // `session.title` RPC — the same path the TUI uses — NOT the REST - // renameSession endpoint and NOT the slash worker. - // - // Why not the slash worker: it's a separate HermesCLI subprocess whose - // SQLite write to the shared state.db can silently fail (notably on - // Windows), and it never refreshes the sidebar. - // - // Why not REST renameSession: `sessionId` here is the *runtime* session - // id returned by session.create — it is NOT the stored DB `sessions.id`, - // and session.create deliberately does not persist a DB row until the - // first turn. The REST PATCH endpoint resolves against the sessions - // table, so a runtime id (or a brand-new, not-yet-persisted session) - // 404s with "Session not found" on every platform. See #38508 / #38576. - // - // session.title maps the runtime id to the in-memory session, writes - // through the gateway's own DB connection, and QUEUES the title - // (`pending: true`) when the row isn't persisted yet — so it works for a - // fresh chat too. refreshSessions() then pulls the authoritative title - // back into the sidebar. A bare `/title` (no arg) still falls through to - // the worker to display the current title. - if (normalizedName === 'title' && arg) { - try { - const result = await requestGateway('session.title', { - session_id: sessionId, - title: arg - }) - - const finalTitle = (result?.title || arg).trim() - const queued = result?.pending === true - - setSessions(prev => prev.map(s => (s.id === sessionId ? { ...s, title: finalTitle || null } : s))) - await refreshSessions().catch(() => undefined) - renderSlashOutput( - finalTitle - ? `Session title set: ${finalTitle}${queued ? ' (queued while session initializes)' : ''}` - : 'Session title cleared.' - ) - } catch (err) { - renderSlashOutput(`error: ${err instanceof Error ? err.message : String(err)}`) - } - - return - } - - if (normalizedName === 'skin') { - renderSlashOutput(handleSkinCommand(arg)) - - return - } - - if (name === 'help' || name === 'commands') { - try { - const catalog = await requestGateway('commands.catalog', { session_id: sessionId }) - - renderSlashOutput(renderCommandsCatalog(catalog, copy)) - } catch (err) { - renderSlashOutput(`error: ${err instanceof Error ? err.message : String(err)}`) - } - - return - } + const { render: renderSlashOutput, sessionId } = resolved if (!isDesktopSlashCommand(name)) { renderSlashOutput(desktopSlashUnavailableMessage(name) || `/${name} is not available in the desktop app.`) @@ -943,11 +880,7 @@ export function usePromptActions({ try { const dispatch = parseCommandDispatch( - await requestGateway('command.dispatch', { - session_id: sessionId, - name, - arg - }) + await requestGateway('command.dispatch', { session_id: sessionId, name, arg }) ) if (!dispatch) { @@ -994,6 +927,261 @@ export function usePromptActions({ } } + // One handler per `action` command. Adding a desktop-native command is a + // registry row in desktop-slash-commands.ts plus an entry here — never a + // new branch in a dispatch ladder. + const actionHandlers: Record Promise> = { + new: async () => { + startFreshSessionDraft() + }, + branch: async () => { + await branchCurrentSession() + }, + // /yolo maps to the status-bar YOLO control — a per-session approval + // bypass, same scope as the TUI's Shift+Tab. With no session yet we arm + // it locally; the session-create path applies it on the first message. + yolo: async ({ sessionHint }) => { + const sid = sessionHint || activeSessionIdRef.current + const next = !$yoloActive.get() + + if (!sid) { + setYoloActive(next) + notify({ kind: 'success', message: next ? copy.yoloArmed : copy.yoloOff }) + + return + } + + try { + const active = await setSessionYolo(requestGateway, sid, next) + appendSessionTextMessage(sid, 'system', copy.yoloSystem(active)) + } catch { + notify({ kind: 'error', title: copy.yoloTitle, message: copy.yoloToggleFailed }) + } + }, + // /handoff hands this session to a messaging platform. The platform is + // completed inline in the slash popover (backend _handoff_completions), + // so there is no overlay: `/handoff ` runs the desktop's own + // handoff RPC. cli_only on the backend, so it must not reach slash.exec. + handoff: async ({ arg, command, recordInput, sessionHint }) => { + const platform = arg.trim() + + if (!platform) { + notify({ kind: 'success', message: copy.handoff.pickPlatform }) + + return + } + + const sid = sessionHint || activeSessionIdRef.current + + if (!sid) { + notify({ kind: 'error', title: copy.sessionUnavailable, message: copy.createSessionFailed }) + + return + } + + const result = await handoffSession(platform, { sessionId: sid }) + + if (!result.ok && result.error) { + appendSessionTextMessage(sid, 'system', recordInput ? slashStatusText(command, result.error) : result.error) + } + }, + // /profile selects which profile new chats open in — no app relaunch. + // A profile is per-session now, so an existing thread can't change its + // profile mid-stream; `/profile ` points the next new chat (and + // the current empty draft) at that profile's backend. + profile: async ({ arg }) => { + const target = arg.trim() + const current = normalizeProfileKey($activeGatewayProfile.get()) + + if (!target) { + notify({ kind: 'success', message: copy.profileStatus(current) }) + + return + } + + try { + const { profiles } = await getProfiles() + const match = profiles.find(profile => profile.name === target) + + if (!match) { + notify({ + kind: 'error', + title: copy.unknownProfile, + message: copy.noProfileNamed(target, profiles.map(profile => profile.name).join(', ')) + }) + + return + } + + const key = normalizeProfileKey(match.name) + + $newChatProfile.set(key) + await ensureGatewayProfile(key) + notify({ kind: 'success', message: copy.newChatsProfile(match.name) }) + } catch (err) { + notifyError(err, copy.setProfileFailed) + } + }, + skin: async ({ arg, command, recordInput, sessionHint }) => { + const sid = sessionHint || activeSessionIdRef.current + const message = handleSkinCommand(arg) + + // No session to print into yet — surface it as a toast instead of + // spinning up a backend session just to change the theme. + if (!sid) { + notify({ kind: 'success', message }) + + return + } + + appendSessionTextMessage(sid, 'system', recordInput ? slashStatusText(command, message) : message) + }, + // /title renames via the gateway's session.title RPC — the same + // path the TUI uses, NOT REST renameSession (which 404s on runtime ids) + // nor the slash worker (whose DB write can silently fail). Bare /title + // shows the current title, which the worker owns, so delegate to exec. + title: async ctx => { + if (!ctx.arg) { + await runExec(ctx) + + return + } + + const resolved = await withSlashOutput(ctx) + + if (!resolved) { + return + } + + const { render: renderSlashOutput, sessionId } = resolved + const { arg } = ctx + + try { + const result = await requestGateway('session.title', { + session_id: sessionId, + title: arg + }) + + const finalTitle = (result?.title || arg).trim() + const queued = result?.pending === true + + setSessions(prev => prev.map(s => (s.id === sessionId ? { ...s, title: finalTitle || null } : s))) + await refreshSessions().catch(() => undefined) + renderSlashOutput( + finalTitle + ? `Session title set: ${finalTitle}${queued ? ' (queued while session initializes)' : ''}` + : 'Session title cleared.' + ) + } catch (err) { + renderSlashOutput(`error: ${err instanceof Error ? err.message : String(err)}`) + } + }, + help: async ctx => { + const resolved = await withSlashOutput(ctx) + + if (!resolved) { + return + } + + const { render: renderSlashOutput, sessionId } = resolved + + try { + const catalog = await requestGateway('commands.catalog', { session_id: sessionId }) + + renderSlashOutput(renderCommandsCatalog(catalog, copy)) + } catch (err) { + renderSlashOutput(`error: ${err instanceof Error ? err.message : String(err)}`) + } + } + } + + // Picker commands open a desktop overlay; a typed arg is resolved by that + // picker so the command never dead-ends or falls through to the backend. + const openPicker = async (pickerId: DesktopPickerId, ctx: SlashActionCtx): Promise => { + if (pickerId === 'model') { + if (!ctx.arg.trim()) { + setModelPickerOpen(true) + + return + } + + // Power users can still type `/model ` — run it on the backend. + await runExec(ctx) + + return + } + + // session picker — /resume, /sessions, /switch + const query = ctx.arg.trim() + + if (!query) { + setSessionPickerOpen(true) + + return + } + + const sessions = $sessions.get() + const lower = query.toLowerCase() + + const match = + sessions.find(session => session.id === query) || + sessions.find(session => sessionTitle(session).toLowerCase().includes(lower)) || + sessions.find(session => (session.preview ?? '').toLowerCase().includes(lower)) + + if (!match) { + if (isSessionIdCandidate(query)) { + await resumeStoredSession(query) + + return + } + + notify({ kind: 'error', message: copy.resumeFailed }) + + return + } + + await resumeStoredSession(match.id) + } + + // The whole dispatcher: resolve the command's desktop surface, then act on + // its kind. No per-command ladder — behavior lives in the registry. + async function runSlash(commandText: string, sessionHint?: string, recordInput = true): Promise { + const command = commandText.trim() + const { name, arg } = parseSlashCommand(command) + + if (!name) { + const sessionId = await ensureSessionId(sessionHint) + + if (sessionId) { + appendSessionTextMessage(sessionId, 'system', copy.emptySlashCommand) + } + + return + } + + const ctx: SlashActionCtx = { arg, command, name, recordInput, sessionHint } + const surface = resolveDesktopCommand(`/${name}`)?.surface + + switch (surface?.kind) { + case 'unavailable': { + const resolved = await withSlashOutput(ctx) + resolved?.render(desktopSlashUnavailableMessage(name) || `/${name} is not available in the desktop app.`) + + return + } + + case 'picker': + return openPicker(surface.picker, ctx) + + case 'action': + return actionHandlers[surface.action](ctx) + + default: + // exec spec, or an unknown skill / quick command the backend owns. + return runExec(ctx) + } + } + await runSlash(rawCommand, options?.sessionId, options?.recordInput ?? true) }, [ @@ -1004,8 +1192,10 @@ export function usePromptActions({ copy, createBackendSessionForSend, handleSkinCommand, + handoffSession, refreshSessions, requestGateway, + resumeStoredSession, startFreshSessionDraft, submitPromptText ] @@ -1087,11 +1277,39 @@ export function usePromptActions({ try { await requestGateway('session.interrupt', { session_id: sessionId }) } catch (err) { + let stopError = err + + if (isSessionNotFoundError(err) && selectedStoredSessionIdRef.current) { + try { + const resumed = await requestGateway<{ session_id: string }>('session.resume', { + session_id: selectedStoredSessionIdRef.current + }) + const recoveredId = resumed?.session_id + + if (recoveredId) { + activeSessionIdRef.current = recoveredId + await requestGateway('session.interrupt', { session_id: recoveredId }) + + return + } + } catch (resumeErr) { + stopError = resumeErr + } + } + setMutableRef(busyRef, false) setBusy(false) - notifyError(err, copy.stopFailed) + notifyError(stopError, copy.stopFailed) } - }, [activeSessionId, activeSessionIdRef, busyRef, copy.stopFailed, requestGateway, updateSessionState]) + }, [ + activeSessionId, + activeSessionIdRef, + busyRef, + copy.stopFailed, + requestGateway, + selectedStoredSessionIdRef, + updateSessionState + ]) // Steer = nudge the live turn without interrupting: the gateway appends the // text to the next tool result so the model reads it on its next iteration @@ -1314,6 +1532,7 @@ export function usePromptActions({ cancelRun, editMessage, handleThreadMessagesChange, + handoffSession, reloadFromMessage, steerPrompt, submitText, diff --git a/apps/desktop/src/app/session/hooks/use-session-actions.ts b/apps/desktop/src/app/session/hooks/use-session-actions.ts index 51ee90924ae..9980c90809d 100644 --- a/apps/desktop/src/app/session/hooks/use-session-actions.ts +++ b/apps/desktop/src/app/session/hooks/use-session-actions.ts @@ -8,7 +8,6 @@ import { type ChatMessage, chatMessageText, preserveLocalAssistantErrors, toChat import { normalizePersonalityValue } from '@/lib/chat-runtime' import { embeddedImageUrls, textWithoutEmbeddedImages } from '@/lib/embedded-images' import { setSessionYolo } from '@/lib/yolo-session' -import { clearComposerAttachments, clearComposerDraft } from '@/store/composer' import { clearQueuedPrompts } from '@/store/composer-queue' import { $pinnedSessionIds } from '@/store/layout' import { clearNotifications, notify, notifyError } from '@/store/notifications' @@ -19,7 +18,6 @@ import { $messages, $sessions, $yoloActive, - workspaceCwdForNewSession, sessionPinId, setActiveSessionId, setAwaitingResponse, @@ -41,10 +39,11 @@ import { setSessionStartedAt, setSessionsTotal, setTurnStartedAt, - setYoloActive + setYoloActive, + workspaceCwdForNewSession } from '@/store/session' import { reportBackendContract } from '@/store/updates' -import type { SessionCreateResponse, SessionInfo, SessionResumeResponse, UsageStats } from '@/types/hermes' +import type { SessionCreateResponse, SessionInfo, SessionResumeResponse, SessionRuntimeInfo, UsageStats } from '@/types/hermes' import { NEW_CHAT_ROUTE, sessionRoute, SETTINGS_ROUTE } from '../../routes' import type { ClientSessionState, SidebarNavItem } from '../../types' @@ -210,16 +209,27 @@ function patchSessionWorkspace(sessionId: string, cwd: string | undefined) { setSessions(prev => prev.map(session => (session.id === sessionId ? { ...session, cwd } : session))) } -function applyRuntimeInfo(info: SessionCreateResponse['info'] | undefined): Partial< - Pick -> | null { +type SessionRuntimeStatePatch = Partial< + Pick< + ClientSessionState, + | 'branch' + | 'cwd' + | 'fast' + | 'model' + | 'personality' + | 'provider' + | 'reasoningEffort' + | 'serviceTier' + | 'yolo' + > +> + +function applyRuntimeInfo(info: SessionRuntimeInfo | undefined): SessionRuntimeStatePatch | null { if (!info) { return null } - const sessionState: Partial< - Pick - > = {} + const sessionState: SessionRuntimeStatePatch = {} reportBackendContract(info.desktop_contract) @@ -227,12 +237,12 @@ function applyRuntimeInfo(info: SessionCreateResponse['info'] | undefined): Part requestDesktopOnboarding(info.credential_warning) } - if (info.model) { + if (typeof info.model === 'string') { setCurrentModel(info.model) sessionState.model = info.model } - if (info.provider) { + if (typeof info.provider === 'string') { setCurrentProvider(info.provider) sessionState.provider = info.provider } @@ -248,7 +258,9 @@ function applyRuntimeInfo(info: SessionCreateResponse['info'] | undefined): Part } if (typeof info.personality === 'string') { - setCurrentPersonality(normalizePersonalityValue(info.personality)) + const personality = normalizePersonalityValue(info.personality) + setCurrentPersonality(personality) + sessionState.personality = personality } if (typeof info.reasoning_effort === 'string') { @@ -278,6 +290,16 @@ function applyRuntimeInfo(info: SessionCreateResponse['info'] | undefined): Part return sessionState } +function applyStoredSessionPreviewRuntimeInfo(stored: { model?: null | string } | undefined) { + setCurrentModel(stored?.model || '') + setCurrentProvider('') + setCurrentReasoningEffort('') + setCurrentServiceTier('') + setCurrentFastMode(false) + setYoloActive(false) + setCurrentPersonality('') +} + export function useSessionActions({ activeSessionId, activeSessionIdRef, @@ -329,8 +351,7 @@ export function useSessionActions({ setYoloActive(false) setCurrentCwd(workspaceCwdForNewSession()) setCurrentBranch('') - clearComposerDraft() - clearComposerAttachments() + // Never clear the composer here — ChatBar's per-thread draft swap owns it. setFreshDraftReady(true) }, [activeSessionIdRef, busyRef, navigate, selectedStoredSessionIdRef] @@ -352,11 +373,13 @@ export function useSessionActions({ // Pass the owning profile so a new chat under a non-launch profile (global // remote mode) builds its agent + persists against THAT profile's home/db. const newChatProfile = $newChatProfile.get() + const created = await requestGateway('session.create', { cols: 96, ...(cwd && { cwd }), ...(newChatProfile ? { profile: newChatProfile } : {}) }) + const stored = created.stored_session_id ?? null if ( @@ -465,18 +488,29 @@ export function useSessionActions({ const cachedState = cachedRuntimeId && sessionStateByRuntimeIdRef.current.get(cachedRuntimeId) if (cachedRuntimeId && cachedState) { + const stored = $sessions.get().find(session => session.id === storedSessionId) + const cachedViewState = + !cachedState.model && stored?.model != null + ? { + ...cachedState, + model: stored.model || '' + } + : cachedState + + if (cachedViewState !== cachedState) { + sessionStateByRuntimeIdRef.current.set(cachedRuntimeId, cachedViewState) + } + setFreshDraftReady(false) clearNotifications() setSelectedStoredSessionId(storedSessionId) selectedStoredSessionIdRef.current = storedSessionId setActiveSessionId(cachedRuntimeId) activeSessionIdRef.current = cachedRuntimeId - syncSessionStateToView(cachedRuntimeId, cachedState) - setCurrentCwd(cachedState.cwd) - setCurrentBranch(cachedState.branch) + syncSessionStateToView(cachedRuntimeId, cachedViewState) + setCurrentCwd(cachedViewState.cwd) + setCurrentBranch(cachedViewState.branch) setSessionStartedAt(Date.now()) - clearComposerDraft() - clearComposerAttachments() try { const usage = await requestGateway('session.usage', { session_id: cachedRuntimeId }) @@ -516,6 +550,7 @@ export function useSessionActions({ selectedStoredSessionIdRef.current = storedSessionId setSessionStartedAt(Date.now()) const stored = $sessions.get().find(session => session.id === storedSessionId) + applyStoredSessionPreviewRuntimeInfo(stored) if (stored) { setCurrentUsage(current => ({ @@ -606,8 +641,6 @@ export function useSessionActions({ }), storedSessionId ) - clearComposerDraft() - clearComposerAttachments() } catch (err) { if (!isCurrentResume()) { return @@ -730,8 +763,6 @@ export function useSessionActions({ selectedStoredSessionIdRef.current = routedSessionId navigate(sessionRoute(routedSessionId)) - clearComposerDraft() - clearComposerAttachments() const runtimeInfo = applyRuntimeInfo(branched.info) patchSessionWorkspace(routedSessionId, runtimeInfo?.cwd) @@ -872,6 +903,12 @@ export function useSessionActions({ try { await setSessionArchived(storedSessionId, true, archived?.profile) + // A sidebar refresh can race the optimistic removal while the PATCH is + // in flight and briefly reinsert the still-unarchived backend row. Win + // that race after the mutation succeeds so right-click → Archive does + // not appear to do nothing until the next full refresh. + setSessions(prev => prev.filter(s => s.id !== storedSessionId)) + $pinnedSessionIds.set($pinnedSessionIds.get().filter(id => id !== storedSessionId && id !== archivedPinId)) notify({ durationMs: 2_000, kind: 'success', message: copy.archived }) } catch (err) { if (archived) { diff --git a/apps/desktop/src/app/session/hooks/use-session-state-cache.test.tsx b/apps/desktop/src/app/session/hooks/use-session-state-cache.test.tsx index e865205d828..e2a97358273 100644 --- a/apps/desktop/src/app/session/hooks/use-session-state-cache.test.tsx +++ b/apps/desktop/src/app/session/hooks/use-session-state-cache.test.tsx @@ -2,7 +2,20 @@ import { act, cleanup, render } from '@testing-library/react' import type { MutableRefObject } from 'react' import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' -import { $turnStartedAt, setTurnStartedAt } from '@/store/session' +import { + $currentFastMode, + $currentModel, + $currentProvider, + $currentReasoningEffort, + $currentServiceTier, + $turnStartedAt, + setCurrentFastMode, + setCurrentModel, + setCurrentProvider, + setCurrentReasoningEffort, + setCurrentServiceTier, + setTurnStartedAt +} from '@/store/session' import { useSessionStateCache } from './use-session-state-cache' @@ -46,12 +59,22 @@ describe('useSessionStateCache — per-session turn timer', () => { return null as unknown as number }) setTurnStartedAt(null) + setCurrentModel('') + setCurrentProvider('') + setCurrentReasoningEffort('') + setCurrentServiceTier('') + setCurrentFastMode(false) }) afterEach(() => { cleanup() vi.restoreAllMocks() setTurnStartedAt(null) + setCurrentModel('') + setCurrentProvider('') + setCurrentReasoningEffort('') + setCurrentServiceTier('') + setCurrentFastMode(false) }) it("keeps a background session's running turn clock and never mirrors it to the view", () => { @@ -115,4 +138,78 @@ describe('useSessionStateCache — per-session turn timer', () => { }) expect($turnStartedAt.get()).toBeNull() }) + + it('mirrors the focused session model metadata when switching from a cached session', () => { + let cache!: Cache + const { rerender } = render( + (cache = c)} selectedStoredSessionId="fg-stored" /> + ) + + act(() => { + cache.updateSessionState( + 'bg-runtime', + state => ({ + ...state, + fast: true, + model: 'anthropic/claude-opus-4.8', + provider: 'anthropic', + reasoningEffort: 'high', + serviceTier: 'priority' + }), + 'bg-stored' + ) + }) + + // Background metadata is cached but must not bleed into the visible statusbar. + expect($currentModel.get()).toBe('') + expect($currentReasoningEffort.get()).toBe('') + expect($currentFastMode.get()).toBe(false) + + rerender( (cache = c)} selectedStoredSessionId="bg-stored" />) + + const bgState = cache.sessionStateByRuntimeIdRef.current.get('bg-runtime') + expect(bgState).toBeTruthy() + + act(() => { + cache.syncSessionStateToView('bg-runtime', bgState!) + }) + + expect($currentModel.get()).toBe('anthropic/claude-opus-4.8') + expect($currentProvider.get()).toBe('anthropic') + expect($currentReasoningEffort.get()).toBe('high') + expect($currentServiceTier.get()).toBe('priority') + expect($currentFastMode.get()).toBe(true) + }) + + it('clears stale model metadata when the newly focused session has no cached value', () => { + setCurrentModel('previous-model') + setCurrentProvider('previous-provider') + setCurrentReasoningEffort('high') + setCurrentServiceTier('priority') + setCurrentFastMode(true) + + let cache!: Cache + const { rerender } = render( + (cache = c)} selectedStoredSessionId="fg-stored" /> + ) + + act(() => { + cache.updateSessionState('bg-runtime', state => ({ ...state }), 'bg-stored') + }) + + rerender( (cache = c)} selectedStoredSessionId="bg-stored" />) + + const bgState = cache.sessionStateByRuntimeIdRef.current.get('bg-runtime') + expect(bgState).toBeTruthy() + + act(() => { + cache.syncSessionStateToView('bg-runtime', bgState!) + }) + + expect($currentModel.get()).toBe('') + expect($currentProvider.get()).toBe('') + expect($currentReasoningEffort.get()).toBe('') + expect($currentServiceTier.get()).toBe('') + expect($currentFastMode.get()).toBe(false) + }) }) diff --git a/apps/desktop/src/app/session/hooks/use-session-state-cache.ts b/apps/desktop/src/app/session/hooks/use-session-state-cache.ts index 72930561bae..a08eb1f16c9 100644 --- a/apps/desktop/src/app/session/hooks/use-session-state-cache.ts +++ b/apps/desktop/src/app/session/hooks/use-session-state-cache.ts @@ -11,6 +11,7 @@ import { noteSessionActivity, setCurrentFastMode, setCurrentModel, + setCurrentPersonality, setCurrentProvider, setCurrentReasoningEffort, setCurrentServiceTier, @@ -53,6 +54,16 @@ interface SessionStateCacheOptions { setMessages: (messages: ChatMessage[]) => void } +function syncRuntimeMetadataToView(state: ClientSessionState) { + setCurrentModel(state.model ?? '') + setCurrentProvider(state.provider ?? '') + setCurrentReasoningEffort(state.reasoningEffort ?? '') + setCurrentServiceTier(state.serviceTier ?? '') + setCurrentFastMode(state.fast ?? false) + setYoloActive(state.yolo ?? false) + setCurrentPersonality(state.personality ?? '') +} + export function useSessionStateCache({ activeSessionId, busyRef, @@ -137,12 +148,7 @@ export function useSessionStateCache({ setMessages(nextMessages) } - setCurrentModel(pending.state.model) - setCurrentProvider(pending.state.provider) - setCurrentReasoningEffort(pending.state.reasoningEffort) - setCurrentServiceTier(pending.state.serviceTier) - setCurrentFastMode(pending.state.fast) - setYoloActive(pending.state.yolo) + syncRuntimeMetadataToView(pending.state) setBusy(pending.state.busy) setMutableRef(busyRef, pending.state.busy) setAwaitingResponse(pending.state.awaitingResponse) @@ -167,6 +173,7 @@ export function useSessionStateCache({ return } + syncRuntimeMetadataToView(state) pendingViewStateRef.current = { sessionId, state } // Terminal / attention transitions (turn finished, error, or the agent is diff --git a/apps/desktop/src/app/types.ts b/apps/desktop/src/app/types.ts index 672beb9a089..5082b70406d 100644 --- a/apps/desktop/src/app/types.ts +++ b/apps/desktop/src/app/types.ts @@ -61,6 +61,26 @@ export interface SessionTitleResponse { session_key?: string } +export interface HandoffRequestResponse { + queued?: boolean + session_key?: string + platform?: string + // Human-readable home channel name for the destination platform. + home_name?: string +} + +export interface HandoffStateResponse { + // '' | 'pending' | 'running' | 'completed' | 'failed' + state?: string + platform?: string + error?: string +} + +export interface HandoffFailResponse { + failed?: boolean + state?: string +} + export interface ExecCommandDispatchResponse { type: 'exec' | 'plugin' output?: string @@ -109,6 +129,7 @@ export interface ClientSessionState { serviceTier: string fast: boolean yolo: boolean + personality: string busy: boolean awaitingResponse: boolean streamId: string | null diff --git a/apps/desktop/src/components/assistant-ui/directive-text.tsx b/apps/desktop/src/components/assistant-ui/directive-text.tsx index 79f772d450f..b870913b012 100644 --- a/apps/desktop/src/components/assistant-ui/directive-text.tsx +++ b/apps/desktop/src/components/assistant-ui/directive-text.tsx @@ -63,7 +63,7 @@ export function directiveIconSvg(type: string) { return `${inner}` } -export function directiveIconElement(type: string) { +function iconElementFromPaths(paths: string[]) { const svg = document.createElementNS('http://www.w3.org/2000/svg', 'svg') svg.setAttribute('class', 'size-3 shrink-0 opacity-80') svg.setAttribute('fill', 'none') @@ -74,7 +74,7 @@ export function directiveIconElement(type: string) { svg.setAttribute('viewBox', '0 0 24 24') svg.setAttribute('xmlns', 'http://www.w3.org/2000/svg') - for (const d of iconPathsFor(type)) { + for (const d of paths) { const path = document.createElementNS('http://www.w3.org/2000/svg', 'path') path.setAttribute('d', d) svg.append(path) @@ -83,6 +83,46 @@ export function directiveIconElement(type: string) { return svg } +export function directiveIconElement(type: string) { + return iconElementFromPaths(iconPathsFor(type)) +} + +/** Per-type slash-command pill styling. The composer inserts these chips when a + * command is picked; the kind drives a theme-aware accent so commands, skills, + * and themes read distinctly (Cursor-style). */ +export type SlashChipKind = 'command' | 'skill' | 'theme' + +const SLASH_ICON_PATHS: Record = { + command: ['M5 7l5 5l-5 5', 'M12 19l7 0'], + skill: ['M13 3l0 7l6 0l-8 11l0 -7l-6 0l8 -11'], + theme: [ + 'M3 21v-4a4 4 0 1 1 4 4h-4', + 'M21 3a16 16 0 0 0 -12.8 10.2', + 'M21 3a16 16 0 0 1 -10.2 12.8', + 'M10.6 9a9 9 0 0 1 4.4 4.4' + ] +} + +const SLASH_CHIP_VARIANT: Record = { + command: + 'bg-[color-mix(in_srgb,var(--ui-accent)_14%,transparent)] text-[color-mix(in_srgb,var(--ui-accent)_82%,var(--foreground))]', + skill: + 'bg-[color-mix(in_srgb,var(--ui-warm)_18%,transparent)] text-[color-mix(in_srgb,var(--ui-warm)_82%,var(--foreground))]', + theme: + 'bg-[color-mix(in_srgb,var(--ui-accent-secondary)_16%,transparent)] text-[color-mix(in_srgb,var(--ui-accent-secondary)_82%,var(--foreground))]' +} + +export const SLASH_CHIP_BASE_CLASS = + 'mx-0.5 inline-flex max-w-64 items-center gap-1 rounded px-1.5 py-0.5 align-middle text-[0.86em] font-medium leading-none' + +export function slashChipClass(kind: SlashChipKind): string { + return `${SLASH_CHIP_BASE_CLASS} ${SLASH_CHIP_VARIANT[kind]}` +} + +export function slashIconElement(kind: SlashChipKind) { + return iconElementFromPaths(SLASH_ICON_PATHS[kind]) +} + const DirectiveIcon: FC<{ type: string }> = ({ type }) => ( { const slashStatus = text.match(SLASH_STATUS_RE) if (slashStatus?.groups) { + const output = slashStatus.groups.output.trim() + // Single-line status (e.g. "model → x") reads best centered inline; padded + // multiline output (catalogs, usage tables) needs left-aligned, wider room + // or the column alignment breaks. + const multiline = output.includes('\n') + return ( {slashStatus.groups.command} - · - + {multiline ? ( + + ) : ( + <> + · + + + )} ) } + const multiline = text.includes('\n') + return ( diff --git a/apps/desktop/src/components/session-picker.tsx b/apps/desktop/src/components/session-picker.tsx new file mode 100644 index 00000000000..048fa32a208 --- /dev/null +++ b/apps/desktop/src/components/session-picker.tsx @@ -0,0 +1,108 @@ +import { useQuery } from '@tanstack/react-query' +import { Dialog as DialogPrimitive } from 'radix-ui' +import { useEffect, useMemo, useState } from 'react' + +import { Command, CommandEmpty, CommandGroup, CommandInput, CommandItem, CommandList } from '@/components/ui/command' +import { listSessions } from '@/hermes' +import { useI18n } from '@/i18n' +import { sessionTitle } from '@/lib/chat-runtime' +import { Check, MessageCircle } from '@/lib/icons' +import { cn } from '@/lib/utils' + +interface SessionPickerDialogProps { + /** Stored id of the session currently open, so it can be flagged in the list. */ + activeStoredSessionId?: string | null + onOpenChange: (open: boolean) => void + onResume: (storedSessionId: string) => void + open: boolean +} + +/** + * Desktop equivalent of the TUI's sessions overlay (`/resume`, `/sessions`, + * `/switch`): a focused, type-to-filter list of recent sessions that resumes + * the picked one. Mirrors the command palette's cmdk surface but scoped to + * sessions only, so `/resume` feels first-class instead of falling through to + * the headless slash worker (which can't render the picker). + */ +export function SessionPickerDialog({ + activeStoredSessionId, + onOpenChange, + onResume, + open +}: SessionPickerDialogProps) { + const { t } = useI18n() + const [search, setSearch] = useState('') + + const sessionsQuery = useQuery({ + enabled: open, + queryFn: () => listSessions(200, 1, 'exclude'), + queryKey: ['session-picker', 'sessions'] + }) + + useEffect(() => { + if (!open) { + setSearch('') + } + }, [open]) + + const sessions = useMemo(() => sessionsQuery.data?.sessions ?? [], [sessionsQuery.data]) + + return ( + + + + + {t.commandCenter.sections.sessions} + + + + {t.commandCenter.noResults} + + {sessions.map(session => { + const title = sessionTitle(session) + const preview = session.preview?.trim() + + return ( + { + onResume(session.id) + onOpenChange(false) + }} + value={`${title} ${preview ?? ''} ${session.id}`} + > + + + {title} + {preview ? ( + {preview} + ) : null} + + + + ) + })} + + + + + + + ) +} diff --git a/apps/desktop/src/i18n/en.ts b/apps/desktop/src/i18n/en.ts index 5aaf090d7e6..5a18d47efa9 100644 --- a/apps/desktop/src/i18n/en.ts +++ b/apps/desktop/src/i18n/en.ts @@ -1778,7 +1778,14 @@ export const en: Translations = { clipboard: 'Clipboard', noClipboardImage: 'No image found in clipboard', clipboardPasteFailed: 'Clipboard paste failed', - dropFiles: 'Drop files' + dropFiles: 'Drop files', + handoff: { + pickPlatform: 'Choose a destination', + success: platform => `Handed off to ${platform}. Resume here anytime.`, + systemNote: platform => `↻ Handed off to ${platform} — resume here anytime.`, + failed: error => `Handoff failed: ${error}`, + timedOut: 'Timed out waiting for the gateway. Is `hermes gateway` running?' + } }, errors: { diff --git a/apps/desktop/src/i18n/ja.ts b/apps/desktop/src/i18n/ja.ts index 956788067ed..36634a6c025 100644 --- a/apps/desktop/src/i18n/ja.ts +++ b/apps/desktop/src/i18n/ja.ts @@ -1914,7 +1914,14 @@ export const ja = defineLocale({ clipboard: 'クリップボード', noClipboardImage: 'クリップボードに画像が見つかりません', clipboardPasteFailed: 'クリップボードからの貼り付けに失敗しました', - dropFiles: 'ファイルをドロップ' + dropFiles: 'ファイルをドロップ', + handoff: { + pickPlatform: '送信先を選択', + success: platform => `${platform} に引き継ぎました。いつでもここで再開できます。`, + systemNote: platform => `↻ ${platform} に引き継ぎました — いつでもここで再開できます。`, + failed: error => `引き継ぎに失敗しました: ${error}`, + timedOut: 'ゲートウェイの待機がタイムアウトしました。`hermes gateway` は起動していますか?' + } }, errors: { diff --git a/apps/desktop/src/i18n/types.ts b/apps/desktop/src/i18n/types.ts index 77424e426ac..7a10e5f3d1c 100644 --- a/apps/desktop/src/i18n/types.ts +++ b/apps/desktop/src/i18n/types.ts @@ -1437,6 +1437,13 @@ export interface Translations { noClipboardImage: string clipboardPasteFailed: string dropFiles: string + handoff: { + pickPlatform: string + success: (platform: string) => string + systemNote: (platform: string) => string + failed: (error: string) => string + timedOut: string + } } errors: { diff --git a/apps/desktop/src/i18n/zh-hant.ts b/apps/desktop/src/i18n/zh-hant.ts index 9f045c4d022..830dc475134 100644 --- a/apps/desktop/src/i18n/zh-hant.ts +++ b/apps/desktop/src/i18n/zh-hant.ts @@ -1873,7 +1873,14 @@ export const zhHant = defineLocale({ clipboard: '剪貼簿', noClipboardImage: '剪貼簿中沒有圖片', clipboardPasteFailed: '剪貼簿貼上失敗', - dropFiles: '拖曳檔案' + dropFiles: '拖曳檔案', + handoff: { + pickPlatform: '選擇目標平台', + success: platform => `已移交到 ${platform}。隨時可在此處恢復。`, + systemNote: platform => `↻ 已移交到 ${platform} — 隨時可在此處恢復。`, + failed: error => `移交失敗:${error}`, + timedOut: '等待閘道逾時。`hermes gateway` 是否正在執行?' + } }, errors: { diff --git a/apps/desktop/src/i18n/zh.ts b/apps/desktop/src/i18n/zh.ts index f6b119a2777..dbad00cf5d1 100644 --- a/apps/desktop/src/i18n/zh.ts +++ b/apps/desktop/src/i18n/zh.ts @@ -1956,7 +1956,14 @@ export const zh: Translations = { clipboard: '剪贴板', noClipboardImage: '剪贴板中没有图片', clipboardPasteFailed: '粘贴剪贴板失败', - dropFiles: '拖放文件' + dropFiles: '拖放文件', + handoff: { + pickPlatform: '选择目标平台', + success: platform => `已移交到 ${platform}。随时可在此处恢复。`, + systemNote: platform => `↻ 已移交到 ${platform} — 随时可在此处恢复。`, + failed: error => `移交失败:${error}`, + timedOut: '等待网关超时。`hermes gateway` 是否正在运行?' + } }, errors: { diff --git a/apps/desktop/src/lib/ansi.ts b/apps/desktop/src/lib/ansi.ts index f30987ec605..c7770e8b777 100644 --- a/apps/desktop/src/lib/ansi.ts +++ b/apps/desktop/src/lib/ansi.ts @@ -173,3 +173,14 @@ export function hasAnsiCodes(input: string): boolean { // eslint-disable-next-line no-control-regex return /\x1b\[/.test(input) } + +/** Remove all ANSI escape sequences, returning plain text. Use when output is + * rendered as text (e.g. chat system messages) rather than styled segments — + * otherwise the ESC byte is invisible and the `[1;31m…` payload leaks through. */ +export function stripAnsi(input: string): string { + if (!input) { + return input + } + + return input.replace(OTHER_ESCAPE_RE, '').replace(CSI_RE, '') +} diff --git a/apps/desktop/src/lib/chat-runtime.ts b/apps/desktop/src/lib/chat-runtime.ts index 68beb83a043..ac5273a2236 100644 --- a/apps/desktop/src/lib/chat-runtime.ts +++ b/apps/desktop/src/lib/chat-runtime.ts @@ -46,6 +46,7 @@ export function createClientSessionState( serviceTier: '', fast: false, yolo: false, + personality: '', busy: false, awaitingResponse: false, streamId: null, diff --git a/apps/desktop/src/lib/desktop-slash-commands.test.ts b/apps/desktop/src/lib/desktop-slash-commands.test.ts index de0e72ec28b..d37738173ce 100644 --- a/apps/desktop/src/lib/desktop-slash-commands.test.ts +++ b/apps/desktop/src/lib/desktop-slash-commands.test.ts @@ -7,7 +7,9 @@ import { filterDesktopCommandsCatalog, isDesktopSlashCommand, isDesktopSlashSuggestion, - isModelPickerCommand + isModelPickerCommand, + isPickerCommand, + resolveDesktopCommand } from './desktop-slash-commands' describe('desktop slash command curation', () => { @@ -38,6 +40,18 @@ describe('desktop slash command curation', () => { expect(isDesktopSlashSuggestion('/curator')).toBe(false) }) + it('surfaces /tools, /save, and /personality on the desktop', () => { + expect(isDesktopSlashSuggestion('/tools')).toBe(true) + expect(isDesktopSlashSuggestion('/save')).toBe(true) + expect(isDesktopSlashSuggestion('/personality')).toBe(true) + expect(isDesktopSlashCommand('/tools')).toBe(true) + expect(isDesktopSlashCommand('/save')).toBe(true) + expect(isDesktopSlashCommand('/personality')).toBe(true) + expect(desktopSlashUnavailableMessage('/tools')).toBeNull() + expect(desktopSlashUnavailableMessage('/save')).toBeNull() + expect(desktopSlashUnavailableMessage('/personality')).toBeNull() + }) + it('allows aliases to execute without cluttering the popover', () => { expect(isDesktopSlashSuggestion('/reset')).toBe(false) expect(isDesktopSlashCommand('/reset')).toBe(true) @@ -74,6 +88,24 @@ describe('desktop slash command curation', () => { ['/new', 'Start a new desktop chat'], ['/ship-it', 'Run release checklist'] ]) + // skill_count is recomputed from the filtered output (only /ship-it is an + // extension command — /new is a built-in) so the /help footer matches what + // the user actually sees rather than echoing the unfiltered backend total. + expect(filtered.skill_count).toBe(1) + }) + + it('recomputes skill_count to reflect only extensions surfaced on desktop', () => { + const filtered = filterDesktopCommandsCatalog({ + pairs: [ + ['/new', 'Start a new session'], + ['/clear', 'Clear terminal screen'], + ['/gif-search', 'Search for a gif'], + ['/ship-it', 'Run release checklist'] + ], + skill_count: 12 + }) + + expect(filtered.pairs?.map(([cmd]) => cmd)).toEqual(['/new', '/gif-search', '/ship-it']) expect(filtered.skill_count).toBe(2) }) @@ -123,4 +155,26 @@ describe('desktop slash command curation', () => { expect(isModelPickerCommand('/new')).toBe(false) expect(isModelPickerCommand('/skills')).toBe(false) }) + + it('gives /resume (and its aliases) a first-class session picker surface', () => { + expect(isPickerCommand('/resume', 'session')).toBe(true) + expect(isPickerCommand('/sessions', 'session')).toBe(true) + expect(isPickerCommand('/switch', 'session')).toBe(true) + // Unlike /model, /resume shows in the popover; its aliases stay hidden. + expect(isDesktopSlashSuggestion('/resume')).toBe(true) + expect(isDesktopSlashSuggestion('/sessions')).toBe(false) + expect(isDesktopSlashCommand('/switch')).toBe(true) + // The session picker is distinct from the model picker. + expect(isModelPickerCommand('/resume')).toBe(false) + }) + + it('resolves commands and aliases to their declared surface', () => { + expect(resolveDesktopCommand('/new')?.surface).toEqual({ kind: 'action', action: 'new' }) + expect(resolveDesktopCommand('/reset')?.surface).toEqual({ kind: 'action', action: 'new' }) + expect(resolveDesktopCommand('/resume')?.surface).toEqual({ kind: 'picker', picker: 'session' }) + expect(resolveDesktopCommand('/usage')?.surface).toEqual({ kind: 'exec' }) + expect(resolveDesktopCommand('/clear')?.surface).toEqual({ kind: 'unavailable', reason: 'terminal' }) + // Skill / quick commands aren't in the registry. + expect(resolveDesktopCommand('/gif-search')).toBeNull() + }) }) diff --git a/apps/desktop/src/lib/desktop-slash-commands.ts b/apps/desktop/src/lib/desktop-slash-commands.ts index e373ac94317..d898a6c83f1 100644 --- a/apps/desktop/src/lib/desktop-slash-commands.ts +++ b/apps/desktop/src/lib/desktop-slash-commands.ts @@ -22,110 +22,161 @@ export interface DesktopThemeCommandOption { name: string } -const DESKTOP_COMMAND_META = [ - ['/agents', 'Show active desktop sessions and running tasks'], - ['/background', 'Run a prompt in the background'], - ['/branch', 'Branch the latest message into a new chat'], - ['/compress', 'Compress this conversation context'], - ['/debug', 'Create a debug report'], - ['/goal', 'Manage the standing goal for this session'], - ['/help', 'Show desktop slash commands'], - ['/new', 'Start a new desktop chat'], - ['/profile', 'Switch the active Hermes profile'], - ['/queue', 'Queue a prompt for the next turn'], - ['/resume', 'Resume a saved session'], - ['/retry', 'Retry the last user message'], - ['/rollback', 'List or restore filesystem checkpoints'], - ['/skin', 'Switch desktop theme or cycle to the next one'], - ['/status', 'Show current session status'], - ['/steer', 'Steer the current run after the next tool call'], - ['/stop', 'Stop running background processes'], - ['/title', 'Rename the current session'], - ['/undo', 'Remove the last user/assistant exchange'], - ['/usage', 'Show token usage for this session'], - ['/version', 'Show Hermes Agent version'], - ['/yolo', 'Toggle YOLO — auto-approve dangerous commands'] -] as const +/** + * Local client action a command resolves to. Each id maps to exactly one + * handler in the dispatcher (`use-prompt-actions`), so adding a command never + * means adding a branch to a switch ladder — you add a row here + a handler + * keyed by the id. + */ +export type DesktopActionId = + | 'branch' + | 'handoff' + | 'help' + | 'new' + | 'profile' + | 'skin' + | 'title' + | 'yolo' -const DESKTOP_COMMANDS: ReadonlySet = new Set(DESKTOP_COMMAND_META.map(([command]) => command)) +/** A command fulfilled by opening a desktop overlay picker. */ +export type DesktopPickerId = 'model' | 'session' -const DESKTOP_ALIASES = new Map([ - ['/bg', '/background'], - ['/btw', '/background'], - ['/fork', '/branch'], - ['/q', '/queue'], - ['/reload_mcp', '/reload-mcp'], - ['/reload_skills', '/reload-skills'], - ['/reset', '/new'], - ['/tasks', '/agents'] -]) +/** Why a known Hermes command has no desktop UI surface. */ +export type DesktopUnavailableReason = 'advanced' | 'messaging' | 'settings' | 'terminal' -const DESKTOP_COMMAND_DESCRIPTIONS: ReadonlyMap = new Map(DESKTOP_COMMAND_META) +/** + * How the desktop fulfils a command. This is the single discriminator the + * dispatcher, popover, pills, and pickers all read — no parallel block-lists. + * + * - `action` → handled by a local client handler (new chat, branch, …) + * - `picker` → opens an overlay (`/model`, `/resume`); a typed arg is + * resolved by that picker instead of falling through + * - `exec` → runs on the backend via slash.exec / command.dispatch and + * renders its text output inline + * - `unavailable`→ a known command with genuinely no desktop UI (terminal-only, + * messaging-only, …); shows a reason instead of executing + */ +export type DesktopCommandSurface = + | { kind: 'action'; action: DesktopActionId } + | { kind: 'picker'; picker: DesktopPickerId } + | { kind: 'exec' } + | { kind: 'unavailable'; reason: DesktopUnavailableReason } -const PICKER_OWNED_COMMANDS = new Set(['/model']) +export interface DesktopCommandSpec { + /** Canonical command, leading slash included (e.g. `/resume`). */ + name: string + /** Popover/help label; omitted for unavailable commands (never surfaced). */ + description?: string + aliases?: string[] + surface: DesktopCommandSurface + /** + * Hide from the slash popover / completions while still letting it execute. + * Used for picker commands reachable from chrome (the model picker lives on + * the status bar), so the popover doesn't dead-end on inline completion. + */ + hidden?: boolean + /** + * The command has an inline options "screen" (theme / personality / session / + * platform / toolset list). Picking the bare command in the popover expands to + * that argument step instead of committing — mirroring typing `/ ` by hand. + */ + args?: boolean +} -const TERMINAL_ONLY_COMMANDS = new Set([ - '/browser', - '/busy', - '/clear', - '/commands', - '/compact', - '/config', - '/copy', - '/cron', - '/details', - '/exit', - '/footer', - '/gateway', - '/gquota', - '/history', - '/image', - '/indicator', - '/logs', - '/mouse', - '/paste', - '/platforms', - '/plugins', - '/quit', - '/redraw', - '/reload', - '/restart', - '/save', - '/sb', - '/set-home', - '/sethome', - '/snap', - '/snapshot', - '/statusbar', - '/toolsets', - '/tools', - '/update', - '/verbose' -]) +const exec = (): DesktopCommandSurface => ({ kind: 'exec' }) +const action = (id: DesktopActionId): DesktopCommandSurface => ({ kind: 'action', action: id }) +const picker = (id: DesktopPickerId): DesktopCommandSurface => ({ kind: 'picker', picker: id }) +const unavailable = (reason: DesktopUnavailableReason): DesktopCommandSurface => ({ kind: 'unavailable', reason }) -const MESSAGING_ONLY_COMMANDS = new Set(['/approve', '/deny']) +/** + * THE source of truth for desktop slash commands. Everything below — execution + * gating, popover suggestions, catalog filtering, pill grouping, and the + * dispatcher's behavior — derives from this one table. + */ +const DESKTOP_COMMAND_SPECS: readonly DesktopCommandSpec[] = [ + // Local client actions + { name: '/new', description: 'Start a new desktop chat', aliases: ['/reset'], surface: action('new') }, + { name: '/branch', description: 'Branch the latest message into a new chat', aliases: ['/fork'], surface: action('branch') }, + { name: '/yolo', description: 'Toggle YOLO — auto-approve dangerous commands', surface: action('yolo') }, + { name: '/handoff', description: 'Hand off this session to a messaging platform', surface: action('handoff'), args: true }, + { name: '/profile', description: 'Switch the active Hermes profile', surface: action('profile') }, + { name: '/skin', description: 'Switch desktop theme or cycle to the next one', surface: action('skin'), args: true }, + { name: '/title', description: 'Rename the current session', surface: action('title') }, + { name: '/help', description: 'Show desktop slash commands', aliases: ['/commands'], surface: action('help') }, -const SETTINGS_OWNED_COMMANDS = new Set(['/skills']) + // Overlay pickers + { name: '/model', description: 'Switch the model for this session', surface: picker('model'), hidden: true }, + { + name: '/resume', + description: 'Resume a saved session', + aliases: ['/sessions', '/switch'], + surface: picker('session'), + args: true + }, -const ADVANCED_COMMANDS = new Set([ - '/curator', - '/fast', - '/insights', - '/kanban', - '/personality', - '/reasoning', - '/reload-mcp', - '/reload-skills', - '/voice' -]) + // Backend-executed commands that render useful inline output + { name: '/agents', description: 'Show active desktop sessions and running tasks', aliases: ['/tasks'], surface: exec() }, + { name: '/background', description: 'Run a prompt in the background', aliases: ['/bg', '/btw'], surface: exec() }, + { name: '/compress', description: 'Compress this conversation context', surface: exec() }, + { name: '/debug', description: 'Create a debug report', surface: exec() }, + { name: '/goal', description: 'Manage the standing goal for this session', surface: exec() }, + { name: '/personality', description: 'Switch personality for this session', surface: exec(), args: true }, + { name: '/queue', description: 'Queue a prompt for the next turn', aliases: ['/q'], surface: exec() }, + { name: '/retry', description: 'Retry the last user message', surface: exec() }, + { name: '/rollback', description: 'List or restore filesystem checkpoints', surface: exec() }, + { name: '/save', description: 'Save the current transcript to JSON', surface: exec() }, + { name: '/status', description: 'Show current session status', surface: exec() }, + { name: '/steer', description: 'Steer the current run after the next tool call', surface: exec() }, + { name: '/stop', description: 'Stop running background processes', surface: exec() }, + { name: '/tools', description: 'List or toggle tools available to the agent', surface: exec(), args: true }, + { name: '/undo', description: 'Remove the last user/assistant exchange', surface: exec() }, + { name: '/usage', description: 'Show token usage for this session', surface: exec() }, + { name: '/version', description: 'Show Hermes Agent version', surface: exec() }, -const BLOCKED_COMMANDS = new Set([ - ...PICKER_OWNED_COMMANDS, - ...TERMINAL_ONLY_COMMANDS, - ...MESSAGING_ONLY_COMMANDS, - ...SETTINGS_OWNED_COMMANDS, - ...ADVANCED_COMMANDS -]) + // No desktop surface, but carry an alias (underscore spelling variants). + { name: '/reload-mcp', aliases: ['/reload_mcp'], surface: unavailable('advanced') }, + { name: '/reload-skills', aliases: ['/reload_skills'], surface: unavailable('advanced') } +] + +// Known commands with no desktop surface (and no alias) — a flat name list +// per reason beats 40 identical object literals. +const NO_DESKTOP_SURFACE: Record = { + terminal: [ + '/browser', '/busy', '/clear', '/compact', '/config', '/copy', '/cron', '/details', + '/exit', '/footer', '/gateway', '/gquota', '/history', '/image', '/indicator', '/logs', + '/mouse', '/paste', '/platforms', '/plugins', '/quit', '/redraw', '/reload', '/restart', + '/sb', '/set-home', '/sethome', '/snap', '/snapshot', '/statusbar', '/toolsets', '/update', '/verbose' + ], + messaging: ['/approve', '/deny'], + settings: ['/skills'], + advanced: ['/curator', '/fast', '/insights', '/kanban', '/reasoning', '/voice'] +} + +const ALL_SPECS: readonly DesktopCommandSpec[] = [ + ...DESKTOP_COMMAND_SPECS, + ...(Object.entries(NO_DESKTOP_SURFACE) as [DesktopUnavailableReason, readonly string[]][]).flatMap( + ([reason, names]) => names.map(name => ({ name, surface: unavailable(reason) })) + ) +] + +const SPEC_BY_NAME = new Map(ALL_SPECS.map(spec => [spec.name, spec])) + +const ALIAS_TO_CANONICAL = new Map( + ALL_SPECS.flatMap(spec => (spec.aliases ?? []).map(alias => [alias, spec.name] as const)) +) + +const UNAVAILABLE_MESSAGE: Record string> = { + advanced: command => + `${command} is not shown in the desktop slash palette. Use the relevant desktop control or terminal interface instead.`, + messaging: command => `${command} is only used from messaging platforms.`, + settings: command => `${command} is managed from the desktop sidebar.`, + terminal: command => `${command} is only available in the terminal interface.` +} + +const PICKER_UNAVAILABLE_MESSAGE: Record string> = { + model: command => `${command} uses the desktop model picker instead of a slash command.`, + session: command => `${command} uses the desktop session picker instead of a slash command.` +} function normalizeCommand(command: string): string { const trimmed = command.trim() @@ -137,27 +188,25 @@ function normalizeCommand(command: string): string { export function canonicalDesktopSlashCommand(command: string): string { const normalized = normalizeCommand(command) - return DESKTOP_ALIASES.get(normalized) || normalized + return ALIAS_TO_CANONICAL.get(normalized) || normalized } -export function isDesktopSlashCommand(command: string): boolean { +/** Resolve a command (or alias) to its desktop spec, or null for unknown/extension commands. */ +export function resolveDesktopCommand(command: string): DesktopCommandSpec | null { + return SPEC_BY_NAME.get(canonicalDesktopSlashCommand(command)) ?? null +} + +function isKnownHermesSlashCommand(command: string): boolean { const normalized = normalizeCommand(command) - const canonical = canonicalDesktopSlashCommand(normalized) - if (BLOCKED_COMMANDS.has(normalized) || BLOCKED_COMMANDS.has(canonical)) { - return false - } - - return DESKTOP_COMMANDS.has(canonical) || !isKnownHermesSlashCommand(normalized) + return SPEC_BY_NAME.has(normalized) || ALIAS_TO_CANONICAL.has(normalized) } /** * An "extension" command is anything the backend surfaces that is NOT one of * Hermes' built-in slash commands — i.e. skill commands (`/gif-search`, * `/codex`, …) and user-defined quick commands. These are user-activated, so - * they should appear in the desktop slash palette even though they aren't in - * the curated `DESKTOP_COMMANDS` allow-list. This mirrors the predicate in - * `isDesktopSlashCommand` that already lets them EXECUTE when typed. + * they appear in the desktop slash palette and execute when typed. */ export function isDesktopSlashExtensionCommand(command: string): boolean { const normalized = normalizeCommand(command) @@ -169,63 +218,85 @@ export function isDesktopSlashExtensionCommand(command: string): boolean { return !isKnownHermesSlashCommand(normalized) } -export function isDesktopSlashSuggestion(command: string): boolean { - const normalized = normalizeCommand(command) - const canonical = canonicalDesktopSlashCommand(normalized) +/** Gates execution: true unless the command is a known no-desktop-surface command. */ +export function isDesktopSlashCommand(command: string): boolean { + const spec = resolveDesktopCommand(command) - // Surface skill / quick commands (extensions the backend provides) alongside - // the curated built-ins. Built-in aliases stay hidden so the popover isn't - // cluttered with duplicates. - if (isDesktopSlashExtensionCommand(normalized)) { - return true + if (spec) { + return spec.surface.kind !== 'unavailable' } - return DESKTOP_COMMANDS.has(canonical) && !DESKTOP_ALIASES.has(normalized) + return isDesktopSlashExtensionCommand(command) +} + +/** Gates discovery in the popover/completions. */ +export function isDesktopSlashSuggestion(command: string): boolean { + const normalized = normalizeCommand(command) + + // Aliases stay hidden so the popover isn't cluttered with duplicates. + if (ALIAS_TO_CANONICAL.has(normalized)) { + return false + } + + const spec = SPEC_BY_NAME.get(normalized) + + if (spec) { + return spec.surface.kind !== 'unavailable' && !spec.hidden + } + + // Skill / quick commands the backend provides. + return isDesktopSlashExtensionCommand(normalized) } /** - * True for commands the desktop fulfils by opening the model picker overlay - * (e.g. `/model`) rather than executing a slash command. The caller opens the - * picker UI instead of printing the "uses the desktop model picker" notice. + * True for commands the desktop fulfils by opening an overlay picker + * (`/model`, `/resume`/`/sessions`/`/switch`). Optionally pin to one picker. */ -export function isModelPickerCommand(command: string): boolean { - const normalized = normalizeCommand(command) - const canonical = canonicalDesktopSlashCommand(normalized) +export function isPickerCommand(command: string, picker?: DesktopPickerId): boolean { + const surface = resolveDesktopCommand(command)?.surface - return PICKER_OWNED_COMMANDS.has(canonical) + if (surface?.kind !== 'picker') { + return false + } + + return picker ? surface.picker === picker : true +} + +/** Back-compat shim for the model picker check. */ +export function isModelPickerCommand(command: string): boolean { + return isPickerCommand(command, 'model') } export function desktopSlashUnavailableMessage(command: string): string | null { - const normalized = normalizeCommand(command) - const canonical = canonicalDesktopSlashCommand(normalized) + const canonical = canonicalDesktopSlashCommand(command) + const surface = SPEC_BY_NAME.get(canonical)?.surface - if (PICKER_OWNED_COMMANDS.has(canonical)) { - return `/${canonical.slice(1)} uses the desktop model picker instead of a slash command.` + if (!surface) { + return null } - if (SETTINGS_OWNED_COMMANDS.has(canonical)) { - return `/${canonical.slice(1)} is managed from the desktop sidebar.` + if (surface.kind === 'unavailable') { + return UNAVAILABLE_MESSAGE[surface.reason](canonical) } - if (MESSAGING_ONLY_COMMANDS.has(canonical)) { - return `/${canonical.slice(1)} is only used from messaging platforms.` - } - - if (ADVANCED_COMMANDS.has(canonical)) { - return `/${canonical.slice(1)} is not shown in the desktop slash palette. Use the relevant desktop control or terminal interface instead.` - } - - if (TERMINAL_ONLY_COMMANDS.has(normalized) || TERMINAL_ONLY_COMMANDS.has(canonical)) { - return `/${canonical.slice(1)} is only available in the terminal interface.` + if (surface.kind === 'picker') { + return PICKER_UNAVAILABLE_MESSAGE[surface.picker](canonical) } return null } export function desktopSlashDescription(command: string, fallback = ''): string { - const canonical = canonicalDesktopSlashCommand(command) + return SPEC_BY_NAME.get(canonicalDesktopSlashCommand(command))?.description || fallback +} - return DESKTOP_COMMAND_DESCRIPTIONS.get(canonical) || fallback +/** + * True when picking the bare command should expand to its inline argument + * options (theme / personality / session / platform / toolset) rather than + * committing immediately. Lets the popover act as a two-step picker. + */ +export function desktopSlashCommandTakesArgs(command: string): boolean { + return resolveDesktopCommand(command)?.args ?? false } export function desktopSkinSlashCompletions( @@ -274,13 +345,36 @@ export function filterDesktopCommandsCatalog(catalog: CommandsCatalogLike): Comm ?.filter(([command]) => isDesktopSlashSuggestion(command)) .map(([command, description]) => [command, desktopSlashDescription(command, description)] as [string, string]) + // Recount skill commands from the filtered output so /help's footer reflects + // what the user actually sees. Backend's skill_count includes commands the + // desktop hides (terminal-only, picker-owned, advanced), producing a footer + // like "60 skill commands available" while only ~29 appear in the list. + const filteredCommands = new Set() + + for (const section of categories ?? []) { + for (const [command] of section.pairs) { + filteredCommands.add(canonicalDesktopSlashCommand(command)) + } + } + + for (const [command] of pairs ?? []) { + filteredCommands.add(canonicalDesktopSlashCommand(command)) + } + + let skillCount = 0 + + for (const command of filteredCommands) { + if (isDesktopSlashExtensionCommand(command)) { + skillCount += 1 + } + } + + const hasSkillCount = catalog.skill_count !== undefined || skillCount > 0 + return { ...catalog, ...(categories ? { categories } : {}), - ...(pairs ? { pairs } : {}) + ...(pairs ? { pairs } : {}), + ...(hasSkillCount ? { skill_count: skillCount } : {}) } } - -function isKnownHermesSlashCommand(command: string): boolean { - return DESKTOP_COMMANDS.has(command) || DESKTOP_ALIASES.has(command) || BLOCKED_COMMANDS.has(command) -} diff --git a/apps/desktop/src/store/composer.test.ts b/apps/desktop/src/store/composer.test.ts index 83f0a3feb96..08bbb391c95 100644 --- a/apps/desktop/src/store/composer.test.ts +++ b/apps/desktop/src/store/composer.test.ts @@ -3,8 +3,12 @@ import { afterEach, describe, expect, it } from 'vitest' import { $composerAttachments, addComposerAttachment, + clearSessionDraft, type ComposerAttachment, removeComposerAttachment, + SESSION_DRAFTS_STORAGE_KEY, + stashSessionDraft, + takeSessionDraft, updateComposerAttachment } from './composer' @@ -41,3 +45,62 @@ describe('updateComposerAttachment', () => { expect($composerAttachments.get()).toHaveLength(0) }) }) + +describe('session drafts', () => { + afterEach(() => { + for (const scope of ['session-a', 'session-b', null]) { + clearSessionDraft(scope) + } + + window.localStorage.clear() + }) + + it('keeps drafts isolated per session scope', () => { + stashSessionDraft('session-a', 'draft a', []) + stashSessionDraft('session-b', 'draft b', [attachment({ id: 'image:b', kind: 'image' })]) + + expect(takeSessionDraft('session-a')).toEqual({ attachments: [], text: 'draft a' }) + expect(takeSessionDraft('session-b').text).toBe('draft b') + expect(takeSessionDraft('session-b').attachments.map(a => a.id)).toEqual(['image:b']) + }) + + it('scopes the unsaved new-session draft separately from real sessions', () => { + stashSessionDraft(null, 'new chat draft', []) + stashSessionDraft('session-a', 'session draft', []) + + expect(takeSessionDraft(null).text).toBe('new chat draft') + expect(takeSessionDraft(undefined).text).toBe('new chat draft') + expect(takeSessionDraft('session-a').text).toBe('session draft') + }) + + it('persists draft text (not attachments) to localStorage', () => { + stashSessionDraft('session-a', 'survives reload', [attachment({ id: 'file:a' })]) + + const persisted = JSON.parse(window.localStorage.getItem(SESSION_DRAFTS_STORAGE_KEY) ?? '{}') as Record + + expect(persisted['session-a']).toBe('survives reload') + }) + + it('evicts empty drafts instead of leaving stale entries behind', () => { + stashSessionDraft('session-a', 'saved', []) + stashSessionDraft('session-a', ' ', []) + + expect(takeSessionDraft('session-a')).toEqual({ attachments: [], text: '' }) + }) + + it('clears a stashed draft after an accepted submit', () => { + stashSessionDraft('session-a', 'sent prompt', [attachment({ id: 'file:a' })]) + clearSessionDraft('session-a') + + expect(takeSessionDraft('session-a')).toEqual({ attachments: [], text: '' }) + }) + + it('returns clones so callers cannot mutate the stash', () => { + stashSessionDraft('session-a', 'draft', [attachment({ id: 'file:a' })]) + + const taken = takeSessionDraft('session-a') + taken.attachments[0]!.label = 'mutated' + + expect(takeSessionDraft('session-a').attachments[0]?.label).toBe('doc.pdf') + }) +}) diff --git a/apps/desktop/src/store/composer.ts b/apps/desktop/src/store/composer.ts index 6b2b58ccb8d..c40cf867735 100644 --- a/apps/desktop/src/store/composer.ts +++ b/apps/desktop/src/store/composer.ts @@ -21,6 +21,84 @@ export const $composerDraft = atom('') export const $composerAttachments = atom([]) export const $composerTerminalSelections = atom>({}) +// Per-thread draft stash for the decoupled composer. Session lifecycle never +// touches this — only ChatBar's scope swap reads/writes it. Text mirrors to +// localStorage; attachments are memory-only (blobs, upload state). +export const SESSION_DRAFTS_STORAGE_KEY = 'hermes:composer-drafts:v3' + +const NEW_SESSION_DRAFT_KEY = '__new__' +const MAX_PERSISTED_DRAFTS = 50 +const EMPTY_SESSION_DRAFT: SessionDraft = { attachments: [], text: '' } + +export interface SessionDraft { + attachments: ComposerAttachment[] + text: string +} + +const draftKey = (scope: string | null | undefined) => scope?.trim() || NEW_SESSION_DRAFT_KEY + +const cloneDraft = (draft: SessionDraft): SessionDraft => ({ + attachments: draft.attachments.map(attachment => ({ ...attachment })), + text: draft.text +}) + +function loadPersistedDraftTexts(): [string, SessionDraft][] { + try { + const raw = window.localStorage.getItem(SESSION_DRAFTS_STORAGE_KEY) + + if (!raw) { + return [] + } + + return Object.entries(JSON.parse(raw) as Record).map(([key, text]) => [ + key, + { attachments: [], text } + ]) + } catch { + return [] + } +} + +const draftsBySession = new Map(loadPersistedDraftTexts()) + +function persistDraftTexts() { + try { + const entries = [...draftsBySession] + .filter(([, draft]) => draft.text) + .slice(-MAX_PERSISTED_DRAFTS) + .map(([key, draft]) => [key, draft.text] as const) + + if (entries.length === 0) { + window.localStorage.removeItem(SESSION_DRAFTS_STORAGE_KEY) + } else { + window.localStorage.setItem(SESSION_DRAFTS_STORAGE_KEY, JSON.stringify(Object.fromEntries(entries))) + } + } catch { + // Best-effort only — quota/private-mode must never break typing. + } +} + +export function stashSessionDraft(scope: string | null | undefined, text: string, attachments: ComposerAttachment[]) { + const key = draftKey(scope) + + // Delete-then-set keeps MRU order for MAX_PERSISTED_DRAFTS eviction. + draftsBySession.delete(key) + + if (text.trim() || attachments.length > 0) { + draftsBySession.set(key, cloneDraft({ attachments, text })) + } + + persistDraftTexts() +} + +export function takeSessionDraft(scope: string | null | undefined): SessionDraft { + const stashed = draftsBySession.get(draftKey(scope)) + + return stashed ? cloneDraft(stashed) : EMPTY_SESSION_DRAFT +} + +export const clearSessionDraft = (scope: string | null | undefined) => stashSessionDraft(scope, '', []) + export function setComposerDraft(value: string) { $composerDraft.set(value) } diff --git a/apps/desktop/src/store/session.test.ts b/apps/desktop/src/store/session.test.ts index deb4833868f..79fefdccd8e 100644 --- a/apps/desktop/src/store/session.test.ts +++ b/apps/desktop/src/store/session.test.ts @@ -133,13 +133,52 @@ describe('mergeSessionPage', () => { it('keeps a pinned session matched by its lineage root after compression', () => { // The pin is stored on the lineage-root id, but the loaded row surfaces // under its live compression tip. Matching on _lineage_root_id keeps it. - const previous = [session({ id: 'tip', _lineage_root_id: 'root' })] - const incoming = [session({ id: 'other' })] + const previous = [session({ id: 'tip', _lineage_root_id: 'root' })] as SessionInfo[] + const incoming = [session({ id: 'other' })] as SessionInfo[] const merged = mergeSessionPage(previous, incoming, ['root']) expect(merged.map(s => s.id)).toEqual(['tip', 'other']) }) + + it('evicts an old compression tip when the incoming page has the new tip from the same lineage', () => { + // Repro of #43483: after auto-compression rotates the tip (#4 → #5), + // the sidebar showed both the old tip and the new tip as separate rows. + // The old tip must be evicted because its lineage key matches the incoming + // new tip's lineage key. + const previous = [ + session({ id: 'tip-4', _lineage_root_id: 'root' }), + session({ id: 'other' }), + ] as SessionInfo[] + const incoming = [ + session({ id: 'tip-5', _lineage_root_id: 'root' }), + ] as SessionInfo[] + + // 'tip-4' is in the keep set (e.g. it was the active/working session), + // but should still be evicted because the incoming page carries the same + // lineage under a new tip id. + const merged = mergeSessionPage(previous, incoming, ['tip-4']) + + expect(merged.map(s => s.id)).toEqual(['tip-5']) + // The new tip comes from the server payload. + expect(merged.find(s => s.id === 'tip-5')?._lineage_root_id).toBe('root') + }) + + it('preserves an unrelated pinned session even when lineage dedup is active', () => { + // Regression guard: lineage dedup must not accidentally evict sessions + // from a different lineage that happen to be in the keep set. + const previous = [ + session({ id: 'a-old', _lineage_root_id: 'lineage-a' }), + session({ id: 'b', _lineage_root_id: 'lineage-b' }), + ] as SessionInfo[] + const incoming = [ + session({ id: 'a-new', _lineage_root_id: 'lineage-a' }), + ] as SessionInfo[] + + const merged = mergeSessionPage(previous, incoming, ['b']) + + expect(merged.map(s => s.id)).toEqual(['b', 'a-new']) + }) }) describe('workspaceCwdForNewSession', () => { diff --git a/apps/desktop/src/store/session.ts b/apps/desktop/src/store/session.ts index 6df96946bf1..ed28b92cb88 100644 --- a/apps/desktop/src/store/session.ts +++ b/apps/desktop/src/store/session.ts @@ -125,10 +125,18 @@ export function mergeSessionPage( } const incomingIds = new Set(incoming.map(session => session.id)) + // Deduplicate by compression lineage: when auto-compression rotates the tip + // id (old #4 → new #5), the incoming page carries the new tip but the + // previous list still holds the old one. Without lineage-level dedup both + // rows survive as separate sidebar entries (fixes #43483). + const incomingLineageKeys = new Set( + incoming.map(session => session._lineage_root_id ?? session.id) + ) const survivors = previous.filter( session => !incomingIds.has(session.id) && + !incomingLineageKeys.has(session._lineage_root_id ?? session.id) && (keep.has(session.id) || (session._lineage_root_id != null && keep.has(session._lineage_root_id))) ) @@ -200,6 +208,7 @@ export const $availablePersonalities = atom([]) export const $introSeed = atom(0) export const $contextSuggestions = atom([]) export const $modelPickerOpen = atom(false) +export const $sessionPickerOpen = atom(false) export const setConnection = (next: Updater) => updateAtom($connection, next) export const setGatewayState = (next: Updater) => updateAtom($gatewayState, next) @@ -249,6 +258,7 @@ export const setAvailablePersonalities = (next: Updater) => updateAtom export const setIntroSeed = (next: Updater) => updateAtom($introSeed, next) export const setContextSuggestions = (next: Updater) => updateAtom($contextSuggestions, next) export const setModelPickerOpen = (next: Updater) => updateAtom($modelPickerOpen, next) +export const setSessionPickerOpen = (next: Updater) => updateAtom($sessionPickerOpen, next) // Watchdog tracking — when does a "working" session count as stuck? // Long-running tool calls (LLM inference, long shell commands, web fetches) diff --git a/cli.py b/cli.py index 641c200ad3d..86875bcb60c 100644 --- a/cli.py +++ b/cli.py @@ -3426,6 +3426,7 @@ class HermesCLI(CLIAgentSetupMixin, CLICommandsMixin): # frozen when the agent thread completes, displayed in the status bar. self._prompt_start_time: Optional[float] = None # time.time() when turn started self._prompt_duration: float = 0.0 # frozen duration of last completed turn + self._last_turn_finished_at: Optional[float] = None # time.time() when the last agent loop finished # Initialize SQLite session store early so /title works before first message self._session_db = None try: @@ -3812,6 +3813,19 @@ class HermesCLI(CLIAgentSetupMixin, CLICommandsMixin): emoji = "⏱" if live else "⏲" return f"{emoji} {time_str}" + @staticmethod + def _format_idle_since(last_finished_at: Optional[float], turn_live: bool) -> str: + """Format time since the last final agent response for the status bar. + + Returns an empty string while a turn is live (the per-prompt elapsed + timer covers that case) or before the first turn has completed. + Compact read-out: ``✓ 42s`` / ``✓ 3m`` / ``✓ 1h 12m``. + """ + if turn_live or last_finished_at is None: + return "" + idle = max(0.0, time.time() - last_finished_at) + return f"✓ {format_duration_compact(idle)}" + def _get_status_bar_snapshot(self) -> Dict[str, Any]: # Prefer the agent's model name — it updates on fallback. # self.model reflects the originally configured model and never @@ -3835,6 +3849,10 @@ class HermesCLI(CLIAgentSetupMixin, CLICommandsMixin): getattr(self, "_prompt_duration", 0.0), live=getattr(self, "_prompt_start_time", None) is not None, ), + "idle_since": self._format_idle_since( + getattr(self, "_last_turn_finished_at", None), + turn_live=getattr(self, "_prompt_start_time", None) is not None, + ), "context_tokens": 0, "context_length": None, "context_percent": None, @@ -4146,6 +4164,9 @@ class HermesCLI(CLIAgentSetupMixin, CLICommandsMixin): prompt_elapsed = snapshot.get("prompt_elapsed") if prompt_elapsed: parts.append(prompt_elapsed) + idle_since = snapshot.get("idle_since") + if idle_since: + parts.append(idle_since) if yolo_active: parts.append("⚠ YOLO") return self._trim_status_bar_text(" │ ".join(parts), width) @@ -4247,6 +4268,11 @@ class HermesCLI(CLIAgentSetupMixin, CLICommandsMixin): if prompt_elapsed: frags.append(("class:status-bar-dim", " │ ")) frags.append(("class:status-bar-dim", prompt_elapsed)) + # Position 8: idle time since the last final agent response + idle_since = snapshot.get("idle_since") + if idle_since: + frags.append(("class:status-bar-dim", " │ ")) + frags.append(("class:status-bar-dim", idle_since)) if yolo_active: frags.append(("class:status-bar-dim", " │ ")) frags.append(("class:status-bar-yolo", "⚠ YOLO")) @@ -5552,6 +5578,15 @@ class HermesCLI(CLIAgentSetupMixin, CLICommandsMixin): f"{_escape(desc)} [dim]({skill_count} skills)[/]" ) + quick_commands = self.config.get("quick_commands", {}) + if quick_commands: + _cprint(f"\n ⚡ {_BOLD}Quick Commands{_RST} ({len(quick_commands)} configured):") + for name, qcmd in sorted(quick_commands.items()): + desc = qcmd.get("description", qcmd.get("type", "")) + ChatConsole().print( + f" [bold {_accent_hex()}]{('/' + name):<22}[/] [dim]-[/] {_escape(desc)}" + ) + _cprint(f"\n {_DIM}Tip: Just type your message to chat with Hermes!{_RST}") _cprint(f" {_DIM}Multi-line: Alt+Enter for a new line{_RST}") _cprint(f" {_DIM}Draft editor: Ctrl+G (Alt+G in VSCode/Cursor){_RST}") @@ -5821,6 +5856,35 @@ class HermesCLI(CLIAgentSetupMixin, CLICommandsMixin): except Exception: pass + def _discard_session_if_empty(self, session_id: Optional[str]) -> bool: + """Drop a just-ended session row when it never gained content. + + Starting the CLI and immediately quitting (or rotating with /new, + /clear) used to leave an empty untitled row behind that clutters + ``/resume`` and ``hermes sessions list``. Delegates the + check-and-delete to ``SessionDB.delete_session_if_empty``, which + only removes rows with no messages, no title, and no child + sessions. Ported from google-gemini/gemini-cli#27770. + """ + if not self._session_db or not session_id: + return False + # In-memory transcript is authoritative: if this CLI object holds + # conversation messages (flushed to the DB or not), the session is + # not empty. Protects against pruning a real conversation whose DB + # flush failed or hasn't happened yet. + if getattr(self, "conversation_history", None): + return False + try: + from hermes_constants import get_hermes_home as _ghh + return self._session_db.delete_session_if_empty( + session_id, sessions_dir=_ghh() / "sessions" + ) + except Exception: + logger.debug( + "Could not prune empty session %s", session_id, exc_info=True + ) + return False + def new_session(self, silent=False, title=None): """Start a fresh session with a new session ID and cleared agent state.""" if self.agent and self.conversation_history: @@ -5837,6 +5901,9 @@ class HermesCLI(CLIAgentSetupMixin, CLICommandsMixin): self._session_db.end_session(old_session_id, "new_session") except Exception: pass + # Don't let immediately-rotated empty sessions pile up in + # /resume and `hermes sessions list` (gemini-cli#27770 port). + self._discard_session_if_empty(old_session_id) self.session_start = datetime.now() timestamp_str = self.session_start.strftime("%Y%m%d_%H%M%S") @@ -10121,6 +10188,9 @@ class HermesCLI(CLIAgentSetupMixin, CLICommandsMixin): if self._prompt_start_time is not None: self._prompt_duration = max(0.0, time.time() - self._prompt_start_time) self._prompt_start_time = None + # Record when this agent loop finished so the status bar can show + # idle time since the last final response. + self._last_turn_finished_at = time.time() # Proactively clean up async clients whose event loop is dead. # The agent thread may have created AsyncOpenAI clients bound @@ -13074,6 +13144,15 @@ class HermesCLI(CLIAgentSetupMixin, CLICommandsMixin): self._session_db.end_session(self.agent.session_id, "cli_close") except (Exception, KeyboardInterrupt) as e: logger.debug("Could not close session in DB: %s", e) + # Started-and-immediately-quit sessions never gained content; + # drop the empty row so /resume and `hermes sessions list` + # stay clean (gemini-cli#27770 port). No-op for resumed or + # titled sessions and anything with messages or children. + if not getattr(self, '_delete_session_on_exit', False): + try: + self._discard_session_if_empty(self.agent.session_id) + except (Exception, KeyboardInterrupt) as e: + logger.debug("Could not prune empty session: %s", e) # /exit --delete: also remove the current session's transcripts # and SQLite history. Ported from google-gemini/gemini-cli#19332. if getattr(self, '_delete_session_on_exit', False): @@ -13336,9 +13415,21 @@ def main( else: toolsets_list.append(str(t)) else: - # Use the shared resolver so MCP servers are included at runtime - from hermes_cli.tools_config import _get_platform_tools - toolsets_list = sorted(_get_platform_tools(CLI_CONFIG, "cli")) + # Coding posture (base Hermes): with no explicit --toolsets, collapse + # to the coding toolset (+ enabled MCP servers) when sitting in a code + # workspace. See agent/coding_context.py. + _coding = None + try: + from agent.coding_context import coding_selection + _coding = coding_selection(platform="cli", config=CLI_CONFIG) + except Exception: + _coding = None + if _coding is not None: + toolsets_list = _coding + else: + # Use the shared resolver so MCP servers are included at runtime + from hermes_cli.tools_config import _get_platform_tools + toolsets_list = sorted(_get_platform_tools(CLI_CONFIG, "cli")) parsed_skills = _parse_skills_argument(skills) diff --git a/cron/jobs.py b/cron/jobs.py index 866dacc41df..52d9367ff84 100644 --- a/cron/jobs.py +++ b/cron/jobs.py @@ -150,9 +150,6 @@ def _normalize_job_record(job: Dict[str, Any]) -> Dict[str, Any]: state = "scheduled" if normalized.get("enabled", True) else "paused" normalized["state"] = state - profile = _coerce_job_text(normalized.get("profile")).strip() - normalized["profile"] = profile or None - return normalized @@ -523,30 +520,6 @@ def _normalize_workdir(workdir: Optional[str]) -> Optional[str]: return str(resolved) -def _normalize_profile(profile: Optional[str]) -> Optional[str]: - """Normalize and validate an optional cron job profile name. - - Empty / None disables per-job profile selection. Otherwise the profile name - is canonicalized with the same rules as ``hermes -p`` and must refer to an - existing profile at create/update time. ``default`` is the built-in root - profile and is always valid. - """ - if profile is None: - return None - raw = str(profile).strip() - if not raw: - return None - - from hermes_cli.profiles import normalize_profile_name, resolve_profile_env - - normalized = normalize_profile_name(raw) - # resolve_profile_env validates the canonical name and checks that named - # profiles exist. Store only the stable profile id, not the filesystem path, - # so profile directories can move with the Hermes root. - resolve_profile_env(normalized) - return normalized - - def create_job( prompt: Optional[str], schedule: str, @@ -563,7 +536,6 @@ def create_job( context_from: Optional[Union[str, List[str]]] = None, enabled_toolsets: Optional[List[str]] = None, workdir: Optional[str] = None, - profile: Optional[str] = None, no_agent: bool = False, ) -> Dict[str, Any]: """ @@ -605,11 +577,6 @@ def create_job( With ``no_agent=True``, ``workdir`` is still applied as the script's cwd so relative paths inside the script behave predictably. - profile: Optional Hermes profile name. When set, the job runs with - that profile's HERMES_HOME so profile-specific config, - credentials, scripts, skills, and memory paths resolve - consistently. ``default`` selects the root profile; empty / - None preserves the scheduler's existing behaviour. no_agent: When True, skip the agent entirely — run ``script`` on schedule and deliver its stdout directly. Empty stdout = silent (no delivery). Requires ``script`` to be set. Ideal for classic @@ -647,7 +614,6 @@ def create_job( normalized_toolsets = [str(t).strip() for t in enabled_toolsets if str(t).strip()] if enabled_toolsets else None normalized_toolsets = normalized_toolsets or None normalized_workdir = _normalize_workdir(workdir) - normalized_profile = _normalize_profile(profile) normalized_no_agent = bool(no_agent) # no_agent jobs are meaningless without a script — the script IS the job. @@ -702,7 +668,6 @@ def create_job( "origin": origin, # Tracks where job was created for "origin" delivery "enabled_toolsets": normalized_toolsets, "workdir": normalized_workdir, - "profile": normalized_profile, } jobs = load_jobs() @@ -792,15 +757,6 @@ def update_job(job_id: str, updates: Dict[str, Any]) -> Optional[Dict[str, Any]] else: updates["workdir"] = _normalize_workdir(_wd) - # Validate / normalize profile if present in updates. Empty string or - # None both mean "clear the field" (restore old behaviour). - if "profile" in updates: - _profile = updates["profile"] - if _profile is None or _profile == "" or _profile is False: - updates["profile"] = None - else: - updates["profile"] = _normalize_profile(_profile) - updated = _apply_skill_fields({**job, **updates}) schedule_changed = "schedule" in updates diff --git a/cron/scheduler.py b/cron/scheduler.py index e48952cfa7c..35906996619 100644 --- a/cron/scheduler.py +++ b/cron/scheduler.py @@ -19,7 +19,6 @@ import shutil import subprocess import sys import threading -from contextlib import contextmanager # fcntl is Unix-only; on Windows use msvcrt for file locking try: @@ -167,7 +166,7 @@ _parallel_pool_max_workers: Optional[int] = None _running_job_ids: set = set() _running_lock = threading.Lock() -# Sequential (env/context-mutating) cron jobs — workdir/profile jobs that touch +# Sequential (env-mutating) cron jobs — workdir jobs that touch # process-global runtime state — must run one at a time, but must NOT block the # ticker thread. A persistent single-thread executor preserves ordering across # ticks while keeping dispatch fire-and-forget, the same as the parallel pool. @@ -191,10 +190,10 @@ def _get_parallel_pool(max_workers: Optional[int]) -> concurrent.futures.ThreadP def _get_sequential_pool() -> concurrent.futures.ThreadPoolExecutor: """Return (or create) the persistent single-thread sequential pool. - A single worker guarantees env/context-mutating jobs never overlap, even + A single worker guarantees env-mutating jobs never overlap, even across ticks: a job queued by a newer tick waits for the previous tick's - sequential jobs to finish rather than corrupting their os.environ / - profile state. + sequential jobs to finish rather than corrupting their os.environ + state. """ global _sequential_pool if _sequential_pool is None: @@ -236,71 +235,6 @@ def _get_lock_paths() -> tuple[Path, Path]: return lock_dir, lock_dir / ".tick.lock" -@contextmanager -def _job_profile_context(job_id: str, profile: Optional[str]): - """Temporarily run a job under a specific Hermes profile. - - Cron jobs are stored and scheduled by the profile running the scheduler, but - an individual job can opt into a different runtime profile. While active, - the scheduler's test/override hook and a context-local Hermes home override - both point at the resolved profile directory so _get_hermes_home(), - .env/config loading, script resolution, AIAgent construction, and downstream - get_hermes_home() callers agree on the same home. - - Some existing provider/config paths still load profile .env values through - os.environ, so profile jobs also snapshot and restore the process - environment on exit. tick() runs profile jobs sequentially to keep that - temporary mutation isolated from other scheduled jobs. - """ - raw_profile = str(profile or "").strip() - if not raw_profile: - yield None - return - - global _hermes_home - prior_override = _hermes_home - env_snapshot = os.environ.copy() - - from hermes_cli.profiles import normalize_profile_name, resolve_profile_env - from hermes_constants import reset_hermes_home_override, set_hermes_home_override - - normalized_profile = normalize_profile_name(raw_profile) - try: - profile_home = Path(resolve_profile_env(normalized_profile)).resolve() - except (FileNotFoundError, ValueError) as exc: - logger.warning( - "Job '%s': configured profile %r no longer valid (%s) — " - "falling back to scheduler default", - job_id, raw_profile, exc, - ) - yield None - return - - override_token = None - try: - override_token = set_hermes_home_override(profile_home) - _hermes_home = profile_home - logger.info( - "Job '%s': using Hermes profile '%s' (%s)", - job_id, - normalized_profile, - profile_home, - ) - yield normalized_profile - finally: - _hermes_home = prior_override - if override_token is not None: - reset_hermes_home_override(override_token) - # Delta-based restore: remove added keys, restore changed keys. - # Avoids a brief window where other threads see an empty env. - added = set(os.environ.keys()) - set(env_snapshot.keys()) - for k in added: - os.environ.pop(k, None) - for k, v in env_snapshot.items(): - if os.environ.get(k) != v: - os.environ[k] = v - - def _resolve_origin(job: dict) -> Optional[dict]: """Extract origin info from a job, preserving any extra routing metadata. @@ -1033,17 +967,6 @@ def _run_job_script(script_path: str) -> tuple[bool, str]: else: argv = [sys.executable, str(path)] - run_env = os.environ.copy() - run_env["HERMES_HOME"] = str(_get_hermes_home()) - try: - from hermes_constants import get_subprocess_home - - profile_home = get_subprocess_home() - if profile_home: - run_env["HOME"] = profile_home - except Exception: - pass - try: popen_kwargs = {"creationflags": windows_hide_flags()} if sys.platform == "win32" else {} result = subprocess.run( @@ -1052,7 +975,6 @@ def _run_job_script(script_path: str) -> tuple[bool, str]: text=True, timeout=script_timeout, cwd=str(path.parent), - env=run_env, **popen_kwargs, ) stdout = (result.stdout or "").strip() @@ -1382,13 +1304,6 @@ def _scan_assembled_cron_prompt( def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]: - """Execute a single cron job, applying any per-job profile override.""" - job_id = job["id"] - with _job_profile_context(job_id, job.get("profile")): - return _run_job_impl(job) - - -def _run_job_impl(job: dict) -> tuple[bool, str, str, Optional[str]]: """ Execute a single cron job. @@ -1625,9 +1540,8 @@ def _run_job_impl(job: dict) -> tuple[bool, str, str, Optional[str]]: # .cursorrules from the job's project dir, AND # - the terminal, file, and code-exec tools run commands from there. # - # tick() serializes jobs that mutate process-global runtime state (workdir - # and/or profile jobs) outside the parallel pool, so mutating - # os.environ["TERMINAL_CWD"] here is safe for those jobs. For workdir-less + # tick() serializes workdir-jobs outside the parallel pool, so mutating + # os.environ["TERMINAL_CWD"] here is safe for those jobs. For workdir-less # jobs we leave TERMINAL_CWD untouched — preserves the original behaviour # (skip_context_files=True, tools use whatever cwd the scheduler has). _job_workdir = (job.get("workdir") or "").strip() or None @@ -2174,21 +2088,12 @@ def tick(verbose: bool = True, adapters=None, loop=None, sync: bool = True) -> i mark_job_run(job["id"], False, str(e)) return False - # Partition due jobs: jobs with a per-job workdir and/or profile touch - # process-global runtime state inside run_job. Workdir jobs temporarily - # set os.environ["TERMINAL_CWD"]; profile jobs use a context-local - # Hermes home override, scheduler _hermes_home hook, and temporary - # profile .env load into os.environ with snapshot/restore. They MUST run - # sequentially to avoid corrupting each other. Jobs without either field - # stay parallel-safe. - sequential_jobs = [ - j for j in due_jobs - if (j.get("workdir") or "").strip() or (j.get("profile") or "").strip() - ] - parallel_jobs = [ - j for j in due_jobs - if not ((j.get("workdir") or "").strip() or (j.get("profile") or "").strip()) - ] + # Partition due jobs: those with a per-job workdir mutate + # os.environ["TERMINAL_CWD"] inside run_job, which is process-global — + # so they MUST run sequentially to avoid corrupting each other. Jobs + # without a workdir leave env untouched and stay parallel-safe. + sequential_jobs = [j for j in due_jobs if (j.get("workdir") or "").strip()] + parallel_jobs = [j for j in due_jobs if not (j.get("workdir") or "").strip()] _results: list = [] _all_futures: list = [] @@ -2217,9 +2122,9 @@ def tick(verbose: bool = True, adapters=None, loop=None, sync: bool = True) -> i return pool.submit(_run_and_release) - # Sequential pass for env/context-mutating (workdir/profile) jobs. + # Sequential pass for env-mutating (workdir) jobs. # Queued to a persistent single-thread pool so they run one at a time - # WITHOUT blocking the ticker thread — a long workdir/profile job no + # WITHOUT blocking the ticker thread — a long workdir job no # longer starves the rest of the schedule (same fix as the parallel # pass, just serialized). The in-flight guard prevents a still-running # job from being re-queued on the next tick. diff --git a/docs/plans/2026-06-09-003-fix-telegram-stream-overflow-continuations-plan.md b/docs/plans/2026-06-09-003-fix-telegram-stream-overflow-continuations-plan.md new file mode 100644 index 00000000000..cd533ea49b7 --- /dev/null +++ b/docs/plans/2026-06-09-003-fix-telegram-stream-overflow-continuations-plan.md @@ -0,0 +1,240 @@ +--- +title: "fix: Prevent Telegram streamed replies from ending after first overflow chunk" +status: active +date: 2026-06-09 +type: fix +target_repo: hermes-agent +origin: user-reported Telegram topic screenshot +--- + +# fix: Prevent Telegram streamed replies from ending after first overflow chunk + +## Summary + +Fix a Telegram gateway bug where a long streamed assistant reply can appear to stop mid-answer in a topic after the first overflow chunk. The reported screenshot shows a long Hermes response in the `Nehemiah - Coding` Telegram topic ending at `- The visible tool-call summary`, followed by the user noting that the previous message did not finish streaming to that Telegram topic. + +The plan targets the streamed edit overflow path, not general model generation. A completed assistant response must either reach Telegram in full across all continuation messages or leave enough state for the gateway fallback path to deliver the remaining content instead of marking the turn complete after a partial delivery. + +--- + +## Problem Frame + +Telegram limits message text to 4096 UTF-16 code units. Hermes streams gateway responses by editing a message and, when a streamed message grows past the limit, splitting the overflow into additional Telegram messages. The adapter already has a split-and-deliver path for oversized edits, but the partial-continuation failure contract is weak: if chunk 1 is edited successfully and a later continuation fails, the adapter can still report success for the operation. The stream consumer may then mark the final response delivered even though the visible topic only contains the first part. + +This is especially visible in Telegram forum topics because a long final response can be split below tool-progress bubbles, and a missing continuation looks exactly like the stream stopped mid-answer. + +--- + +## Requirements + +- R1. Long streamed Telegram replies must preserve all final content across overflow chunks. +- R2. If any continuation chunk fails after the first overflow edit lands, the gateway must not mark the final response as fully delivered. +- R3. Continuation chunks must remain routed to the same Telegram topic/thread as the original response. +- R4. The fix must avoid duplicate full-answer sends when all overflow chunks were delivered successfully. +- R5. Tests must cover the reported failure shape: a final streamed reply that exceeds Telegram's limit, succeeds on the first edit, fails on a continuation, and must not be treated as complete. + +--- + +## Key Technical Decisions + +- Treat overflow delivery as all-or-not-complete. `_edit_overflow_split` should only return a successful final-delivery result when every planned chunk reaches Telegram. Partial delivery is a distinct outcome that downstream code can recover from. +- Carry partial-overflow metadata through `SendResult.raw_response` rather than adding a new public dataclass field unless implementation proves the existing result shape is insufficient. The stream consumer already inspects `SendResult` after adapter edits, so a small raw response contract can keep the change contained. +- Make the stream consumer responsible for final-delivery truth. The adapter knows which chunks landed, but the consumer owns `_final_response_sent`, `_final_content_delivered`, `_fallback_prefix`, and fallback final-send behaviour. +- Keep routing inside Telegram adapter helpers. Continuation sends should continue to use `_thread_kwargs_for_send(...)` with metadata-derived `message_thread_id` and reply anchors so forum topic behaviour stays consistent. + +--- + +## High-Level Technical Design + +```mermaid +sequenceDiagram + participant C as GatewayStreamConsumer + participant T as TelegramAdapter.edit_message + participant B as Telegram Bot API + + C->>T: finalize/edit long accumulated response + T->>B: edit original message with chunk 1 + loop remaining chunks + T->>B: send continuation in same topic/thread + end + alt all chunks delivered + T-->>C: success, last message id, continuation ids + C->>C: mark final response delivered + else any continuation failed + T-->>C: partial overflow failure with delivered prefix metadata + C->>C: do not mark final delivered + C->>B: fallback sends missing tail or full final response safely + end +``` + +--- + +## Implementation Units + +### U1. Add a partial-overflow contract for Telegram edit splits + +**Goal:** Make `TelegramAdapter._edit_overflow_split` distinguish complete overflow delivery from partial delivery. + +**Requirements:** R1, R2, R4 + +**Dependencies:** None + +**Files:** +- `gateway/platforms/telegram.py` +- `tests/gateway/test_telegram_send.py` or the existing Telegram adapter test module that already covers `edit_message` overflow behaviour + +**Approach:** +- Keep the successful path unchanged when every chunk is delivered: return `SendResult(success=True, message_id=, continuation_message_ids=(...))`. +- When a continuation fails after the first edit, return a result that clearly indicates partial delivery instead of plain success. Prefer `success=False`, `retryable=True`, and `raw_response` metadata such as delivered chunk count, total chunk count, last delivered message id, and the visible delivered prefix. +- Preserve logging, but do not rely on logs as the only signal. The caller must be able to tell partial delivery happened. +- Ensure the first edited chunk and all successful continuation chunks still include the existing Markdown/plain-text fallback behaviour. + +**Patterns to follow:** +- Existing overflow handling in `TelegramAdapter.edit_message` and `_edit_overflow_split`. +- Existing `SendResult` semantics in `gateway/platforms/base.py`, especially `retryable`, `raw_response`, and `continuation_message_ids`. + +**Test scenarios:** +- Oversized finalized edit where all continuations succeed returns success, the last continuation id, and all continuation ids. +- Oversized finalized edit where the first continuation send fails returns a partial-overflow failure and does not report success. +- Oversized finalized edit where one continuation succeeds and a later continuation fails reports the last delivered continuation id and delivered count in raw metadata. +- A continuation MarkdownV2 formatting failure still retries plain text before being treated as a delivery failure. + +**Verification:** Adapter tests prove complete overflow remains successful and partial overflow is observable by the caller. + +### U2. Teach the stream consumer to recover from partial overflow + +**Goal:** Ensure a partial Telegram overflow does not set `_final_response_sent` or `_final_content_delivered` unless the full response reached the user. + +**Requirements:** R1, R2, R4, R5 + +**Dependencies:** U1 + +**Files:** +- `gateway/stream_consumer.py` +- `tests/gateway/test_stream_consumer.py` or a focused new `tests/gateway/test_stream_consumer_telegram_overflow.py` + +**Approach:** +- In `_send_or_edit`, when `adapter.edit_message(...)` returns a partial-overflow failure, update consumer state to reflect the last visible prefix/message and enter fallback delivery for the missing content. +- Avoid treating `_already_sent` as final delivery. A partial visible message can be true while final delivery is false. +- Use the delivered-prefix metadata if available so `_send_fallback_final(...)` sends only the missing tail. If implementation finds the prefix is unreliable after Markdown formatting, prefer sending the complete final response as a fresh fallback message rather than silently dropping the tail. +- Keep the existing success handling for `continuation_message_ids` when the adapter delivered all chunks. + +**Patterns to follow:** +- Existing fallback mode in `GatewayStreamConsumer._send_or_edit` and `_send_fallback_final`. +- Existing comments around `_final_response_sent`, `_final_content_delivered`, and `_fallback_prefix` for prior partial-delivery regressions. + +**Test scenarios:** +- A final streamed response that overflows and receives a complete-success edit split sets final-delivery flags and does not invoke fallback. +- A final streamed response whose adapter reports partial overflow does not set final-delivery flags immediately. +- After partial overflow, fallback delivery sends the remaining tail and then marks final content delivered only if the fallback send succeeds. +- If fallback delivery also fails, the consumer leaves final-delivery false so the gateway's non-streaming final-send safety path can still run. + +**Verification:** Stream consumer tests reproduce the screenshot shape by simulating first chunk visible and continuation failure, then assert the final answer is not suppressed. + +### U3. Preserve Telegram topic/thread routing for overflow and fallback continuations + +**Goal:** Ensure overflow recovery messages land in the same Telegram forum topic or DM topic fallback context. + +**Requirements:** R3 + +**Dependencies:** U1, U2 + +**Files:** +- `gateway/platforms/telegram.py` +- `gateway/stream_consumer.py` +- `tests/gateway/test_stream_consumer_thread_routing.py` +- Relevant Telegram adapter routing tests, if existing coverage is closer there + +**Approach:** +- Keep passing `metadata` through every overflow continuation and fallback send. +- Keep reply anchors where valid, but do not let a missing reply anchor drop the `message_thread_id` for normal forum topics. +- For private DM topic fallback metadata, preserve the existing stricter anchor behaviour documented in the adapter comments. + +**Patterns to follow:** +- `TelegramAdapter._thread_kwargs_for_send(...)`. +- Existing tests around Telegram topic recovery and stream consumer thread routing. + +**Test scenarios:** +- Overflow continuations include `message_thread_id` for a forum topic. +- A continuation retry after `reply message not found` keeps forum topic routing when allowed. +- Partial-overflow fallback sends receive the same metadata passed to the original stream consumer. + +**Verification:** Thread-routing assertions inspect fake bot calls and confirm all continuation/fallback messages carry the expected topic metadata. + +### U4. Add issue evidence and PR body traceability + +**Goal:** Make the upstream issue and PR clearly trace the user-visible bug and verification evidence. + +**Requirements:** R5 + +**Dependencies:** U1, U2, U3 + +**Files:** +- GitHub issue body created via `gh issue create` +- PR body using `.github/PULL_REQUEST_TEMPLATE.md` + +**Approach:** +- Create a GitHub issue with the screenshot evidence: the long message in the `Nehemiah - Coding` Telegram topic stops at `- The visible tool-call summary`, and the user's reply says the previous message did not finish streaming to that Telegram topic. +- Reference affected component as Gateway and platform as Telegram. +- In the PR body, link the issue with `Fixes #...`, describe the split-delivery contract change, and include the screenshot or attach it if GitHub upload is available. +- Follow `CONTRIBUTING.md` and the repository PR template exactly. + +**Patterns to follow:** +- `.github/ISSUE_TEMPLATE/bug_report.yml` +- `.github/PULL_REQUEST_TEMPLATE.md` + +**Test scenarios:** +- Test expectation: none, this is tracker and PR documentation work. + +**Verification:** The GitHub issue exists with screenshot evidence or an explicit screenshot reference, and the PR body links the issue and lists the tests run. + +--- + +## Scope Boundaries + +### In Scope + +- Telegram streamed response overflow splitting and recovery. +- Stream consumer final-delivery truth for partial overflow delivery. +- Topic/thread metadata preservation for overflow and fallback continuation sends. +- Focused unit tests around adapter and stream consumer behaviour. + +### Out of Scope + +- Changing model streaming semantics in `run_agent.py`. +- Reworking Telegram draft streaming, which is DM-only and not the forum-topic path in the screenshot. +- Changing general platform message splitting for Discord, Slack, WhatsApp, or Matrix unless a shared helper must be corrected for the Telegram fix. +- Altering tool-progress display settings or terminal progress rendering. + +### Deferred to Follow-Up Work + +- Broader observability for gateway delivery completeness across all messaging platforms. +- A user-facing resend/recover command for a previous truncated response. + +--- + +## Risks & Mitigations + +- Risk: fallback recovery duplicates already-visible first chunks. Mitigation: use delivered-prefix metadata where reliable and add tests for no-duplicate complete-success behaviour. +- Risk: preserving forum topic routing while dropping invalid reply anchors is easy to regress. Mitigation: include fake bot call assertions for `message_thread_id` and reply behaviour. +- Risk: MarkdownV2 formatting can alter visible/raw prefix comparisons. Mitigation: keep fallback conservative; duplicate content is preferable to silently missing content, but tests should keep the common path tail-only. + +--- + +## Sources & Research + +- User-provided screenshot at `/root/.hermes/image_cache/img_f664e68f6ddf.jpg`. +- `gateway/stream_consumer.py` streamed edit, overflow, fallback, and final-delivery state handling. +- `gateway/platforms/telegram.py` Telegram send/edit overflow splitting and topic routing helpers. +- `gateway/platforms/base.py` `SendResult` contract and shared message chunking helper. +- `tests/gateway/test_stream_consumer.py`, `tests/gateway/test_stream_consumer_thread_routing.py`, and Telegram adapter tests for focused regression coverage. + +--- + +## Verification Strategy + +- Run focused Telegram adapter overflow tests. +- Run focused stream consumer overflow/fallback tests. +- Run topic-routing tests affected by metadata changes. +- Run the gateway test subset around Telegram send/edit, stream consumer, and run progress if touched. +- Before PR creation, ensure `git diff` contains only the plan, implementation, tests, and PR/issue-relevant documentation for this bug. diff --git a/gateway/config.py b/gateway/config.py index ebd8af27a2c..f11146e606a 100644 --- a/gateway/config.py +++ b/gateway/config.py @@ -1222,17 +1222,30 @@ def load_gateway_config() -> GatewayConfig: if isinstance(matrix_cfg, dict): if "require_mention" in matrix_cfg and not os.getenv("MATRIX_REQUIRE_MENTION"): os.environ["MATRIX_REQUIRE_MENTION"] = str(matrix_cfg["require_mention"]).lower() + allowed_users = matrix_cfg.get("allowed_users") + if allowed_users is not None and not os.getenv("MATRIX_ALLOWED_USERS"): + if isinstance(allowed_users, list): + allowed_users = ",".join(str(v) for v in allowed_users) + os.environ["MATRIX_ALLOWED_USERS"] = str(allowed_users) + allowed_rooms = matrix_cfg.get("allowed_rooms") + if allowed_rooms is not None and not os.getenv("MATRIX_ALLOWED_ROOMS"): + if isinstance(allowed_rooms, list): + allowed_rooms = ",".join(str(v) for v in allowed_rooms) + os.environ["MATRIX_ALLOWED_ROOMS"] = str(allowed_rooms) frc = matrix_cfg.get("free_response_rooms") if frc is not None and not os.getenv("MATRIX_FREE_RESPONSE_ROOMS"): if isinstance(frc, list): frc = ",".join(str(v) for v in frc) os.environ["MATRIX_FREE_RESPONSE_ROOMS"] = str(frc) - # allowed_rooms: if set, bot ONLY responds in these rooms (whitelist) - ar = matrix_cfg.get("allowed_rooms") - if ar is not None and not os.getenv("MATRIX_ALLOWED_ROOMS"): - if isinstance(ar, list): - ar = ",".join(str(v) for v in ar) - os.environ["MATRIX_ALLOWED_ROOMS"] = str(ar) + ignore_patterns = matrix_cfg.get("ignore_user_patterns") + if ignore_patterns is not None and not os.getenv("MATRIX_IGNORE_USER_PATTERNS"): + if isinstance(ignore_patterns, list): + ignore_patterns = ",".join(str(v) for v in ignore_patterns) + os.environ["MATRIX_IGNORE_USER_PATTERNS"] = str(ignore_patterns) + if "process_notices" in matrix_cfg and not os.getenv("MATRIX_PROCESS_NOTICES"): + os.environ["MATRIX_PROCESS_NOTICES"] = str(matrix_cfg["process_notices"]).lower() + if "session_scope" in matrix_cfg and not os.getenv("MATRIX_SESSION_SCOPE"): + os.environ["MATRIX_SESSION_SCOPE"] = str(matrix_cfg["session_scope"]).lower() if "auto_thread" in matrix_cfg and not os.getenv("MATRIX_AUTO_THREAD"): os.environ["MATRIX_AUTO_THREAD"] = str(matrix_cfg["auto_thread"]).lower() if "dm_mention_threads" in matrix_cfg and not os.getenv("MATRIX_DM_MENTION_THREADS"): @@ -1556,8 +1569,14 @@ def _apply_env_overrides(config: GatewayConfig) -> None: matrix_password = os.getenv("MATRIX_PASSWORD", "") if matrix_password: matrix_config.extra["password"] = matrix_password - matrix_e2ee = os.getenv("MATRIX_ENCRYPTION", "").lower() in {"true", "1", "yes"} + matrix_e2ee_mode = os.getenv("MATRIX_E2EE_MODE", "").strip().lower() + matrix_e2ee = ( + matrix_e2ee_mode in ("required", "require", "optional", "prefer", "preferred") + or os.getenv("MATRIX_ENCRYPTION", "").lower() in ("true", "1", "yes") + ) matrix_config.extra["encryption"] = matrix_e2ee + if matrix_e2ee_mode: + matrix_config.extra["e2ee_mode"] = matrix_e2ee_mode matrix_device_id = os.getenv("MATRIX_DEVICE_ID", "") if matrix_device_id: matrix_config.extra["device_id"] = matrix_device_id diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py index 8a71c75a30c..b9273e7cca0 100644 --- a/gateway/platforms/base.py +++ b/gateway/platforms/base.py @@ -1545,6 +1545,13 @@ class SendResult: message_id: Optional[str] = None error: Optional[str] = None raw_response: Any = None + # Adapter-specific metadata. Cross-layer contracts that affect delivery + # semantics must be documented at the producer and consumer sites. Current + # known contract: Telegram edit overflow partials set + # raw_response["partial_overflow"] with delivered_chunks, total_chunks, + # last_message_id, delivered_prefix, and continuation_message_ids so the + # stream consumer can send the missing tail instead of marking a clipped + # response complete. retryable: bool = False # True for transient connection errors — base will retry automatically # When the adapter had to split an oversized payload across multiple # platform messages (e.g. Telegram edit_message overflow split-and-deliver), diff --git a/gateway/platforms/matrix.py b/gateway/platforms/matrix.py index b00fe5effc6..5253c537259 100644 --- a/gateway/platforms/matrix.py +++ b/gateway/platforms/matrix.py @@ -10,32 +10,58 @@ Environment variables: MATRIX_USER_ID Full user ID (@bot:server) — required for password login MATRIX_PASSWORD Password (alternative to access token) MATRIX_ENCRYPTION Set "true" to enable E2EE + MATRIX_E2EE_MODE off | optional | required. Overrides MATRIX_ENCRYPTION + when set. Legacy MATRIX_ENCRYPTION=true maps to required. MATRIX_DEVICE_ID Stable device ID for E2EE persistence across restarts MATRIX_PROXY HTTP(S) or SOCKS proxy URL for Matrix traffic MATRIX_ALLOWED_USERS Comma-separated Matrix user IDs (@user:server) + MATRIX_ALLOWED_ROOMS Comma-separated Matrix room IDs allowed to trigger turns MATRIX_HOME_ROOM Room ID for cron/notification delivery MATRIX_REACTIONS Set "false" to disable processing lifecycle reactions (eyes/checkmark/cross). Default: true MATRIX_REQUIRE_MENTION Require @mention in rooms (default: true) - MATRIX_FREE_RESPONSE_ROOMS Comma-separated room IDs exempt from mention requirement (alias of matrix.free_response_rooms) - MATRIX_ALLOWED_ROOMS Comma-separated room IDs; if set, bot ONLY responds in these rooms (whitelist, DMs exempt; alias of matrix.allowed_rooms) + MATRIX_FREE_RESPONSE_ROOMS Comma-separated room IDs exempt from mention requirement + (alias of matrix.free_response_rooms) + MATRIX_ALLOWED_ROOMS Comma-separated room IDs; if set, bot ONLY responds + in these rooms (whitelist, DMs exempt; alias of + matrix.allowed_rooms) + MATRIX_IGNORE_USER_PATTERNS Comma-separated regular expressions for appservice / + bridge ghost user IDs to ignore + MATRIX_PROCESS_NOTICES Set "true" to process inbound m.notice events + (default: false) + MATRIX_ALLOW_ROOM_MENTIONS Allow outbound @room mentions to notify whole rooms + (default: false) + MATRIX_TOOLS_ALLOW_REDACTION + Allow Matrix redaction tool execution (default: false) + MATRIX_TOOLS_ALLOW_INVITES Allow Matrix invite tool execution (default: false) + MATRIX_TOOLS_ALLOW_ROOM_CREATE + Allow Matrix room creation tool execution (default: false) MATRIX_AUTO_THREAD Auto-create threads for room messages (default: true) MATRIX_DM_AUTO_THREAD Auto-create threads for DM messages (default: false) MATRIX_RECOVERY_KEY Recovery key for cross-signing verification after device key rotation MATRIX_DM_MENTION_THREADS Create a thread when bot is @mentioned in a DM (default: false) + MATRIX_ALLOW_PUBLIC_ROOMS Allow Matrix tools to create public rooms (default: false) + MATRIX_APPROVAL_REQUIRE_SENDER + Require reaction controls to come from the original requester + when requester metadata is available (default: true) + MATRIX_APPROVAL_TIMEOUT_SECONDS + Reaction approval/model-picker timeout (default: 300) """ from __future__ import annotations import asyncio +import inspect import logging import mimetypes import os import re import time -from dataclasses import dataclass +from urllib.parse import urlsplit, urlunsplit +from dataclasses import dataclass, field from html import escape as _html_escape +from html.parser import HTMLParser from pathlib import Path from typing import Any, Dict, Optional, Set @@ -177,17 +203,139 @@ def _normalize_matrix_bang_command(text: str) -> str: return f"/{resolved}{match.group(2) or ''}" +class _MatrixHtmlSanitizer(HTMLParser): + """Allowlist sanitizer for Matrix-compatible formatted HTML.""" + + _ALLOWED_TAGS = { + "a", "b", "blockquote", "br", "code", "del", "em", "h1", "h2", "h3", + "h4", "h5", "h6", "hr", "i", "li", "ol", "p", "pre", "s", "strike", + "strong", "ul", + } + _VOID_TAGS = {"br", "hr"} + + def __init__(self) -> None: + super().__init__(convert_charrefs=False) + self._parts: list[str] = [] + self._skip_depth = 0 + + @staticmethod + def _safe_url(value: str) -> str: + stripped = re.sub(r"[\x00-\x1f\x7f]+", "", value or "").strip() + match = re.match(r"^([A-Za-z][A-Za-z0-9+.-]*):", stripped) + scheme = match.group(1).lower() if match else "" + if scheme and scheme not in {"http", "https", "matrix", "mailto"}: + return "" + return stripped + + def _safe_attrs(self, tag: str, attrs: list[tuple[str, str | None]]) -> str: + safe: list[str] = [] + for key, value in attrs: + attr = str(key or "").lower() + raw_value = "" if value is None else str(value) + if attr.startswith("on"): + continue + if tag == "a" and attr == "href": + href = self._safe_url(raw_value) + if href: + safe.append(f' href="{_html_escape(href, quote=True)}"') + elif tag == "code" and attr == "class": + if re.fullmatch(r"language-[A-Za-z0-9_+.-]{1,64}", raw_value): + safe.append(f' class="{_html_escape(raw_value, quote=True)}"') + return "".join(safe) + + def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None: + tag = tag.lower() + if tag in {"script", "style"}: + self._skip_depth += 1 + return + if self._skip_depth: + return + if tag not in self._ALLOWED_TAGS: + return + if tag in self._VOID_TAGS: + self._parts.append(f"<{tag}>") + return + self._parts.append(f"<{tag}{self._safe_attrs(tag, attrs)}>") + + def handle_endtag(self, tag: str) -> None: + tag = tag.lower() + if tag in {"script", "style"} and self._skip_depth: + self._skip_depth -= 1 + return + if self._skip_depth or tag not in self._ALLOWED_TAGS or tag in self._VOID_TAGS: + return + self._parts.append(f"") + + def handle_data(self, data: str) -> None: + if not self._skip_depth: + self._parts.append(_html_escape(data)) + + def handle_entityref(self, name: str) -> None: + if not self._skip_depth: + self._parts.append(f"&{name};") + + def handle_charref(self, name: str) -> None: + if not self._skip_depth: + self._parts.append(f"&#{name};") + + def get_html(self) -> str: + return "".join(self._parts) + + +@dataclass(frozen=True) +class MatrixRoomIdentity: + """Resolved Matrix room identity for routing and prompt context.""" + + room_id: str + room_name: str | None + room_topic: str | None + canonical_alias: str | None + server_name: str | None + joined_member_count: int | None + is_direct_account_data: bool + display_name: str + has_explicit_name: bool + chat_type: str + conflict: bool = False + + @dataclass class _MatrixApprovalPrompt: """Tracks a pending Matrix reaction-based exec approval prompt.""" - def __init__(self, session_key: str, chat_id: str, message_id: str, resolved: bool = False): + def __init__( + self, + session_key: str, + chat_id: str, + message_id: str, + resolved: bool = False, + requester_user_id: str | None = None, + expires_at: float | None = None, + ): self.session_key = session_key self.chat_id = chat_id self.message_id = message_id self.resolved = resolved + self.requester_user_id = requester_user_id + self.expires_at = expires_at self.bot_reaction_events: dict[str, str] = {} # emoji -> event_id + +@dataclass +class _MatrixModelPickerPrompt: + """Tracks a pending Matrix reaction-based model picker prompt.""" + + chat_id: str + message_id: str + session_key: str + choices: dict[str, tuple[str, str]] + on_model_selected: Any + requester_user_id: str | None = None + expires_at: float | None = None + resolved: bool = False + bot_reaction_events: dict[str, str] = field(default_factory=dict) + + # Matrix message size limit (4000 chars practical, spec has no hard limit # but clients render poorly above this). MAX_MESSAGE_LENGTH = 4000 @@ -224,6 +372,40 @@ _MATRIX_IMAGE_FILENAME_EXTS = frozenset({ ".avif", }) +_MATRIX_MODEL_PICKER_REACTIONS = ( + "1\ufe0f\u20e3", + "2\ufe0f\u20e3", + "3\ufe0f\u20e3", + "4\ufe0f\u20e3", + "5\ufe0f\u20e3", + "6\ufe0f\u20e3", + "7\ufe0f\u20e3", + "8\ufe0f\u20e3", + "9\ufe0f\u20e3", + "\U0001f51f", +) + +_MATRIX_CAPABILITIES: Dict[str, str] = { + "text": "yes", + "threads": "yes", + "reactions": "yes", + "approvals": "yes", + "model picker": "yes", + "thinking panes": "yes", + "images": "yes", + "multiple images": "yes", + "files": "yes", + "voice/audio": "yes", + "video": "yes", + "E2EE": "off / optional / required", + "diagnostics": "yes", +} + + +def get_matrix_capabilities() -> Dict[str, str]: + """Return Matrix gateway capabilities for docs and release checks.""" + return dict(_MATRIX_CAPABILITIES) + def _looks_like_matrix_image_filename(text: str) -> bool: """Return True when Matrix image body text is probably just a transport filename. @@ -250,6 +432,26 @@ def _looks_like_matrix_image_filename(text: str) -> bool: return suffix in _MATRIX_IMAGE_FILENAME_EXTS +def _matrix_event_timestamp_seconds(event: Any) -> float: + """Return a Matrix event timestamp in seconds, accepting ms or sec values.""" + raw_ts = ( + getattr(event, "timestamp", None) + or getattr(event, "server_timestamp", None) + or 0 + ) + if not raw_ts: + return 0.0 + try: + ts = float(raw_ts) + except (TypeError, ValueError): + return 0.0 + # Matrix origin_server_ts is milliseconds. Some tests/fakes and SDK objects + # expose seconds; do not turn those into 1970-era timestamps. + if ts > 10_000_000_000: + return ts / 1000.0 + return ts + + def _create_matrix_session(proxy_url: str | None): """Create an ``aiohttp.ClientSession`` whose proxy applies to *all* requests. @@ -306,6 +508,159 @@ def _check_e2ee_deps() -> bool: return False +def _normalize_e2ee_mode(value: Any) -> str: + """Normalize Matrix E2EE mode to off/optional/required.""" + raw = str(value or "").strip().lower() + if raw in ("required", "require", "true", "1", "yes", "on"): + return "required" + if raw in ("optional", "prefer", "preferred"): + return "optional" + return "off" + + +def _resolve_e2ee_mode(extra: Optional[Dict[str, Any]] = None) -> str: + """Resolve E2EE mode with MATRIX_ENCRYPTION backwards compatibility.""" + extra = extra or {} + explicit = extra.get("e2ee_mode") or os.getenv("MATRIX_E2EE_MODE", "") + if explicit: + return _normalize_e2ee_mode(explicit) + legacy_enabled = extra.get( + "encryption", + os.getenv("MATRIX_ENCRYPTION", "").lower() in ("true", "1", "yes"), + ) + return "required" if legacy_enabled else "off" + + +def _redact_matrix_value(value: Any) -> str: + """Return a safe, non-reversible preview for Matrix diagnostics.""" + text = str(value or "").strip() + if not text: + return "" + return "***" + + +def _write_matrix_recovery_key_output_file(recovery_key: str) -> Optional[Path]: + """Write a generated Matrix recovery key to an operator-chosen file. + + The file is created with mode 0600 and never overwritten. Returns the path + when written, otherwise None. + """ + output_file = os.getenv("MATRIX_RECOVERY_KEY_OUTPUT_FILE", "").strip() + if not output_file: + return None + path = Path(output_file).expanduser() + path.parent.mkdir(parents=True, exist_ok=True) + flags = os.O_WRONLY | os.O_CREAT | os.O_EXCL + fd = os.open(path, flags, 0o600) + try: + with os.fdopen(fd, "w", encoding="utf-8") as fh: + fh.write(recovery_key) + fh.write("\n") + except Exception: + try: + os.close(fd) + except OSError: + pass + raise + return path + + +def _get_matrix_recovery_key_output_target() -> tuple[Optional[Path], str]: + """Return a usable one-time recovery-key output path, or a redacted reason.""" + output_file = os.getenv("MATRIX_RECOVERY_KEY_OUTPUT_FILE", "").strip() + if not output_file: + return None, "not_configured" + path = Path(output_file).expanduser() + if path.exists(): + return None, "exists" + try: + path.parent.mkdir(parents=True, exist_ok=True) + except Exception as exc: + return None, f"unusable: {exc}" + return path, "" + + +def _handle_generated_matrix_recovery_key(mxid: str, recovery_key: str) -> None: + """Handle a freshly generated Matrix recovery key without logging it.""" + try: + output_path = _write_matrix_recovery_key_output_file(recovery_key) + except FileExistsError: + logger.warning( + "Matrix: bootstrapped cross-signing for %s. Recovery key output file " + "already exists; refusing to overwrite. Store the generated key " + "securely and set MATRIX_RECOVERY_KEY for future restarts.", + mxid, + ) + return + except Exception as exc: + logger.warning( + "Matrix: bootstrapped cross-signing for %s, but failed to write " + "MATRIX_RECOVERY_KEY_OUTPUT_FILE: %s. Store the generated key " + "securely and set MATRIX_RECOVERY_KEY for future restarts.", + mxid, + exc, + ) + return + + if output_path: + logger.warning( + "Matrix: bootstrapped cross-signing for %s. A new recovery key was " + "written to %s with mode 0600. Move it to your secret store and set " + "MATRIX_RECOVERY_KEY for future restarts.", + mxid, + output_path, + ) + else: + logger.warning( + "Matrix: bootstrapped cross-signing for %s. A new recovery key was " + "generated but will not be logged. Set MATRIX_RECOVERY_KEY_OUTPUT_FILE " + "to write it once with mode 0600, or configure MATRIX_RECOVERY_KEY " + "from your Matrix client before future restarts.", + mxid, + ) + + +def _sanitize_matrix_html(html: str) -> str: + sanitizer = _MatrixHtmlSanitizer() + try: + sanitizer.feed(html or "") + sanitizer.close() + return sanitizer.get_html() + except Exception: + return _html_escape(html or "") + + +def _redact_url_for_log(url: str) -> str: + """Strip query/fragment from URLs before logging signed media links.""" + try: + parts = urlsplit(str(url)) + if not parts.scheme and not parts.netloc: + return str(url).split("?", 1)[0].split("#", 1)[0] + return urlunsplit((parts.scheme, parts.netloc, parts.path, "", "")) + except Exception: + return "" + + +def _pre_sanitize_matrix_markdown(text: str) -> str: + """Remove unsafe raw HTML before Markdown conversion can escape it.""" + result = re.sub( + r"(?is)<\s*(script|style)\b[^>]*>.*?<\s*/\s*\1\s*>", + "", + text or "", + ) + result = re.sub( + r"""(?is)\s+on[a-z0-9_-]+\s*=\s*("[^"]*"|'[^']*'|[^\s>]+)""", + "", + result, + ) + result = re.sub( + r"""(?is)\s+(href|src)\s*=\s*("[^"]*(?:javascript|data|vbscript):[^"]*"|'[^']*(?:javascript|data|vbscript):[^']*'|[^\s>]*(?:javascript|data|vbscript):[^\s>]*)""", + "", + result, + ) + return result + + def check_matrix_requirements() -> bool: """Return True if the Matrix adapter can be used. @@ -371,21 +726,20 @@ def check_matrix_requirements() -> bool: ) return False - # If encryption is requested, verify E2EE deps are available at startup - # rather than silently degrading to plaintext-only at connect time. - encryption_requested = os.getenv("MATRIX_ENCRYPTION", "").lower() in { - "true", - "1", - "yes", - } - if encryption_requested and not _check_e2ee_deps(): + e2ee_mode = _resolve_e2ee_mode() + if e2ee_mode == "required" and not _check_e2ee_deps(): logger.error( - "Matrix: MATRIX_ENCRYPTION=true but E2EE dependencies are missing. %s. " + "Matrix: E2EE is required but dependencies are missing. %s. " "Without this, encrypted rooms will not work. " - "Set MATRIX_ENCRYPTION=false to disable E2EE.", + "Set MATRIX_E2EE_MODE=off to disable E2EE.", _E2EE_INSTALL_HINT, ) return False + if e2ee_mode == "optional" and not _check_e2ee_deps(): + logger.warning( + "Matrix: E2EE optional but dependencies are missing. %s", + _E2EE_INSTALL_HINT, + ) return True @@ -445,10 +799,8 @@ class MatrixAdapter(BasePlatformAdapter): self._password: str = config.extra.get("password", "") or os.getenv( "MATRIX_PASSWORD", "" ) - self._encryption: bool = config.extra.get( - "encryption", - os.getenv("MATRIX_ENCRYPTION", "").lower() in {"true", "1", "yes"}, - ) + self._e2ee_mode: str = _resolve_e2ee_mode(config.extra) + self._encryption: bool = self._e2ee_mode != "off" self._device_id: str = config.extra.get("device_id", "") or os.getenv( "MATRIX_DEVICE_ID", "" ) @@ -469,9 +821,19 @@ class MatrixAdapter(BasePlatformAdapter): self._late_grace_drops: int = 0 self._late_grace_skew: float = 0.0 self._clock_skew_warned: bool = False + self._last_sync_ts: float = 0.0 # Cache: room_id → bool (is DM) self._dm_rooms: Dict[str, bool] = {} + self._room_identities: Dict[str, MatrixRoomIdentity] = {} + self._room_identity_cached_at: Dict[str, float] = {} + try: + self._room_identity_ttl_seconds = float( + os.getenv("MATRIX_ROOM_IDENTITY_TTL_SECONDS", "60") + ) + except ValueError: + self._room_identity_ttl_seconds = 60.0 + self._room_identity_cache_max = 256 # Set of room IDs we've joined self._joined_rooms: Set[str] = set() # Event deduplication (bounded deque keeps newest entries) @@ -486,10 +848,8 @@ class MatrixAdapter(BasePlatformAdapter): # Thread participation tracking (for require_mention bypass) self._threads = ThreadParticipationTracker("matrix") - # Mention/thread gating — parsed once from env vars. - self._require_mention: bool = os.getenv( - "MATRIX_REQUIRE_MENTION", "true" - ).lower() not in {"false", "0", "no"} + # Mention/thread gating — parsed once from config.extra or env vars. + self._require_mention: bool = self._parse_require_mention(config) self._thread_require_mention: bool = self._parse_thread_require_mention(config) free_rooms_raw = config.extra.get("free_response_rooms") if free_rooms_raw is None: @@ -514,17 +874,27 @@ class MatrixAdapter(BasePlatformAdapter): self._allowed_rooms: Set[str] = { r.strip() for r in str(allowed_rooms_raw).split(",") if r.strip() } - self._auto_thread: bool = os.getenv("MATRIX_AUTO_THREAD", "true").lower() in { + self._allow_room_mentions: bool = os.getenv( + "MATRIX_ALLOW_ROOM_MENTIONS", "false" + ).lower() in ("true", "1", "yes") + self._auto_thread: bool = os.getenv("MATRIX_AUTO_THREAD", "true").lower() in ( "true", "1", "yes", - } + ) self._dm_auto_thread: bool = os.getenv( "MATRIX_DM_AUTO_THREAD", "false" ).lower() in {"true", "1", "yes"} self._dm_mention_threads: bool = os.getenv( "MATRIX_DM_MENTION_THREADS", "false" - ).lower() in {"true", "1", "yes"} + ).lower() in ("true", "1", "yes") + raw_session_scope = os.getenv("MATRIX_SESSION_SCOPE", "auto").strip().lower() + self._matrix_session_scope = ( + raw_session_scope if raw_session_scope in {"auto", "room", "thread"} else "auto" + ) + self._process_notices: bool = os.getenv( + "MATRIX_PROCESS_NOTICES", "false" + ).lower() in ("true", "1", "yes") # Reactions: configurable via MATRIX_REACTIONS (default: true). self._reactions_enabled: bool = os.getenv( @@ -542,6 +912,10 @@ class MatrixAdapter(BasePlatformAdapter): self._proxy_url: str | None = resolve_proxy_url(platform_env_var="MATRIX_PROXY") if self._proxy_url: logger.info("Matrix: proxy configured — %s", self._proxy_url) + try: + self._max_media_bytes = int(os.getenv("MATRIX_MAX_MEDIA_BYTES", str(100 * 1024 * 1024))) + except ValueError: + self._max_media_bytes = 100 * 1024 * 1024 # Text batching: merge rapid successive messages (Telegram-style). # Matrix clients split long messages around 4000 chars. @@ -557,14 +931,41 @@ class MatrixAdapter(BasePlatformAdapter): # Matrix reaction-based dangerous command approvals. self._approval_reaction_map = { "✅": "once", + "♾️": "always", + "♾": "always", + "\u267e\ufe0f": "always", + "\u267e": "always", + "❌": "deny", "❎": "deny", } self._approval_prompts_by_event: Dict[str, _MatrixApprovalPrompt] = {} self._approval_prompt_by_session: Dict[str, str] = {} + self._approval_require_sender: bool = os.getenv( + "MATRIX_APPROVAL_REQUIRE_SENDER", "true" + ).lower() in ("true", "1", "yes") + try: + self._approval_timeout_seconds = int( + os.getenv("MATRIX_APPROVAL_TIMEOUT_SECONDS", "300") + ) + except ValueError: + self._approval_timeout_seconds = 300 + self._model_picker_prompts_by_event: Dict[str, _MatrixModelPickerPrompt] = {} allowed_users_raw = os.getenv("MATRIX_ALLOWED_USERS", "") self._allowed_user_ids: Set[str] = { u.strip() for u in allowed_users_raw.split(",") if u.strip() } + self._allowed_room_ids: Set[str] = set(self._allowed_rooms) + ignore_patterns_raw = os.getenv("MATRIX_IGNORE_USER_PATTERNS", "") + self._ignored_user_patterns: list[re.Pattern[str]] = [] + for pattern in (p.strip() for p in ignore_patterns_raw.split(",") if p.strip()): + try: + self._ignored_user_patterns.append(re.compile(pattern)) + except re.error as exc: + logger.warning( + "Matrix: ignoring invalid MATRIX_IGNORE_USER_PATTERNS entry %r: %s", + pattern, + exc, + ) def _is_duplicate_event(self, event_id) -> bool: """Return True if this event was already processed. Tracks the ID otherwise.""" @@ -579,6 +980,25 @@ class MatrixAdapter(BasePlatformAdapter): self._processed_events_set.add(event_id) return False + @staticmethod + def _parse_require_mention(config) -> bool: + """Parse require_mention from config.extra or env var. + + Handles both YAML booleans and string values (``\"true\"``, ``\"false\"``, + ``\"yes\"``, ``\"no\"``, ``\"on\"``, ``\"off\"``, ``\"1\"``, ``\"0\"``). + Falls back to ``MATRIX_REQUIRE_MENTION`` env var, default ``true``. + """ + configured = config.extra.get("require_mention") + if configured is not None: + if isinstance(configured, bool): + return configured + if isinstance(configured, str): + return configured.lower() not in {"false", "0", "no", "off"} + return bool(configured) + return os.getenv( + "MATRIX_REQUIRE_MENTION", "true" + ).lower() not in {"false", "0", "no", "off"} + @staticmethod def _parse_thread_require_mention(config) -> bool: """Parse thread_require_mention from config.extra or env var. @@ -804,173 +1224,180 @@ class MatrixAdapter(BasePlatformAdapter): # Set up E2EE if requested. if self._encryption: if not _check_e2ee_deps(): - logger.error( - "Matrix: MATRIX_ENCRYPTION=true but E2EE dependencies are missing. %s. " - "Refusing to connect — encrypted rooms would silently fail.", - _E2EE_INSTALL_HINT, - ) - await api.session.close() - return False - try: - from mautrix.crypto import OlmMachine - from mautrix.crypto.store.asyncpg import PgCryptoStore - from mautrix.util.async_db import Database - - _STORE_DIR.mkdir(parents=True, exist_ok=True) - - # Remove legacy pickle file from pre-SQLite era. - legacy_pickle = _STORE_DIR / "crypto_store.pickle" - if legacy_pickle.exists(): - logger.info( - "Matrix: removing legacy crypto_store.pickle (migrated to SQLite)" + if self._e2ee_mode == "optional": + logger.warning( + "Matrix: E2EE optional but dependencies are missing. " + "Continuing without encrypted-room support. %s", + _E2EE_INSTALL_HINT, + ) + self._encryption = False + else: + logger.error( + "Matrix: E2EE is required but dependencies are missing. %s. " + "Refusing to connect — encrypted rooms would silently fail.", + _E2EE_INSTALL_HINT, ) - legacy_pickle.unlink() - - # Open SQLite-backed crypto store. - crypto_db = Database.create( - f"sqlite:///{_CRYPTO_DB_PATH}", - upgrade_table=PgCryptoStore.upgrade_table, - ) - await crypto_db.start() - self._crypto_db = crypto_db - - _acct_id = self._user_id or "hermes" - _pickle_key = f"{_acct_id}:{self._device_id or 'default'}" - crypto_store = PgCryptoStore( - account_id=_acct_id, - pickle_key=_pickle_key, - db=crypto_db, - ) - await crypto_store.open() - - # Bind the store to the runtime device_id before any - # put_account() runs. PgCryptoStore defaults _device_id - # to "" and its crypto_account UPSERT never updates the - # device_id column on conflict — so once put_account - # writes blank, it stays blank forever. That breaks - # every downstream device-scoped olm operation: peer - # to-device ciphertext can't find our identity key and - # no megolm sessions ever land. Setting _device_id here - # (in-memory; the on-disk row may not exist yet) makes - # the first put_account write the correct value. - # DeviceID is a NewType(str) so plain str works at runtime. - if client.device_id: - await crypto_store.put_device_id(client.device_id) - - crypto_state = _CryptoStateStore(state_store, self._joined_rooms) - olm = OlmMachine(client, crypto_store, crypto_state) - - # Accept unverified devices so senders share Megolm - # session keys with us automatically. - olm.share_keys_min_trust = TrustState.UNVERIFIED - olm.send_keys_min_trust = TrustState.UNVERIFIED - - await olm.load() - - # Verify our device keys are still on the homeserver. - if not await self._verify_device_keys_on_server(client, olm): - await crypto_db.stop() await api.session.close() return False - - # Proactively flush one-time keys to detect stale OTK - # conflicts early. When crypto state is wiped but the - # same device ID is reused, the server may still hold OTKs - # signed with the old ed25519 key. Identity key re-upload - # succeeds but OTK uploads fail ("already exists" with - # mismatched signature). Peers then cannot establish Olm - # sessions and all new messages are undecryptable. + if not self._encryption: + pass + else: try: - await olm.share_keys() + from mautrix.crypto import OlmMachine + from mautrix.crypto.store.asyncpg import PgCryptoStore + from mautrix.util.async_db import Database + + _STORE_DIR.mkdir(parents=True, exist_ok=True) except Exception as exc: - exc_str = str(exc) - if "already exists" in exc_str: - logger.error( - "Matrix: device %s has stale one-time keys on the " - "server signed with a previous identity key. " - "Peers cannot establish new Olm sessions with " - "this device. Delete the device from the " - "homeserver and restart, or generate a new " - "access token to get a fresh device ID.", - client.device_id, + if self._e2ee_mode == "optional": + logger.warning( + "Matrix: failed to import optional E2EE client; " + "continuing without encrypted-room support: %s. %s", + exc, + _E2EE_INSTALL_HINT, ) + self._encryption = False + else: + logger.error( + "Matrix: failed to import E2EE client: %s. %s", + exc, + _E2EE_INSTALL_HINT, + ) + await api.session.close() + return False + if self._encryption: + try: + # Remove legacy pickle file from pre-SQLite era. + legacy_pickle = _STORE_DIR / "crypto_store.pickle" + if legacy_pickle.exists(): + logger.info( + "Matrix: removing legacy crypto_store.pickle (migrated to SQLite)" + ) + legacy_pickle.unlink() + + crypto_db = Database.create( + f"sqlite:///{_CRYPTO_DB_PATH}", + upgrade_table=PgCryptoStore.upgrade_table, + ) + await crypto_db.start() + self._crypto_db = crypto_db + + _acct_id = self._user_id or "hermes" + _pickle_key = f"{_acct_id}:{self._device_id or 'default'}" + crypto_store = PgCryptoStore( + account_id=_acct_id, + pickle_key=_pickle_key, + db=crypto_db, + ) + await crypto_store.open() + + if client.device_id: + await crypto_store.put_device_id(client.device_id) + + crypto_state = _CryptoStateStore(state_store, self._joined_rooms) + olm = OlmMachine(client, crypto_store, crypto_state) + olm.share_keys_min_trust = TrustState.UNVERIFIED + olm.send_keys_min_trust = TrustState.UNVERIFIED + + await olm.load() + + if not await self._verify_device_keys_on_server(client, olm): await crypto_db.stop() await api.session.close() return False - # Non-OTK errors are transient (network, etc.) — log - # but allow startup to continue. - logger.warning( - "Matrix: share_keys() warning during startup: %s", - exc, - ) - # Import cross-signing private keys from SSSS and self-sign - # the current device. Required after any device-key rotation - # (fresh crypto.db, share_keys re-upload) — otherwise the - # device's self-signing signature is stale and peers refuse - # to share Megolm sessions with the rotated device. - recovery_key = os.getenv("MATRIX_RECOVERY_KEY", "").strip() - if recovery_key: try: - await olm.verify_with_recovery_key(recovery_key) - logger.info("Matrix: cross-signing verified via recovery key") + await olm.share_keys() except Exception as exc: - logger.warning( - "Matrix: recovery key verification failed: %s", exc - ) - else: - # No recovery key — bootstrap cross-signing if the bot - # has none yet. Without this, Element shows "Encrypted - # by a device not verified by its owner" on every - # message from this bot, indefinitely. mautrix's - # generate_recovery_key does the full flow: generates - # MSK/SSK/USK, uploads private keys to SSSS, publishes - # public keys to the homeserver, and signs the current - # device with the new SSK. Some homeservers require UIA - # for /keys/device_signing/upload — those will need an - # alternate path; Continuwuity and Synapse-with-shared- - # secret accept the unauthenticated upload. - try: - own_xsign = await olm.get_own_cross_signing_public_keys() - except Exception as exc: - own_xsign = None - logger.warning( - "Matrix: cross-signing key lookup failed: %s", exc - ) - if own_xsign is None: + exc_str = str(exc) + if "already exists" in exc_str: + logger.error( + "Matrix: device %s has stale one-time keys on the " + "server signed with a previous identity key. " + "Delete the device from the homeserver and restart, " + "or generate a new access token to get a fresh device ID.", + client.device_id, + ) + await crypto_db.stop() + await api.session.close() + return False + logger.warning("Matrix: share_keys() warning during startup: %s", exc) + + recovery_key = os.getenv("MATRIX_RECOVERY_KEY", "").strip() + if recovery_key: try: - new_recovery_key = await olm.generate_recovery_key() - logger.warning( - "Matrix: bootstrapped cross-signing for %s. " - "SAVE THIS RECOVERY KEY — set " - "MATRIX_RECOVERY_KEY for future restarts so " - "the bot can re-sign its device after key " - "rotation: %s", - client.mxid, - new_recovery_key, - ) + await olm.verify_with_recovery_key(recovery_key) + logger.info("Matrix: cross-signing verified via recovery key") except Exception as exc: - logger.warning( - "Matrix: cross-signing bootstrap failed " - "(non-fatal — Element will show 'not " - "verified by its owner'): %s", - exc, - ) + logger.warning("Matrix: recovery key verification failed: %s", exc) + else: + try: + own_xsign = await olm.get_own_cross_signing_public_keys() + except Exception as exc: + own_xsign = None + logger.warning("Matrix: cross-signing key lookup failed: %s", exc) + if own_xsign is None: + _, output_error = _get_matrix_recovery_key_output_target() + if output_error == "not_configured": + logger.warning( + "Matrix: cross-signing keys are missing, but " + "automatic bootstrap is skipped because " + "MATRIX_RECOVERY_KEY_OUTPUT_FILE is not configured. " + "Configure MATRIX_RECOVERY_KEY from your Matrix client " + "or set MATRIX_RECOVERY_KEY_OUTPUT_FILE to write a new " + "recovery key once with mode 0600." + ) + elif output_error == "exists": + logger.warning( + "Matrix: cross-signing keys are missing, but " + "automatic bootstrap is skipped because " + "MATRIX_RECOVERY_KEY_OUTPUT_FILE already exists and " + "will not be overwritten." + ) + elif output_error: + logger.warning( + "Matrix: cross-signing keys are missing, but " + "automatic bootstrap is skipped because " + "MATRIX_RECOVERY_KEY_OUTPUT_FILE is not usable: %s", + output_error, + ) + else: + try: + new_recovery_key = await olm.generate_recovery_key() + _handle_generated_matrix_recovery_key( + str(client.mxid), + new_recovery_key, + ) + except Exception as exc: + logger.warning( + "Matrix: cross-signing bootstrap failed " + "(non-fatal — Element will show 'not verified by its owner'): %s", + exc, + ) - client.crypto = olm - logger.info( - "Matrix: E2EE enabled (store: %s%s)", - str(_CRYPTO_DB_PATH), - f", device_id={client.device_id}" if client.device_id else "", - ) - except Exception as exc: - logger.error( - "Matrix: failed to create E2EE client: %s. %s", - exc, - _E2EE_INSTALL_HINT, - ) - await api.session.close() - return False + client.crypto = olm + logger.info( + "Matrix: E2EE enabled (store: %s%s)", + str(_CRYPTO_DB_PATH), + f", device_id={client.device_id}" if client.device_id else "", + ) + except Exception as exc: + if self._e2ee_mode == "optional": + logger.warning( + "Matrix: failed to create optional E2EE client; " + "continuing without encrypted-room support: %s. %s", + exc, + _E2EE_INSTALL_HINT, + ) + self._encryption = False + else: + logger.error( + "Matrix: failed to create E2EE client: %s. %s", + exc, + _E2EE_INSTALL_HINT, + ) + await api.session.close() + return False # Register event handlers. from mautrix.client import InternalEventType as IntEvt @@ -995,9 +1422,12 @@ class MatrixAdapter(BasePlatformAdapter): try: sync_data = await client.sync(timeout=10000, full_state=True) if isinstance(sync_data, dict): + self._last_sync_ts = time.time() rooms_join = sync_data.get("rooms", {}).get("join", {}) self._joined_rooms.clear() self._joined_rooms.update(rooms_join.keys()) + self._room_identities.clear() + self._room_identity_cached_at.clear() # Store the next_batch token so incremental syncs start # from where the initial sync left off. nb = sync_data.get("next_batch") @@ -1013,9 +1443,7 @@ class MatrixAdapter(BasePlatformAdapter): # Dispatch events from the initial sync so the OlmMachine # receives to-device key shares queued while we were offline. try: - tasks = client.handle_sync(sync_data) - if tasks: - await asyncio.gather(*tasks) + await self._dispatch_sync(sync_data) except Exception as exc: logger.warning("Matrix: initial sync event dispatch error: %s", exc) await self._join_pending_invites(sync_data) @@ -1093,20 +1521,7 @@ class MatrixAdapter(BasePlatformAdapter): for i, chunk in enumerate(chunks): msg_content = self._build_text_message_content(chunk) - # Reply-to support. - if reply_to: - msg_content["m.relates_to"] = {"m.in_reply_to": {"event_id": reply_to}} - - # Thread support: if metadata has thread_id, send as threaded reply. - thread_id = (metadata or {}).get("thread_id") - if thread_id: - relates_to = msg_content.get("m.relates_to", {}) - relates_to["rel_type"] = "m.thread" - relates_to["event_id"] = thread_id - relates_to["is_falling_back"] = True - if reply_to and "m.in_reply_to" not in relates_to: - relates_to["m.in_reply_to"] = {"event_id": reply_to} - msg_content["m.relates_to"] = relates_to + self._apply_relation_metadata(msg_content, reply_to=reply_to, metadata=metadata) try: event_id = await asyncio.wait_for( @@ -1153,21 +1568,56 @@ class MatrixAdapter(BasePlatformAdapter): async def get_chat_info(self, chat_id: str) -> Dict[str, Any]: """Return room name and type (dm/group).""" - name = chat_id - chat_type = "dm" if await self._is_dm_room(chat_id) else "group" + identity = await self._resolve_room_identity(chat_id) + chat_type = "dm" if identity.chat_type == "dm" else "group" + return {"name": identity.display_name, "type": chat_type} - if self._client: - try: - name_evt = await self._client.get_state_event( - RoomID(chat_id), - EventType.ROOM_NAME, - ) - if name_evt and hasattr(name_evt, "name") and name_evt.name: - name = name_evt.name - except Exception: - pass - - return {"name": name, "type": chat_type} + def get_diagnostics(self) -> Dict[str, Any]: + """Return redacted Matrix readiness/status diagnostics.""" + now = time.time() + token_present = bool(self._access_token) + user_id = self._user_id or getattr(self._client, "mxid", "") or "" + device_id = self._device_id or getattr(self._client, "device_id", "") or "" + return { + "platform": "matrix", + "homeserver": self._homeserver, + "auth": { + "access_token_present": token_present, + "password_present": bool(self._password), + "token_preview": "***" if token_present else "", + "user_id": user_id, + "device_id_present": bool(device_id), + "device_id_preview": _redact_matrix_value(device_id), + }, + "sync": { + "connected": self._client is not None, + "joined_room_count": len(self._joined_rooms), + "last_sync_age_seconds": ( + max(0.0, now - self._last_sync_ts) if self._last_sync_ts else None + ), + }, + "e2ee": { + "mode": self._e2ee_mode, + "enabled": bool(self._encryption), + "deps_available": _check_e2ee_deps(), + "crypto_store_path": str(_CRYPTO_DB_PATH), + "recovery_key_configured": bool(os.getenv("MATRIX_RECOVERY_KEY", "").strip()), + }, + "policy": { + "allowed_user_count": len(self._allowed_user_ids), + "allowed_room_count": len(self._allowed_room_ids), + "ignored_user_pattern_count": len(self._ignored_user_patterns), + "require_mention": self._require_mention, + "free_response_room_count": len(self._free_rooms), + "allow_room_mentions": self._allow_room_mentions, + "process_notices": self._process_notices, + "allow_public_rooms": os.getenv("MATRIX_ALLOW_PUBLIC_ROOMS", "").lower() + in ("true", "1", "yes"), + }, + "media": { + "max_media_bytes": self._max_media_bytes, + }, + } # ------------------------------------------------------------------ # Optional overrides @@ -1242,43 +1692,120 @@ class MatrixAdapter(BasePlatformAdapter): ) try: - # Try aiohttp first (always available), fall back to httpx - try: - import aiohttp as _aiohttp - _sess_kw, _req_kw = proxy_kwargs_for_aiohttp(self._proxy_url) - async with _aiohttp.ClientSession(**_sess_kw) as http: - async with http.get( - image_url, - timeout=_aiohttp.ClientTimeout(total=30), - **_req_kw, - ) as resp: - resp.raise_for_status() - data = await resp.read() - ct = resp.content_type or "image/png" - fname = ( - image_url.rsplit("/", 1)[-1].split("?")[0] or "image.png" - ) - except ImportError: - import httpx - _httpx_kw: dict = {} - if self._proxy_url: - _httpx_kw["proxy"] = self._proxy_url - async with httpx.AsyncClient(**_httpx_kw) as http: - resp = await http.get(image_url, follow_redirects=True, timeout=30) - resp.raise_for_status() - data = resp.content - ct = resp.headers.get("content-type", "image/png") - fname = image_url.rsplit("/", 1)[-1].split("?")[0] or "image.png" + data, ct, fname = await self._download_external_media_with_cap(image_url) except Exception as exc: - logger.warning("Matrix: failed to download image %s: %s", image_url, exc) + logger.warning( + "Matrix: failed to download image %s: %s", + _redact_url_for_log(image_url), + exc, + ) + fallback = ( + "I couldn't download and upload the image to Matrix. " + "The source URL was not shown because it may contain private tokens." + ) + if caption: + fallback = f"{caption}\n{fallback}" return await self.send( - chat_id, f"{caption or ''}\n{image_url}".strip(), reply_to + chat_id, + fallback, + reply_to, ) return await self._upload_and_send( chat_id, data, fname, ct, "m.image", caption, reply_to, metadata ) + async def _download_external_media_with_cap(self, url: str) -> tuple[bytes, str, str]: + """Download external media while enforcing redirect safety and size caps.""" + from tools.url_safety import is_safe_url + + if not is_safe_url(url): + raise ValueError("blocked unsafe media URL") + + def _check_content_length(headers: Any) -> None: + raw = None + try: + raw = headers.get("Content-Length") or headers.get("content-length") + except Exception: + raw = None + if raw is None: + return + try: + size = int(raw) + except (TypeError, ValueError): + return + if size > self._max_media_bytes: + raise ValueError( + f"media exceeds Matrix limit ({size} > {self._max_media_bytes} bytes)" + ) + + def _check_image_content_type(content_type: str) -> str: + content_type = str(content_type or "").split(";", 1)[0].strip().lower() + if not content_type.startswith("image/"): + raise ValueError("external media is not an image") + return content_type + + def _append_chunk(parts: list[bytes], total: int, chunk: bytes) -> int: + total += len(chunk) + if total > self._max_media_bytes: + raise ValueError( + f"media exceeds Matrix limit (> {self._max_media_bytes} bytes)" + ) + parts.append(chunk) + return total + + fname = url.rsplit("/", 1)[-1].split("?")[0] or "image.png" + + try: + import aiohttp as _aiohttp + + _sess_kw, _req_kw = proxy_kwargs_for_aiohttp(self._proxy_url) + async with _aiohttp.ClientSession(**_sess_kw) as http: + async with http.get( + url, + timeout=_aiohttp.ClientTimeout(total=30), + allow_redirects=True, + **_req_kw, + ) as resp: + resp.raise_for_status() + if not is_safe_url(str(resp.url)): + raise ValueError("blocked unsafe redirect URL") + _check_content_length(resp.headers) + parts: list[bytes] = [] + total = 0 + async for chunk in resp.content.iter_chunked(65536): + total = _append_chunk(parts, total, bytes(chunk)) + ct = _check_image_content_type( + getattr(resp, "content_type", None) + or resp.headers.get("content-type", "application/octet-stream") + ) + return b"".join(parts), ct, fname + except ImportError: + import httpx + + _httpx_kw: dict = {} + if self._proxy_url: + _httpx_kw["proxy"] = self._proxy_url + async with httpx.AsyncClient(**_httpx_kw) as http: + async with http.stream( + "GET", + url, + follow_redirects=True, + timeout=30, + ) as resp: + resp.raise_for_status() + if not is_safe_url(str(resp.url)): + raise ValueError("blocked unsafe redirect URL") + _check_content_length(resp.headers) + parts: list[bytes] = [] + total = 0 + async for chunk in resp.aiter_bytes(): + total = _append_chunk(parts, total, bytes(chunk)) + ct = _check_image_content_type( + resp.headers.get("content-type", "application/octet-stream") + ) + return b"".join(parts), ct, fname + async def send_image_file( self, chat_id: str, @@ -1292,6 +1819,42 @@ class MatrixAdapter(BasePlatformAdapter): chat_id, image_path, "m.image", caption, reply_to, metadata=metadata ) + async def send_multiple_images( + self, + chat_id: str, + images: list[tuple[str, str]], + metadata: Optional[Dict[str, Any]] = None, + human_delay: float = 0.0, + ) -> None: + """Send multiple Matrix images as one ordered logical batch.""" + if not images: + return + from urllib.parse import unquote as _unquote + + total = len(images) + for idx, (image_url, alt_text) in enumerate(images, start=1): + if human_delay > 0 and idx > 1: + await asyncio.sleep(human_delay) + caption = alt_text or None + if total > 1 and caption: + caption = f"{caption} ({idx}/{total})" + if image_url.startswith("file://"): + result = await self.send_image_file( + chat_id=chat_id, + image_path=_unquote(image_url[7:]), + caption=caption, + metadata=metadata, + ) + else: + result = await self.send_image( + chat_id=chat_id, + image_url=image_url, + caption=caption, + metadata=metadata, + ) + if not result.success: + logger.warning("Matrix: failed to send image %d/%d: %s", idx, total, result.error) + async def send_document( self, chat_id: str, @@ -1350,6 +1913,7 @@ class MatrixAdapter(BasePlatformAdapter): if not self._client: return SendResult(success=False, error="Not connected") + requester_user_id = str((metadata or {}).get("requester_user_id") or "") or None cmd_preview = command[:2000] + "..." if len(command) > 2000 else command text = ( "⚠️ **Dangerous command requires approval**\n" @@ -1370,6 +1934,8 @@ class MatrixAdapter(BasePlatformAdapter): session_key=session_key, chat_id=chat_id, message_id=result.message_id, + requester_user_id=requester_user_id, + expires_at=time.monotonic() + max(self._approval_timeout_seconds, 0), ) old_event = self._approval_prompt_by_session.get(session_key) if old_event: @@ -1377,7 +1943,7 @@ class MatrixAdapter(BasePlatformAdapter): self._approval_prompts_by_event[result.message_id] = prompt self._approval_prompt_by_session[session_key] = result.message_id - for emoji in ("✅", "❎"): + for emoji in ("✅", "♾️", "❌"): try: reaction_result = await self._send_reaction(chat_id, result.message_id, emoji) # Save the bot's reaction event_id for later cleanup @@ -1388,6 +1954,87 @@ class MatrixAdapter(BasePlatformAdapter): return result + async def send_model_picker( + self, + chat_id: str, + providers: list, + current_model: str, + current_provider: str, + session_key: str, + on_model_selected, + metadata: Optional[Dict[str, Any]] = None, + ) -> SendResult: + """Send a Matrix reaction-based model picker.""" + if not self._client: + return SendResult(success=False, error="Not connected") + + flat_choices: list[tuple[str, str, str, str]] = [] + for provider in providers or []: + provider_slug = str(provider.get("slug") or "") + provider_name = str(provider.get("name") or provider_slug) + models = provider.get("models") or [] + for model_id in models: + if len(flat_choices) >= len(_MATRIX_MODEL_PICKER_REACTIONS): + break + flat_choices.append(( + _MATRIX_MODEL_PICKER_REACTIONS[len(flat_choices)], + str(model_id), + provider_slug, + provider_name, + )) + if len(flat_choices) >= len(_MATRIX_MODEL_PICKER_REACTIONS): + break + + if not flat_choices: + return await self.send( + chat_id, + "No authenticated models are available for this session.", + metadata=metadata, + ) + + try: + from hermes_cli.providers import get_label + provider_label = get_label(current_provider) + except Exception: + provider_label = current_provider + + lines = [ + "⚙ **Model Configuration**", + f"Current model: `{current_model or 'unknown'}`", + f"Provider: {provider_label or 'unknown'}", + "", + "React to choose a model:", + ] + choices: dict[str, tuple[str, str]] = {} + for emoji, model_id, provider_slug, provider_name in flat_choices: + choices[emoji] = (model_id, provider_slug) + lines.append(f"{emoji} `{model_id}` — {provider_name}") + + result = await self.send(chat_id, "\n".join(lines), metadata=metadata) + if not result.success or not result.message_id: + return result + + prompt = _MatrixModelPickerPrompt( + chat_id=chat_id, + message_id=result.message_id, + session_key=session_key, + choices=choices, + on_model_selected=on_model_selected, + requester_user_id=str((metadata or {}).get("requester_user_id") or "") or None, + expires_at=time.monotonic() + max(self._approval_timeout_seconds, 0), + ) + self._model_picker_prompts_by_event[result.message_id] = prompt + + for emoji in choices: + try: + reaction_event_id = await self._send_reaction(chat_id, result.message_id, emoji) + if reaction_event_id: + prompt.bot_reaction_events[emoji] = str(reaction_event_id) + except Exception as exc: + logger.debug("Matrix: failed to add model picker reaction %s: %s", emoji, exc) + + return result + def format_message(self, content: str) -> str: """Pass-through — Matrix supports standard Markdown natively.""" # Strip image markdown; media is uploaded separately. @@ -1411,6 +2058,11 @@ class MatrixAdapter(BasePlatformAdapter): is_voice: bool = False, ) -> SendResult: """Upload bytes to Matrix and send as a media message.""" + if len(data) > self._max_media_bytes: + return SendResult( + success=False, + error=f"Media file exceeds Matrix limit ({len(data)} > {self._max_media_bytes} bytes)", + ) upload_data = data encrypted_file = None @@ -1461,16 +2113,7 @@ class MatrixAdapter(BasePlatformAdapter): if is_voice: msg_content["org.matrix.msc3245.voice"] = {} - if reply_to: - msg_content["m.relates_to"] = {"m.in_reply_to": {"event_id": reply_to}} - - thread_id = (metadata or {}).get("thread_id") - if thread_id: - relates_to = msg_content.get("m.relates_to", {}) - relates_to["rel_type"] = "m.thread" - relates_to["event_id"] = thread_id - relates_to["is_falling_back"] = True - msg_content["m.relates_to"] = relates_to + self._apply_relation_metadata(msg_content, reply_to=reply_to, metadata=metadata) try: event_id = await self._client.send_message_event( @@ -1499,6 +2142,15 @@ class MatrixAdapter(BasePlatformAdapter): return await self.send( room_id, f"{caption or ''}\n(file not found: {file_path})", reply_to ) + try: + file_size = p.stat().st_size + except OSError: + file_size = 0 + if file_size > self._max_media_bytes: + return SendResult( + success=False, + error=f"Media file exceeds Matrix limit ({file_size} > {self._max_media_bytes} bytes)", + ) fname = file_name or p.name ct = mimetypes.guess_type(fname)[0] or "application/octet-stream" @@ -1543,10 +2195,13 @@ class MatrixAdapter(BasePlatformAdapter): return if isinstance(sync_data, dict): + self._last_sync_ts = time.time() # Update joined rooms from sync response. rooms_join = sync_data.get("rooms", {}).get("join", {}) if rooms_join: self._joined_rooms.update(rooms_join.keys()) + self._room_identities.clear() + self._room_identity_cached_at.clear() # Advance the sync token so the next request is # incremental instead of a full initial sync. @@ -1558,9 +2213,7 @@ class MatrixAdapter(BasePlatformAdapter): # Dispatch events to registered handlers so that # _on_room_message / _on_reaction / _on_invite fire. try: - tasks = client.handle_sync(sync_data) - if tasks: - await asyncio.gather(*tasks) + await self._dispatch_sync(sync_data) except Exception as exc: logger.warning("Matrix: sync event dispatch error: %s", exc) await self._join_pending_invites(sync_data) @@ -1589,6 +2242,17 @@ class MatrixAdapter(BasePlatformAdapter): # Event callbacks # ------------------------------------------------------------------ + async def _dispatch_sync(self, sync_data: Dict[str, Any]) -> None: + """Dispatch a sync response through the mautrix event machinery.""" + client = self._client + if not client or not hasattr(client, "handle_sync"): + return + tasks = client.handle_sync(sync_data) + if inspect.isawaitable(tasks): + tasks = await tasks + if tasks: + await asyncio.gather(*tasks) + def _is_self_sender(self, sender: str) -> bool: """Return True if the sender refers to the bot's own account. @@ -1645,6 +2309,33 @@ class MatrixAdapter(BasePlatformAdapter): return True return localpart.startswith("_") + def _matches_ignored_user_pattern(self, sender: str) -> bool: + """Return True when sender matches configured Matrix ignore patterns.""" + return any(pattern.search(sender or "") for pattern in self._ignored_user_patterns) + + def _is_allowed_matrix_room(self, room_id: str) -> bool: + """Return True when MATRIX_ALLOWED_ROOMS permits the room.""" + return not self._allowed_room_ids or room_id in self._allowed_room_ids + + async def _is_allowed_matrix_room_event(self, room_id: str) -> bool: + """Return True when a room event may proceed past intake filters. + + MATRIX_ALLOWED_ROOMS constrains shared rooms. Matrix DMs are exempt so + personal chats still work when operators use a room allowlist for + project rooms. + """ + if self._is_allowed_matrix_room(room_id): + return True + try: + return await self._is_dm_room(room_id) + except Exception as exc: + logger.debug( + "Matrix: could not resolve room identity for allowlist check in %s: %s", + room_id, + exc, + ) + return False + async def _on_room_message(self, event: Any) -> None: """Handle incoming room message events (text, media).""" room_id = str(getattr(event, "room_id", "")) @@ -1676,6 +2367,19 @@ class MatrixAdapter(BasePlatformAdapter): room_id, ) return + if self._matches_ignored_user_pattern(sender): + logger.debug( + "Matrix: ignoring sender %s in %s due to configured ignore pattern", + sender, + room_id, + ) + return + if not await self._is_allowed_matrix_room_event(room_id): + logger.info( + "Matrix: ignoring message from unauthorized room %s", + room_id, + ) + return # Deduplicate by event ID. event_id = str(getattr(event, "event_id", "")) @@ -1683,12 +2387,7 @@ class MatrixAdapter(BasePlatformAdapter): return # Startup grace: ignore old messages from initial sync. - raw_ts = ( - getattr(event, "timestamp", None) - or getattr(event, "server_timestamp", None) - or 0 - ) - event_ts = raw_ts / 1000.0 if raw_ts else 0.0 + event_ts = _matrix_event_timestamp_seconds(event) if event_ts and event_ts < self._startup_ts - _STARTUP_GRACE_SECONDS: # If we are well past startup but events are still being dropped # by the grace check, the host clock is probably set ahead of @@ -1764,7 +2463,7 @@ class MatrixAdapter(BasePlatformAdapter): # Ignore m.notice to prevent bot-to-bot loops (m.notice is the # conventional msgtype for bot responses in the Matrix ecosystem). - if msgtype == "m.notice": + if msgtype == "m.notice" and not self._process_notices: return # Dispatch by msgtype. @@ -1773,7 +2472,7 @@ class MatrixAdapter(BasePlatformAdapter): await self._handle_media_message( room_id, sender, event_id, event_ts, source_content, relates_to, msgtype ) - elif msgtype == "m.text": + elif msgtype in ("m.text", "m.notice"): await self._handle_text_message( room_id, sender, event_id, event_ts, source_content, relates_to ) @@ -1792,6 +2491,7 @@ class MatrixAdapter(BasePlatformAdapter): Returns (body, is_dm, chat_type, thread_id, display_name, source) or None if the message should be dropped (mention gating). """ + identity = await self._resolve_room_identity(room_id) is_dm = await self._is_dm_room(room_id) chat_type = "dm" if is_dm else "group" @@ -1858,18 +2558,34 @@ class MatrixAdapter(BasePlatformAdapter): if is_mentioned and self._require_mention: body = self._strip_mention(body) - # Auto-thread. - if not thread_id and ((not is_dm and self._auto_thread) or (is_dm and self._dm_auto_thread)): - thread_id = event_id - self._threads.mark(thread_id) + # Auto-thread/session-scope policy. Real Matrix thread roots are + # preserved above; synthetic thread roots are policy-driven. + if not thread_id: + if is_dm: + if self._dm_auto_thread: + thread_id = event_id + self._threads.mark(thread_id) + elif self._matrix_session_scope == "room": + thread_id = None + elif self._matrix_session_scope == "thread": + thread_id = event_id + self._threads.mark(thread_id) + elif self._auto_thread: + thread_id = event_id + self._threads.mark(thread_id) display_name = await self._get_display_name(room_id, sender) source = self.build_source( chat_id=room_id, + chat_name=identity.display_name, chat_type=chat_type, user_id=sender, user_name=display_name, thread_id=thread_id, + chat_topic=identity.room_topic, + guild_id=identity.server_name, + parent_chat_id=room_id if thread_id else None, + message_id=event_id, ) if thread_id: @@ -1964,6 +2680,12 @@ class MatrixAdapter(BasePlatformAdapter): """Process a media message event (image, audio, video, file).""" body = source_content.get("body", "") or "" url = source_content.get("url", "") + if url and not str(url).startswith("mxc://"): + logger.warning( + "[Matrix] Rejecting inbound media %s with non-MXC URL", + event_id, + ) + return # Convert mxc:// to HTTP URL for downstream processing. http_url = "" @@ -1975,11 +2697,30 @@ class MatrixAdapter(BasePlatformAdapter): if not isinstance(content_info, dict): content_info = {} event_mimetype = content_info.get("mimetype", "") + event_size = content_info.get("size") + try: + event_size_int = int(event_size) if event_size is not None else 0 + except (TypeError, ValueError): + event_size_int = 0 + if event_size_int and event_size_int > self._max_media_bytes: + logger.warning( + "[Matrix] Rejecting oversized inbound media %s (%d > %d bytes)", + event_id, + event_size_int, + self._max_media_bytes, + ) + return # For encrypted media, the URL may be in file.url. file_content = source_content.get("file", {}) if not url and isinstance(file_content, dict): url = file_content.get("url", "") or "" + if url and not str(url).startswith("mxc://"): + logger.warning( + "[Matrix] Rejecting inbound encrypted media %s with non-MXC URL", + event_id, + ) + return if url and url.startswith("mxc://"): http_url = self._mxc_to_http(url) @@ -2150,6 +2891,8 @@ class MatrixAdapter(BasePlatformAdapter): try: await self._client.join_room(RoomID(room_id)) self._joined_rooms.add(room_id) + self._room_identities.pop(room_id, None) + self._room_identity_cached_at.pop(room_id, None) logger.info("Matrix: joined %s", room_id) await self._refresh_dm_cache() return True @@ -2319,15 +3062,20 @@ class MatrixAdapter(BasePlatformAdapter): if prompt and not prompt.resolved: if room_id != prompt.chat_id: return - _allow_all = os.getenv("GATEWAY_ALLOW_ALL_USERS", "").lower() in {"true", "1", "yes"} - if not _allow_all and not (self._allowed_user_ids and sender in self._allowed_user_ids): - logger.info( - "Matrix: ignoring approval reaction from unauthorized user %s on %s", - sender, reacts_to, - ) + if self._matrix_prompt_expired(prompt): + await self._expire_matrix_approval_prompt(room_id, reacts_to, prompt) + return + if not await self._validate_matrix_prompt_reactor( + room_id, reacts_to, sender, prompt, "approval" + ): return choice = self._approval_reaction_map.get(key) if not choice: + await self._send_invalid_reaction_feedback( + room_id, + reacts_to, + "That reaction is not valid for this approval prompt.", + ) return try: from tools.approval import resolve_gateway_approval @@ -2346,17 +3094,157 @@ class MatrixAdapter(BasePlatformAdapter): await self._redact_bot_approval_reactions(room_id, prompt) except Exception as exc: logger.error("Failed to resolve gateway approval from Matrix reaction: %s", exc) + return + + model_prompt = self._model_picker_prompts_by_event.get(reacts_to) + if model_prompt and not model_prompt.resolved: + if room_id != model_prompt.chat_id: + return + if self._matrix_prompt_expired(model_prompt): + await self._expire_matrix_model_picker_prompt(room_id, reacts_to, model_prompt) + return + if not await self._validate_matrix_prompt_reactor( + room_id, reacts_to, sender, model_prompt, "model picker" + ): + return + selection = model_prompt.choices.get(key) + if not selection: + await self._send_invalid_reaction_feedback( + room_id, + reacts_to, + "That reaction is not one of the available model choices.", + ) + return + model_prompt.resolved = True + self._model_picker_prompts_by_event.pop(reacts_to, None) + model_id, provider_slug = selection + try: + confirmation = await model_prompt.on_model_selected( + room_id, model_id, provider_slug + ) + await self._redact_bot_model_picker_reactions(room_id, model_prompt) + if confirmation: + await self.send(room_id, confirmation, reply_to=reacts_to) + except Exception as exc: + logger.error("Failed to switch model from Matrix reaction: %s", exc) + await self.send( + room_id, + f"Failed to switch model: {exc}", + reply_to=reacts_to, + ) + return + + def _matrix_prompt_expired(self, prompt: Any) -> bool: + expires_at = getattr(prompt, "expires_at", None) + return expires_at is not None and time.monotonic() > float(expires_at) + + async def _validate_matrix_prompt_reactor( + self, + room_id: str, + target_event_id: str, + sender: str, + prompt: Any, + prompt_label: str, + ) -> bool: + allow_all = os.getenv("GATEWAY_ALLOW_ALL_USERS", "").lower() in { + "true", + "1", + "yes", + } + if not allow_all and not ( + self._allowed_user_ids and sender in self._allowed_user_ids + ): + logger.info( + "Matrix: ignoring %s reaction from unauthorized user %s on %s", + prompt_label, sender, target_event_id, + ) + await self._send_invalid_reaction_feedback( + room_id, + target_event_id, + "Only an authorized Matrix user can use these controls.", + ) + return False + + requester = getattr(prompt, "requester_user_id", None) + approval_require_sender = getattr(self, "_approval_require_sender", True) + if approval_require_sender and requester and sender != requester: + logger.info( + "Matrix: ignoring %s reaction from %s; requester is %s", + prompt_label, sender, requester, + ) + await self._send_invalid_reaction_feedback( + room_id, + target_event_id, + "Only the user who requested this action can use these controls.", + ) + return False + return True + + async def _send_invalid_reaction_feedback( + self, + room_id: str, + target_event_id: str, + text: str, + ) -> None: + try: + await self.send(room_id, text, reply_to=target_event_id) + except Exception as exc: + logger.debug("Matrix: failed to send invalid reaction feedback: %s", exc) + + async def _expire_matrix_approval_prompt( + self, + room_id: str, + target_event_id: str, + prompt: "_MatrixApprovalPrompt", + ) -> None: + prompt.resolved = True + self._approval_prompts_by_event.pop(target_event_id, None) + self._approval_prompt_by_session.pop(prompt.session_key, None) + await self._redact_bot_approval_reactions(room_id, prompt) + await self._send_invalid_reaction_feedback( + room_id, + target_event_id, + "This approval prompt has expired. Run the command again if you still want to approve it.", + ) + + async def _expire_matrix_model_picker_prompt( + self, + room_id: str, + target_event_id: str, + prompt: "_MatrixModelPickerPrompt", + ) -> None: + prompt.resolved = True + self._model_picker_prompts_by_event.pop(target_event_id, None) + await self._redact_bot_model_picker_reactions(room_id, prompt) + await self._send_invalid_reaction_feedback( + room_id, + target_event_id, + "This model picker has expired. Run `/model` again to choose a model.", + ) async def _redact_bot_approval_reactions( self, room_id: str, prompt: "_MatrixApprovalPrompt", ) -> None: - """Redact the bot's seed ✅/❎ reactions, leaving only the user's reaction.""" + """Redact the bot's seeded approval reactions, leaving only the user's reaction.""" for emoji, evt_id in prompt.bot_reaction_events.items(): self._schedule_reaction_redaction(room_id, evt_id, "approval resolved") logger.debug("Matrix: scheduled bot reaction redaction %s (%s)", emoji, evt_id) + async def _redact_bot_model_picker_reactions( + self, + room_id: str, + prompt: "_MatrixModelPickerPrompt", + ) -> None: + """Redact the bot's seeded model picker reactions.""" + for emoji, evt_id in prompt.bot_reaction_events.items(): + try: + await self.redact_message(room_id, evt_id, "model picker resolved") + logger.debug("Matrix: redacted model picker reaction %s (%s)", emoji, evt_id) + except Exception as exc: + logger.debug("Matrix: failed to redact model picker reaction %s: %s", emoji, exc) + # ------------------------------------------------------------------ # Text message aggregation (handles Matrix client-side splits) # ------------------------------------------------------------------ @@ -2505,6 +3393,13 @@ class MatrixAdapter(BasePlatformAdapter): """Create a new Matrix room.""" if not self._client: return None + if preset == "public_chat" and os.getenv("MATRIX_ALLOW_PUBLIC_ROOMS", "").lower() not in ( + "true", + "1", + "yes", + ): + logger.warning("Matrix: refusing to create public room without MATRIX_ALLOW_PUBLIC_ROOMS=true") + return None try: preset_enum = { "private_chat": RoomCreatePreset.PRIVATE, @@ -2539,6 +3434,63 @@ class MatrixAdapter(BasePlatformAdapter): logger.warning("Matrix: invite error: %s", exc) return False + async def fetch_history( + self, + room_id: str, + limit: int = 20, + from_token: str = "", + ) -> list[dict[str, Any]]: + """Fetch recent Matrix room history using the live client.""" + if not self._client: + return [] + limit = max(1, min(int(limit or 20), 100)) + try: + direction = getattr(PaginationDirection, "BACKWARD", "b") + if hasattr(self._client, "messages"): + response = await self._client.messages( + RoomID(room_id), + from_token=SyncToken(from_token) if from_token else None, + direction=direction, + limit=limit, + ) + elif hasattr(self._client, "get_messages"): + response = await self._client.get_messages( + RoomID(room_id), + start=SyncToken(from_token) if from_token else None, + direction=direction, + limit=limit, + ) + else: + logger.debug("Matrix: client has no messages/get_messages method") + return [] + chunk = getattr(response, "chunk", None) + if chunk is None and isinstance(response, dict): + chunk = response.get("chunk") + return [self._serialize_history_event(evt) for evt in (chunk or [])] + except Exception as exc: + logger.warning("Matrix: fetch history error: %s", exc) + return [] + + def _serialize_history_event(self, event: Any) -> dict[str, Any]: + content = getattr(event, "content", None) + if content is None and isinstance(event, dict): + content = event.get("content", {}) + if not isinstance(content, dict): + content = dict(content) if hasattr(content, "items") else {} + return { + "event_id": str( + getattr(event, "event_id", "") + or (event.get("event_id", "") if isinstance(event, dict) else "") + ), + "sender": str( + getattr(event, "sender", "") + or (event.get("sender", "") if isinstance(event, dict) else "") + ), + "timestamp": _matrix_event_timestamp_seconds(event), + "msgtype": str(content.get("msgtype", "")), + "body": str(content.get("body", "")), + } + # ------------------------------------------------------------------ # Presence # ------------------------------------------------------------------ @@ -2598,22 +3550,152 @@ class MatrixAdapter(BasePlatformAdapter): # Helpers # ------------------------------------------------------------------ - async def _is_dm_room(self, room_id: str) -> bool: - """Check if a room is a DM.""" - if self._dm_rooms.get(room_id, False): - return True - # Fallback: check member count via state store. + @staticmethod + def _state_event_value(event: Any, key: str) -> Optional[str]: + """Extract a simple value from a Matrix state event object or dict.""" + if event is None: + return None + value = getattr(event, key, None) + if value: + return str(value) + if isinstance(event, dict): + if event.get(key): + return str(event[key]) + content = event.get("content") + if isinstance(content, dict) and content.get(key): + return str(content[key]) + content = getattr(event, "content", None) + if isinstance(content, dict) and content.get(key): + return str(content[key]) + if content is not None and getattr(content, key, None): + return str(getattr(content, key)) + return None + + async def _get_room_member_count(self, room_id: str) -> Optional[int]: state_store = ( getattr(self._client, "state_store", None) if self._client else None ) - if state_store: - try: - members = await state_store.get_members(room_id) - if members and len(members) == 2: - return True - except Exception: - pass - return False + if not state_store: + return None + try: + members = await state_store.get_members(room_id) + except Exception: + return None + if members is None: + return None + try: + return len(members) + except TypeError: + return None + + async def _get_room_name(self, room_id: str) -> Optional[str]: + if not self._client or not hasattr(self._client, "get_state_event"): + return None + try: + event = await self._client.get_state_event( + RoomID(room_id), + "m.room.name", + ) + except Exception: + return None + value = self._state_event_value(event, "name") + return value.strip() if value and value.strip() else None + + async def _get_room_canonical_alias(self, room_id: str) -> Optional[str]: + if not self._client or not hasattr(self._client, "get_state_event"): + return None + try: + event = await self._client.get_state_event( + RoomID(room_id), + "m.room.canonical_alias", + ) + except Exception: + return None + value = self._state_event_value(event, "alias") + return value.strip() if value and value.strip() else None + + async def _get_room_topic(self, room_id: str) -> Optional[str]: + if not self._client or not hasattr(self._client, "get_state_event"): + return None + try: + event = await self._client.get_state_event( + RoomID(room_id), + "m.room.topic", + ) + except Exception: + return None + value = self._state_event_value(event, "topic") + return value.strip() if value and value.strip() else None + + @staticmethod + def _room_server_name(room_id: str) -> Optional[str]: + if ":" not in room_id: + return None + server = room_id.rsplit(":", 1)[-1].strip() + return server or None + + def _cache_room_identity(self, room_id: str, identity: MatrixRoomIdentity) -> None: + if len(self._room_identities) >= self._room_identity_cache_max: + oldest = min( + self._room_identity_cached_at, + key=self._room_identity_cached_at.get, + default=None, + ) + if oldest: + self._room_identities.pop(oldest, None) + self._room_identity_cached_at.pop(oldest, None) + self._room_identities[room_id] = identity + self._room_identity_cached_at[room_id] = time.monotonic() + + async def _resolve_room_identity( + self, + room_id: str, + *, + force_refresh: bool = False, + ) -> MatrixRoomIdentity: + """Resolve Matrix room identity without member-count DM heuristics. + + Matrix ``m.direct`` account data is the authoritative DM signal, but + explicitly named rooms win over stale/conflicting DM account data. + """ + cached = self._room_identities.get(room_id) + cached_at = self._room_identity_cached_at.get(room_id, 0.0) + cache_fresh = ( + self._room_identity_ttl_seconds <= 0 + or time.monotonic() - cached_at <= self._room_identity_ttl_seconds + ) + if cached is not None and cache_fresh and not force_refresh: + return cached + + room_name = await self._get_room_name(room_id) + room_topic = await self._get_room_topic(room_id) + canonical_alias = await self._get_room_canonical_alias(room_id) + member_count = await self._get_room_member_count(room_id) + has_explicit_name = bool(room_name) + is_direct = bool(self._dm_rooms.get(room_id, False)) + conflict = bool(is_direct and has_explicit_name) + chat_type = "dm" if is_direct and not has_explicit_name else "room" + display_name = room_name or canonical_alias or room_id + + identity = MatrixRoomIdentity( + room_id=room_id, + room_name=room_name, + room_topic=room_topic, + canonical_alias=canonical_alias, + server_name=self._room_server_name(room_id), + joined_member_count=member_count, + is_direct_account_data=is_direct, + display_name=display_name, + has_explicit_name=has_explicit_name, + chat_type=chat_type, + conflict=conflict, + ) + self._cache_room_identity(room_id, identity) + return identity + + async def _is_dm_room(self, room_id: str) -> bool: + """Check if a room is a DM.""" + return (await self._resolve_room_identity(room_id)).chat_type == "dm" async def _refresh_dm_cache(self) -> None: """Refresh the DM room cache from m.direct account data.""" @@ -2637,9 +3719,11 @@ class MatrixAdapter(BasePlatformAdapter): dm_room_ids: Set[str] = set() for user_id, rooms in dm_data.items(): if isinstance(rooms, list): - dm_room_ids.update(str(r) for r in rooms) + dm_room_ids.update(str(r) for r in rooms if isinstance(r, str)) self._dm_rooms = {rid: (rid in dm_room_ids) for rid in self._joined_rooms} + self._room_identities.clear() + self._room_identity_cached_at.clear() # ------------------------------------------------------------------ # Mention detection helpers @@ -2649,8 +3733,11 @@ class MatrixAdapter(BasePlatformAdapter): """Build Matrix text content with HTML and outbound mention metadata.""" msg_content: Dict[str, Any] = {"msgtype": msgtype, "body": text} mention_user_ids = self._extract_outbound_mentions(text) + room_mentioned = self._allow_room_mentions and self._has_outbound_room_mention(text) if mention_user_ids: msg_content["m.mentions"] = {"user_ids": mention_user_ids} + if room_mentioned: + msg_content.setdefault("m.mentions", {})["room"] = True html_source = self._inject_outbound_mention_links(text) html = self._markdown_to_html(html_source) @@ -2660,6 +3747,31 @@ class MatrixAdapter(BasePlatformAdapter): return msg_content + def _apply_relation_metadata( + self, + msg_content: Dict[str, Any], + *, + reply_to: Optional[str] = None, + metadata: Optional[Dict[str, Any]] = None, + ) -> None: + """Apply Matrix reply/thread relation metadata to an outbound payload.""" + thread_id = str((metadata or {}).get("thread_id") or "") + if reply_to: + msg_content["m.relates_to"] = {"m.in_reply_to": {"event_id": reply_to}} + if thread_id: + relates_to = msg_content.get("m.relates_to", {}) + relates_to["rel_type"] = "m.thread" + relates_to["event_id"] = thread_id + relates_to["is_falling_back"] = True + # Matrix clients that do not render threads still use reply + # fallback. If no explicit reply target is available, fall back + # to the thread root. + relates_to.setdefault( + "m.in_reply_to", + {"event_id": reply_to or thread_id}, + ) + msg_content["m.relates_to"] = relates_to + def _extract_outbound_mentions(self, text: str) -> list[str]: """Return unique Matrix user IDs mentioned in outbound text.""" protected, _ = self._protect_outbound_mention_regions(text) @@ -2672,6 +3784,11 @@ class MatrixAdapter(BasePlatformAdapter): mentions.append(user_id) return mentions + def _has_outbound_room_mention(self, text: str) -> bool: + """Return True when outbound text contains @room outside protected spans.""" + protected, _ = self._protect_outbound_mention_regions(text) + return bool(re.search(r"(? str: """Wrap outbound Matrix mentions in markdown links outside code spans.""" if not text: @@ -2812,6 +3929,7 @@ class MatrixAdapter(BasePlatformAdapter): links, blockquotes, lists, and horizontal rules — everything the Matrix HTML spec allows. """ + text = _pre_sanitize_matrix_markdown(text) try: import markdown as _md @@ -2826,11 +3944,11 @@ class MatrixAdapter(BasePlatformAdapter): if html.count("

") == 1: html = html.replace("

", "").replace("

", "") - return html + return _sanitize_matrix_html(html) except ImportError: pass - return self._markdown_to_html_fallback(text) + return _sanitize_matrix_html(self._markdown_to_html_fallback(text)) # ------------------------------------------------------------------ # Regex-based Markdown -> HTML (no extra dependencies) diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py index fa896db9d3a..eec156bbae9 100644 --- a/gateway/platforms/telegram.py +++ b/gateway/platforms/telegram.py @@ -2348,10 +2348,15 @@ class TelegramAdapter(BasePlatformAdapter): ) except Exception as fmt_err: if "not modified" not in str(fmt_err).lower(): + logger.warning( + "[%s] Overflow split: MarkdownV2 first-chunk edit " + "failed, falling back to plain text: %s", + self.name, fmt_err, + ) await self._bot.edit_message_text( chat_id=int(chat_id), message_id=int(message_id), - text=first_chunk, + text=_strip_mdv2(first_chunk), ) else: await self._bot.edit_message_text( @@ -2379,6 +2384,7 @@ class TelegramAdapter(BasePlatformAdapter): # are already correctly sized). Best-effort MarkdownV2 with plain # fallback, mirroring send(). continuation_ids: list[str] = [] + delivered_chunks = [first_chunk] prev_id = message_id thread_id = self._metadata_thread_id(metadata) for chunk in chunks[1:]: @@ -2392,7 +2398,14 @@ class TelegramAdapter(BasePlatformAdapter): ) for use_markdown in (True, False) if finalize else (False,): try: - text = self.format_message(chunk) if use_markdown else chunk + if use_markdown: + text = self.format_message(chunk) + else: + # Plain attempt: on finalize the MarkdownV2 attempt + # failed, so degrade to clean stripped text, never + # the raw chunk (raw ** / ``` markers would render + # literally); streaming previews stay raw. + text = _strip_mdv2(chunk) if finalize else chunk sent_msg = await self._bot.send_message( chat_id=int(chat_id), text=text, @@ -2418,7 +2431,7 @@ class TelegramAdapter(BasePlatformAdapter): try: sent_msg = await self._bot.send_message( chat_id=int(chat_id), - text=chunk, + text=_strip_mdv2(chunk) if finalize else chunk, **retry_thread_kwargs, **self._link_preview_kwargs(), **self._notification_kwargs(metadata), @@ -2442,17 +2455,37 @@ class TelegramAdapter(BasePlatformAdapter): break if sent_msg is None: # Continuation failed — the user has chunk 1 + however many - # continuations succeeded. Report success with what we got - # so the stream consumer knows the edit landed; the - # remaining tail is lost on this attempt and the next - # streaming tick may retry. + # continuations succeeded, but NOT the full response. Do not + # report success: the stream consumer treats a successful edit + # as final delivery on got_done, which would suppress fallback + # delivery and leave the Telegram topic clipped after the last + # delivered chunk. logger.warning( "[%s] Overflow split: stopped at %d/%d chunks delivered", self.name, 1 + len(continuation_ids), len(chunks), ) - break + delivered_prefix = "".join( + re.sub(r" \(\d+/\d+\)$", "", delivered) + for delivered in delivered_chunks + ) + return SendResult( + success=False, + message_id=prev_id, + error="overflow_continuation_failed", + retryable=True, + raw_response={ + "partial_overflow": True, + "delivered_chunks": 1 + len(continuation_ids), + "total_chunks": len(chunks), + "last_message_id": prev_id, + "delivered_prefix": delivered_prefix, + "continuation_message_ids": tuple(continuation_ids), + }, + continuation_message_ids=tuple(continuation_ids), + ) new_id = str(getattr(sent_msg, "message_id", "")) or prev_id continuation_ids.append(new_id) + delivered_chunks.append(chunk) prev_id = new_id last_id = continuation_ids[-1] if continuation_ids else message_id @@ -3804,6 +3837,33 @@ class TelegramAdapter(BasePlatformAdapter): ) return error + def _telegram_media_too_large_note(self, label: str, file_size: Any, max_bytes: int) -> str: + limit_mb = max(1, max_bytes // (1024 * 1024)) + try: + size_mb = int(file_size or 0) / (1024 * 1024) + size_text = f"{size_mb:.1f} MB" + except (TypeError, ValueError): + size_text = "unknown size" + return ( + f"[Telegram {label} skipped: file size {size_text} exceeds the " + f"{limit_mb} MB limit. Ask the user to send a shorter voice note " + "or a smaller audio file.]" + ) + + def _telegram_media_size_allowed(self, source: Any, label: str) -> tuple[bool, Optional[str]]: + """Validate Telegram media size before downloading into memory.""" + max_bytes = int(getattr(self, "_max_doc_bytes", 20 * 1024 * 1024) or 20 * 1024 * 1024) + file_size = getattr(source, "file_size", None) + try: + size = int(file_size or 0) + except (TypeError, ValueError): + size = 0 + if size <= 0: + return True, None + if size <= max_bytes: + return True, None + return False, self._telegram_media_too_large_note(label, size, max_bytes) + async def send_voice( self, chat_id: str, @@ -5569,6 +5629,12 @@ class TelegramAdapter(BasePlatformAdapter): # Download voice/audio messages to cache for STT transcription if msg.voice: try: + allowed, note = self._telegram_media_size_allowed(msg.voice, "voice message") + if not allowed: + event.text = self._append_observed_note(event.text, note or "") + logger.info("[Telegram] Skipped oversized user voice (size=%s)", getattr(msg.voice, "file_size", None)) + await self.handle_message(event) + return file_obj = await msg.voice.get_file() audio_bytes = await file_obj.download_as_bytearray() cached_path = cache_audio_from_bytes(bytes(audio_bytes), ext=".ogg") @@ -5579,6 +5645,12 @@ class TelegramAdapter(BasePlatformAdapter): logger.warning("[Telegram] Failed to cache voice: %s", e, exc_info=True) elif msg.audio: try: + allowed, note = self._telegram_media_size_allowed(msg.audio, "audio file") + if not allowed: + event.text = self._append_observed_note(event.text, note or "") + logger.info("[Telegram] Skipped oversized user audio (size=%s)", getattr(msg.audio, "file_size", None)) + await self.handle_message(event) + return file_obj = await msg.audio.get_file() audio_bytes = await file_obj.download_as_bytearray() cached_path = cache_audio_from_bytes(bytes(audio_bytes), ext=".mp3") diff --git a/gateway/platforms/whatsapp.py b/gateway/platforms/whatsapp.py index 7ec1f84c287..d833d5649a2 100644 --- a/gateway/platforms/whatsapp.py +++ b/gateway/platforms/whatsapp.py @@ -190,6 +190,22 @@ from gateway.platforms.base import ( ) +def _file_content_hash(path: Path) -> str: + """Return the first 16 hex chars of the SHA-256 of *path*'s contents. + + Used for the bridge staleness handshake: bridge.js reports its own + source hash in ``/health`` (``scriptHash``), and the adapter compares + it against the hash of bridge.js currently on disk. A mismatch means + a long-lived bridge process is serving code from before an update. + Returns ``""`` when the file can't be read. + """ + import hashlib + try: + return hashlib.sha256(path.read_bytes()).hexdigest()[:16] + except OSError: + return "" + + def check_whatsapp_requirements() -> bool: """ Check if WhatsApp dependencies are available. @@ -372,9 +388,21 @@ class WhatsAppAdapter(WhatsAppBehaviorMixin, BasePlatformAdapter): logger.warning("[%s] Could not acquire session lock (non-fatal): %s", self.name, e) try: - # Auto-install npm dependencies if node_modules doesn't exist + # Auto-install npm dependencies when node_modules is missing OR + # package.json changed since the last install (e.g. after + # `hermes update` bumps the Baileys pin). The stamp file records + # the package.json hash of the last successful install. bridge_dir = bridge_path.parent - if not (bridge_dir / "node_modules").exists(): + _pkg_json = bridge_dir / "package.json" + _dep_stamp = bridge_dir / "node_modules" / ".hermes-pkg-hash" + _pkg_hash = _file_content_hash(_pkg_json) + _deps_fresh = False + if (bridge_dir / "node_modules").exists(): + try: + _deps_fresh = (_dep_stamp.read_text().strip() == _pkg_hash) and bool(_pkg_hash) + except OSError: + _deps_fresh = False + if not _deps_fresh: print(f"[{self.name}] Installing WhatsApp bridge dependencies...") # Resolve npm path so Windows can execute the .cmd shim. # shutil.which honours PATHEXT; on POSIX it returns the @@ -395,6 +423,11 @@ class WhatsAppAdapter(WhatsAppBehaviorMixin, BasePlatformAdapter): print(f"[{self.name}] npm install failed: {install_result.stderr}") return False print(f"[{self.name}] Dependencies installed") + if _pkg_hash: + try: + _dep_stamp.write_text(_pkg_hash) + except OSError: + pass # Stamp is an optimization; install still succeeded except Exception as e: print(f"[{self.name}] Failed to install dependencies: {e}") return False @@ -414,12 +447,28 @@ class WhatsAppAdapter(WhatsAppBehaviorMixin, BasePlatformAdapter): data = await resp.json() bridge_status = data.get("status", "unknown") if bridge_status == "connected": - print(f"[{self.name}] Using existing bridge (status: {bridge_status})") - self._mark_connected() - self._bridge_process = None # Not managed by us - self._http_session = aiohttp.ClientSession() - self._poll_task = asyncio.create_task(self._poll_messages()) - return True + # Staleness handshake: only reuse a running + # bridge if it is serving the same bridge.js + # that is on disk right now. A long-lived + # bridge survives gateway restarts AND + # `hermes update`, so without this check it + # keeps serving pre-update code forever + # (e.g. no inbound media download). Old + # bridges that don't report scriptHash are + # treated as stale by definition. + running_hash = data.get("scriptHash", "") + disk_hash = _file_content_hash(bridge_path) + if running_hash and disk_hash and running_hash == disk_hash: + print(f"[{self.name}] Using existing bridge (status: {bridge_status})") + self._mark_connected() + self._bridge_process = None # Not managed by us + self._http_session = aiohttp.ClientSession() + self._poll_task = asyncio.create_task(self._poll_messages()) + return True + print( + f"[{self.name}] Running bridge is stale " + f"(running={running_hash or 'unversioned'}, disk={disk_hash}), restarting" + ) else: print(f"[{self.name}] Bridge found but not connected (status: {bridge_status}), restarting") except Exception: @@ -444,6 +493,18 @@ class WhatsAppAdapter(WhatsAppBehaviorMixin, BasePlatformAdapter): bridge_env = os.environ.copy() if self._reply_prefix is not None: bridge_env["WHATSAPP_REPLY_PREFIX"] = self._reply_prefix + # Pass the profile-aware cache directories so the bridge writes + # media where the Python side reads it. Without these the bridge + # hardcodes ~/.hermes/{image,audio,document}_cache, which diverges + # under HERMES_HOME overrides, profiles, and the new cache/ layout. + from gateway.platforms.base import ( + get_audio_cache_dir as _get_audio_dir, + get_document_cache_dir as _get_doc_dir, + get_image_cache_dir as _get_img_dir, + ) + bridge_env["HERMES_IMAGE_CACHE_DIR"] = str(_get_img_dir()) + bridge_env["HERMES_AUDIO_CACHE_DIR"] = str(_get_audio_dir()) + bridge_env["HERMES_DOCUMENT_CACHE_DIR"] = str(_get_doc_dir()) self._bridge_process = subprocess.Popen( [ diff --git a/gateway/run.py b/gateway/run.py index 49000c38ad0..ac81eba30f7 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -32,6 +32,7 @@ import logging import os import re import shlex +import site import sys import signal import tempfile @@ -135,6 +136,60 @@ _GATEWAY_SECRET_PATTERNS = ( ) +def _ensure_windows_gateway_venv_imports() -> None: + """Make detached Windows gateway runs see the Hermes venv packages. + + Some Windows restart paths run the gateway under uv's base ``pythonw.exe`` + to avoid the venv launcher respawning a visible console interpreter. That + mode can import the source tree via cwd/PYTHONPATH but still miss optional + packages installed only in ``venv/Lib/site-packages`` (notably the MCP SDK). + Patch the live process before MCP discovery so tool injection does not + depend on every launcher preserving PYTHONPATH perfectly. + """ + if sys.platform != "win32": + return + + project_root = Path(__file__).resolve().parent.parent + candidates: list[Path] = [] + if os.environ.get("VIRTUAL_ENV"): + candidates.append(Path(os.environ["VIRTUAL_ENV"])) + candidates.append(project_root / "venv") + + seen: set[str] = set() + for venv_dir in candidates: + try: + resolved_venv = venv_dir.resolve() + except OSError: + resolved_venv = venv_dir + venv_key = str(resolved_venv).lower() + if venv_key in seen: + continue + seen.add(venv_key) + + site_packages = resolved_venv / "Lib" / "site-packages" + if not site_packages.exists(): + continue + + project_entry = str(project_root) + site_entry = str(site_packages) + if project_entry not in sys.path: + sys.path.insert(0, project_entry) + # addsitepackages() semantics matter here: pywin32, used by the MCP + # SDK on Windows, relies on .pth processing to expose pywintypes. + site.addsitedir(site_entry) + if site_entry in sys.path: + sys.path.remove(site_entry) + insert_at = 1 if sys.path and sys.path[0] == project_entry else 0 + sys.path.insert(insert_at, site_entry) + + os.environ["VIRTUAL_ENV"] = str(resolved_venv) + pythonpath = [project_entry, site_entry] + if os.environ.get("PYTHONPATH"): + pythonpath.append(os.environ["PYTHONPATH"]) + os.environ["PYTHONPATH"] = os.pathsep.join(dict.fromkeys(pythonpath)) + return + + def _gateway_platform_value(platform: Any) -> str: """Return a normalized gateway platform value for enums or raw strings.""" return str(getattr(platform, "value", platform) or "").strip().lower() @@ -4255,10 +4310,25 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew ) """ ).strip() + watcher_env = os.environ.copy() + # This watcher is intentionally outside the running gateway. If it + # inherits the gateway marker, `hermes gateway restart` refuses to + # run as a self-restart loop guard and the gateway stays stopped. + watcher_env.pop("_HERMES_GATEWAY", None) + project_root = Path(__file__).resolve().parent.parent + venv_dir = Path(watcher_env.get("VIRTUAL_ENV") or project_root / "venv") + site_packages = venv_dir / "Lib" / "site-packages" + if site_packages.exists(): + watcher_env["VIRTUAL_ENV"] = str(venv_dir) + pythonpath = [str(project_root), str(site_packages)] + if watcher_env.get("PYTHONPATH"): + pythonpath.append(watcher_env["PYTHONPATH"]) + watcher_env["PYTHONPATH"] = os.pathsep.join(dict.fromkeys(pythonpath)) subprocess.Popen( [sys.executable, "-c", watcher, str(current_pid), *cmd_argv], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, + env=watcher_env, **windows_detach_popen_kwargs(), ) return @@ -4268,12 +4338,20 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew f"while kill -0 {current_pid} 2>/dev/null; do sleep 0.2; done; " f"{cmd} gateway restart" ) + # Same marker scrub as the Windows watcher above: this watcher runs + # `hermes gateway restart` from outside the gateway, but it inherits + # _HERMES_GATEWAY=1 from us, and the CLI's self-restart loop guard + # refuses to run when that marker is set — silently (DEVNULL), so the + # gateway stops and never comes back. + watcher_env = os.environ.copy() + watcher_env.pop("_HERMES_GATEWAY", None) setsid_bin = shutil.which("setsid") if setsid_bin: subprocess.Popen( [setsid_bin, "bash", "-lc", shell_cmd], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, + env=watcher_env, start_new_session=True, ) else: @@ -4281,6 +4359,7 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew ["bash", "-lc", shell_cmd], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, + env=watcher_env, start_new_session=True, ) @@ -12946,6 +13025,10 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew last_tool = [None] # Mutable container for tracking in closure last_progress_msg = [None] # Track last message for dedup repeat_count = [0] # How many times the same message repeated + # True when the previously enqueued progress line was a terminal + # fenced code block — consecutive terminal calls then drop the + # repeated "💻 terminal" header and render back-to-back blocks. + last_was_terminal_block = [False] # ── Discord voice "verbal ack before tool calls" ──────────────── # When the bot is in a voice channel with the continuous mixer @@ -13102,7 +13185,13 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew ): from agent.display import get_tool_preview_max_len _cmd_full = args["command"].rstrip() - _code_block_full = f"{emoji} {tool_name}\n```\n{_cmd_full}\n```" + # Consecutive terminal calls: drop the repeated + # "💻 terminal" header so back-to-back commands render as + # adjacent code blocks under a single header. + _block_header = ( + "" if last_was_terminal_block[0] else f"{emoji} {tool_name}\n" + ) + _code_block_full = f"{_block_header}```\n{_cmd_full}\n```" # Single-line, capped preview for non-verbose modes. _pl = get_tool_preview_max_len() _cap = _pl if _pl > 0 else 40 @@ -13113,13 +13202,15 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew _cmd_short = _cmd_short[:_cap - 3] + "..." elif _multiline: _cmd_short = _cmd_short + " ..." - _code_block_short = f"{emoji} {tool_name}\n```\n{_cmd_short}\n```" + _code_block_short = f"{_block_header}```\n{_cmd_short}\n```" # Verbose mode: show detailed arguments, respects tool_preview_length if progress_mode == "verbose": if _code_block_full is not None: + last_was_terminal_block[0] = True progress_queue.put(_code_block_full) return + last_was_terminal_block[0] = False if args: from agent.display import get_tool_preview_max_len _pl = get_tool_preview_max_len() @@ -13144,6 +13235,7 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew # fenced block (built above) instead of the truncated preview. if _code_block_short is not None: msg = _code_block_short + last_was_terminal_block[0] = True elif preview: from agent.display import get_tool_preview_max_len _pl = get_tool_preview_max_len() @@ -13151,8 +13243,10 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew if len(preview) > _cap: preview = preview[:_cap - 3] + "..." msg = f"{emoji} {tool_name}: \"{preview}\"" + last_was_terminal_block[0] = False else: msg = f"{emoji} {tool_name}..." + last_was_terminal_block[0] = False # Dedup: collapse consecutive identical progress messages. # Common with execute_code where models iterate with the same @@ -15909,6 +16003,8 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool = atexit.register(remove_pid_file) atexit.register(release_gateway_runtime_lock) + _ensure_windows_gateway_venv_imports() + # MCP tool discovery — run in an executor so the asyncio event loop # stays responsive even when a configured MCP server is slow or # unreachable. discover_mcp_tools() uses a blocking 120s wait diff --git a/gateway/session.py b/gateway/session.py index 19aa0cdb776..5548139a682 100644 --- a/gateway/session.py +++ b/gateway/session.py @@ -294,6 +294,22 @@ def build_session_context_prompt( if context.source.chat_topic: lines.append(f"**Channel Topic:** {context.source.chat_topic}") + if context.source.platform == Platform.MATRIX: + src = context.source + room_name = src.chat_name or src.chat_id + room_id = _hash_chat_id(src.chat_id) if redact_pii else src.chat_id + lines.append("") + lines.append(f"**Matrix Room:** {room_name}") + lines.append(f"**Matrix Room ID:** {room_id}") + if src.thread_id: + thread_id = _hash_chat_id(src.thread_id) if redact_pii else src.thread_id + lines.append(f"**Matrix Thread:** {thread_id}") + lines.append( + "**Matrix room boundary:** Treat this turn as scoped to the current " + "Matrix room/thread only. Do not assume unresolved references are " + "about other Matrix rooms or projects unless the user explicitly says so." + ) + # User identity. # In shared multi-user sessions (shared threads OR shared non-thread groups # when group_sessions_per_user=False), multiple users contribute to the same @@ -1264,6 +1280,17 @@ class SessionStore: entries.sort(key=lambda e: e.updated_at, reverse=True) return entries + + def lookup_by_session_id(self, session_id: str) -> Optional[SessionEntry]: + """Return the active session entry for a persisted session ID, if any.""" + if not session_id: + return None + with self._lock: + self._ensure_loaded_locked() + for entry in self._entries.values(): + if entry.session_id == session_id: + return entry + return None def append_to_transcript(self, session_id: str, message: Dict[str, Any], skip_db: bool = False) -> None: """Append a message to a session's transcript (SQLite). diff --git a/gateway/slash_commands.py b/gateway/slash_commands.py index 107b5645ec5..1bb2fc41d1c 100644 --- a/gateway/slash_commands.py +++ b/gateway/slash_commands.py @@ -17,6 +17,7 @@ from __future__ import annotations import asyncio import dataclasses +import hashlib import inspect import logging import os @@ -32,7 +33,7 @@ from agent.account_usage import fetch_account_usage, render_account_usage_lines from agent.i18n import t from gateway.config import HomeChannel, Platform, PlatformConfig from gateway.platforms.base import EphemeralReply, MessageEvent, MessageType -from gateway.session import build_session_key +from gateway.session import SessionSource, build_session_key from hermes_cli.config import cfg_get from utils import ( atomic_json_write, @@ -447,6 +448,22 @@ class GatewaySlashCommandsMixin: ]) if queue_depth: lines.append(t("gateway.status.queued", count=queue_depth)) + if source.platform == Platform.MATRIX: + adapter = self.adapters.get(Platform.MATRIX) + scope = getattr(adapter, "_matrix_session_scope", os.getenv("MATRIX_SESSION_SCOPE", "auto")) + thread = source.thread_id or "none" + lines.extend([ + "", + t("gateway.status.matrix_scope_header"), + t("gateway.status.matrix_scope_room", room=source.chat_name or source.chat_id), + t("gateway.status.matrix_scope_room_id", room_id=source.chat_id), + t("gateway.status.matrix_scope_thread", thread_id=thread), + t("gateway.status.matrix_scope_mode", scope=scope), + t( + "gateway.status.matrix_scope_key", + session_key=self._redact_matrix_session_key(session_key), + ), + ]) lines.extend([ "", t("gateway.status.platforms", platforms=', '.join(connected_platforms)), @@ -454,6 +471,37 @@ class GatewaySlashCommandsMixin: return "\n".join(lines) + @staticmethod + def _redact_matrix_session_key(session_key: str) -> str: + """Return a stable Matrix session-key fingerprint for shared room status.""" + text = str(session_key or "") + digest = hashlib.sha256(text.encode("utf-8")).hexdigest()[:12] + return f"sha256:{digest}" + + def _gateway_session_origin_for_id(self, session_id: str) -> Optional[SessionSource]: + """Best-effort origin lookup for gateway session IDs.""" + lookup = getattr(type(self.session_store), "lookup_by_session_id", None) + if callable(lookup): + entry = lookup(self.session_store, session_id) + return getattr(entry, "origin", None) if entry is not None else None + + # Test doubles and older stores may not expose the public lookup helper. + # Keep the Matrix resume guard fail-closed if no origin can be resolved. + entries = getattr(self.session_store, "_entries", {}) or {} + for entry in entries.values(): + if getattr(entry, "session_id", None) == session_id: + return getattr(entry, "origin", None) + return None + + @staticmethod + def _same_matrix_room(current: SessionSource, origin: Optional[SessionSource]) -> bool: + return ( + origin is not None + and origin.platform == Platform.MATRIX + and current.platform == Platform.MATRIX + and origin.chat_id == current.chat_id + ) + async def _handle_agents_command(self, event: MessageEvent) -> str: """Handle /agents command - list active agents and running tasks.""" from gateway.run import _AGENT_PENDING_SENTINEL @@ -2652,7 +2700,14 @@ class GatewaySlashCommandsMixin: source = event.source session_key = self._session_key_for_source(source) - name = event.get_command_args().strip() + raw_args = event.get_command_args().strip() + try: + parts = shlex.split(raw_args) + except ValueError as exc: + return t("gateway.resume.parse_error", error=exc) + allow_all = "--all" in parts + allow_cross_room = "--cross-room" in parts + name = " ".join(p for p in parts if p not in {"--all", "--cross-room"}).strip() # Strip common outer brackets/quotes users may type literally from the # usage hint (e.g. ``/resume ``). Mirrors the CLI behavior. @@ -2673,11 +2728,24 @@ class GatewaySlashCommandsMixin: # List recent titled sessions for this user/platform try: titled = _list_titled_sessions() + if source.platform == Platform.MATRIX and not allow_all: + scoped = [] + for s in titled: + origin = self._gateway_session_origin_for_id(str(s.get("id") or "")) + if self._same_matrix_room(source, origin): + scoped.append(s) + titled = scoped if not titled: + if source.platform == Platform.MATRIX and not allow_all: + return t("gateway.resume.matrix_no_named_sessions") return t("gateway.resume.no_named_sessions") lines = [t("gateway.resume.list_header")] for idx, s in enumerate(titled[:10], start=1): title = s["title"] + if source.platform == Platform.MATRIX and allow_all: + origin = self._gateway_session_origin_for_id(str(s.get("id") or "")) + if origin: + title = f"{title} — {origin.chat_name or origin.chat_id}" preview = s.get("preview", "")[:40] preview_part = t("gateway.resume.list_preview_suffix", preview=preview) if preview else "" lines.append(t("gateway.resume.list_item_numbered", index=idx, title=title, preview_part=preview_part)) @@ -2691,6 +2759,13 @@ class GatewaySlashCommandsMixin: if name.isdigit(): try: titled = _list_titled_sessions() + if source.platform == Platform.MATRIX and not allow_all: + scoped = [] + for s in titled: + origin = self._gateway_session_origin_for_id(str(s.get("id") or "")) + if self._same_matrix_room(source, origin): + scoped.append(s) + titled = scoped except Exception as e: logger.debug("Failed to list titled sessions for numeric resume: %s", e) return t("gateway.resume.list_failed", error=e) @@ -2717,6 +2792,17 @@ class GatewaySlashCommandsMixin: except Exception as e: logger.debug("Failed to resolve resume continuation for %s: %s", target_id, e) + if source.platform == Platform.MATRIX: + target_origin = self._gateway_session_origin_for_id(target_id) + if not self._same_matrix_room(source, target_origin) and not allow_cross_room: + if target_origin is None: + return t("gateway.resume.matrix_blocked_no_origin", name=name) + return t( + "gateway.resume.matrix_blocked_other_room", + room=target_origin.chat_name or target_origin.chat_id, + name=name, + ) + # Check if already on that session current_entry = self.session_store.get_or_create_session(source) if current_entry.session_id == target_id: @@ -2744,6 +2830,15 @@ class GatewaySlashCommandsMixin: # Count messages for context history = self.session_store.load_transcript(target_id) msg_count = len([m for m in history if m.get("role") == "user"]) if history else 0 + msg_part = f" ({msg_count} message{'s' if msg_count != 1 else ''})" if msg_count else "" + + if source.platform == Platform.MATRIX and allow_cross_room: + return t( + "gateway.resume.matrix_cross_room_success", + title=title, + room=source.chat_name or source.chat_id, + msg_part=msg_part, + ) if not msg_count: return t("gateway.resume.resumed_no_count", title=title) if msg_count == 1: diff --git a/gateway/stream_consumer.py b/gateway/stream_consumer.py index 33910c7b40b..53434da3c40 100644 --- a/gateway/stream_consumer.py +++ b/gateway/stream_consumer.py @@ -147,8 +147,15 @@ class GatewayStreamConsumer: self._edit_supported = True # Disabled when progressive edits are no longer usable self._last_edit_time = 0.0 self._last_sent_text = "" # Track last-sent text to skip redundant edits + # True when the most recent _send_or_edit split-and-delivered across + # continuation messages (the adapter adopted a new message id). + self._last_edit_overflowed = False self._fallback_final_send = False self._fallback_prefix = "" + # True when fallback is sending only the missing tail after a partial + # Telegram overflow delivery. In that case the already-visible prefix + # is intentional content, not a stale preview to delete. + self._fallback_preserve_partial_messages = False self._flood_strikes = 0 # Consecutive flood-control edit failures self._current_edit_interval = self.cfg.edit_interval # Adaptive backoff self._final_response_sent = False @@ -261,6 +268,7 @@ class GatewayStreamConsumer: self._last_sent_text = "" self._fallback_final_send = False self._fallback_prefix = "" + self._fallback_preserve_partial_messages = False # #29346: a tool/segment boundary means what we delivered was an interim # preamble, not the final answer — clear the flags so a premature setter # can't fool the gateway. Safe: got_done returns before any reset, and @@ -581,14 +589,20 @@ class GatewayStreamConsumer: if self._accumulated: if self._fallback_final_send: await self._send_fallback_final(self._accumulated) - elif ( - current_update_visible - and not self._adapter_requires_finalize + elif current_update_visible and ( + not self._adapter_requires_finalize + or self._last_edit_overflowed ): # Mid-stream edit above already delivered the # final accumulated content. Skip the redundant - # final edit — but only for adapters that don't - # need an explicit finalize signal. + # final edit for adapters that don't need an + # explicit finalize signal, and for any adapter + # when that edit split-and-delivered across + # continuations: the split edit carried + # finalize=True itself, and re-finalizing with + # the full text would overflow-split again into + # the adopted continuation, duplicating chunks + # on screen. self._final_response_sent = True self._final_content_delivered = True elif self._message_id: @@ -647,11 +661,21 @@ class GatewayStreamConsumer: await asyncio.sleep(0.05) # Small yield to not busy-loop except asyncio.CancelledError: - # Best-effort final edit on cancellation + # Best-effort final edit on cancellation. finalize=True so + # REQUIRES_EDIT_FINALIZE platforms (Telegram) apply final + # formatting — a plain edit here would leave the entire reply + # rendered as a raw streaming preview while the success flags + # below suppress the gateway's formatted re-send. + # is_turn_final=False keeps _try_fresh_final from setting + # _final_response_sent itself; this handler owns the flags. _best_effort_ok = False if self._accumulated and self._message_id: try: - _best_effort_ok = bool(await self._send_or_edit(self._accumulated)) + _best_effort_ok = bool( + await self._send_or_edit( + self._accumulated, finalize=True, is_turn_final=False, + ) + ) except Exception: pass # Only confirm final delivery if the best-effort send above @@ -867,11 +891,21 @@ class GatewayStreamConsumer: self._notify_new_message() # Remove the frozen partial message so the user only sees the - # complete fallback response. Best-effort — if the platform doesn't + # complete fallback response. ONLY safe when the fallback re-sent + # the FULL final text (continuation == final_text). When the + # prefix-based dedup above sent only the missing TAIL, the partial + # message IS the head of the answer — deleting it leaves the user + # with only the last part of the response (the "Gemini sent only + # the second half" symptom). Best-effort — if the platform doesn't # implement ``delete_message``, the delete fails (flood control still # active, bot lacks permission, message too old to delete), the # partial remains but at least the full answer was delivered. - if stale_message_id and stale_message_id != last_message_id: + if ( + stale_message_id + and stale_message_id != last_message_id + and not self._fallback_preserve_partial_messages + and continuation == final_text + ): delete_fn = getattr(self.adapter, "delete_message", None) if delete_fn is not None: try: @@ -888,6 +922,7 @@ class GatewayStreamConsumer: self._final_content_delivered = True self._last_sent_text = chunks[-1] self._fallback_prefix = "" + self._fallback_preserve_partial_messages = False def _is_flood_error(self, result) -> bool: """Check if a SendResult failure is due to flood control / rate limiting.""" @@ -1208,6 +1243,7 @@ class GatewayStreamConsumer: return True # Failure already disabled drafts for this run; fall through to # the regular edit/send path below. + self._last_edit_overflowed = False try: if self._message_id is not None: if self._edit_supported: @@ -1264,6 +1300,7 @@ class GatewayStreamConsumer: and result.message_id and result.message_id != self._message_id ): + self._last_edit_overflowed = True self._message_id = str(result.message_id) self._message_created_ts = time.monotonic() self._last_sent_text = "" @@ -1274,6 +1311,35 @@ class GatewayStreamConsumer: self._flood_strikes = 0 return True else: + raw_response = getattr(result, "raw_response", None) + if isinstance(raw_response, dict) and raw_response.get("partial_overflow"): + # Telegram edited/sent one or more overflow chunks, + # but not the complete response. Preserve the + # visible prefix so the got_done fallback sends the + # missing tail instead of marking a clipped topic + # reply as final delivery. + self._message_id = str( + raw_response.get("last_message_id") + or result.message_id + or self._message_id + ) + delivered_prefix = raw_response.get("delivered_prefix") + if isinstance(delivered_prefix, str) and delivered_prefix: + self._last_sent_text = delivered_prefix + self._fallback_prefix = delivered_prefix + self._fallback_preserve_partial_messages = text.startswith( + delivered_prefix + ) + else: + self._fallback_prefix = self._visible_prefix() + self._fallback_preserve_partial_messages = False + self._fallback_final_send = True + self._edit_supported = False + self._already_sent = True + if getattr(result, "continuation_message_ids", ()): + self._notify_new_message() + return False + # Edit failed. If this looks like flood control / rate # limiting, use adaptive backoff: double the edit interval # and retry on the next cycle. Only permanently disable diff --git a/hermes_cli/backup.py b/hermes_cli/backup.py index 0c6bf8692fc..62997528bd8 100644 --- a/hermes_cli/backup.py +++ b/hermes_cli/backup.py @@ -31,6 +31,9 @@ logger = logging.getLogger(__name__) # --------------------------------------------------------------------------- # Directory names to skip entirely (matched against each path component) +# ``hermes-agent`` is special-cased to root level only in ``_should_exclude`` +# so that skill directories like ``skills/autonomous-ai-agents/hermes-agent/`` +# are not accidentally excluded. _EXCLUDED_DIRS = { "hermes-agent", # the codebase repo — re-clone instead "__pycache__", # bytecode caches — regenerated on import @@ -69,10 +72,15 @@ def _should_exclude(rel_path: Path) -> bool: """Return True if *rel_path* (relative to hermes root) should be skipped.""" parts = rel_path.parts - # Any path component matches an excluded dir name for part in parts: - if part in _EXCLUDED_DIRS: - return True + if part not in _EXCLUDED_DIRS: + continue + # ``hermes-agent`` only matches at the root level (first component). + # Nested directories with the same name — e.g. + # ``skills/autonomous-ai-agents/hermes-agent/`` — must be preserved. + if part == "hermes-agent" and part != parts[0]: + continue + return True name = rel_path.name @@ -177,10 +185,13 @@ def run_backup(args) -> None: rel_dir = dp.relative_to(hermes_root) # Prune excluded directories in-place so os.walk doesn't descend + # ``hermes-agent`` is only pruned at the root level; nested dirs + # with the same name (e.g. in skills/) must be preserved. + is_root = rel_dir == Path(".") orig_dirnames = dirnames[:] dirnames[:] = [ d for d in dirnames - if d not in _EXCLUDED_DIRS + if d not in _EXCLUDED_DIRS or (d == "hermes-agent" and not is_root) ] for removed in set(orig_dirnames) - set(dirnames): skipped_dirs.add(str(rel_dir / removed)) @@ -211,7 +222,13 @@ def run_backup(args) -> None: try: # Safe copy for SQLite databases (handles WAL mode) if abs_path.suffix == ".db": - with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as tmp: + # Stage the snapshot alongside the output zip so that the + # temp file lives on the same filesystem. The system + # default (/tmp) may be a small tmpfs that cannot hold + # large databases, causing silent backup incompleteness. + with tempfile.NamedTemporaryFile( + suffix=".db", delete=False, dir=str(out_path.parent) + ) as tmp: tmp_db = Path(tmp.name) if _safe_copy_db(abs_path, tmp_db): zf.write(tmp_db, arcname=str(rel_path)) @@ -853,7 +870,13 @@ def _write_full_zip_backup(out_path: Path, hermes_root: Path) -> Optional[Path]: for abs_path, rel_path in files_to_add: try: if abs_path.suffix == ".db": - with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as tmp: + # Stage the snapshot alongside the output zip so that the + # temp file lives on the same filesystem. The system + # default (/tmp) may be a small tmpfs that cannot hold + # large databases, causing silent backup incompleteness. + with tempfile.NamedTemporaryFile( + suffix=".db", delete=False, dir=str(out_path.parent) + ) as tmp: tmp_db = Path(tmp.name) try: if _safe_copy_db(abs_path, tmp_db): diff --git a/hermes_cli/banner.py b/hermes_cli/banner.py index 1955b009df2..af0bdd5feef 100644 --- a/hermes_cli/banner.py +++ b/hermes_cli/banner.py @@ -11,6 +11,7 @@ import subprocess import threading import time from pathlib import Path +from urllib.parse import urlparse from hermes_constants import get_hermes_home from typing import TYPE_CHECKING, Dict, List, Optional @@ -121,6 +122,53 @@ _UPDATE_CHECK_CACHE_SECONDS = 6 * 3600 UPDATE_AVAILABLE_NO_COUNT = -1 _UPSTREAM_REPO_URL = "https://github.com/NousResearch/hermes-agent.git" +_OFFICIAL_REPO_CANONICAL = "github.com/nousresearch/hermes-agent" + + +def _canonical_github_remote(url: str | None) -> str: + """Return ``host/owner/repo`` for common GitHub remote URL forms.""" + if not url: + return "" + value = url.strip() + if value.startswith("git@github.com:"): + value = "github.com/" + value[len("git@github.com:"):] + elif value.startswith("ssh://git@github.com/"): + value = "github.com/" + value[len("ssh://git@github.com/"):] + else: + parsed = urlparse(value) + if parsed.netloc and parsed.path: + value = f"{parsed.netloc}{parsed.path}" + value = value.strip().rstrip("/") + if value.endswith(".git"): + value = value[:-4] + return value.lower() + + +def _is_ssh_remote(url: str | None) -> bool: + if not url: + return False + value = url.strip().lower() + return value.startswith("git@") or value.startswith("ssh://") + + +def _is_official_ssh_remote(url: str | None) -> bool: + return _is_ssh_remote(url) and _canonical_github_remote(url) == _OFFICIAL_REPO_CANONICAL + + +def _git_stdout(args: list[str], *, cwd: Path, timeout: int = 5) -> Optional[str]: + try: + result = subprocess.run( + ["git", *args], + capture_output=True, + text=True, + timeout=timeout, + cwd=str(cwd), + ) + except Exception: + return None + if result.returncode != 0: + return None + return (result.stdout or "").strip() def _check_via_rev(local_rev: str) -> Optional[int]: @@ -146,6 +194,11 @@ def _check_via_rev(local_rev: str) -> Optional[int]: def _check_via_local_git(repo_dir: Path) -> Optional[int]: """Count commits behind origin/main in a local checkout.""" + origin_url = _git_stdout(["remote", "get-url", "origin"], cwd=repo_dir) + if _is_official_ssh_remote(origin_url): + head_rev = _git_stdout(["rev-parse", "HEAD"], cwd=repo_dir) + return _check_via_rev(head_rev) if head_rev else None + try: subprocess.run( ["git", "fetch", "origin", "--quiet"], diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py index f23d1960da7..aded4d41d81 100644 --- a/hermes_cli/commands.py +++ b/hermes_cli/commands.py @@ -1544,12 +1544,140 @@ class SlashCommandCompleter(Completer): except Exception: pass + @staticmethod + def _tools_completions(sub_text: str, sub_lower: str): + """Yield completions for /tools — subcommand + toolset/MCP-server name. + + Handles both ``/tools `` (suggesting ``list|disable|enable``) and + ``/tools enable `` / ``/tools disable `` (suggesting toolset + keys and MCP server prefixes, filtered by current enable state so the + user only sees actionable options). + """ + SUBS = ("list", "disable", "enable") + parts = sub_text.split() + trailing_space = sub_text.endswith(" ") + + # Subcommand stage: zero words typed, or completing the first word. + if len(parts) == 0 or (len(parts) == 1 and not trailing_space): + partial = sub_text if not trailing_space else "" + for sub in SUBS: + if sub.startswith(partial.lower()) and sub != partial.lower(): + yield Completion(sub, start_position=-len(partial), display=sub) + return + + subcommand = parts[0].lower() + if subcommand not in ("enable", "disable"): + return + + partial = "" if trailing_space else parts[-1] + partial_lower = partial.lower() + already = set(parts[1:] if trailing_space else parts[1:-1]) + + try: + from hermes_cli.config import load_config + from hermes_cli.tools_config import ( + CONFIGURABLE_TOOLSETS, + _get_platform_tools, + _get_plugin_toolset_keys, + ) + + config = load_config() + enabled = _get_platform_tools(config, "cli", include_default_mcp_servers=False) + + for ts_key, label, _desc in CONFIGURABLE_TOOLSETS: + if ts_key in already or not ts_key.startswith(partial_lower): + continue + is_on = ts_key in enabled + if subcommand == "enable" and is_on: + continue + if subcommand == "disable" and not is_on: + continue + yield Completion( + ts_key, + start_position=-len(partial), + display=ts_key, + display_meta=label, + ) + + for ts_key in sorted(_get_plugin_toolset_keys()): + if ts_key in already or not ts_key.startswith(partial_lower): + continue + is_on = ts_key in enabled + if subcommand == "enable" and is_on: + continue + if subcommand == "disable" and not is_on: + continue + yield Completion( + ts_key, + start_position=-len(partial), + display=ts_key, + display_meta="plugin toolset", + ) + + mcp_servers = config.get("mcp_servers") or {} + if isinstance(mcp_servers, dict): + for server in sorted(mcp_servers): + prefix = f"{server}:" + if prefix in already or not prefix.startswith(partial_lower): + continue + yield Completion( + prefix, + start_position=-len(partial), + display=prefix, + display_meta=f"MCP server '{server}'", + ) + except Exception: + return + + @staticmethod + def _handoff_completions(sub_text: str, sub_lower: str): + """Yield platform completions for /handoff. + + Offers connected (enabled + configured) gateway platforms. A recorded + home channel is NOT required to list a platform — it's often learned at + runtime — so the meta hints whether one is set yet. Completes only the + first arg (the platform); once one is chosen, stop. + """ + parts = sub_text.split() + trailing_space = sub_text.endswith(" ") + if len(parts) > 1 or (len(parts) == 1 and trailing_space): + return + partial = "" if (not parts or trailing_space) else parts[-1] + partial_lower = partial.lower() + try: + from gateway.config import load_gateway_config + + gw = load_gateway_config() + platforms = gw.get_connected_platforms() + except Exception: + return + for platform in platforms: + name = platform.value + if not name.startswith(partial_lower): + continue + try: + home = gw.get_home_channel(platform) + except Exception: + home = None + meta = f"→ {home.name}" if home and getattr(home, "name", None) else "send this session here" + yield Completion( + name, + start_position=-len(partial), + display=name, + display_meta=meta, + ) + @staticmethod def _personality_completions(sub_text: str, sub_lower: str): """Yield completions for /personality from configured personalities.""" try: - from hermes_cli.config import load_config - personalities = load_config().get("agent", {}).get("personalities", {}) + # Resolve from the same source the runtime applies personalities — + # agent.personalities via the CLI config (which ships the built-ins). + # load_config()'s schema has no agent.personalities, so the completer + # used to come back empty even with personalities available. + from cli import load_cli_config + + personalities = (load_cli_config().get("agent") or {}).get("personalities", {}) or {} if "none".startswith(sub_lower) and "none" != sub_lower: yield Completion( "none", @@ -1602,6 +1730,17 @@ class SlashCommandCompleter(Completer): yield from self._personality_completions(sub_text, sub_lower) return + # /tools needs multi-word completion (subcommand + toolset name) + # so it handles both stages itself, bypassing the single-word + # SUBCOMMANDS branch below. + if base_cmd == "/tools": + yield from self._tools_completions(sub_text, sub_lower) + return + + if base_cmd == "/handoff": + yield from self._handoff_completions(sub_text, sub_lower) + return + # Static subcommand completions if " " not in sub_text and base_cmd in SUBCOMMANDS and self._command_allowed(base_cmd): for sub in SUBCOMMANDS[base_cmd]: diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 494c5ddfe3a..1605527935b 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -270,6 +270,11 @@ _EXTRA_ENV_KEYS = frozenset({ "IRC_SERVER", "IRC_PORT", "IRC_NICKNAME", "IRC_CHANNEL", "IRC_USE_TLS", "IRC_SERVER_PASSWORD", "IRC_NICKSERV_PASSWORD", "TERMINAL_ENV", "TERMINAL_SSH_KEY", "TERMINAL_SSH_PORT", + # Deprecated tool-progress env vars — replaced by display.tool_progress in + # config.yaml. Kept known here so .env sanitization/reload still handle + # them for existing users (gateway reads them as a back-compat fallback), + # without surfacing them in user-facing OPTIONAL_ENV_VARS listings. + "HERMES_TOOL_PROGRESS", "HERMES_TOOL_PROGRESS_MODE", "WHATSAPP_MODE", "WHATSAPP_ENABLED", "MATTERMOST_HOME_CHANNEL", "MATTERMOST_HOME_CHANNEL_NAME", "MATTERMOST_REPLY_MODE", "MATRIX_PASSWORD", "MATRIX_ENCRYPTION", "MATRIX_DEVICE_ID", "MATRIX_HOME_ROOM", @@ -863,6 +868,19 @@ DEFAULT_CONFIG = { # identity slot (SOUL.md). Empty by default. The HERMES_ENVIRONMENT_HINT # env var overrides this (build-time/container mechanism). "environment_hint": "", + # Coding posture — on interactive coding surfaces (CLI, TUI, desktop + # app, ACP) in a code workspace, Hermes adds a coding operating brief + # + a live git/workspace snapshot to the system prompt. See + # agent/coding_context.py. + # "auto" (default) — prompt-only posture when the surface is + # interactive AND cwd is a code workspace. + # Toolsets are never touched; messaging platforms + # unaffected. + # "focus" — auto + collapse the toolset to the lean coding + # set (+ enabled MCP servers). Explicit opt-in. + # "on" — force the prompt posture everywhere. + # "off" — disable entirely. + "coding_context": "auto", # Staged inactivity warning: send a warning to the user at this # threshold before escalating to a full timeout. The warning fires # once per run and does not interrupt the agent. 0 = disable warning. @@ -3544,21 +3562,11 @@ OPTIONAL_ENV_VARS = { }, # HERMES_TOOL_PROGRESS and HERMES_TOOL_PROGRESS_MODE are deprecated — # now configured via display.tool_progress in config.yaml (off|new|all|verbose). - # Gateway falls back to these env vars for backward compatibility. - "HERMES_TOOL_PROGRESS": { - "description": "(deprecated) Use display.tool_progress in config.yaml instead", - "prompt": "Tool progress (deprecated — use config.yaml)", - "url": None, - "password": False, - "category": "setting", - }, - "HERMES_TOOL_PROGRESS_MODE": { - "description": "(deprecated) Use display.tool_progress in config.yaml instead", - "prompt": "Progress mode (deprecated — use config.yaml)", - "url": None, - "password": False, - "category": "setting", - }, + # The gateway still falls back to these env vars for backward compatibility, + # so they live in _EXTRA_ENV_KEYS (known to .env sanitization/reload) but + # are intentionally NOT listed here: OPTIONAL_ENV_VARS feeds user-facing + # surfaces (dashboard keys page, setup checklists) and deprecated knobs + # shouldn't be offered there. "HERMES_PREFILL_MESSAGES_FILE": { "description": "Path to JSON file with ephemeral prefill messages for few-shot priming", "prompt": "Prefill messages file path", diff --git a/hermes_cli/cron.py b/hermes_cli/cron.py index 683fc73fb73..717c1e97658 100644 --- a/hermes_cli/cron.py +++ b/hermes_cli/cron.py @@ -120,9 +120,6 @@ def cron_list(show_all: bool = False): workdir = job.get("workdir") if workdir: print(f" Workdir: {workdir}") - profile = job.get("profile") - if profile: - print(f" Profile: {profile}") # Execution history last_status = job.get("last_status") @@ -221,7 +218,6 @@ def cron_create(args): skills=_normalize_skills(getattr(args, "skill", None), getattr(args, "skills", None)), script=getattr(args, "script", None), workdir=getattr(args, "workdir", None), - profile=getattr(args, "profile", None), no_agent=getattr(args, "no_agent", False) or None, ) if not result.get("success"): @@ -239,8 +235,6 @@ def cron_create(args): print(" Mode: no-agent (script stdout delivered directly)") if job_data.get("workdir"): print(f" Workdir: {job_data['workdir']}") - if job_data.get("profile"): - print(f" Profile: {job_data['profile']}") print(f" Next run: {result['next_run_at']}") return 0 @@ -286,7 +280,6 @@ def cron_edit(args): skills=final_skills, script=getattr(args, "script", None), workdir=getattr(args, "workdir", None), - profile=getattr(args, "profile", None), no_agent=getattr(args, "no_agent", None), ) if not result.get("success"): @@ -307,8 +300,6 @@ def cron_edit(args): print(" Mode: no-agent (script stdout delivered directly)") if updated.get("workdir"): print(f" Workdir: {updated['workdir']}") - if updated.get("profile"): - print(f" Profile: {updated['profile']}") return 0 diff --git a/hermes_cli/gateway.py b/hermes_cli/gateway.py index 5ff74259185..c1f7c04d7f2 100644 --- a/hermes_cli/gateway.py +++ b/hermes_cli/gateway.py @@ -2531,6 +2531,65 @@ def systemd_unit_is_current(system: bool = False) -> bool: return norm_installed == norm_expected +def _temp_home_in_service_definition(definition: str) -> str | None: + """Return the temp-dir HERMES_HOME baked into a service definition, or None. + + A generated systemd unit / launchd plist carries the resolved HERMES_HOME + in its environment block. If that path lives under the system temp dir, + the definition was almost certainly generated by a test/E2E harness that + exported a throwaway ``HERMES_HOME=/tmp/...`` — writing it to the real + service file silently breaks the user's gateway on the next (re)start: + the gateway comes back "active (running)" but pointed at an empty temp + home ("No messaging platforms enabled"), deaf to every platform. + Seen live 2026-06-11: an E2E guard probe ran ``hermes gateway restart`` + with ``HERMES_HOME=/tmp/hermes-e2e-`` exported; the restart path's + unit refresh baked the temp path into the production unit and the + post-update restart produced a zombie gateway for 7+ hours. + + Matches both systemd ``Environment="HERMES_HOME=..."`` lines and launchd + ``HERMES_HOME...`` pairs. + """ + import re + import tempfile + + candidates = re.findall(r'HERMES_HOME=([^"\n]+)', definition) + candidates += re.findall( + r"HERMES_HOME\s*(.*?)", definition, flags=re.S + ) + temp_roots = { + Path(tempfile.gettempdir()).resolve(), + Path("/tmp"), + Path("/var/tmp"), + Path("/private/tmp"), + Path("/private/var/tmp"), + } + for raw in candidates: + try: + resolved = Path(raw.strip().strip('"')).resolve() + except (OSError, ValueError): + continue + for root in temp_roots: + if resolved == root or root in resolved.parents: + return raw.strip() + return None + + +def _refuse_temp_home_service_write(definition: str, kind: str) -> bool: + """Refuse (with guidance) when a service definition carries a temp HERMES_HOME.""" + temp_home = _temp_home_in_service_definition(definition) + if temp_home is None: + return False + print( + f"✗ Refusing to write the gateway {kind}: HERMES_HOME resolves to a " + f"temporary directory ({temp_home})." + ) + print( + " This usually means a test/E2E environment exported HERMES_HOME. " + "Unset it (or run from a clean shell) and retry." + ) + return True + + def refresh_systemd_unit_if_needed(system: bool = False) -> bool: """Rewrite the installed systemd unit when the generated definition has changed.""" unit_path = get_systemd_unit_path(system=system) @@ -2561,6 +2620,12 @@ def refresh_systemd_unit_if_needed(system: bool = False) -> bool: ): return False + # Structural variant of the same belt: refuse to bake ANY temp-dir + # HERMES_HOME into the unit (manual E2E homes like /tmp/hermes-e2e-NNN + # don't carry the pytest markers above but poison the unit identically). + if _refuse_temp_home_service_write(new_unit, "systemd unit"): + return False + unit_path.write_text(new_unit, encoding="utf-8") _run_systemctl(["daemon-reload"], system=system, check=True, timeout=30) print( @@ -2729,10 +2794,11 @@ def systemd_install( return unit_path.parent.mkdir(parents=True, exist_ok=True) + new_unit = generate_systemd_unit(system=system, run_as_user=run_as_user) + if _refuse_temp_home_service_write(new_unit, "systemd unit"): + return print(f"Installing {_service_scope_label(system)} systemd service to: {unit_path}") - unit_path.write_text( - generate_systemd_unit(system=system, run_as_user=run_as_user), encoding="utf-8" - ) + unit_path.write_text(new_unit, encoding="utf-8") _run_systemctl(["daemon-reload"], system=system, check=True, timeout=30) if enable_on_startup: @@ -3067,12 +3133,77 @@ def get_launchd_label() -> str: return f"ai.hermes.gateway-{suffix}" if suffix else "ai.hermes.gateway" +# Cached launchd domain result — probing is cheap but should only run once per +# process invocation (each ``hermes gateway start/stop/status`` call). +_resolved_launchd_domain: str | None = None + + def _launchd_domain() -> str: - # The `user/` domain (vs the older `gui/`) is reachable from - # non-Aqua/background sessions (SSH, headless, login items) and is the only - # one that supports service management on macOS 26+. `gui/` returns - # error 125 ("Domain does not support specified action") there. See #23387. - return f"user/{os.getuid()}" # windows-footgun: ok — POSIX launchd (macOS) helper, never invoked on Windows + """Return the launchd domain that actually manages the gateway service. + + Probes ``gui/`` first (Aqua sessions), then ``user/`` + (Background/SSH sessions). When neither domain contains a loaded + service, falls back to ``launchctl managername`` as a heuristic. + + The result is cached for the lifetime of the process so that repeated + calls (``start``, ``stop``, ``restart``) use a consistent domain. + + See #40831, #23387. + """ + global _resolved_launchd_domain + if _resolved_launchd_domain is not None: + return _resolved_launchd_domain + + uid = os.getuid() # windows-footgun: ok — POSIX launchd (macOS) helper, never invoked on Windows + label = get_launchd_label() + gui_domain = f"gui/{uid}" + user_domain = f"user/{uid}" + + # 1. Probe gui/ first — in Aqua sessions the service is loaded here. + try: + subprocess.run( + ["launchctl", "print", f"{gui_domain}/{label}"], + check=True, + timeout=5, + capture_output=True, + ) + _resolved_launchd_domain = gui_domain + return gui_domain + except (subprocess.CalledProcessError, subprocess.TimeoutExpired, FileNotFoundError): + pass + + # 2. Probe user/ — in Background/SSH sessions this is the working domain. + try: + subprocess.run( + ["launchctl", "print", f"{user_domain}/{label}"], + check=True, + timeout=5, + capture_output=True, + ) + _resolved_launchd_domain = user_domain + return user_domain + except (subprocess.CalledProcessError, subprocess.TimeoutExpired, FileNotFoundError): + pass + + # 3. Neither domain has the service loaded — use managername as heuristic. + # Aqua → gui/, anything else (Background, loginwindow) → user/. + try: + result = subprocess.run( + ["launchctl", "managername"], + capture_output=True, + text=True, + timeout=5, + ) + if "Aqua" in (result.stdout or ""): + _resolved_launchd_domain = gui_domain + return gui_domain + except (subprocess.CalledProcessError, subprocess.TimeoutExpired, FileNotFoundError): + pass + + # 4. Default to user/ (matches the pre-probing behavior for + # Background/SSH sessions and is the recommended domain on macOS 26+). + _resolved_launchd_domain = user_domain + return user_domain # On macOS, exit code 125 ("Domain does not support specified action") and @@ -3297,7 +3428,11 @@ def refresh_launchd_plist_if_needed() -> bool: if not plist_path.exists() or launchd_plist_is_current(): return False - plist_path.write_text(generate_launchd_plist(), encoding="utf-8") + new_plist = generate_launchd_plist() + if _refuse_temp_home_service_write(new_plist, "launchd plist"): + return False + + plist_path.write_text(new_plist, encoding="utf-8") label = get_launchd_label() # Bootout/bootstrap so launchd picks up the new definition subprocess.run( @@ -3330,8 +3465,11 @@ def launchd_install(force: bool = False): return plist_path.parent.mkdir(parents=True, exist_ok=True) + new_plist = generate_launchd_plist() + if _refuse_temp_home_service_write(new_plist, "launchd plist"): + return print(f"Installing launchd service to: {plist_path}") - plist_path.write_text(generate_launchd_plist()) + plist_path.write_text(new_plist) try: subprocess.run( @@ -3377,9 +3515,12 @@ def launchd_start(): # Self-heal if the plist is missing entirely (e.g., manual cleanup, failed upgrade) if not plist_path.exists(): + new_plist = generate_launchd_plist() + if _refuse_temp_home_service_write(new_plist, "launchd plist"): + sys.exit(1) print("↻ launchd plist missing; regenerating service definition") plist_path.parent.mkdir(parents=True, exist_ok=True) - plist_path.write_text(generate_launchd_plist(), encoding="utf-8") + plist_path.write_text(new_plist, encoding="utf-8") try: subprocess.run( ["launchctl", "bootstrap", _launchd_domain(), str(plist_path)], diff --git a/hermes_cli/main.py b/hermes_cli/main.py index bab1302e850..72fb4ad41b9 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -1623,7 +1623,11 @@ def _make_tui_argv(tui_dir: Path, tui_dev: bool) -> tuple[list[str], Path]: npm_cwd = _workspace_root(tui_dir) # --workspace ui-tui avoids resolving apps/desktop (Electron + node-pty). # See #38772. - npm_workspace_args: tuple[str, ...] = ("--workspace", "ui-tui") + # When ui-tui/ has its own package-lock.json (e.g. curl install), + # _workspace_root() returns tui_dir itself. Passing --workspace in + # that case fails because npm cannot find a workspace named "ui-tui" + # inside ui-tui/. See #42973. + npm_workspace_args: tuple[str, ...] = () if npm_cwd == tui_dir else ("--workspace", "ui-tui") if termux_startup: npm_cwd, npm_workspace_args = _termux_workspace_install_context( tui_dir, @@ -4661,7 +4665,9 @@ def _build_web_ui(web_dir: Path, *, fatal: bool = False) -> bool: # graph (including apps/desktop with its Electron + node-pty deps) is never # resolved here. Without --workspace the root package.json's apps/* glob # would pull in desktop on every web build. See #38772. - npm_workspace_args: tuple[str, ...] = ("--workspace", "web") + # When web/ has its own package-lock.json, _workspace_root() returns + # web_dir itself and --workspace would fail. See #42973. + npm_workspace_args: tuple[str, ...] = () if npm_cwd == web_dir else ("--workspace", "web") if _is_termux_startup_environment(): npm_cwd, npm_workspace_args = _termux_workspace_install_context(web_dir) r1 = _run_npm_install_deterministic( @@ -10234,6 +10240,21 @@ def _report_dashboard_status() -> int: return len(pids) +def _dashboard_listening(host: str, port: int) -> bool: + """True when something is accepting TCP connections at host:port. + + Any listener counts — even a 401 response proves a dashboard is up. + Used by the unified profile-launch routing to decide attach-vs-start. + """ + import socket + + try: + with socket.create_connection((host or "127.0.0.1", port), timeout=1.5): + return True + except OSError: + return False + + def cmd_dashboard(args): """Start the web UI server, or (with --stop/--status) manage running ones.""" # --status: report running dashboards and exit, no deps needed. @@ -10254,6 +10275,65 @@ def cmd_dashboard(args): remaining = _find_stale_dashboard_pids() sys.exit(1 if remaining else 0) + # ── Unified profile launch routing ──────────────────────────────── + # The dashboard is a MACHINE management surface: it can read/write any + # profile via the per-request ?profile= scoping. Running one dashboard + # per profile just fragments that (port collisions, N processes, and a + # "which dashboard am I on?" guessing game). So when a NAMED profile + # launches the dashboard (`worker dashboard` → HERMES_HOME points into + # profiles/), default to the machine dashboard: + # - already running → open the browser at ?profile= and exit + # - not running → re-exec as the machine dashboard (pinned to the + # default profile so _apply_profile_override can't re-route through + # the sticky active_profile file) with the launching profile + # preselected in the UI's switcher. + # `--isolated` opts out and preserves the old per-profile behavior. + try: + from hermes_cli.profiles import get_active_profile_name + _launch_profile = get_active_profile_name() + except Exception: + _launch_profile = "default" + + if ( + _launch_profile not in ("default", "custom") + and not getattr(args, "isolated", False) + and not getattr(args, "open_profile", "") + ): + url = f"http://{args.host or '127.0.0.1'}:{args.port}/?profile={_launch_profile}" + if _dashboard_listening(args.host, args.port): + print(f"Machine dashboard already running on port {args.port}.") + print(f" Managing profile '{_launch_profile}': {url}") + if not args.no_open: + try: + import webbrowser + webbrowser.open(url) + except Exception: + pass + sys.exit(0) + + print( + f"Routing to the machine dashboard (profile '{_launch_profile}' " + f"preselected). Use --isolated for a dedicated per-profile server." + ) + reexec_argv = [ + sys.executable, "-m", "hermes_cli.main", + "-p", "default", + "dashboard", + "--port", str(args.port), + "--host", args.host, + "--open-profile", _launch_profile, + ] + if args.no_open: + reexec_argv.append("--no-open") + if getattr(args, "insecure", False): + reexec_argv.append("--insecure") + if getattr(args, "skip_build", False): + reexec_argv.append("--skip-build") + env = os.environ.copy() + # Drop the profile HERMES_HOME so the child binds the machine root. + env.pop("HERMES_HOME", None) + os.execvpe(sys.executable, reexec_argv, env) + # Attach gui.log early so dashboard startup/build failures are captured in # the same logs directory as every other Hermes surface. try: @@ -10327,6 +10407,7 @@ def cmd_dashboard(args): port=args.port, open_browser=not args.no_open, allow_public=getattr(args, "insecure", False), + initial_profile=getattr(args, "open_profile", "") or "", ) diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py index b8165978538..c53a930e9e4 100644 --- a/hermes_cli/runtime_provider.py +++ b/hermes_cli/runtime_provider.py @@ -491,15 +491,27 @@ def _lift_max_output_tokens(entry: Dict[str, Any], result: Dict[str, Any]) -> No def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, Any]]: requested_norm = _normalize_custom_provider_name(requested_provider or "") - if not requested_norm or requested_norm == "custom": + if not requested_norm: return None + # Bare "custom" is normally an incomplete spec — the canonical form is + # "custom:" — and is otherwise owned by the model.base_url "bare + # custom" trust path. BUT a user may literally name a ``providers:`` (or + # legacy ``custom_providers:``) entry "custom" (e.g. ``providers.custom`` + # pointing at cliproxy). We used to return None here *before* scanning + # config, so such an entry was never matched and resolution fell through to + # the global default (Codex) — the cause of cron jobs with + # ``provider: "custom"`` failing with ``auth_unavailable: providers=codex``. + # Fall through to the config scan instead; if no entry is literally named + # "custom" it still returns None at the end, preserving the trust path. + # Raw names should only map to custom providers when they are not already # valid built-in providers or aliases. Explicit menu keys like - # ``custom:local`` always target the saved custom provider. + # ``custom:local`` always target the saved custom provider. Bare "custom" + # is exempt from the shadow check — it is not a built-in to defer to. if requested_norm == "auto": return None - if not requested_norm.startswith("custom:"): + if requested_norm != "custom" and not requested_norm.startswith("custom:"): try: canonical = auth_mod.resolve_provider(requested_norm) except AuthError: @@ -634,6 +646,20 @@ def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, An return None +def has_named_custom_provider(requested_provider: str) -> bool: + """Return True when config defines a custom provider matching the request. + + Thin public wrapper around :func:`_get_named_custom_provider` so other + modules (e.g. the cronjob tool) can decide whether a provider name will + actually resolve to a configured ``providers:`` / ``custom_providers:`` + entry — without reaching into a private helper or duplicating the scan. + """ + try: + return _get_named_custom_provider(requested_provider) is not None + except Exception: + return False + + def _custom_provider_request_overrides(custom_provider: Dict[str, Any]) -> Dict[str, Any]: extra_body = custom_provider.get("extra_body") if not isinstance(extra_body, dict) or not extra_body: diff --git a/hermes_cli/subcommands/cron.py b/hermes_cli/subcommands/cron.py index 33dd10158f3..c50b3401462 100644 --- a/hermes_cli/subcommands/cron.py +++ b/hermes_cli/subcommands/cron.py @@ -70,10 +70,6 @@ def build_cron_parser(subparsers, *, cmd_cron: Callable) -> None: "--workdir", help="Absolute path for the job to run from. Injects AGENTS.md / CLAUDE.md / .cursorrules from that directory and uses it as the cwd for terminal/file/code_exec tools. Omit to preserve old behaviour (no project context files).", ) - cron_create.add_argument( - "--profile", - help="Hermes profile name to run the job under. Use 'default' for the root profile. Named profiles must already exist. Omit to preserve the scheduler's existing profile.", - ) # cron edit cron_edit = cron_subparsers.add_parser( @@ -138,10 +134,6 @@ def build_cron_parser(subparsers, *, cmd_cron: Callable) -> None: "--workdir", help="Absolute path for the job to run from (injects AGENTS.md etc. and sets terminal cwd). Pass empty string to clear.", ) - cron_edit.add_argument( - "--profile", - help="Hermes profile name to run the job under. Use 'default' for the root profile. Pass empty string to clear.", - ) # lifecycle actions cron_pause = cron_subparsers.add_parser("pause", help="Pause a scheduled job") diff --git a/hermes_cli/subcommands/dashboard.py b/hermes_cli/subcommands/dashboard.py index 6bdb858513d..01ee57e2624 100644 --- a/hermes_cli/subcommands/dashboard.py +++ b/hermes_cli/subcommands/dashboard.py @@ -45,6 +45,26 @@ def build_dashboard_parser( "where npm may not be available. Pre-build with: cd web && npm run build" ), ) + dashboard_parser.add_argument( + "--isolated", + action="store_true", + help=( + "When launched from a named profile (e.g. `worker dashboard`), run " + "a dedicated dashboard server scoped to that profile instead of " + "routing to the machine dashboard. Default behavior is unified: " + "profile launches attach to (or start) ONE machine-level dashboard " + "and preselect the profile in the UI's profile switcher." + ), + ) + # Internal flag set by the unified-launch re-exec (cmd_dashboard) to + # preselect the launching profile in the SPA switcher. Hidden from + # --help: users get this behavior automatically via ` dashboard`. + dashboard_parser.add_argument( + "--open-profile", + dest="open_profile", + default="", + help=argparse.SUPPRESS, + ) # Lifecycle flags — mutually exclusive with each other and with the # start-a-server flags above (if both are passed, --stop / --status win # because they exit before the server is started). The dashboard has diff --git a/hermes_cli/tools_config.py b/hermes_cli/tools_config.py index ae97dbf54a2..d71fd5edb73 100644 --- a/hermes_cli/tools_config.py +++ b/hermes_cli/tools_config.py @@ -1437,6 +1437,10 @@ def _get_platform_tools( continue if ts_def.get("includes"): continue + # Posture toolsets (e.g. ``coding``) are session-level selections made + # by agent/coding_context.py — not per-platform capabilities to recover. + if ts_def.get("posture"): + continue ts_tools = set(resolve_toolset(ts_key)) if not ts_tools or not ts_tools.issubset(platform_tool_universe): continue @@ -2178,8 +2182,13 @@ def _toolset_needs_configuration_prompt( tts_cfg = config.get("tts", {}) return not isinstance(tts_cfg, dict) or "provider" not in tts_cfg if ts_key == "web": - web_cfg = config.get("web", {}) - return not isinstance(web_cfg, dict) or "backend" not in web_cfg + # Web works out of the box via Parallel's free Search MCP (no key), so + # don't force setup just because ``web.backend`` is unset — only prompt + # when web isn't actually usable (e.g. an explicit backend configured + # without its credentials). Lazy import: web_tools is heavy and most + # tools_config callers don't need it. + from tools.web_tools import check_web_api_key + return not check_web_api_key() if ts_key == "browser": browser_cfg = config.get("browser", {}) return not isinstance(browser_cfg, dict) or "cloud_provider" not in browser_cfg diff --git a/hermes_cli/web_server.py b/hermes_cli/web_server.py index e1f1c62051d..ac48354f0b1 100644 --- a/hermes_cli/web_server.py +++ b/hermes_cli/web_server.py @@ -9,7 +9,7 @@ Usage: python -m hermes_cli.main web --port 8080 """ -from contextlib import asynccontextmanager +from contextlib import asynccontextmanager, contextmanager import asyncio import base64 @@ -625,19 +625,23 @@ CONFIG_SCHEMA = _ordered_schema class ConfigUpdate(BaseModel): config: dict + profile: Optional[str] = None class EnvVarUpdate(BaseModel): key: str value: str + profile: Optional[str] = None class EnvVarDelete(BaseModel): key: str + profile: Optional[str] = None class EnvVarReveal(BaseModel): key: str + profile: Optional[str] = None class MessagingPlatformUpdate(BaseModel): @@ -716,6 +720,74 @@ class ModelAssignment(BaseModel): # the path that actually wires a local endpoint into resolution. base_url: str = "" confirm_expensive_model: bool = False + profile: Optional[str] = None + + +def _normalize_main_model_assignment(provider: str, model: str) -> tuple[str, str]: + """Normalize a main-slot (provider, model) pair before persisting. + + The Models page has two assignment paths and only one of them was safe: + + - The "Change" picker sends a real Hermes provider slug — fine. + - The per-card "Use as → Main model" menu sends ``entry.provider`` + from the analytics rows, falling back to the model's VENDOR prefix + (``modelVendor("anthropic/claude-opus-4.6") == "anthropic"``) when + the session row has no ``billing_provider`` (older sessions, NULL + rows). That wrote ``provider: anthropic`` + + ``default: anthropic/claude-opus-4.6`` to config — a vendor-prefixed + OpenRouter slug on the NATIVE Anthropic provider. New sessions then + 400 against api.anthropic.com ("model: anthropic/claude-opus-4.6 not + found") and the user reads it as "changing models does nothing". + + Two repairs, both at this single chokepoint so every caller inherits: + + 1. Vendor-name → Hermes-provider mapping: when the provider string is + not a known Hermes provider/alias (e.g. ``moonshotai``, ``x-ai`` is + known but ``poolside`` isn't) but the model is a vendor-prefixed + aggregator slug, keep the user's CURRENT aggregator if they're on + one, else fall back to openrouter. + 2. Model-format normalization for the resolved provider via + ``normalize_model_for_provider`` (e.g. ``anthropic/claude-opus-4.6`` + on native anthropic → ``claude-opus-4-6``). + """ + from hermes_cli.models import _KNOWN_PROVIDER_NAMES, normalize_provider + from hermes_cli.model_normalize import normalize_model_for_provider + + prov_in = (provider or "").strip() + model_in = (model or "").strip() + canonical = normalize_provider(prov_in) + + if canonical not in _KNOWN_PROVIDER_NAMES and "/" in model_in: + # Vendor prefix posing as a provider (analytics fallback). Resolve + # against the user's current provider when it's an aggregator that + # serves vendor-prefixed slugs; otherwise default to openrouter. + try: + cur_cfg = load_config().get("model", {}) + cur_provider = ( + str(cur_cfg.get("provider", "") or "").strip().lower() + if isinstance(cur_cfg, dict) else "" + ) + except Exception: + cur_provider = "" + from hermes_cli.models import _AGGREGATOR_PROVIDERS + if cur_provider and normalize_provider(cur_provider) in _AGGREGATOR_PROVIDERS: + canonical = normalize_provider(cur_provider) + prov_in = cur_provider + else: + canonical = "openrouter" + prov_in = "openrouter" + + # Custom/user-config providers keep the model verbatim — the registry + # normalizer doesn't know their namespaces. + if canonical in _KNOWN_PROVIDER_NAMES and not canonical.startswith("custom"): + try: + normalized_model = normalize_model_for_provider(model_in, canonical) + if normalized_model: + model_in = normalized_model + except Exception: + _log.debug("model normalization failed for %s/%s", prov_in, model_in, exc_info=True) + + return prov_in, model_in def _apply_main_model_assignment( @@ -1707,6 +1779,28 @@ def _spawn_gateway_restart() -> Tuple[subprocess.Popen, bool]: return _spawn_hermes_action(["gateway", "restart"], "gateway-restart"), False +def _restart_gateway_after_webhook_enable() -> dict[str, Any]: + """Best-effort gateway restart after enabling the webhook platform.""" + try: + proc, reused = _spawn_gateway_restart() + except Exception as exc: + _log.exception("Failed to auto-restart gateway after enabling webhooks") + return { + "restart_started": False, + "restart_error": str(exc), + } + if reused: + _log.info( + "Webhook enable: reusing in-flight gateway restart (pid %s)", + proc.pid, + ) + return { + "restart_started": True, + "restart_action": "gateway-restart", + "restart_pid": proc.pid, + } + + @app.post("/api/gateway/restart") async def restart_gateway(): """Kick off a ``hermes gateway restart`` in the background.""" @@ -2495,8 +2589,9 @@ def _normalize_config_for_web(config: Dict[str, Any]) -> Dict[str, Any]: @app.get("/api/config") -async def get_config(): - config = _normalize_config_for_web(load_config()) +async def get_config(profile: Optional[str] = None): + with _profile_scope(profile): + config = _normalize_config_for_web(load_config()) # Strip internal keys that the frontend shouldn't see or send back return {k: v for k, v in config.items() if not k.startswith("_")} @@ -2522,7 +2617,7 @@ _EMPTY_MODEL_INFO: dict = { @app.get("/api/model/info") -def get_model_info(): +def get_model_info(profile: Optional[str] = None): """Return resolved model metadata for the currently configured model. Calls the same context-length resolution chain the agent uses, so the @@ -2530,7 +2625,8 @@ def get_model_info(): Also returns model capabilities (vision, reasoning, tools) when available. """ try: - cfg = load_config() + with _profile_scope(profile): + cfg = load_config() model_cfg = cfg.get("model", "") # Extract model name and provider from the config @@ -2593,6 +2689,10 @@ def get_model_info(): "effective_context_length": effective_ctx, "capabilities": caps, } + except HTTPException: + # Unknown/invalid profile must surface as 404, not degrade into a + # 200 with empty model info (which would render as "no model set"). + raise except Exception: _log.exception("GET /api/model/info failed") return dict(_EMPTY_MODEL_INFO) @@ -2622,13 +2722,17 @@ _AUX_TASK_SLOTS: Tuple[str, ...] = ( @app.get("/api/model/options") -def get_model_options(): +def get_model_options(profile: Optional[str] = None): """Return authenticated providers + their curated model lists. REST equivalent of the ``model.options`` JSON-RPC on tui_gateway, so the dashboard Models page can render the picker without a live chat session. The response shape matches ``model.options`` 1:1 so ``ModelPickerDialog`` can share the same types. + + ``profile`` scopes the picker context (current model/provider, custom + providers from config, per-profile .env auth state) so the Models page + reads the SAME profile /api/model/set writes. """ try: from hermes_cli.inventory import build_models_payload, load_picker_context @@ -2641,15 +2745,18 @@ def get_model_options(): # come back as skeleton rows carrying `authenticated=False` + # `auth_type`/`key_env`/`warning` so the GUI can render a setup # affordance instead of hiding the provider entirely. - return build_models_payload( - load_picker_context(), - max_models=50, - include_unconfigured=True, - picker_hints=True, - canonical_order=True, - pricing=True, - capabilities=True, - ) + with _profile_scope(profile): + return build_models_payload( + load_picker_context(), + max_models=50, + include_unconfigured=True, + picker_hints=True, + canonical_order=True, + pricing=True, + capabilities=True, + ) + except HTTPException: + raise except Exception: _log.exception("GET /api/model/options failed") raise HTTPException(status_code=500, detail="Failed to list model options") @@ -2728,7 +2835,7 @@ def get_recommended_default_model(provider: str = ""): @app.get("/api/model/auxiliary") -def get_auxiliary_models(): +def get_auxiliary_models(profile: Optional[str] = None): """Return current auxiliary task assignments. Shape: @@ -2739,9 +2846,14 @@ def get_auxiliary_models(): ], "main": {"provider": "openrouter", "model": "anthropic/claude-opus-4.7"}, } + + ``profile`` scopes the read — without it, the Models page would show + the dashboard profile's auxiliary pins while /api/model/set wrote the + selected profile's (read/write asymmetry). """ try: - cfg = load_config() + with _profile_scope(profile): + cfg = load_config() aux_cfg = cfg.get("auxiliary", {}) if not isinstance(aux_cfg, dict): aux_cfg = {} @@ -2766,13 +2878,15 @@ def get_auxiliary_models(): main = {"provider": "", "model": str(model_cfg) if model_cfg else ""} return {"tasks": tasks, "main": main} + except HTTPException: + raise except Exception: _log.exception("GET /api/model/auxiliary failed") raise HTTPException(status_code=500, detail="Failed to read auxiliary config") @app.post("/api/model/set") -async def set_model_assignment(body: ModelAssignment): +async def set_model_assignment(body: ModelAssignment, profile: Optional[str] = None): """Assign a model to the main slot or an auxiliary task slot. Writes to ``~/.hermes/config.yaml`` — applies to **new** sessions only. @@ -2789,8 +2903,10 @@ async def set_model_assignment(body: ModelAssignment): raise HTTPException(status_code=400, detail="scope must be 'main' or 'auxiliary'") try: - cfg = load_config() - + # Expensive-model warning runs BEFORE the profile scope is entered: + # _profile_scope must never be held across an await (the RLock is + # reentrant per-thread, so a second coroutine interleaving on the + # event-loop thread could cross-restore the module globals). if model and not body.confirm_expensive_model: try: from hermes_cli.model_cost_guard import expensive_model_warning @@ -2815,125 +2931,13 @@ async def set_model_assignment(body: ModelAssignment): "confirm_message": warning.message, } - if scope == "main": - if not provider or not model: - raise HTTPException(status_code=400, detail="provider and model required for main") - model_cfg = _apply_main_model_assignment( - cfg.get("model", {}), provider, model, base_url - ) - cfg["model"] = model_cfg + def _apply_assignment(): + with _profile_scope(body.profile or profile): + return _apply_model_assignment_sync( + scope, provider, model, task, base_url + ) - # When switching the main provider to Nous, mirror the CLI's - # post-model-selection behaviour (hermes_cli/main.py - # prompt_enable_tool_gateway / tools_config apply_nous_managed_defaults): - # auto-route any *unconfigured* tools through the Nous Tool Gateway. - # This is purely additive — apply_nous_managed_defaults skips every - # tool where the user already has a direct key (FIRECRAWL_API_KEY, - # FAL_KEY, etc.) or an explicit backend/provider in config, so it - # never overwrites a user's own setup. GUI users thus land on the - # gateway the same way CLI users do, without a separate prompt. - gateway_tools: list[str] = [] - if provider.strip().lower() == "nous": - try: - from hermes_cli.nous_subscription import apply_nous_managed_defaults - from hermes_cli.tools_config import _get_platform_tools - - enabled = _get_platform_tools( - cfg, "cli", include_default_mcp_servers=False - ) - changed = apply_nous_managed_defaults( - cfg, - enabled_toolsets=enabled, - force_fresh=True, - ) - gateway_tools = sorted(changed) - except Exception: - # Portal lookup hiccups / non-subscriber / non-nous gating - # must never block saving the model assignment. - _log.debug("apply_nous_managed_defaults skipped", exc_info=True) - - save_config(cfg) - - # Surface auxiliary slots still pinned to a *different* provider than - # the new main one. Switching the main model does NOT touch aux pins - # (they're independent, sticky per-task overrides — see - # auxiliary_client._resolve_auto). A user who switches main away from - # a now-unpaid provider (e.g. nous with $0 balance) keeps paying 402s - # on every background aux call until they reset those pins. We never - # auto-clear them — pinning aux to a cheaper/different model is a - # legitimate config — but we tell the caller so the UI can offer a - # "reset to main" nudge instead of silently burning credits. - new_provider = provider.strip().lower() - stale_aux: list[dict] = [] - aux_cfg = cfg.get("auxiliary", {}) - if isinstance(aux_cfg, dict): - for slot in _AUX_TASK_SLOTS: - slot_cfg = aux_cfg.get(slot) - if not isinstance(slot_cfg, dict): - continue - slot_provider = str(slot_cfg.get("provider", "") or "").strip() - if ( - slot_provider - and slot_provider.lower() not in {"auto", ""} - and slot_provider.lower() != new_provider - ): - stale_aux.append({ - "task": slot, - "provider": slot_provider, - "model": str(slot_cfg.get("model", "") or ""), - }) - - return { - "ok": True, - "scope": "main", - "provider": provider, - "model": model, - "base_url": model_cfg.get("base_url", ""), - "gateway_tools": gateway_tools, - "stale_aux": stale_aux, - } - - # scope == "auxiliary" - aux = cfg.get("auxiliary") - if not isinstance(aux, dict): - aux = {} - - if task == "__reset__": - # Reset every slot to provider="auto", model="" — keeps other fields intact. - for slot in _AUX_TASK_SLOTS: - slot_cfg = aux.get(slot) - if not isinstance(slot_cfg, dict): - slot_cfg = {} - slot_cfg["provider"] = "auto" - slot_cfg["model"] = "" - aux[slot] = slot_cfg - cfg["auxiliary"] = aux - save_config(cfg) - return {"ok": True, "scope": "auxiliary", "reset": True} - - if not provider: - raise HTTPException(status_code=400, detail="provider required for auxiliary") - - targets = [task] if task else list(_AUX_TASK_SLOTS) - for slot in targets: - if slot not in _AUX_TASK_SLOTS: - raise HTTPException(status_code=400, detail=f"unknown auxiliary task: {slot}") - slot_cfg = aux.get(slot) - if not isinstance(slot_cfg, dict): - slot_cfg = {} - slot_cfg["provider"] = provider - slot_cfg["model"] = model - aux[slot] = slot_cfg - - cfg["auxiliary"] = aux - save_config(cfg) - return { - "ok": True, - "scope": "auxiliary", - "tasks": targets, - "provider": provider, - "model": model, - } + return await asyncio.to_thread(_apply_assignment) except HTTPException: raise except Exception: @@ -2941,6 +2945,139 @@ async def set_model_assignment(body: ModelAssignment): raise HTTPException(status_code=500, detail="Failed to save model assignment") +def _apply_model_assignment_sync( + scope: str, provider: str, model: str, task: str, base_url: str +): + """Synchronous body of POST /api/model/set. + + Runs inside ``_profile_scope`` (in a worker thread) so every + load_config/save_config lands in the requested profile. Raises + HTTPException for validation errors — the async wrapper re-raises them. + """ + cfg = load_config() + + if scope == "main": + if not provider or not model: + raise HTTPException(status_code=400, detail="provider and model required for main") + provider, model = _normalize_main_model_assignment(provider, model) + model_cfg = _apply_main_model_assignment( + cfg.get("model", {}), provider, model, base_url + ) + cfg["model"] = model_cfg + + # When switching the main provider to Nous, mirror the CLI's + # post-model-selection behaviour (hermes_cli/main.py + # prompt_enable_tool_gateway / tools_config apply_nous_managed_defaults): + # auto-route any *unconfigured* tools through the Nous Tool Gateway. + # This is purely additive — apply_nous_managed_defaults skips every + # tool where the user already has a direct key (FIRECRAWL_API_KEY, + # FAL_KEY, etc.) or an explicit backend/provider in config, so it + # never overwrites a user's own setup. GUI users thus land on the + # gateway the same way CLI users do, without a separate prompt. + gateway_tools: list[str] = [] + if provider.strip().lower() == "nous": + try: + from hermes_cli.nous_subscription import apply_nous_managed_defaults + from hermes_cli.tools_config import _get_platform_tools + + enabled = _get_platform_tools( + cfg, "cli", include_default_mcp_servers=False + ) + changed = apply_nous_managed_defaults( + cfg, + enabled_toolsets=enabled, + force_fresh=True, + ) + gateway_tools = sorted(changed) + except Exception: + # Portal lookup hiccups / non-subscriber / non-nous gating + # must never block saving the model assignment. + _log.debug("apply_nous_managed_defaults skipped", exc_info=True) + + save_config(cfg) + + # Surface auxiliary slots still pinned to a *different* provider than + # the new main one. Switching the main model does NOT touch aux pins + # (they're independent, sticky per-task overrides — see + # auxiliary_client._resolve_auto). A user who switches main away from + # a now-unpaid provider (e.g. nous with $0 balance) keeps paying 402s + # on every background aux call until they reset those pins. We never + # auto-clear them — pinning aux to a cheaper/different model is a + # legitimate config — but we tell the caller so the UI can offer a + # "reset to main" nudge instead of silently burning credits. + new_provider = provider.strip().lower() + stale_aux: list[dict] = [] + aux_cfg = cfg.get("auxiliary", {}) + if isinstance(aux_cfg, dict): + for slot in _AUX_TASK_SLOTS: + slot_cfg = aux_cfg.get(slot) + if not isinstance(slot_cfg, dict): + continue + slot_provider = str(slot_cfg.get("provider", "") or "").strip() + if ( + slot_provider + and slot_provider.lower() not in {"auto", ""} + and slot_provider.lower() != new_provider + ): + stale_aux.append({ + "task": slot, + "provider": slot_provider, + "model": str(slot_cfg.get("model", "") or ""), + }) + + return { + "ok": True, + "scope": "main", + "provider": provider, + "model": model, + "base_url": model_cfg.get("base_url", ""), + "gateway_tools": gateway_tools, + "stale_aux": stale_aux, + } + + # scope == "auxiliary" + aux = cfg.get("auxiliary") + if not isinstance(aux, dict): + aux = {} + + if task == "__reset__": + # Reset every slot to provider="auto", model="" — keeps other fields intact. + for slot in _AUX_TASK_SLOTS: + slot_cfg = aux.get(slot) + if not isinstance(slot_cfg, dict): + slot_cfg = {} + slot_cfg["provider"] = "auto" + slot_cfg["model"] = "" + aux[slot] = slot_cfg + cfg["auxiliary"] = aux + save_config(cfg) + return {"ok": True, "scope": "auxiliary", "reset": True} + + if not provider: + raise HTTPException(status_code=400, detail="provider required for auxiliary") + + targets = [task] if task else list(_AUX_TASK_SLOTS) + for slot in targets: + if slot not in _AUX_TASK_SLOTS: + raise HTTPException(status_code=400, detail=f"unknown auxiliary task: {slot}") + slot_cfg = aux.get(slot) + if not isinstance(slot_cfg, dict): + slot_cfg = {} + slot_cfg["provider"] = provider + slot_cfg["model"] = model + aux[slot] = slot_cfg + + cfg["auxiliary"] = aux + save_config(cfg) + return { + "ok": True, + "scope": "auxiliary", + "tasks": targets, + "provider": provider, + "model": model, + } + + def _denormalize_config_from_web(config: Dict[str, Any]) -> Dict[str, Any]: @@ -2996,18 +3133,22 @@ def _denormalize_config_from_web(config: Dict[str, Any]) -> Dict[str, Any]: @app.put("/api/config") -async def update_config(body: ConfigUpdate): +async def update_config(body: ConfigUpdate, profile: Optional[str] = None): try: - save_config(_denormalize_config_from_web(body.config)) + with _profile_scope(body.profile or profile): + save_config(_denormalize_config_from_web(body.config)) return {"ok": True} + except HTTPException: + raise except Exception: _log.exception("PUT /api/config failed") raise HTTPException(status_code=500, detail="Internal server error") @app.get("/api/env") -async def get_env_vars(): - env_on_disk = load_env() +async def get_env_vars(profile: Optional[str] = None): + with _profile_scope(profile): + env_on_disk = load_env() channel_keys = _channel_managed_env_keys() result = {} for var_name, info in OPTIONAL_ENV_VARS.items(): @@ -3030,9 +3171,10 @@ async def get_env_vars(): @app.put("/api/env") -async def set_env_var(body: EnvVarUpdate): +async def set_env_var(body: EnvVarUpdate, profile: Optional[str] = None): try: - save_env_value(body.key, body.value) + with _profile_scope(body.profile or profile): + save_env_value(body.key, body.value) return {"ok": True, "key": body.key} except ValueError as exc: # save_env_value raises ValueError for invalid names and for keys @@ -3143,9 +3285,10 @@ async def validate_provider_credential(body: EnvVarUpdate, request: Request): @app.delete("/api/env") -async def remove_env_var(body: EnvVarDelete): +async def remove_env_var(body: EnvVarDelete, profile: Optional[str] = None): try: - removed = remove_env_value(body.key) + with _profile_scope(body.profile or profile): + removed = remove_env_value(body.key) if not removed: raise HTTPException(status_code=404, detail=f"{body.key} not found in .env") return {"ok": True, "key": body.key} @@ -3162,7 +3305,9 @@ async def remove_env_var(body: EnvVarDelete): @app.post("/api/env/reveal") -async def reveal_env_var(body: EnvVarReveal, request: Request): +async def reveal_env_var( + body: EnvVarReveal, request: Request, profile: Optional[str] = None +): """Return the real (unredacted) value of a single env var. Protected by: @@ -3182,7 +3327,8 @@ async def reveal_env_var(body: EnvVarReveal, request: Request): _reveal_timestamps.append(now) # --- Reveal --- - env_on_disk = load_env() + with _profile_scope(body.profile or profile): + env_on_disk = load_env() value = env_on_disk.get(body.key) if value is None: raise HTTPException(status_code=404, detail=f"{body.key} not found in .env") @@ -4322,22 +4468,27 @@ def _truncate_token(value: Optional[str], visible: int = 6) -> str: def _anthropic_oauth_status() -> Dict[str, Any]: - """Combined status across the three Anthropic credential sources we read. + """Status for the "Anthropic API Key" catalog entry. - Hermes resolves Anthropic creds in this order at runtime: - 1. ``~/.hermes/.anthropic_oauth.json`` — Hermes-managed PKCE flow - 2. ``~/.claude/.credentials.json`` — Claude Code CLI credentials (auto) - 3. ``ANTHROPIC_TOKEN`` / ``ANTHROPIC_API_KEY`` env vars - The dashboard reports the highest-priority source that's actually present. + Two sources, in priority order: + 1. ``~/.hermes/.anthropic_oauth.json`` — Hermes-managed PKCE flow (what + this entry's Connect button writes) + 2. ``ANTHROPIC_API_KEY`` → ``ANTHROPIC_TOKEN`` → ``CLAUDE_CODE_OAUTH_TOKEN`` + env vars (registry order) — from ``.env``, the shell, or an external + secret source like Bitwarden (whose keys are injected into the process + env during ``load_hermes_dotenv()``, so the same check covers them) + + Claude Code's ``~/.claude/.credentials.json`` is deliberately NOT read + here — it has its own dedicated catalog entry (``claude-code`` → + ``_claude_code_only_status``). Reporting it under the API-key entry + double-counts the token and shadows a real ANTHROPIC_API_KEY. """ try: from agent.anthropic_adapter import ( read_hermes_oauth_credentials, - read_claude_code_credentials, _HERMES_OAUTH_FILE, ) except ImportError: - read_claude_code_credentials = None # type: ignore read_hermes_oauth_credentials = None # type: ignore _HERMES_OAUTH_FILE = None # type: ignore @@ -4357,29 +4508,33 @@ def _anthropic_oauth_status() -> Dict[str, Any]: "has_refresh_token": bool(hermes_creds.get("refreshToken")), } - cc_creds = None - if read_claude_code_credentials: - try: - cc_creds = read_claude_code_credentials() - except Exception: - cc_creds = None - if cc_creds and cc_creds.get("accessToken"): - return { - "logged_in": True, - "source": "claude_code", - "source_label": "Claude Code (~/.claude/.credentials.json)", - "token_preview": _truncate_token(cc_creds.get("accessToken")), - "expires_at": cc_creds.get("expiresAt"), - "has_refresh_token": bool(cc_creds.get("refreshToken")), - } + # Env-var / secret-source path. ``get_env_value`` checks the process + # environment first (where Bitwarden-sourced secrets land) then .env. + env_var_order: tuple = ("ANTHROPIC_API_KEY", "ANTHROPIC_TOKEN", "CLAUDE_CODE_OAUTH_TOKEN") + try: + from hermes_cli.auth import PROVIDER_REGISTRY + env_var_order = PROVIDER_REGISTRY["anthropic"].api_key_env_vars + except (ImportError, KeyError): + pass + try: + from hermes_cli.config import get_env_value + except ImportError: + get_env_value = None # type: ignore + try: + from hermes_cli.env_loader import format_secret_source_suffix + except ImportError: + format_secret_source_suffix = None # type: ignore - env_token = os.getenv("ANTHROPIC_TOKEN") or os.getenv("CLAUDE_CODE_OAUTH_TOKEN") - if env_token: + for var in env_var_order: + value = (get_env_value(var) if get_env_value else None) or os.getenv(var) + if not value: + continue + suffix = format_secret_source_suffix(var) if format_secret_source_suffix else "" return { "logged_in": True, "source": "env_var", - "source_label": "ANTHROPIC_TOKEN environment variable", - "token_preview": _truncate_token(env_token), + "source_label": f"{var}{suffix}", + "token_preview": _truncate_token(value), "expires_at": None, "has_refresh_token": False, } @@ -6102,6 +6257,7 @@ class CronJobCreate(BaseModel): schedule: str name: str = "" deliver: str = "local" + skills: Optional[List[str]] = None class CronJobUpdate(BaseModel): @@ -6273,6 +6429,7 @@ async def create_cron_job(body: CronJobCreate, profile: str = "default"): schedule=body.schedule, name=body.name, deliver=body.deliver, + skills=body.skills, ) except Exception as e: _log.exception("POST /api/cron/jobs failed") @@ -6387,6 +6544,7 @@ class MCPServerCreate(BaseModel): env: Dict[str, str] = {} # auth: "oauth" | "header" | None auth: Optional[str] = None + profile: Optional[str] = None def _redact_mcp_env(env: Dict[str, Any]) -> Dict[str, str]: @@ -6417,10 +6575,11 @@ def _mcp_server_summary(name: str, cfg: Dict[str, Any]) -> Dict[str, Any]: @app.get("/api/mcp/servers") -async def list_mcp_servers(): +async def list_mcp_servers(profile: Optional[str] = None): from hermes_cli.mcp_config import _get_mcp_servers - servers = _get_mcp_servers() + with _profile_scope(profile): + servers = _get_mcp_servers() return { "servers": [ _mcp_server_summary(name, cfg) for name, cfg in sorted(servers.items()) @@ -6429,13 +6588,15 @@ async def list_mcp_servers(): @app.post("/api/mcp/servers") -async def add_mcp_server(body: MCPServerCreate): +async def add_mcp_server(body: MCPServerCreate, profile: Optional[str] = None): from hermes_cli.mcp_config import _get_mcp_servers, _save_mcp_server name = (body.name or "").strip() if not name: raise HTTPException(status_code=400, detail="Server name is required") - if name in _get_mcp_servers(): + with _profile_scope(body.profile or profile): + existing = _get_mcp_servers() + if name in existing: raise HTTPException(status_code=409, detail=f"Server '{name}' already exists") if not body.url and not body.command: raise HTTPException( @@ -6456,7 +6617,10 @@ async def add_mcp_server(body: MCPServerCreate): server_config["auth"] = body.auth try: - _save_mcp_server(name, server_config) + with _profile_scope(body.profile or profile): + _save_mcp_server(name, server_config) + except HTTPException: + raise except Exception as exc: _log.exception("POST /api/mcp/servers failed") raise HTTPException(status_code=400, detail=str(exc)) from exc @@ -6465,27 +6629,44 @@ async def add_mcp_server(body: MCPServerCreate): @app.delete("/api/mcp/servers/{name}") -async def remove_mcp_server(name: str): +async def remove_mcp_server(name: str, profile: Optional[str] = None): from hermes_cli.mcp_config import _remove_mcp_server - if not _remove_mcp_server(name): + with _profile_scope(profile): + removed = _remove_mcp_server(name) + if not removed: raise HTTPException(status_code=404, detail=f"Server '{name}' not found") return {"ok": True} @app.post("/api/mcp/servers/{name}/test") -async def test_mcp_server(name: str): +async def test_mcp_server(name: str, profile: Optional[str] = None): """Connect to the server, list its tools, disconnect. Returns tool list.""" from hermes_cli.mcp_config import _get_mcp_servers, _probe_single_server - servers = _get_mcp_servers() + with _profile_scope(profile): + servers = _get_mcp_servers() if name not in servers: raise HTTPException(status_code=404, detail=f"Server '{name}' not found") + def _probe_scoped(): + # Re-enter the scope INSIDE the worker thread so call-time + # resolution during the probe — env-placeholder expansion in + # _resolve_mcp_server_config reading the profile's .env — sees the + # selected profile, matching the config the server was saved into. + # (asyncio.to_thread copies contextvars, but entering explicitly + # keeps the lock-protected SKILLS_DIR swap balanced per-thread.) + # The probe's dedicated MCP event-loop thread is covered too: + # _run_on_mcp_loop wraps scheduled coroutines with the caller's + # HERMES_HOME override (see mcp_tool._wrap_with_home_override), so + # OAuth token stores resolve against the selected profile as well. + with _profile_scope(profile): + return _probe_single_server(name, servers[name]) + try: # Probe blocks on a dedicated MCP event loop — run in a thread so the # FastAPI event loop is never blocked. - tools = await asyncio.to_thread(_probe_single_server, name, servers[name]) + tools = await asyncio.to_thread(_probe_scoped) except Exception as exc: return { "ok": False, @@ -6500,34 +6681,40 @@ async def test_mcp_server(name: str): class MCPEnabledToggle(BaseModel): enabled: bool + profile: Optional[str] = None @app.put("/api/mcp/servers/{name}/enabled") -async def set_mcp_server_enabled(name: str, body: MCPEnabledToggle): +async def set_mcp_server_enabled( + name: str, body: MCPEnabledToggle, profile: Optional[str] = None +): """Enable or disable an MCP server (takes effect on next session/gateway). Toggles the ``enabled`` key on the server's config.yaml entry — the same flag the agent reads at startup. Disabled servers stay in config so they can be re-enabled without re-entering their settings. """ - cfg = load_config() - servers = cfg.get("mcp_servers") - if not isinstance(servers, dict) or name not in servers: - raise HTTPException(status_code=404, detail=f"Server '{name}' not found") - if not isinstance(servers[name], dict): - raise HTTPException(status_code=400, detail="Malformed server config") - servers[name]["enabled"] = bool(body.enabled) - save_config(cfg) + with _profile_scope(body.profile or profile): + cfg = load_config() + servers = cfg.get("mcp_servers") + if not isinstance(servers, dict) or name not in servers: + raise HTTPException(status_code=404, detail=f"Server '{name}' not found") + if not isinstance(servers[name], dict): + raise HTTPException(status_code=400, detail="Malformed server config") + servers[name]["enabled"] = bool(body.enabled) + save_config(cfg) return {"ok": True, "name": name, "enabled": bool(body.enabled)} @app.get("/api/mcp/catalog") -async def list_mcp_catalog(): +async def list_mcp_catalog(profile: Optional[str] = None): """Browse the Nous-approved MCP catalog (the optional-mcps/ manifests). Each entry reports whether it's already installed and enabled so the UI can show install / enabled state inline. This is the same catalog - `hermes mcp catalog` / `hermes mcp install` read. + `hermes mcp catalog` / `hermes mcp install` read. ``profile`` scopes + the installed/enabled annotations (the catalog itself is repo-shipped + and identical for every profile). """ try: from hermes_cli import mcp_catalog @@ -6537,7 +6724,13 @@ async def list_mcp_catalog(): entries = [] try: - for entry in mcp_catalog.list_catalog(): + with _profile_scope(profile): + catalog_entries = list(mcp_catalog.list_catalog()) + installed_state = { + e.name: (mcp_catalog.is_installed(e.name), mcp_catalog.is_enabled(e.name)) + for e in catalog_entries + } + for entry in catalog_entries: auth = entry.auth entries.append({ "name": entry.name, @@ -6551,9 +6744,12 @@ async def list_mcp_catalog(): for e in getattr(auth, "env", []) or [] ], "needs_install": entry.install is not None, - "installed": mcp_catalog.is_installed(entry.name), - "enabled": mcp_catalog.is_enabled(entry.name), + "installed": installed_state.get(entry.name, (False, False))[0], + "enabled": installed_state.get(entry.name, (False, False))[1], }) + except HTTPException: + # Unknown/invalid profile → 404, not a silently-empty catalog. + raise except Exception: _log.exception("list_mcp_catalog failed") @@ -6574,10 +6770,11 @@ class MCPCatalogInstall(BaseModel): # env: KEY=VALUE map for catalog entries that declare required env vars. env: Dict[str, str] = {} enable: bool = True + profile: Optional[str] = None @app.post("/api/mcp/catalog/install") -async def install_mcp_catalog_entry(body: MCPCatalogInstall): +async def install_mcp_catalog_entry(body: MCPCatalogInstall, profile: Optional[str] = None): """Install a catalog MCP into config.yaml. For HTTP/stdio entries with required env vars, those are written to .env @@ -6594,23 +6791,42 @@ async def install_mcp_catalog_entry(body: MCPCatalogInstall): # Persist any supplied env vars first (catalog entries declare which names # they need; we only write the ones the user provided). + effective_profile = body.profile or profile if body.env: - for k, v in body.env.items(): - if v: - save_env_value(k, v) + with _profile_scope(effective_profile): + for k, v in body.env.items(): + if v: + save_env_value(k, v) # Git-bootstrap entries can take a while to clone — run via the background # action path so the request returns immediately and the UI can tail logs. + # The -p subprocess rebinds HERMES_HOME-derived paths in the child. if entry.install is not None: try: - proc = _spawn_hermes_action(["mcp", "install", name], "mcp-install") + proc = _spawn_hermes_action( + _profile_cli_args(effective_profile) + ["mcp", "install", name], + "mcp-install", + ) + except HTTPException: + raise except Exception as exc: raise HTTPException(status_code=500, detail=f"Install failed: {exc}") return {"ok": True, "name": name, "background": True, "action": "mcp-install"} - # No git step — install synchronously via the catalog API. + # No git step — install synchronously via the catalog API. install_entry + # routes through load_config/save_config + save_env_value, all call-time + # resolvers, so the context override scopes it. Wrap the to_thread body + # in the scope INSIDE the thread (contextvars don't propagate into + # to_thread the other way around — asyncio.to_thread copies context, so + # setting it here works; keep it explicit for clarity). + def _install_scoped(): + with _profile_scope(effective_profile): + mcp_catalog.install_entry(entry, enable=body.enable) + try: - await asyncio.to_thread(mcp_catalog.install_entry, entry, enable=body.enable) + await asyncio.to_thread(_install_scoped) + except HTTPException: + raise except Exception as exc: _log.exception("install_mcp_catalog_entry failed") raise HTTPException(status_code=400, detail=str(exc)) @@ -6753,6 +6969,27 @@ async def list_webhooks(): } +@app.post("/api/webhooks/enable") +async def enable_webhooks(): + try: + _write_platform_enabled("webhook", True) + except Exception as exc: + _log.exception("Failed to enable webhook platform from dashboard") + raise HTTPException( + status_code=500, + detail="Failed to enable webhook platform.", + ) from exc + + restart_result = _restart_gateway_after_webhook_enable() + return { + "ok": True, + "platform": "webhook", + "enabled": True, + "needs_restart": not restart_result["restart_started"], + **restart_result, + } + + @app.post("/api/webhooks") async def create_webhook(body: WebhookCreate): import re as _re @@ -6763,7 +7000,7 @@ async def create_webhook(body: WebhookCreate): if not wh._is_webhook_enabled(): raise HTTPException( status_code=400, - detail="Webhook platform is not enabled. Enable it in messaging settings first.", + detail="Webhook platform is not enabled. Enable it from the Webhooks page first.", ) name = (body.name or "").strip().lower().replace(" ", "-") @@ -7144,6 +7381,14 @@ async def run_backup(body: BackupRequest): class ImportRequest(BaseModel): archive: str + # Pass --force to `hermes import`. The spawned action runs with + # stdin=DEVNULL, so the CLI's interactive "Continue? [y/N]" overwrite + # prompt hits EOF and auto-aborts ("Aborted.", exit 1) whenever the + # target already has a config — which it always does when the dashboard + # itself is running from it. The dashboard shows its own confirm modal + # before calling this endpoint, then sends force=True so the restore + # proceeds non-interactively. + force: bool = False @app.post("/api/ops/import") @@ -7153,8 +7398,11 @@ async def run_import(body: ImportRequest): raise HTTPException(status_code=400, detail="archive path is required") if not os.path.isfile(archive): raise HTTPException(status_code=404, detail=f"Archive not found: {archive}") + args = ["import", archive] + if body.force: + args.append("--force") try: - proc = _spawn_hermes_action(["import", archive], "import") + proc = _spawn_hermes_action(args, "import") except Exception as exc: _log.exception("Failed to spawn import") raise HTTPException(status_code=500, detail=f"Failed to run import: {exc}") @@ -7373,15 +7621,38 @@ async def prune_checkpoints(): class SkillInstallRequest(BaseModel): identifier: str + profile: Optional[str] = None + + +def _profile_cli_args(profile: Optional[str]) -> List[str]: + """Return ``["-p", ]`` for a validated non-default profile. + + Hub install/uninstall/update run in a fresh ``hermes`` subprocess, and + ``_apply_profile_override()`` reads ``-p`` from argv in the child — the + only mechanism that reaches import-time-bound globals like + ``skills_hub.SKILLS_DIR``. Empty/"current" means the dashboard's own + profile (no args, legacy behavior). + """ + requested = (profile or "").strip() + if not requested or requested.lower() == "current": + return [] + from hermes_cli import profiles as profiles_mod + _resolve_profile_dir(requested) + return ["-p", profiles_mod.normalize_profile_name(requested)] @app.post("/api/skills/hub/install") -async def install_skill_hub(body: SkillInstallRequest): +async def install_skill_hub(body: SkillInstallRequest, profile: Optional[str] = None): identifier = (body.identifier or "").strip() if not identifier: raise HTTPException(status_code=400, detail="identifier is required") try: - proc = _spawn_hermes_action(["skills", "install", identifier], "skills-install") + proc = _spawn_hermes_action( + _profile_cli_args(body.profile or profile) + ["skills", "install", identifier], + "skills-install", + ) + except HTTPException: + raise except Exception as exc: _log.exception("Failed to spawn skills install") raise HTTPException(status_code=500, detail=f"Failed to install skill: {exc}") @@ -7390,25 +7661,42 @@ async def install_skill_hub(body: SkillInstallRequest): class SkillUninstallRequest(BaseModel): name: str + profile: Optional[str] = None @app.post("/api/skills/hub/uninstall") -async def uninstall_skill_hub(body: SkillUninstallRequest): +async def uninstall_skill_hub(body: SkillUninstallRequest, profile: Optional[str] = None): name = (body.name or "").strip() if not name: raise HTTPException(status_code=400, detail="name is required") try: - proc = _spawn_hermes_action(["skills", "uninstall", name, "--yes"], "skills-uninstall") + proc = _spawn_hermes_action( + _profile_cli_args(body.profile or profile) + ["skills", "uninstall", name, "--yes"], + "skills-uninstall", + ) + except HTTPException: + raise except Exception as exc: _log.exception("Failed to spawn skills uninstall") raise HTTPException(status_code=500, detail=f"Failed to uninstall skill: {exc}") return {"ok": True, "pid": proc.pid, "name": "skills-uninstall"} +class SkillsUpdateRequest(BaseModel): + profile: Optional[str] = None + + @app.post("/api/skills/hub/update") -async def update_skills_hub(): +async def update_skills_hub( + body: Optional[SkillsUpdateRequest] = None, profile: Optional[str] = None +): try: - proc = _spawn_hermes_action(["skills", "update"], "skills-update") + effective = (body.profile if body else None) or profile + proc = _spawn_hermes_action( + _profile_cli_args(effective) + ["skills", "update"], "skills-update" + ) + except HTTPException: + raise except Exception as exc: _log.exception("Failed to spawn skills update") raise HTTPException(status_code=500, detail=f"Failed to update skills: {exc}") @@ -7443,17 +7731,25 @@ def _skill_meta_to_payload(m) -> dict: } -def _installed_hub_identifiers() -> dict: +def _installed_hub_identifiers(profile: Optional[str] = None) -> dict: """Map identifier -> installed lock entry for hub-installed skills. - Lets the UI mark search results that are already installed. Best-effort: - returns an empty dict if the lock file can't be read. + Lets the UI mark search results that are already installed. Scoped to + ``profile``'s skills/.hub/lock.json when provided (HubLockFile takes an + explicit path, sidestepping the import-time LOCK_FILE binding). + Best-effort: returns an empty dict if the lock file can't be read. """ try: from tools.skills_hub import HubLockFile + requested = (profile or "").strip() + if requested and requested.lower() != "current": + profile_dir = _resolve_profile_dir(requested) + lock = HubLockFile(profile_dir / "skills" / ".hub" / "lock.json") + else: + lock = HubLockFile() out = {} - for entry in HubLockFile().list_installed(): + for entry in lock.list_installed(): ident = entry.get("identifier") if ident: out[ident] = { @@ -7467,13 +7763,14 @@ def _installed_hub_identifiers() -> dict: @app.get("/api/skills/hub/sources") -async def list_skills_hub_sources(): +async def list_skills_hub_sources(profile: Optional[str] = None): """List the configured skill-hub sources and installed-skill provenance. Gives the dashboard something to show BEFORE a search runs — which hubs are wired up, their trust tier, and a set of featured skills pulled from the centralized index (zero extra API calls). Without this the Browse-hub tab is a blank page with no indication it's even connected to anything. + ``profile`` scopes the installed-skill provenance to that profile. """ def _run(): @@ -7514,18 +7811,22 @@ async def list_skills_hub_sources(): "sources": out, "index_available": index_available, "featured": featured, - "installed": _installed_hub_identifiers(), + "installed": _installed_hub_identifiers(profile), } try: return await asyncio.to_thread(_run) + except HTTPException: + raise except Exception as exc: _log.exception("skills hub sources listing failed") raise HTTPException(status_code=502, detail=f"Hub sources failed: {exc}") @app.get("/api/skills/hub/search") -async def search_skills_hub(q: str = "", source: str = "all", limit: int = 20): +async def search_skills_hub( + q: str = "", source: str = "all", limit: int = 20, profile: Optional[str] = None +): """Search the skill hub across all configured sources. Network-bound (parallel source search); runs in a thread so the FastAPI @@ -7560,11 +7861,13 @@ async def search_skills_hub(q: str = "", source: str = "all", limit: int = 20): "results": [_skill_meta_to_payload(m) for m in deduped], "source_counts": source_counts, "timed_out": timed_out, - "installed": _installed_hub_identifiers(), + "installed": _installed_hub_identifiers(profile), } try: return await asyncio.to_thread(_run) + except HTTPException: + raise except Exception as exc: _log.exception("skills hub search failed") raise HTTPException(status_code=502, detail=f"Hub search failed: {exc}") @@ -7893,6 +8196,7 @@ def _write_profile_model(profile_dir: Path, provider: str, model: str) -> None: token = set_hermes_home_override(str(profile_dir)) try: + provider, model = _normalize_main_model_assignment(provider, model) cfg = load_config() cfg["model"] = _apply_main_model_assignment(cfg.get("model", {}), provider, model) save_config(cfg) @@ -8333,41 +8637,193 @@ async def describe_profile_auto_endpoint(name: str, body: ProfileDescribeAuto): # --------------------------------------------------------------------------- # Skills & Tools endpoints +# +# Every read/write below accepts an optional ``profile`` query param so the +# dashboard can manage ANY profile's skills/toolsets, not just the profile +# the dashboard process happens to be running under. Without this, "Set as +# active" on the Profiles page (which only flips the sticky ``active_profile`` +# file for FUTURE CLI/gateway invocations) misled users into thinking skill +# toggles would land in the activated profile — they silently wrote into the +# dashboard's own config instead. See _profile_scope() for the mechanism. # --------------------------------------------------------------------------- +_SKILLS_PROFILE_LOCK = threading.RLock() + + +@contextmanager +def _profile_scope(profile: Optional[str]): + """Scope config + skill-directory resolution to ``profile`` for one request. + + Two seams must be redirected for skills/toolsets endpoints: + + 1. ``load_config``/``save_config`` resolve ``get_hermes_home()`` at call + time — the context-local override from ``set_hermes_home_override`` + reaches them (same pattern as ``_write_profile_model``). + 2. ``tools.skills_tool`` and ``tools.skill_manager_tool`` bind + ``SKILLS_DIR`` at import time, so the override CANNOT reach them. + Like ``_call_cron_for_profile`` does for cron's module globals, + temporarily retarget both under a lock and restore them + immediately after. + + ``profile`` of None/""/"current" means "the dashboard's own profile" — + config resolution is untouched, but the skill-module globals are still + retargeted to the *current* ``get_hermes_home()`` so writes land in the + live home even when the import-time binding is stale (e.g. the process + imported the modules before a HERMES_HOME override, or under test + isolation). + """ + requested = (profile or "").strip() + + from hermes_constants import ( + get_hermes_home, + set_hermes_home_override, + reset_hermes_home_override, + ) + from tools import skills_tool as _skills_tool + from tools import skill_manager_tool as _skill_mgr + + token = None + if not requested or requested.lower() == "current": + profile_dir = get_hermes_home() + else: + profile_dir = _resolve_profile_dir(requested) + token = set_hermes_home_override(str(profile_dir)) + + with _SKILLS_PROFILE_LOCK: + old_home = _skills_tool.HERMES_HOME + old_skills_dir = _skills_tool.SKILLS_DIR + old_mgr_home = _skill_mgr.HERMES_HOME + old_mgr_skills_dir = _skill_mgr.SKILLS_DIR + _skills_tool.HERMES_HOME = profile_dir + _skills_tool.SKILLS_DIR = profile_dir / "skills" + _skill_mgr.HERMES_HOME = profile_dir + _skill_mgr.SKILLS_DIR = profile_dir / "skills" + try: + yield profile_dir if token is not None else None + finally: + _skills_tool.HERMES_HOME = old_home + _skills_tool.SKILLS_DIR = old_skills_dir + _skill_mgr.HERMES_HOME = old_mgr_home + _skill_mgr.SKILLS_DIR = old_mgr_skills_dir + if token is not None: + reset_hermes_home_override(token) + + class SkillToggle(BaseModel): name: str enabled: bool + profile: Optional[str] = None @app.get("/api/skills") -async def get_skills(): +async def get_skills(profile: Optional[str] = None): from tools.skills_tool import _find_all_skills from hermes_cli.skills_config import get_disabled_skills - config = load_config() - disabled = get_disabled_skills(config) - skills = _find_all_skills(skip_disabled=True) + with _profile_scope(profile): + config = load_config() + disabled = get_disabled_skills(config) + skills = _find_all_skills(skip_disabled=True) for s in skills: s["enabled"] = s["name"] not in disabled return skills @app.put("/api/skills/toggle") -async def toggle_skill(body: SkillToggle): +async def toggle_skill(body: SkillToggle, profile: Optional[str] = None): from hermes_cli.skills_config import get_disabled_skills, save_disabled_skills - config = load_config() - disabled = get_disabled_skills(config) - if body.enabled: - disabled.discard(body.name) - else: - disabled.add(body.name) - save_disabled_skills(config, disabled) + with _profile_scope(body.profile or profile): + config = load_config() + disabled = get_disabled_skills(config) + if body.enabled: + disabled.discard(body.name) + else: + disabled.add(body.name) + save_disabled_skills(config, disabled) return {"ok": True, "name": body.name, "enabled": body.enabled} +class SkillCreate(BaseModel): + name: str + content: str + category: Optional[str] = None + profile: Optional[str] = None + + +class SkillContentUpdate(BaseModel): + name: str + content: str + profile: Optional[str] = None + + +def _clear_skills_prompt_cache() -> None: + """Best-effort: invalidate the skills system-prompt snapshot after a write. + + Mirrors what ``skill_manage`` does so a dashboard-authored skill is picked + up by the next session without a manual cache reset. + """ + try: + from agent.prompt_builder import clear_skills_system_prompt_cache + clear_skills_system_prompt_cache(clear_snapshot=True) + except Exception: + pass + + +@app.get("/api/skills/content") +async def get_skill_content(name: str, profile: Optional[str] = None): + """Return the raw SKILL.md text for a skill, for the dashboard editor.""" + from tools.skill_manager_tool import _find_skill + + with _profile_scope(profile): + found = _find_skill(name) + if not found: + raise HTTPException(status_code=404, detail=f"Skill '{name}' not found.") + skill_md = found["path"] / "SKILL.md" + if not skill_md.exists(): + raise HTTPException(status_code=404, detail=f"Skill '{name}' has no SKILL.md.") + try: + content = skill_md.read_text(encoding="utf-8") + except OSError as exc: + raise HTTPException(status_code=500, detail=str(exc)) from exc + return {"name": name, "content": content, "path": str(skill_md)} + + +@app.post("/api/skills") +async def create_skill(body: SkillCreate): + """Create a new custom skill (SKILL.md) from the dashboard editor. + + Calls the same validated write path as the agent's ``skill_manage`` + tool (frontmatter validation, name/category validation, size limit, + optional security scan) — but bypasses the agent write-approval gate: + a write from the authenticated dashboard IS the user acting directly. + """ + from tools.skill_manager_tool import _create_skill + + with _profile_scope(body.profile): + result = _create_skill(body.name, body.content, body.category or None) + if not result.get("success"): + raise HTTPException(status_code=400, detail=result.get("error", "Failed to create skill.")) + _clear_skills_prompt_cache() + return result + + +@app.put("/api/skills/content") +async def update_skill_content(body: SkillContentUpdate): + """Replace the SKILL.md of an existing skill (full rewrite) from the editor.""" + from tools.skill_manager_tool import _edit_skill + + with _profile_scope(body.profile): + result = _edit_skill(body.name, body.content) + if not result.get("success"): + err = result.get("error", "Failed to update skill.") + status = 404 if "not found" in str(err).lower() else 400 + raise HTTPException(status_code=status, detail=err) + _clear_skills_prompt_cache() + return result + + @app.get("/api/tools/toolsets") -async def get_toolsets(): +async def get_toolsets(profile: Optional[str] = None): from hermes_cli.tools_config import ( _get_effective_configurable_toolsets, _get_platform_tools, @@ -8376,12 +8832,13 @@ async def get_toolsets(): ) from toolsets import resolve_toolset - config = load_config() - enabled_toolsets = _get_platform_tools( - config, - "cli", - include_default_mcp_servers=False, - ) + with _profile_scope(profile): + config = load_config() + enabled_toolsets = _get_platform_tools( + config, + "cli", + include_default_mcp_servers=False, + ) result = [] for name, label, desc in _get_effective_configurable_toolsets(): try: @@ -8403,15 +8860,17 @@ async def get_toolsets(): class ToolsetToggle(BaseModel): enabled: bool + profile: Optional[str] = None @app.put("/api/tools/toolsets/{name}") -async def toggle_toolset(name: str, body: ToolsetToggle): +async def toggle_toolset(name: str, body: ToolsetToggle, profile: Optional[str] = None): """Enable/disable a configurable toolset for the desktop (cli) platform. Persists to ``platform_toolsets.cli`` via the same ``_save_platform_tools`` helper the CLI ``hermes tools`` picker uses, so the GUI and CLI stay in - lockstep. Returns 400 for unknown toolset keys. + lockstep. Scoped to ``body.profile`` when provided. Returns 400 for + unknown toolset keys. """ from hermes_cli.tools_config import ( _get_effective_configurable_toolsets, @@ -8423,20 +8882,21 @@ async def toggle_toolset(name: str, body: ToolsetToggle): if name not in valid: raise HTTPException(status_code=400, detail=f"Unknown toolset: {name}") - config = load_config() - enabled = set( - _get_platform_tools(config, "cli", include_default_mcp_servers=False) - ) - if body.enabled: - enabled.add(name) - else: - enabled.discard(name) - _save_platform_tools(config, "cli", enabled) + with _profile_scope(body.profile or profile): + config = load_config() + enabled = set( + _get_platform_tools(config, "cli", include_default_mcp_servers=False) + ) + if body.enabled: + enabled.add(name) + else: + enabled.discard(name) + _save_platform_tools(config, "cli", enabled) return {"ok": True, "name": name, "enabled": body.enabled} @app.get("/api/tools/toolsets/{name}/config") -async def get_toolset_config(name: str): +async def get_toolset_config(name: str, profile: Optional[str] = None): """Return the provider matrix + key status for a toolset's config panel. Surfaces the same provider rows the CLI ``hermes tools`` picker shows @@ -8457,38 +8917,39 @@ async def get_toolset_config(name: str): if name not in valid: raise HTTPException(status_code=400, detail=f"Unknown toolset: {name}") - config = load_config() - cat = TOOL_CATEGORIES.get(name) - providers = [] - active_provider = None - if cat: - for prov in _visible_providers(cat, config, force_fresh=True): - env_vars = [ - { - "key": e["key"], - "prompt": e.get("prompt", e["key"]), - "url": e.get("url"), - "default": e.get("default"), - "is_set": bool(get_env_value(e["key"])), - } - for e in prov.get("env_vars", []) - ] - # Surface the same active-provider determination the CLI picker - # uses (``_is_provider_active``) so the GUI highlights the provider - # actually written to config (e.g. web.backend), not just the first - # keyless one in the list. - is_active = _is_provider_active(prov, config, force_fresh=True) - if is_active and active_provider is None: - active_provider = prov["name"] - providers.append({ - "name": prov["name"], - "badge": prov.get("badge", ""), - "tag": prov.get("tag", ""), - "env_vars": env_vars, - "post_setup": prov.get("post_setup"), - "requires_nous_auth": bool(prov.get("requires_nous_auth")), - "is_active": is_active, - }) + with _profile_scope(profile): + config = load_config() + cat = TOOL_CATEGORIES.get(name) + providers = [] + active_provider = None + if cat: + for prov in _visible_providers(cat, config, force_fresh=True): + env_vars = [ + { + "key": e["key"], + "prompt": e.get("prompt", e["key"]), + "url": e.get("url"), + "default": e.get("default"), + "is_set": bool(get_env_value(e["key"])), + } + for e in prov.get("env_vars", []) + ] + # Surface the same active-provider determination the CLI picker + # uses (``_is_provider_active``) so the GUI highlights the provider + # actually written to config (e.g. web.backend), not just the first + # keyless one in the list. + is_active = _is_provider_active(prov, config, force_fresh=True) + if is_active and active_provider is None: + active_provider = prov["name"] + providers.append({ + "name": prov["name"], + "badge": prov.get("badge", ""), + "tag": prov.get("tag", ""), + "env_vars": env_vars, + "post_setup": prov.get("post_setup"), + "requires_nous_auth": bool(prov.get("requires_nous_auth")), + "is_active": is_active, + }) return { "name": name, "has_category": cat is not None, @@ -8499,10 +8960,13 @@ async def get_toolset_config(name: str): class ToolsetProviderSelect(BaseModel): provider: str + profile: Optional[str] = None @app.put("/api/tools/toolsets/{name}/provider") -async def select_toolset_provider(name: str, body: ToolsetProviderSelect): +async def select_toolset_provider( + name: str, body: ToolsetProviderSelect, profile: Optional[str] = None +): """Persist a provider selection for a toolset (no key prompting). Delegates to ``apply_provider_selection`` — the shared, non-interactive @@ -8520,21 +8984,23 @@ async def select_toolset_provider(name: str, body: ToolsetProviderSelect): if name not in valid: raise HTTPException(status_code=400, detail=f"Unknown toolset: {name}") - config = load_config() - try: - apply_provider_selection(name, body.provider, config) - except KeyError as exc: - raise HTTPException(status_code=400, detail=str(exc).strip('"')) - save_config(config) + with _profile_scope(body.profile or profile): + config = load_config() + try: + apply_provider_selection(name, body.provider, config) + except KeyError as exc: + raise HTTPException(status_code=400, detail=str(exc).strip('"')) + save_config(config) return {"ok": True, "name": name, "provider": body.provider} class ToolsetEnvUpdate(BaseModel): env: Dict[str, str] + profile: Optional[str] = None @app.put("/api/tools/toolsets/{name}/env") -async def save_toolset_env(name: str, body: ToolsetEnvUpdate): +async def save_toolset_env(name: str, body: ToolsetEnvUpdate, profile: Optional[str] = None): """Persist API keys for a toolset's provider env vars. Writes each ``key: value`` to ``~/.hermes/.env`` via ``save_env_value`` — @@ -8556,43 +9022,47 @@ async def save_toolset_env(name: str, body: ToolsetEnvUpdate): if name not in valid_ts: raise HTTPException(status_code=400, detail=f"Unknown toolset: {name}") - config = load_config() - cat = TOOL_CATEGORIES.get(name) - allowed: set[str] = set() - if cat: - for prov in _visible_providers(cat, config, force_fresh=True): - for e in prov.get("env_vars", []): - allowed.add(e["key"]) + with _profile_scope(body.profile or profile): + config = load_config() + cat = TOOL_CATEGORIES.get(name) + allowed: set[str] = set() + if cat: + for prov in _visible_providers(cat, config, force_fresh=True): + for e in prov.get("env_vars", []): + allowed.add(e["key"]) - unknown = [k for k in body.env if k not in allowed] - if unknown: - raise HTTPException( - status_code=400, - detail=f"Unknown env var(s) for toolset {name}: {', '.join(sorted(unknown))}", - ) + unknown = [k for k in body.env if k not in allowed] + if unknown: + raise HTTPException( + status_code=400, + detail=f"Unknown env var(s) for toolset {name}: {', '.join(sorted(unknown))}", + ) - saved: List[str] = [] - skipped: List[str] = [] - for key, value in body.env.items(): - if value and value.strip(): - try: - save_env_value(key, value.strip()) - except ValueError as exc: - raise HTTPException(status_code=400, detail=str(exc)) - saved.append(key) - else: - skipped.append(key) + saved: List[str] = [] + skipped: List[str] = [] + for key, value in body.env.items(): + if value and value.strip(): + try: + save_env_value(key, value.strip()) + except ValueError as exc: + raise HTTPException(status_code=400, detail=str(exc)) + saved.append(key) + else: + skipped.append(key) - status = {k: bool(get_env_value(k)) for k in allowed} + status = {k: bool(get_env_value(k)) for k in allowed} return {"ok": True, "name": name, "saved": saved, "skipped": skipped, "is_set": status} class ToolsetPostSetup(BaseModel): key: str + profile: Optional[str] = None @app.post("/api/tools/toolsets/{name}/post-setup") -async def run_toolset_post_setup(name: str, body: ToolsetPostSetup): +async def run_toolset_post_setup( + name: str, body: ToolsetPostSetup, profile: Optional[str] = None +): """Spawn a provider's post-setup install hook as a background action. Post-setup hooks (npm install for browser/Camofox, pip install for @@ -8602,6 +9072,12 @@ async def run_toolset_post_setup(name: str, body: ToolsetPostSetup): ``GET /api/actions/tools-post-setup/status``. The ``key`` is validated against the declared post-setup allowlist before spawning. Returns 400 for unknown toolset or post-setup key. + + ``profile`` spawns the hook as ``hermes -p tools post-setup``. + Most hooks install machine-level artifacts (repo node_modules, shared + pip packages) where the scope is inert, but hooks that read config or + write per-profile state must see the same HERMES_HOME the rest of the + drawer's writes targeted — so the scope is threaded for consistency. """ from hermes_cli.tools_config import ( _get_effective_configurable_toolsets, @@ -8619,8 +9095,12 @@ async def run_toolset_post_setup(name: str, body: ToolsetPostSetup): try: proc = _spawn_hermes_action( - ["tools", "post-setup", body.key], "tools-post-setup" + _profile_cli_args(body.profile or profile) + + ["tools", "post-setup", body.key], + "tools-post-setup", ) + except HTTPException: + raise except Exception as exc: _log.exception("Failed to spawn tools post-setup") raise HTTPException( @@ -8636,23 +9116,26 @@ async def run_toolset_post_setup(name: str, body: ToolsetPostSetup): class RawConfigUpdate(BaseModel): yaml_text: str + profile: Optional[str] = None @app.get("/api/config/raw") -async def get_config_raw(): - path = get_config_path() +async def get_config_raw(profile: Optional[str] = None): + with _profile_scope(profile): + path = get_config_path() if not path.exists(): return {"yaml": ""} return {"yaml": path.read_text(encoding="utf-8")} @app.put("/api/config/raw") -async def update_config_raw(body: RawConfigUpdate): +async def update_config_raw(body: RawConfigUpdate, profile: Optional[str] = None): try: parsed = yaml.safe_load(body.yaml_text) if not isinstance(parsed, dict): raise HTTPException(status_code=400, detail="YAML must be a mapping") - save_config(parsed) + with _profile_scope(body.profile or profile): + save_config(parsed) return {"ok": True} except yaml.YAMLError as e: raise HTTPException(status_code=400, detail=f"Invalid YAML: {e}") @@ -9100,6 +9583,7 @@ def _ws_auth_ok(ws: "WebSocket") -> bool: def _resolve_chat_argv( resume: Optional[str] = None, sidecar_url: Optional[str] = None, + profile: Optional[str] = None, ) -> tuple[list[str], Optional[str], Optional[dict]]: """Resolve the argv + cwd + env for the chat PTY. @@ -9119,9 +9603,24 @@ def _resolve_chat_argv( `sidecar_url` (when set) is forwarded as ``HERMES_TUI_SIDECAR_URL`` so the spawned ``tui_gateway.entry`` can mirror dispatcher emits to the dashboard's ``/api/pub`` endpoint (see :func:`pub_ws`). + + `profile` (when set) scopes the ENTIRE chat to that profile by pointing + ``HERMES_HOME`` at the profile dir in the child env. Every spawned + process (the TUI and the ``tui_gateway.entry`` it launches) resolves + ``get_hermes_home()`` from that env var at its own import, so the child + binds the profile's config, skills, memory, and state.db from the start + — the same propagation ``hermes -p `` performs. The in-process + ``HERMES_TUI_GATEWAY_URL`` attach is SKIPPED for scoped chats: the + dashboard's in-memory gateway runs under the dashboard's own profile, + so a profile-scoped chat must spawn its own gateway subprocess. """ from hermes_cli.main import PROJECT_ROOT, _make_tui_argv + profile_dir: Optional[Path] = None + requested = (profile or "").strip() + if requested and requested.lower() != "current": + profile_dir = _resolve_profile_dir(requested) + argv, cwd = _make_tui_argv(PROJECT_ROOT / "ui-tui", tui_dev=False) env = os.environ.copy() try: @@ -9139,6 +9638,9 @@ def _resolve_chat_argv( env.setdefault("HERMES_TUI_DISABLE_MOUSE", "1") env.setdefault("HERMES_TUI_INLINE", "1") + if profile_dir is not None: + env["HERMES_HOME"] = str(profile_dir) + if resume: latest_resume, _latest_path = _session_latest_descendant(resume) if latest_resume: @@ -9148,8 +9650,13 @@ def _resolve_chat_argv( if sidecar_url: env["HERMES_TUI_SIDECAR_URL"] = sidecar_url - if gateway_ws_url := _build_gateway_ws_url(): - env["HERMES_TUI_GATEWAY_URL"] = gateway_ws_url + # Profile-scoped chats must NOT attach to the dashboard's in-memory + # gateway — it runs under the dashboard's own profile. Without the + # attach URL, gatewayClient spawns its own `tui_gateway.entry`, which + # inherits the profile HERMES_HOME set above. + if profile_dir is None: + if gateway_ws_url := _build_gateway_ws_url(): + env["HERMES_TUI_GATEWAY_URL"] = gateway_ws_url return list(argv), str(cwd) if cwd else None, env @@ -9312,11 +9819,19 @@ async def pty_ws(ws: WebSocket) -> None: # --- spawn PTY ------------------------------------------------------ resume = ws.query_params.get("resume") or None + profile = ws.query_params.get("profile") or None channel = _channel_or_close_code(ws) sidecar_url = _build_sidecar_url(channel) if channel else None try: - argv, cwd, env = _resolve_chat_argv(resume=resume, sidecar_url=sidecar_url) + argv, cwd, env = _resolve_chat_argv( + resume=resume, sidecar_url=sidecar_url, profile=profile + ) + except HTTPException as exc: + # Unknown/invalid profile from _resolve_profile_dir. + await ws.send_text(f"\r\n\x1b[31mChat unavailable: {exc.detail}\x1b[0m\r\n") + await ws.close(code=1011) + return except SystemExit as exc: # _make_tui_argv calls sys.exit(1) when node/npm is missing. await ws.send_text(f"\r\n\x1b[31mChat unavailable: {exc}\x1b[0m\r\n") @@ -10594,8 +11109,15 @@ def start_server( port: int = 9119, open_browser: bool = True, allow_public: bool = False, + initial_profile: str = "", ): - """Start the web UI server.""" + """Start the web UI server. + + ``initial_profile`` (when set) is appended to the auto-opened browser + URL as ``?profile=`` so the SPA's profile switcher preselects it + — used when a profile alias (`` dashboard``) routes to the + machine dashboard. + """ import uvicorn # Phase 0: stash the auth-gate flag on app.state so middleware / SPA-token @@ -10686,10 +11208,15 @@ def start_server( ) if _has_display: + _open_url = f"http://{host}:{port}" + if initial_profile: + from urllib.parse import quote + _open_url += f"/?profile={quote(initial_profile)}" + def _open(): try: time.sleep(1.0) - webbrowser.open(f"http://{host}:{port}") + webbrowser.open(_open_url) except Exception: pass diff --git a/hermes_state.py b/hermes_state.py index bda6eeacd62..0f97ebdf098 100644 --- a/hermes_state.py +++ b/hermes_state.py @@ -1715,15 +1715,51 @@ class SessionDB: """Archive or unarchive a session. Archived sessions are hidden from the default session list but keep all - their messages — this is a soft hide, not a delete. Returns True when a - row was updated. + their messages — this is a soft hide, not a delete. For compression + chains, archive the whole logical conversation. Desktop lists compression + roots projected forward to their latest continuation; updating only the + displayed tip lets the still-unarchived root resurrect it on refresh. + Returns True when at least one row was updated. """ def _do(conn): cursor = conn.execute( - "UPDATE sessions SET archived = ? WHERE id = ?", - (1 if archived else 0, session_id), + """ + WITH RECURSIVE + ancestors(id) AS ( + SELECT ? + UNION + SELECT parent.id + FROM ancestors a + JOIN sessions child ON child.id = a.id + JOIN sessions parent ON parent.id = child.parent_session_id + WHERE parent.end_reason = 'compression' + AND child.started_at >= parent.ended_at + ), + descendants(id) AS ( + SELECT ? + UNION + SELECT child.id + FROM descendants d + JOIN sessions parent ON parent.id = d.id + JOIN sessions child ON child.parent_session_id = parent.id + WHERE parent.end_reason = 'compression' + AND child.started_at >= parent.ended_at + ), + lineage(id) AS ( + SELECT id FROM ancestors + UNION + SELECT id FROM descendants + ) + UPDATE sessions + SET archived = ? + WHERE id IN (SELECT id FROM lineage) + """, + (session_id, session_id, 1 if archived else 0), ) - return cursor.rowcount + rowcount = cursor.rowcount + if rowcount is None or rowcount < 0: + rowcount = conn.execute("SELECT changes()").fetchone()[0] + return rowcount rowcount = self._execute_write(_do) return rowcount > 0 @@ -3658,6 +3694,48 @@ class SessionDB: self._remove_session_files(sessions_dir, session_id) return deleted + def delete_session_if_empty( + self, + session_id: str, + sessions_dir: Optional[Path] = None, + ) -> bool: + """Delete *session_id* only when it never gained resumable content. + + A session is considered empty when it has no messages and no + user-assigned title. Used by CLI exit / session-rotation paths so + immediately-started-and-quit sessions don't pile up in ``/resume`` + and ``hermes sessions list`` output. (Pattern ported from + google-gemini/gemini-cli#27770.) + + The emptiness check and delete run in one transaction, so a message + flushed concurrently by another writer can't be lost. Sessions with + children (delegate subagent runs) are preserved — a parent that + spawned work is not "empty" even if its own transcript never + flushed. Returns True if the session was deleted. + """ + def _do(conn): + cursor = conn.execute( + """ + DELETE FROM sessions + WHERE id = ? + AND title IS NULL + AND NOT EXISTS ( + SELECT 1 FROM messages WHERE messages.session_id = sessions.id + ) + AND NOT EXISTS ( + SELECT 1 FROM sessions child + WHERE child.parent_session_id = sessions.id + ) + """, + (session_id,), + ) + return cursor.rowcount > 0 + + deleted = self._execute_write(_do) + if deleted: + self._remove_session_files(sessions_dir, session_id) + return bool(deleted) + def delete_sessions( self, session_ids: List[str], diff --git a/locales/af.yaml b/locales/af.yaml index bb3fae463ee..1ac315a1d4d 100644 --- a/locales/af.yaml +++ b/locales/af.yaml @@ -219,6 +219,14 @@ gateway: resume: db_unavailable: "Sessie-databasis is nie beskikbaar nie." + parse_error: "⚠️ Could not parse `/resume` arguments: {error}. +Use quotes around titles with spaces, for example: `/resume \"Project A Plan\"`." + matrix_no_named_sessions: "No named sessions found for this Matrix room. +Use `/title My Session` to name the current room session, `/resume --all` to list all Matrix sessions, or `/resume --cross-room ` to explicitly cross room boundaries." + matrix_blocked_no_origin: "⚠️ Matrix /resume blocked: this named session has no recorded room origin, so Hermes will not resume it inside the current room by default. Use `/resume --cross-room {name}` if you intentionally want to cross room boundaries." + matrix_blocked_other_room: "⚠️ Matrix /resume blocked: that session belongs to a different Matrix room ({room}). Use `/resume --cross-room {name}` if you intentionally want to resume it here." + matrix_cross_room_success: "⚠️ Cross-room resume: resumed **{title}** inside Matrix room **{room}**. +Future messages in this room will use that transcript until `/reset` or another `/resume`.{msg_part}" no_named_sessions: "Geen benoemde sessies gevind nie.\nGebruik `/title My Sessie` om jou huidige sessie 'n naam te gee, en dan `/resume My Sessie` om later daarheen terug te keer." list_header: "📋 **Benoemde Sessies**\n" list_item: "• **{title}**{preview_part}" @@ -251,6 +259,12 @@ gateway: status: header: "📊 **Hermes Gateway Status**" + matrix_scope_header: "**Matrix scope:**" + matrix_scope_room: " room: {room}" + matrix_scope_room_id: " room_id: {room_id}" + matrix_scope_thread: " thread_id: {thread_id}" + matrix_scope_mode: " session_scope: {scope}" + matrix_scope_key: " session_key: {session_key}" session_id: "**Sessie-ID:** `{session_id}`" title: "**Titel:** {title}" created: "**Geskep:** {timestamp}" diff --git a/locales/de.yaml b/locales/de.yaml index 437a90a9476..f83181c9815 100644 --- a/locales/de.yaml +++ b/locales/de.yaml @@ -219,6 +219,14 @@ gateway: resume: db_unavailable: "Sitzungsdatenbank nicht verfügbar." + parse_error: "⚠️ Could not parse `/resume` arguments: {error}. +Use quotes around titles with spaces, for example: `/resume \"Project A Plan\"`." + matrix_no_named_sessions: "No named sessions found for this Matrix room. +Use `/title My Session` to name the current room session, `/resume --all` to list all Matrix sessions, or `/resume --cross-room ` to explicitly cross room boundaries." + matrix_blocked_no_origin: "⚠️ Matrix /resume blocked: this named session has no recorded room origin, so Hermes will not resume it inside the current room by default. Use `/resume --cross-room {name}` if you intentionally want to cross room boundaries." + matrix_blocked_other_room: "⚠️ Matrix /resume blocked: that session belongs to a different Matrix room ({room}). Use `/resume --cross-room {name}` if you intentionally want to resume it here." + matrix_cross_room_success: "⚠️ Cross-room resume: resumed **{title}** inside Matrix room **{room}**. +Future messages in this room will use that transcript until `/reset` or another `/resume`.{msg_part}" no_named_sessions: "Keine benannten Sitzungen gefunden.\nVerwenden Sie `/title Meine Sitzung`, um die aktuelle Sitzung zu benennen, dann `/resume Meine Sitzung`, um später dorthin zurückzukehren." list_header: "📋 **Benannte Sitzungen**\n" list_item: "• **{title}**{preview_part}" @@ -251,6 +259,12 @@ gateway: status: header: "📊 **Hermes-Gateway-Status**" + matrix_scope_header: "**Matrix scope:**" + matrix_scope_room: " room: {room}" + matrix_scope_room_id: " room_id: {room_id}" + matrix_scope_thread: " thread_id: {thread_id}" + matrix_scope_mode: " session_scope: {scope}" + matrix_scope_key: " session_key: {session_key}" session_id: "**Sitzungs-ID:** `{session_id}`" title: "**Titel:** {title}" created: "**Erstellt:** {timestamp}" diff --git a/locales/en.yaml b/locales/en.yaml index 1516977ccb6..acf15ae1a12 100644 --- a/locales/en.yaml +++ b/locales/en.yaml @@ -234,6 +234,11 @@ gateway: resume: db_unavailable: "Session database not available." + parse_error: "⚠️ Could not parse `/resume` arguments: {error}.\nUse quotes around titles with spaces, for example: `/resume \"Project A Plan\"`." + matrix_no_named_sessions: "No named sessions found for this Matrix room.\nUse `/title My Session` to name the current room session, `/resume --all` to list all Matrix sessions, or `/resume --cross-room ` to explicitly cross room boundaries." + matrix_blocked_no_origin: "⚠️ Matrix /resume blocked: this named session has no recorded room origin, so Hermes will not resume it inside the current room by default. Use `/resume --cross-room {name}` if you intentionally want to cross room boundaries." + matrix_blocked_other_room: "⚠️ Matrix /resume blocked: that session belongs to a different Matrix room ({room}). Use `/resume --cross-room {name}` if you intentionally want to resume it here." + matrix_cross_room_success: "⚠️ Cross-room resume: resumed **{title}** inside Matrix room **{room}**.\nFuture messages in this room will use that transcript until `/reset` or another `/resume`.{msg_part}" no_named_sessions: "No named sessions found.\nUse `/title My Session` to name your current session, then `/resume My Session` to return to it later." list_header: "📋 **Named Sessions**\n" list_item: "• **{title}**{preview_part}" @@ -266,6 +271,12 @@ gateway: status: header: "📊 **Hermes Gateway Status**" + matrix_scope_header: "**Matrix scope:**" + matrix_scope_room: " room: {room}" + matrix_scope_room_id: " room_id: {room_id}" + matrix_scope_thread: " thread_id: {thread_id}" + matrix_scope_mode: " session_scope: {scope}" + matrix_scope_key: " session_key: {session_key}" session_id: "**Session ID:** `{session_id}`" title: "**Title:** {title}" created: "**Created:** {timestamp}" diff --git a/locales/es.yaml b/locales/es.yaml index b22fc2ec429..429f9f0f987 100644 --- a/locales/es.yaml +++ b/locales/es.yaml @@ -219,6 +219,14 @@ gateway: resume: db_unavailable: "Base de datos de sesiones no disponible." + parse_error: "⚠️ Could not parse `/resume` arguments: {error}. +Use quotes around titles with spaces, for example: `/resume \"Project A Plan\"`." + matrix_no_named_sessions: "No named sessions found for this Matrix room. +Use `/title My Session` to name the current room session, `/resume --all` to list all Matrix sessions, or `/resume --cross-room ` to explicitly cross room boundaries." + matrix_blocked_no_origin: "⚠️ Matrix /resume blocked: this named session has no recorded room origin, so Hermes will not resume it inside the current room by default. Use `/resume --cross-room {name}` if you intentionally want to cross room boundaries." + matrix_blocked_other_room: "⚠️ Matrix /resume blocked: that session belongs to a different Matrix room ({room}). Use `/resume --cross-room {name}` if you intentionally want to resume it here." + matrix_cross_room_success: "⚠️ Cross-room resume: resumed **{title}** inside Matrix room **{room}**. +Future messages in this room will use that transcript until `/reset` or another `/resume`.{msg_part}" no_named_sessions: "No se encontraron sesiones con nombre.\nUsa `/title Mi sesión` para nombrar la sesión actual y luego `/resume Mi sesión` para volver a ella." list_header: "📋 **Sesiones con nombre**\n" list_item: "• **{title}**{preview_part}" @@ -251,6 +259,12 @@ gateway: status: header: "📊 **Estado de Hermes Gateway**" + matrix_scope_header: "**Matrix scope:**" + matrix_scope_room: " room: {room}" + matrix_scope_room_id: " room_id: {room_id}" + matrix_scope_thread: " thread_id: {thread_id}" + matrix_scope_mode: " session_scope: {scope}" + matrix_scope_key: " session_key: {session_key}" session_id: "**ID de sesión:** `{session_id}`" title: "**Título:** {title}" created: "**Creado:** {timestamp}" diff --git a/locales/fr.yaml b/locales/fr.yaml index 8201df6c3f3..ad17ee61fa9 100644 --- a/locales/fr.yaml +++ b/locales/fr.yaml @@ -219,6 +219,14 @@ gateway: resume: db_unavailable: "Base de données des sessions indisponible." + parse_error: "⚠️ Could not parse `/resume` arguments: {error}. +Use quotes around titles with spaces, for example: `/resume \"Project A Plan\"`." + matrix_no_named_sessions: "No named sessions found for this Matrix room. +Use `/title My Session` to name the current room session, `/resume --all` to list all Matrix sessions, or `/resume --cross-room ` to explicitly cross room boundaries." + matrix_blocked_no_origin: "⚠️ Matrix /resume blocked: this named session has no recorded room origin, so Hermes will not resume it inside the current room by default. Use `/resume --cross-room {name}` if you intentionally want to cross room boundaries." + matrix_blocked_other_room: "⚠️ Matrix /resume blocked: that session belongs to a different Matrix room ({room}). Use `/resume --cross-room {name}` if you intentionally want to resume it here." + matrix_cross_room_success: "⚠️ Cross-room resume: resumed **{title}** inside Matrix room **{room}**. +Future messages in this room will use that transcript until `/reset` or another `/resume`.{msg_part}" no_named_sessions: "Aucune session nommée trouvée.\nUtilisez `/title Ma session` pour nommer la session actuelle, puis `/resume Ma session` pour y revenir plus tard." list_header: "📋 **Sessions nommées**\n" list_item: "• **{title}**{preview_part}" @@ -251,6 +259,12 @@ gateway: status: header: "📊 **État de Hermes Gateway**" + matrix_scope_header: "**Matrix scope:**" + matrix_scope_room: " room: {room}" + matrix_scope_room_id: " room_id: {room_id}" + matrix_scope_thread: " thread_id: {thread_id}" + matrix_scope_mode: " session_scope: {scope}" + matrix_scope_key: " session_key: {session_key}" session_id: "**ID de session :** `{session_id}`" title: "**Titre :** {title}" created: "**Créé :** {timestamp}" diff --git a/locales/ga.yaml b/locales/ga.yaml index eaf957a2912..8acb02e3814 100644 --- a/locales/ga.yaml +++ b/locales/ga.yaml @@ -223,6 +223,14 @@ gateway: resume: db_unavailable: "Níl bunachar sonraí na seisiún ar fáil." + parse_error: "⚠️ Could not parse `/resume` arguments: {error}. +Use quotes around titles with spaces, for example: `/resume \"Project A Plan\"`." + matrix_no_named_sessions: "No named sessions found for this Matrix room. +Use `/title My Session` to name the current room session, `/resume --all` to list all Matrix sessions, or `/resume --cross-room ` to explicitly cross room boundaries." + matrix_blocked_no_origin: "⚠️ Matrix /resume blocked: this named session has no recorded room origin, so Hermes will not resume it inside the current room by default. Use `/resume --cross-room {name}` if you intentionally want to cross room boundaries." + matrix_blocked_other_room: "⚠️ Matrix /resume blocked: that session belongs to a different Matrix room ({room}). Use `/resume --cross-room {name}` if you intentionally want to resume it here." + matrix_cross_room_success: "⚠️ Cross-room resume: resumed **{title}** inside Matrix room **{room}**. +Future messages in this room will use that transcript until `/reset` or another `/resume`.{msg_part}" no_named_sessions: "Níor aimsíodh aon seisiún ainmnithe.\nÚsáid `/title M'Ainm Seisiúin` chun do sheisiún reatha a ainmniú, ansin `/resume M'Ainm Seisiúin` chun filleadh air níos déanaí." list_header: "📋 **Seisiúin Ainmnithe**\n" list_item: "• **{title}**{preview_part}" @@ -255,6 +263,12 @@ gateway: status: header: "📊 **Stádas Hermes Gateway**" + matrix_scope_header: "**Matrix scope:**" + matrix_scope_room: " room: {room}" + matrix_scope_room_id: " room_id: {room_id}" + matrix_scope_thread: " thread_id: {thread_id}" + matrix_scope_mode: " session_scope: {scope}" + matrix_scope_key: " session_key: {session_key}" session_id: "**ID Seisiúin:** `{session_id}`" title: "**Teideal:** {title}" created: "**Cruthaithe:** {timestamp}" diff --git a/locales/hu.yaml b/locales/hu.yaml index 78b18ac1942..8afe07bba47 100644 --- a/locales/hu.yaml +++ b/locales/hu.yaml @@ -219,6 +219,14 @@ gateway: resume: db_unavailable: "A munkamenet-adatbázis nem érhető el." + parse_error: "⚠️ Could not parse `/resume` arguments: {error}. +Use quotes around titles with spaces, for example: `/resume \"Project A Plan\"`." + matrix_no_named_sessions: "No named sessions found for this Matrix room. +Use `/title My Session` to name the current room session, `/resume --all` to list all Matrix sessions, or `/resume --cross-room ` to explicitly cross room boundaries." + matrix_blocked_no_origin: "⚠️ Matrix /resume blocked: this named session has no recorded room origin, so Hermes will not resume it inside the current room by default. Use `/resume --cross-room {name}` if you intentionally want to cross room boundaries." + matrix_blocked_other_room: "⚠️ Matrix /resume blocked: that session belongs to a different Matrix room ({room}). Use `/resume --cross-room {name}` if you intentionally want to resume it here." + matrix_cross_room_success: "⚠️ Cross-room resume: resumed **{title}** inside Matrix room **{room}**. +Future messages in this room will use that transcript until `/reset` or another `/resume`.{msg_part}" no_named_sessions: "Nem található elnevezett munkamenet.\nHasználd a `/title Saját munkamenet` parancsot a jelenlegi munkamenet elnevezéséhez, majd a `/resume Saját munkamenet` paranccsal térhetsz vissza hozzá." list_header: "📋 **Elnevezett munkamenetek**\n" list_item: "• **{title}**{preview_part}" @@ -251,6 +259,12 @@ gateway: status: header: "📊 **Hermes Gateway állapot**" + matrix_scope_header: "**Matrix scope:**" + matrix_scope_room: " room: {room}" + matrix_scope_room_id: " room_id: {room_id}" + matrix_scope_thread: " thread_id: {thread_id}" + matrix_scope_mode: " session_scope: {scope}" + matrix_scope_key: " session_key: {session_key}" session_id: "**Munkamenet-azonosító:** `{session_id}`" title: "**Cím:** {title}" created: "**Létrehozva:** {timestamp}" diff --git a/locales/it.yaml b/locales/it.yaml index 89d4e0796bb..2e355c94c68 100644 --- a/locales/it.yaml +++ b/locales/it.yaml @@ -219,6 +219,14 @@ gateway: resume: db_unavailable: "Database delle sessioni non disponibile." + parse_error: "⚠️ Could not parse `/resume` arguments: {error}. +Use quotes around titles with spaces, for example: `/resume \"Project A Plan\"`." + matrix_no_named_sessions: "No named sessions found for this Matrix room. +Use `/title My Session` to name the current room session, `/resume --all` to list all Matrix sessions, or `/resume --cross-room ` to explicitly cross room boundaries." + matrix_blocked_no_origin: "⚠️ Matrix /resume blocked: this named session has no recorded room origin, so Hermes will not resume it inside the current room by default. Use `/resume --cross-room {name}` if you intentionally want to cross room boundaries." + matrix_blocked_other_room: "⚠️ Matrix /resume blocked: that session belongs to a different Matrix room ({room}). Use `/resume --cross-room {name}` if you intentionally want to resume it here." + matrix_cross_room_success: "⚠️ Cross-room resume: resumed **{title}** inside Matrix room **{room}**. +Future messages in this room will use that transcript until `/reset` or another `/resume`.{msg_part}" no_named_sessions: "Nessuna sessione con nome trovata.\nUsa `/title My Session` per dare un nome alla sessione attuale, poi `/resume My Session` per tornare a essa in seguito." list_header: "📋 **Sessioni con nome**\n" list_item: "• **{title}**{preview_part}" @@ -251,6 +259,12 @@ gateway: status: header: "📊 **Stato del Gateway Hermes**" + matrix_scope_header: "**Matrix scope:**" + matrix_scope_room: " room: {room}" + matrix_scope_room_id: " room_id: {room_id}" + matrix_scope_thread: " thread_id: {thread_id}" + matrix_scope_mode: " session_scope: {scope}" + matrix_scope_key: " session_key: {session_key}" session_id: "**ID sessione:** `{session_id}`" title: "**Titolo:** {title}" created: "**Creata:** {timestamp}" diff --git a/locales/ja.yaml b/locales/ja.yaml index 1758746df02..d860684acf2 100644 --- a/locales/ja.yaml +++ b/locales/ja.yaml @@ -219,6 +219,14 @@ gateway: resume: db_unavailable: "セッションデータベースは利用できません。" + parse_error: "⚠️ Could not parse `/resume` arguments: {error}. +Use quotes around titles with spaces, for example: `/resume \"Project A Plan\"`." + matrix_no_named_sessions: "No named sessions found for this Matrix room. +Use `/title My Session` to name the current room session, `/resume --all` to list all Matrix sessions, or `/resume --cross-room ` to explicitly cross room boundaries." + matrix_blocked_no_origin: "⚠️ Matrix /resume blocked: this named session has no recorded room origin, so Hermes will not resume it inside the current room by default. Use `/resume --cross-room {name}` if you intentionally want to cross room boundaries." + matrix_blocked_other_room: "⚠️ Matrix /resume blocked: that session belongs to a different Matrix room ({room}). Use `/resume --cross-room {name}` if you intentionally want to resume it here." + matrix_cross_room_success: "⚠️ Cross-room resume: resumed **{title}** inside Matrix room **{room}**. +Future messages in this room will use that transcript until `/reset` or another `/resume`.{msg_part}" no_named_sessions: "名前付きセッションが見つかりません。\n`/title セッション名` で現在のセッションに名前を付けると、後で `/resume セッション名` で戻れます。" list_header: "📋 **名前付きセッション**\n" list_item: "• **{title}**{preview_part}" @@ -251,6 +259,12 @@ gateway: status: header: "📊 **Hermes ゲートウェイ状態**" + matrix_scope_header: "**Matrix scope:**" + matrix_scope_room: " room: {room}" + matrix_scope_room_id: " room_id: {room_id}" + matrix_scope_thread: " thread_id: {thread_id}" + matrix_scope_mode: " session_scope: {scope}" + matrix_scope_key: " session_key: {session_key}" session_id: "**セッション ID:** `{session_id}`" title: "**タイトル:** {title}" created: "**作成日時:** {timestamp}" diff --git a/locales/ko.yaml b/locales/ko.yaml index 19fbd28cb30..0966fb22ce2 100644 --- a/locales/ko.yaml +++ b/locales/ko.yaml @@ -219,6 +219,14 @@ gateway: resume: db_unavailable: "세션 데이터베이스를 사용할 수 없습니다." + parse_error: "⚠️ Could not parse `/resume` arguments: {error}. +Use quotes around titles with spaces, for example: `/resume \"Project A Plan\"`." + matrix_no_named_sessions: "No named sessions found for this Matrix room. +Use `/title My Session` to name the current room session, `/resume --all` to list all Matrix sessions, or `/resume --cross-room ` to explicitly cross room boundaries." + matrix_blocked_no_origin: "⚠️ Matrix /resume blocked: this named session has no recorded room origin, so Hermes will not resume it inside the current room by default. Use `/resume --cross-room {name}` if you intentionally want to cross room boundaries." + matrix_blocked_other_room: "⚠️ Matrix /resume blocked: that session belongs to a different Matrix room ({room}). Use `/resume --cross-room {name}` if you intentionally want to resume it here." + matrix_cross_room_success: "⚠️ Cross-room resume: resumed **{title}** inside Matrix room **{room}**. +Future messages in this room will use that transcript until `/reset` or another `/resume`.{msg_part}" no_named_sessions: "이름이 지정된 세션이 없습니다.\n현재 세션에 이름을 지정하려면 `/title 내 세션`을 사용하고, 나중에 `/resume 내 세션`으로 돌아오세요." list_header: "📋 **이름이 지정된 세션**\n" list_item: "• **{title}**{preview_part}" @@ -251,6 +259,12 @@ gateway: status: header: "📊 **Hermes 게이트웨이 상태**" + matrix_scope_header: "**Matrix scope:**" + matrix_scope_room: " room: {room}" + matrix_scope_room_id: " room_id: {room_id}" + matrix_scope_thread: " thread_id: {thread_id}" + matrix_scope_mode: " session_scope: {scope}" + matrix_scope_key: " session_key: {session_key}" session_id: "**세션 ID:** `{session_id}`" title: "**제목:** {title}" created: "**생성됨:** {timestamp}" diff --git a/locales/pt.yaml b/locales/pt.yaml index 191ad1413ec..fa74c6f90e9 100644 --- a/locales/pt.yaml +++ b/locales/pt.yaml @@ -219,6 +219,14 @@ gateway: resume: db_unavailable: "Base de dados de sessões indisponível." + parse_error: "⚠️ Could not parse `/resume` arguments: {error}. +Use quotes around titles with spaces, for example: `/resume \"Project A Plan\"`." + matrix_no_named_sessions: "No named sessions found for this Matrix room. +Use `/title My Session` to name the current room session, `/resume --all` to list all Matrix sessions, or `/resume --cross-room ` to explicitly cross room boundaries." + matrix_blocked_no_origin: "⚠️ Matrix /resume blocked: this named session has no recorded room origin, so Hermes will not resume it inside the current room by default. Use `/resume --cross-room {name}` if you intentionally want to cross room boundaries." + matrix_blocked_other_room: "⚠️ Matrix /resume blocked: that session belongs to a different Matrix room ({room}). Use `/resume --cross-room {name}` if you intentionally want to resume it here." + matrix_cross_room_success: "⚠️ Cross-room resume: resumed **{title}** inside Matrix room **{room}**. +Future messages in this room will use that transcript until `/reset` or another `/resume`.{msg_part}" no_named_sessions: "Não foram encontradas sessões com nome.\nUsa `/title A minha sessão` para nomear a sessão atual e depois `/resume A minha sessão` para voltar a ela." list_header: "📋 **Sessões com nome**\n" list_item: "• **{title}**{preview_part}" @@ -251,6 +259,12 @@ gateway: status: header: "📊 **Estado do Hermes Gateway**" + matrix_scope_header: "**Matrix scope:**" + matrix_scope_room: " room: {room}" + matrix_scope_room_id: " room_id: {room_id}" + matrix_scope_thread: " thread_id: {thread_id}" + matrix_scope_mode: " session_scope: {scope}" + matrix_scope_key: " session_key: {session_key}" session_id: "**ID da sessão:** `{session_id}`" title: "**Título:** {title}" created: "**Criada:** {timestamp}" diff --git a/locales/ru.yaml b/locales/ru.yaml index ce526d7b47f..979601aedaa 100644 --- a/locales/ru.yaml +++ b/locales/ru.yaml @@ -219,6 +219,14 @@ gateway: resume: db_unavailable: "База данных сеансов недоступна." + parse_error: "⚠️ Could not parse `/resume` arguments: {error}. +Use quotes around titles with spaces, for example: `/resume \"Project A Plan\"`." + matrix_no_named_sessions: "No named sessions found for this Matrix room. +Use `/title My Session` to name the current room session, `/resume --all` to list all Matrix sessions, or `/resume --cross-room ` to explicitly cross room boundaries." + matrix_blocked_no_origin: "⚠️ Matrix /resume blocked: this named session has no recorded room origin, so Hermes will not resume it inside the current room by default. Use `/resume --cross-room {name}` if you intentionally want to cross room boundaries." + matrix_blocked_other_room: "⚠️ Matrix /resume blocked: that session belongs to a different Matrix room ({room}). Use `/resume --cross-room {name}` if you intentionally want to resume it here." + matrix_cross_room_success: "⚠️ Cross-room resume: resumed **{title}** inside Matrix room **{room}**. +Future messages in this room will use that transcript until `/reset` or another `/resume`.{msg_part}" no_named_sessions: "Именованных сеансов не найдено.\nИспользуйте `/title Мой сеанс`, чтобы назвать текущий сеанс, затем `/resume Мой сеанс`, чтобы вернуться к нему позже." list_header: "📋 **Именованные сеансы**\n" list_item: "• **{title}**{preview_part}" @@ -251,6 +259,12 @@ gateway: status: header: "📊 **Состояние Hermes Gateway**" + matrix_scope_header: "**Matrix scope:**" + matrix_scope_room: " room: {room}" + matrix_scope_room_id: " room_id: {room_id}" + matrix_scope_thread: " thread_id: {thread_id}" + matrix_scope_mode: " session_scope: {scope}" + matrix_scope_key: " session_key: {session_key}" session_id: "**ID сеанса:** `{session_id}`" title: "**Название:** {title}" created: "**Создано:** {timestamp}" diff --git a/locales/tr.yaml b/locales/tr.yaml index ecd23d8e977..259e56fa273 100644 --- a/locales/tr.yaml +++ b/locales/tr.yaml @@ -219,6 +219,14 @@ gateway: resume: db_unavailable: "Oturum veritabanı kullanılamıyor." + parse_error: "⚠️ Could not parse `/resume` arguments: {error}. +Use quotes around titles with spaces, for example: `/resume \"Project A Plan\"`." + matrix_no_named_sessions: "No named sessions found for this Matrix room. +Use `/title My Session` to name the current room session, `/resume --all` to list all Matrix sessions, or `/resume --cross-room ` to explicitly cross room boundaries." + matrix_blocked_no_origin: "⚠️ Matrix /resume blocked: this named session has no recorded room origin, so Hermes will not resume it inside the current room by default. Use `/resume --cross-room {name}` if you intentionally want to cross room boundaries." + matrix_blocked_other_room: "⚠️ Matrix /resume blocked: that session belongs to a different Matrix room ({room}). Use `/resume --cross-room {name}` if you intentionally want to resume it here." + matrix_cross_room_success: "⚠️ Cross-room resume: resumed **{title}** inside Matrix room **{room}**. +Future messages in this room will use that transcript until `/reset` or another `/resume`.{msg_part}" no_named_sessions: "Adlandırılmış oturum bulunamadı.\nMevcut oturumu adlandırmak için `/title Oturumum`, daha sonra geri dönmek için `/resume Oturumum` kullanın." list_header: "📋 **Adlandırılmış Oturumlar**\n" list_item: "• **{title}**{preview_part}" @@ -251,6 +259,12 @@ gateway: status: header: "📊 **Hermes Gateway Durumu**" + matrix_scope_header: "**Matrix scope:**" + matrix_scope_room: " room: {room}" + matrix_scope_room_id: " room_id: {room_id}" + matrix_scope_thread: " thread_id: {thread_id}" + matrix_scope_mode: " session_scope: {scope}" + matrix_scope_key: " session_key: {session_key}" session_id: "**Oturum kimliği:** `{session_id}`" title: "**Başlık:** {title}" created: "**Oluşturuldu:** {timestamp}" diff --git a/locales/uk.yaml b/locales/uk.yaml index b564ec30545..8f7d10ebfb7 100644 --- a/locales/uk.yaml +++ b/locales/uk.yaml @@ -219,6 +219,14 @@ gateway: resume: db_unavailable: "База даних сеансів недоступна." + parse_error: "⚠️ Could not parse `/resume` arguments: {error}. +Use quotes around titles with spaces, for example: `/resume \"Project A Plan\"`." + matrix_no_named_sessions: "No named sessions found for this Matrix room. +Use `/title My Session` to name the current room session, `/resume --all` to list all Matrix sessions, or `/resume --cross-room ` to explicitly cross room boundaries." + matrix_blocked_no_origin: "⚠️ Matrix /resume blocked: this named session has no recorded room origin, so Hermes will not resume it inside the current room by default. Use `/resume --cross-room {name}` if you intentionally want to cross room boundaries." + matrix_blocked_other_room: "⚠️ Matrix /resume blocked: that session belongs to a different Matrix room ({room}). Use `/resume --cross-room {name}` if you intentionally want to resume it here." + matrix_cross_room_success: "⚠️ Cross-room resume: resumed **{title}** inside Matrix room **{room}**. +Future messages in this room will use that transcript until `/reset` or another `/resume`.{msg_part}" no_named_sessions: "Іменованих сеансів не знайдено.\nВикористайте `/title Мій сеанс`, щоб назвати поточний сеанс, потім `/resume Мій сеанс`, щоб повернутися до нього." list_header: "📋 **Іменовані сеанси**\n" list_item: "• **{title}**{preview_part}" @@ -251,6 +259,12 @@ gateway: status: header: "📊 **Стан Hermes Gateway**" + matrix_scope_header: "**Matrix scope:**" + matrix_scope_room: " room: {room}" + matrix_scope_room_id: " room_id: {room_id}" + matrix_scope_thread: " thread_id: {thread_id}" + matrix_scope_mode: " session_scope: {scope}" + matrix_scope_key: " session_key: {session_key}" session_id: "**ID сесії:** `{session_id}`" title: "**Назва:** {title}" created: "**Створено:** {timestamp}" diff --git a/locales/zh-hant.yaml b/locales/zh-hant.yaml index a2210d2c225..982a9b2918b 100644 --- a/locales/zh-hant.yaml +++ b/locales/zh-hant.yaml @@ -219,6 +219,14 @@ gateway: resume: db_unavailable: "工作階段資料庫無法使用。" + parse_error: "⚠️ Could not parse `/resume` arguments: {error}. +Use quotes around titles with spaces, for example: `/resume \"Project A Plan\"`." + matrix_no_named_sessions: "No named sessions found for this Matrix room. +Use `/title My Session` to name the current room session, `/resume --all` to list all Matrix sessions, or `/resume --cross-room ` to explicitly cross room boundaries." + matrix_blocked_no_origin: "⚠️ Matrix /resume blocked: this named session has no recorded room origin, so Hermes will not resume it inside the current room by default. Use `/resume --cross-room {name}` if you intentionally want to cross room boundaries." + matrix_blocked_other_room: "⚠️ Matrix /resume blocked: that session belongs to a different Matrix room ({room}). Use `/resume --cross-room {name}` if you intentionally want to resume it here." + matrix_cross_room_success: "⚠️ Cross-room resume: resumed **{title}** inside Matrix room **{room}**. +Future messages in this room will use that transcript until `/reset` or another `/resume`.{msg_part}" no_named_sessions: "找不到已命名的工作階段。\n使用 `/title 我的工作階段` 為目前工作階段命名,然後使用 `/resume 我的工作階段` 返回。" list_header: "📋 **已命名工作階段**\n" list_item: "• **{title}**{preview_part}" @@ -251,6 +259,12 @@ gateway: status: header: "📊 **Hermes 閘道狀態**" + matrix_scope_header: "**Matrix scope:**" + matrix_scope_room: " room: {room}" + matrix_scope_room_id: " room_id: {room_id}" + matrix_scope_thread: " thread_id: {thread_id}" + matrix_scope_mode: " session_scope: {scope}" + matrix_scope_key: " session_key: {session_key}" session_id: "**工作階段 ID:** `{session_id}`" title: "**標題:** {title}" created: "**建立時間:** {timestamp}" diff --git a/locales/zh.yaml b/locales/zh.yaml index 896a958778f..ee20289e16d 100644 --- a/locales/zh.yaml +++ b/locales/zh.yaml @@ -219,6 +219,14 @@ gateway: resume: db_unavailable: "会话数据库不可用。" + parse_error: "⚠️ Could not parse `/resume` arguments: {error}. +Use quotes around titles with spaces, for example: `/resume \"Project A Plan\"`." + matrix_no_named_sessions: "No named sessions found for this Matrix room. +Use `/title My Session` to name the current room session, `/resume --all` to list all Matrix sessions, or `/resume --cross-room ` to explicitly cross room boundaries." + matrix_blocked_no_origin: "⚠️ Matrix /resume blocked: this named session has no recorded room origin, so Hermes will not resume it inside the current room by default. Use `/resume --cross-room {name}` if you intentionally want to cross room boundaries." + matrix_blocked_other_room: "⚠️ Matrix /resume blocked: that session belongs to a different Matrix room ({room}). Use `/resume --cross-room {name}` if you intentionally want to resume it here." + matrix_cross_room_success: "⚠️ Cross-room resume: resumed **{title}** inside Matrix room **{room}**. +Future messages in this room will use that transcript until `/reset` or another `/resume`.{msg_part}" no_named_sessions: "未找到已命名的会话。\n使用 `/title 我的会话` 为当前会话命名,然后用 `/resume 我的会话` 返回。" list_header: "📋 **已命名会话**\n" list_item: "• **{title}**{preview_part}" @@ -251,6 +259,12 @@ gateway: status: header: "📊 **Hermes 网关状态**" + matrix_scope_header: "**Matrix scope:**" + matrix_scope_room: " room: {room}" + matrix_scope_room_id: " room_id: {room_id}" + matrix_scope_thread: " thread_id: {thread_id}" + matrix_scope_mode: " session_scope: {scope}" + matrix_scope_key: " session_key: {session_key}" session_id: "**会话 ID:** `{session_id}`" title: "**标题:** {title}" created: "**创建时间:** {timestamp}" diff --git a/optional-skills/blockchain/hyperliquid/SKILL.md b/optional-skills/blockchain/hyperliquid/SKILL.md index ec0671e0508..51843bbf1b3 100644 --- a/optional-skills/blockchain/hyperliquid/SKILL.md +++ b/optional-skills/blockchain/hyperliquid/SKILL.md @@ -36,7 +36,7 @@ Read-only — no API key, no signing, no order placement. Stdlib only — no external packages, no API key. -The script reads `~/.hermes/.env` for two optional defaults: +The script reads `${HERMES_HOME:-~/.hermes}/.env` for two optional defaults: - `HYPERLIQUID_API_URL` — defaults to `https://api.hyperliquid.xyz`. Set to `https://api.hyperliquid-testnet.xyz` for testnet. @@ -80,7 +80,7 @@ hyperliquid_client.py export [--interval 1h] [--hours N] [--output PATH] ``` For `state`, `spot-balances`, `fills`, `orders`, and `review`, the address is -optional when `HYPERLIQUID_USER_ADDRESS` is set in `~/.hermes/.env`. +optional when `HYPERLIQUID_USER_ADDRESS` is set in `${HERMES_HOME:-~/.hermes}/.env`. --- diff --git a/optional-skills/blockchain/hyperliquid/scripts/hyperliquid_client.py b/optional-skills/blockchain/hyperliquid/scripts/hyperliquid_client.py index 1079f6b6267..be2a95d5f99 100644 --- a/optional-skills/blockchain/hyperliquid/scripts/hyperliquid_client.py +++ b/optional-skills/blockchain/hyperliquid/scripts/hyperliquid_client.py @@ -115,7 +115,7 @@ def _resolve_user(user: Optional[str]) -> str: sys.exit( "Missing Hyperliquid address. Pass
explicitly or set " - f"{DEFAULT_USER_ENV} in your environment or ~/.hermes/.env." + f"{DEFAULT_USER_ENV} in your environment or {_hermes_home() / '.env'}." ) diff --git a/optional-skills/creative/kanban-video-orchestrator/SKILL.md b/optional-skills/creative/kanban-video-orchestrator/SKILL.md index f06972abd5f..c5ac2a8c96e 100644 --- a/optional-skills/creative/kanban-video-orchestrator/SKILL.md +++ b/optional-skills/creative/kanban-video-orchestrator/SKILL.md @@ -182,7 +182,7 @@ task graphs. See **[references/examples.md](references/examples.md)**. right human-review gates. 8. **Verify API keys BEFORE firing.** External APIs (TTS, image-gen, - image-to-video) need keys in `~/.hermes/.env` or the user's secret store. + image-to-video) need keys in `${HERMES_HOME:-~/.hermes}/.env` or the user's secret store. A worker that hits a missing-key error wastes a task slot. The setup script's `check_key` helper aborts cleanly if a required key is missing. diff --git a/optional-skills/creative/kanban-video-orchestrator/assets/setup.sh.tmpl b/optional-skills/creative/kanban-video-orchestrator/assets/setup.sh.tmpl index 01d836def8d..3f7629d6293 100644 --- a/optional-skills/creative/kanban-video-orchestrator/assets/setup.sh.tmpl +++ b/optional-skills/creative/kanban-video-orchestrator/assets/setup.sh.tmpl @@ -23,8 +23,9 @@ check_key() { local var="$1" local kc_account="${2:-hermes}" local kc_service="${3:-$1}" - if grep -q "^${var}=" "$HOME/.hermes/.env" 2>/dev/null && \ - [ -n "$(grep "^${var}=" "$HOME/.hermes/.env" | cut -d= -f2-)" ]; then + local _hermes_env="${HERMES_HOME:-$HOME/.hermes}/.env" + if grep -q "^${var}=" "$_hermes_env" 2>/dev/null && \ + [ -n "$(grep "^${var}=" "$_hermes_env" | cut -d= -f2-)" ]; then echo " ✓ ${var} (env)" return 0 fi @@ -33,7 +34,7 @@ check_key() { echo " ✓ ${var} (Keychain ${kc_account}/${kc_service})" return 0 fi - echo " ✗ ${var} not set in ~/.hermes/.env or Keychain (${kc_account}/${kc_service})" + echo " ✗ ${var} not set in ${_hermes_env} or Keychain (${kc_account}/${kc_service})" return 1 } diff --git a/optional-skills/creative/kanban-video-orchestrator/references/kanban-setup.md b/optional-skills/creative/kanban-video-orchestrator/references/kanban-setup.md index ab449a0b0a4..53e4f269997 100644 --- a/optional-skills/creative/kanban-video-orchestrator/references/kanban-setup.md +++ b/optional-skills/creative/kanban-video-orchestrator/references/kanban-setup.md @@ -218,22 +218,24 @@ The director turns this into actual `kanban_create` calls. ## API-key prerequisites check Before firing the kanban, verify required keys are available. Check both -`~/.hermes/.env` and macOS Keychain (if on macOS): +the Hermes `.env` (`${HERMES_HOME:-$HOME/.hermes}/.env`) and macOS Keychain +(if on macOS): ```bash check_key() { local var="$1" local kc_account="$2" local kc_service="$3" - if grep -q "^${var}=" ~/.hermes/.env 2>/dev/null && \ - [ -n "$(grep "^${var}=" ~/.hermes/.env | cut -d= -f2-)" ]; then + local _hermes_env="${HERMES_HOME:-$HOME/.hermes}/.env" + if grep -q "^${var}=" "$_hermes_env" 2>/dev/null && \ + [ -n "$(grep "^${var}=" "$_hermes_env" | cut -d= -f2-)" ]; then return 0 fi if command -v security >/dev/null 2>&1 && \ security find-generic-password -a "${kc_account}" -s "${kc_service}" -w >/dev/null 2>&1; then return 0 fi - echo "ERROR: ${var} not set in ~/.hermes/.env or Keychain (${kc_account}/${kc_service})" + echo "ERROR: ${var} not set in ${_hermes_env} or Keychain (${kc_account}/${kc_service})" return 1 } diff --git a/optional-skills/creative/kanban-video-orchestrator/references/tool-matrix.md b/optional-skills/creative/kanban-video-orchestrator/references/tool-matrix.md index 5a52d15ddd0..b5e59c31478 100644 --- a/optional-skills/creative/kanban-video-orchestrator/references/tool-matrix.md +++ b/optional-skills/creative/kanban-video-orchestrator/references/tool-matrix.md @@ -284,7 +284,7 @@ skills: ## API key requirements Track these in the project setup. The setup script should verify each required -key is present in `~/.hermes/.env` (or macOS Keychain) before firing the kanban. +key is present in `${HERMES_HOME:-~/.hermes}/.env` (or macOS Keychain) before firing the kanban. | Service | Env var | Used by | |---------|---------|---------| @@ -301,7 +301,7 @@ key is present in `~/.hermes/.env` (or macOS Keychain) before firing the kanban. | Anthropic | `ANTHROPIC_API_KEY` | every Hermes profile (Claude) | If a key is missing, prompt the user to add it. Storage methods, in order of -preference: macOS Keychain → `~/.hermes/.env` → environment variable. +preference: macOS Keychain → `${HERMES_HOME:-~/.hermes}/.env` → environment variable. ## Skill version pinning diff --git a/optional-skills/devops/watchers/SKILL.md b/optional-skills/devops/watchers/SKILL.md index 628f340b4c8..7c326ae7e4b 100644 --- a/optional-skills/devops/watchers/SKILL.md +++ b/optional-skills/devops/watchers/SKILL.md @@ -62,7 +62,7 @@ python $HERMES_HOME/skills/devops/watchers/scripts/watch_rss.py \ --name hn --url https://news.ycombinator.com/rss --max 5 ``` -Watch a GitHub repo (set `GITHUB_TOKEN` in `~/.hermes/.env` to avoid the 60 req/hr anonymous rate limit): +Watch a GitHub repo (set `GITHUB_TOKEN` in `${HERMES_HOME:-~/.hermes}/.env` to avoid the 60 req/hr anonymous rate limit): ```bash python $HERMES_HOME/skills/devops/watchers/scripts/watch_github.py \ diff --git a/optional-skills/devops/watchers/scripts/watch_github.py b/optional-skills/devops/watchers/scripts/watch_github.py index bb4a3ca6f30..4b42d4ed3ee 100755 --- a/optional-skills/devops/watchers/scripts/watch_github.py +++ b/optional-skills/devops/watchers/scripts/watch_github.py @@ -8,7 +8,8 @@ Usage (via cron with --no-agent): --script "$HERMES_HOME/skills/devops/watchers/scripts/watch_github.py" \\ --script-args "--name hermes-issues --repo NousResearch/hermes-agent --scope issues" -Set GITHUB_TOKEN (or GH_TOKEN) in ~/.hermes/.env to avoid the 60 req/hr +Set GITHUB_TOKEN (or GH_TOKEN) in the Hermes .env file +(``${HERMES_HOME:-~/.hermes}/.env``) to avoid the 60 req/hr anonymous rate limit. Scopes: issues | pulls | releases | commits. Or pass --search QUERY to diff --git a/optional-skills/productivity/canvas/SKILL.md b/optional-skills/productivity/canvas/SKILL.md index fbcfec5853a..68d6402e554 100644 --- a/optional-skills/productivity/canvas/SKILL.md +++ b/optional-skills/productivity/canvas/SKILL.md @@ -26,7 +26,7 @@ Read-only access to Canvas LMS for listing courses and assignments. 2. Go to **Account → Settings** (click your profile icon, then Settings) 3. Scroll to **Approved Integrations** and click **+ New Access Token** 4. Name the token (e.g., "Hermes Agent"), set an optional expiry, and click **Generate Token** -5. Copy the token and add to `~/.hermes/.env`: +5. Copy the token and add to `${HERMES_HOME:-~/.hermes}/.env`: ``` CANVAS_API_TOKEN=your_token_here diff --git a/optional-skills/productivity/canvas/scripts/canvas_api.py b/optional-skills/productivity/canvas/scripts/canvas_api.py index 13599c57556..2390d5ff513 100644 --- a/optional-skills/productivity/canvas/scripts/canvas_api.py +++ b/optional-skills/productivity/canvas/scripts/canvas_api.py @@ -28,9 +28,12 @@ def _check_config(): if not CANVAS_BASE_URL: missing.append("CANVAS_BASE_URL") if missing: + hermes_env = os.path.join( + os.environ.get("HERMES_HOME", os.path.expanduser("~/.hermes")), ".env" + ) print( f"Missing required environment variables: {', '.join(missing)}\n" - "Set them in ~/.hermes/.env or export them in your shell.\n" + f"Set them in {hermes_env} or export them in your shell.\n" "See the canvas skill SKILL.md for setup instructions.", file=sys.stderr, ) diff --git a/optional-skills/productivity/shopify/SKILL.md b/optional-skills/productivity/shopify/SKILL.md index 0062674069a..4dc8dc93ad8 100644 --- a/optional-skills/productivity/shopify/SKILL.md +++ b/optional-skills/productivity/shopify/SKILL.md @@ -36,7 +36,7 @@ The REST Admin API is legacy since 2024-04 and only receives security fixes. **U 1. In Shopify admin: **Settings → Apps and sales channels → Develop apps → Create an app**. 2. Click **Configure Admin API scopes**, select what you need (examples below), save. 3. **Install app** → the Admin API access token appears ONCE. Copy it immediately — Shopify will never show it again. Tokens start with `shpat_`. -4. Save to `~/.hermes/.env`: +4. Save to `${HERMES_HOME:-~/.hermes}/.env`: ``` SHOPIFY_ACCESS_TOKEN=shpat_xxxxxxxxxxxxxxxxxxxx SHOPIFY_STORE_DOMAIN=my-store.myshopify.com diff --git a/optional-skills/productivity/siyuan/SKILL.md b/optional-skills/productivity/siyuan/SKILL.md index 0417ba6c4c5..3f199776438 100644 --- a/optional-skills/productivity/siyuan/SKILL.md +++ b/optional-skills/productivity/siyuan/SKILL.md @@ -30,7 +30,7 @@ Use the [SiYuan](https://github.com/siyuan-note/siyuan) kernel API via curl to s 1. Install and run SiYuan (desktop or Docker) 2. Get your API token: **Settings > About > API token** -3. Store it in `~/.hermes/.env`: +3. Store it in `${HERMES_HOME:-~/.hermes}/.env`: ``` SIYUAN_TOKEN=your_token_here SIYUAN_URL=http://127.0.0.1:6806 diff --git a/optional-skills/productivity/telephony/SKILL.md b/optional-skills/productivity/telephony/SKILL.md index b3d1d5884eb..f0d28614912 100644 --- a/optional-skills/productivity/telephony/SKILL.md +++ b/optional-skills/productivity/telephony/SKILL.md @@ -17,7 +17,7 @@ metadata: This optional skill gives Hermes practical phone capabilities while keeping telephony out of the core tool list. It ships with a helper script, `scripts/telephony.py`, that can: -- save provider credentials into `~/.hermes/.env` +- save provider credentials into `${HERMES_HOME:-~/.hermes}/.env` - search for and buy a Twilio phone number - remember that owned number for later sessions - send SMS / MMS from the owned number @@ -104,7 +104,7 @@ Why: The skill persists telephony state in two places: -### `~/.hermes/.env` +### `${HERMES_HOME:-~/.hermes}/.env` Used for long-lived provider credentials and owned-number IDs, for example: - `TWILIO_ACCOUNT_SID` - `TWILIO_AUTH_TOKEN` @@ -241,7 +241,7 @@ python3 "$SCRIPT" save-twilio AC... auth_token_here python3 "$SCRIPT" twilio-search --country US --area-code 702 --limit 10 ``` -3. Buy it and save it into `~/.hermes/.env` + state: +3. Buy it and save it into `${HERMES_HOME:-~/.hermes}/.env` + state: ```bash python3 "$SCRIPT" twilio-buy "+17025551234" --save-env ``` @@ -403,7 +403,7 @@ After setup, you should be able to do all of the following with just this skill: 1. `diagnose` shows provider readiness and remembered state 2. search and buy a Twilio number -3. persist that number to `~/.hermes/.env` +3. persist that number to `${HERMES_HOME:-~/.hermes}/.env` 4. send an SMS from the owned number 5. poll inbound texts for the owned number later 6. place a direct Twilio call diff --git a/optional-skills/productivity/telephony/scripts/telephony.py b/optional-skills/productivity/telephony/scripts/telephony.py index 188b6be2ad9..291fd8629ab 100644 --- a/optional-skills/productivity/telephony/scripts/telephony.py +++ b/optional-skills/productivity/telephony/scripts/telephony.py @@ -2,7 +2,7 @@ """Telephony helper for the Hermes optional telephony skill. Capabilities: -- Persist telephony provider credentials to ~/.hermes/.env +- Persist telephony provider credentials to the Hermes .env file ($HERMES_HOME/.env) - Search for, buy, and remember Twilio phone numbers - Make direct Twilio calls (TwiML or ) - Send SMS / MMS via Twilio @@ -286,7 +286,7 @@ def _twilio_creds() -> tuple[str, str]: if not sid or not token: raise TelephonyError( "Twilio credentials are not configured. Use 'save-twilio' or set " - "TWILIO_ACCOUNT_SID and TWILIO_AUTH_TOKEN in ~/.hermes/.env." + f"TWILIO_ACCOUNT_SID and TWILIO_AUTH_TOKEN in {_env_path()}." ) return sid, token @@ -420,7 +420,7 @@ def _resolve_twilio_number(identifier: str | None = None) -> OwnedTwilioNumber: raise TelephonyError( "No default Twilio phone number is set. Use 'twilio-buy --save-env', " - "'twilio-set-default', or set TWILIO_PHONE_NUMBER in ~/.hermes/.env." + f"'twilio-set-default', or set TWILIO_PHONE_NUMBER in {_env_path()}." ) @@ -756,7 +756,7 @@ def _vapi_import_twilio_number( api_key = _vapi_api_key() if not api_key: raise TelephonyError( - "Vapi is not configured. Use 'save-vapi' or set VAPI_API_KEY in ~/.hermes/.env first." + f"Vapi is not configured. Use 'save-vapi' or set VAPI_API_KEY in {_env_path()} first." ) owned = _resolve_twilio_number(phone_identifier) sid, token = _twilio_creds() @@ -803,7 +803,7 @@ def _bland_call( api_key = _bland_api_key() if not api_key: raise TelephonyError( - "Bland.ai is not configured. Use 'save-bland' or set BLAND_API_KEY in ~/.hermes/.env." + f"Bland.ai is not configured. Use 'save-bland' or set BLAND_API_KEY in {_env_path()}." ) normalized = _normalize_phone(phone_number) if voice is None: @@ -881,13 +881,13 @@ def _vapi_call( api_key = _vapi_api_key() if not api_key: raise TelephonyError( - "Vapi is not configured. Use 'save-vapi' or set VAPI_API_KEY in ~/.hermes/.env." + f"Vapi is not configured. Use 'save-vapi' or set VAPI_API_KEY in {_env_path()}." ) phone_number_id = _vapi_phone_number_id() if not phone_number_id: raise TelephonyError( "No Vapi phone number id is configured. Import an owned Twilio number with " - "'vapi-import-twilio --save-env' or set VAPI_PHONE_NUMBER_ID in ~/.hermes/.env." + f"'vapi-import-twilio --save-env' or set VAPI_PHONE_NUMBER_ID in {_env_path()}." ) normalized = _normalize_phone(phone_number) voice_provider = _env_or_config( @@ -1091,7 +1091,7 @@ def save_twilio(account_sid: str, auth_token: str, phone_number: str = "", phone "provider": "twilio", "saved_env_keys": sorted(updates), "env_path": str(env_file), - "message": "Twilio credentials saved to ~/.hermes/.env.", + "message": f"Twilio credentials saved to {env_file}.", } if phone_number: result.update(_remember_twilio_number(phone_number=updates["TWILIO_PHONE_NUMBER"], phone_sid=phone_sid.strip(), save_env=False)) @@ -1111,7 +1111,7 @@ def save_bland(api_key: str, voice: str = BLAND_DEFAULT_VOICE) -> dict[str, Any] "provider": "bland", "saved_env_keys": ["BLAND_API_KEY", "BLAND_DEFAULT_VOICE", "PHONE_PROVIDER"], "env_path": str(env_file), - "message": "Bland.ai configuration saved to ~/.hermes/.env.", + "message": f"Bland.ai configuration saved to {env_file}.", } @@ -1138,7 +1138,7 @@ def save_vapi( "provider": "vapi", "saved_env_keys": sorted(updates), "env_path": str(env_file), - "message": "Vapi configuration saved to ~/.hermes/.env.", + "message": f"Vapi configuration saved to {env_file}.", } if phone_number_id: result.update(_remember_vapi_number(phone_number_id=phone_number_id.strip(), save_env=False)) @@ -1151,17 +1151,17 @@ def _build_parser() -> argparse.ArgumentParser: sub.add_parser("diagnose", help="Show saved telephony state and provider readiness") - p = sub.add_parser("save-twilio", help="Save Twilio credentials to ~/.hermes/.env") + p = sub.add_parser("save-twilio", help="Save Twilio credentials to the Hermes .env file") p.add_argument("account_sid") p.add_argument("auth_token") p.add_argument("--phone-number", default="") p.add_argument("--phone-sid", default="") - p = sub.add_parser("save-bland", help="Save Bland.ai settings to ~/.hermes/.env") + p = sub.add_parser("save-bland", help="Save Bland.ai settings to the Hermes .env file") p.add_argument("api_key") p.add_argument("--voice", default=BLAND_DEFAULT_VOICE) - p = sub.add_parser("save-vapi", help="Save Vapi settings to ~/.hermes/.env") + p = sub.add_parser("save-vapi", help="Save Vapi settings to the Hermes .env file") p.add_argument("api_key") p.add_argument("--phone-number-id", default="") p.add_argument("--voice-provider", default=VAPI_DEFAULT_VOICE_PROVIDER) @@ -1312,7 +1312,7 @@ def _dispatch(args: argparse.Namespace) -> dict[str, Any]: ) raise TelephonyError( f"Unsupported AI call provider '{provider}'. Use --provider bland or --provider vapi, " - "or set PHONE_PROVIDER in ~/.hermes/.env." + f"or set PHONE_PROVIDER in {_env_path()}." ) if cmd == "ai-status": provider = (args.provider or _ai_provider()).lower().strip() @@ -1322,7 +1322,7 @@ def _dispatch(args: argparse.Namespace) -> dict[str, Any]: return _bland_status(args.call_id, analyze=args.analyze or None) raise TelephonyError( f"Unsupported AI call provider '{provider}'. Use --provider bland or --provider vapi, " - "or set PHONE_PROVIDER in ~/.hermes/.env." + f"or set PHONE_PROVIDER in {_env_path()}." ) raise TelephonyError(f"Unknown command: {cmd}") diff --git a/optional-skills/security/1password/SKILL.md b/optional-skills/security/1password/SKILL.md index 2a6cc8e18b0..152cd13e60b 100644 --- a/optional-skills/security/1password/SKILL.md +++ b/optional-skills/security/1password/SKILL.md @@ -41,7 +41,7 @@ Use this skill when the user wants secrets managed through 1Password instead of ### Service Account (recommended for Hermes) -Set `OP_SERVICE_ACCOUNT_TOKEN` in `~/.hermes/.env` (the skill will prompt for this on first load). +Set `OP_SERVICE_ACCOUNT_TOKEN` in `${HERMES_HOME:-~/.hermes}/.env` (the skill will prompt for this on first load). No desktop app needed. Supports `op read`, `op inject`, `op run`. ```bash diff --git a/optional-skills/security/godmode/SKILL.md b/optional-skills/security/godmode/SKILL.md index 27751e93ecb..deda3471785 100644 --- a/optional-skills/security/godmode/SKILL.md +++ b/optional-skills/security/godmode/SKILL.md @@ -400,4 +400,4 @@ Claude Sonnet 4 is robust against all current techniques for clearly harmful con 9. **Always use `load_godmode.py` in execute_code** — The individual scripts (`parseltongue.py`, `godmode_race.py`, `auto_jailbreak.py`) have argparse CLI entry points with `if __name__ == '__main__'` blocks. When loaded via `exec()` in execute_code, `__name__` is `'__main__'` and argparse fires, crashing the script. The `load_godmode.py` loader handles this by setting `__name__` to a non-main value and managing sys.argv. 10. **boundary_inversion is model-version specific** — Works on Claude 3.5 Sonnet but NOT Claude Sonnet 4 or Claude 4.6. The strategy order in auto_jailbreak tries it first for Claude models, but falls through to refusal_inversion when it fails. Update the strategy order if you know the model version. 11. **Gray-area vs hard queries** — Jailbreak techniques work much better on "dual-use" queries (lock picking, security tools, chemistry) than on overtly harmful ones (phishing templates, malware). For hard queries, skip directly to ULTRAPLINIAN or use Hermes/Grok models that don't refuse. -12. **execute_code sandbox has no env vars** — When Hermes runs auto_jailbreak via execute_code, the sandbox doesn't inherit `~/.hermes/.env`. Load dotenv explicitly: `from dotenv import load_dotenv; load_dotenv(os.path.expanduser("~/.hermes/.env"))` +12. **execute_code sandbox has no env vars** — When Hermes runs auto_jailbreak via execute_code, the sandbox doesn't inherit the Hermes `.env`. Load dotenv explicitly: `import os; from dotenv import load_dotenv; load_dotenv(os.path.join(os.environ.get("HERMES_HOME", os.path.expanduser("~/.hermes")), ".env"))` diff --git a/optional-skills/software-development/rest-graphql-debug/SKILL.md b/optional-skills/software-development/rest-graphql-debug/SKILL.md index 78f90f2a91f..64b96b3cdd3 100644 --- a/optional-skills/software-development/rest-graphql-debug/SKILL.md +++ b/optional-skills/software-development/rest-graphql-debug/SKILL.md @@ -397,7 +397,7 @@ class TestAPISmoke: ### Token handling - Never log full tokens. Redact: `Bearer `. -- Never hardcode tokens in scripts. Read from env (`os.environ["API_TOKEN"]`) or `~/.hermes/.env`. +- Never hardcode tokens in scripts. Read from env (`os.environ["API_TOKEN"]`) or `${HERMES_HOME:-~/.hermes}/.env`. - Rotate immediately if a token surfaces in logs, error messages, or git history. ### Safe logging diff --git a/plugins/web/parallel/provider.py b/plugins/web/parallel/provider.py index 38578e6b52c..7a15b3d3f80 100644 --- a/plugins/web/parallel/provider.py +++ b/plugins/web/parallel/provider.py @@ -1,14 +1,20 @@ """Parallel.ai web search + content extraction — plugin form. -Subclasses :class:`agent.web_search_provider.WebSearchProvider`. Uses two -distinct Parallel SDK clients: +Subclasses :class:`agent.web_search_provider.WebSearchProvider`. -- ``Parallel`` (sync) — for :meth:`search` -- ``AsyncParallel`` (async) — for :meth:`extract` +Search runs on one of two transports, picked by credential: -This is the first plugin to exercise the **async-extract** code path in -the ABC: :meth:`extract` is declared ``async def``, and the dispatcher -in :func:`tools.web_tools.web_extract_tool` detects coroutines via +- **No key →** the free hosted Search MCP at ``https://search.parallel.ai/mcp`` + (anonymous Streamable-HTTP JSON-RPC). This makes ``web_search`` work out of + the box with zero setup, which is why ``parallel`` is the keyless default + backend in :func:`tools.web_tools._get_backend`. +- **``PARALLEL_API_KEY`` →** the ``parallel`` SDK's v1 ``search`` / ``extract`` + REST endpoints (objective-tuned, mode-selectable, higher rate limits). + +Extract mirrors search: keyed uses the async SDK (``AsyncParallel``) v1 +``extract``; keyless uses the free MCP's ``web_fetch``. :meth:`extract` is +declared ``async def`` and the dispatcher in +:func:`tools.web_tools.web_extract_tool` detects coroutines via :func:`inspect.iscoroutinefunction` and awaits. Config keys this provider responds to:: @@ -17,25 +23,66 @@ Config keys this provider responds to:: search_backend: "parallel" # explicit per-capability extract_backend: "parallel" # explicit per-capability backend: "parallel" # shared fallback - # Optional: search mode (default "agentic"; also "fast" or "one-shot") - # via the PARALLEL_SEARCH_MODE env var. + # Optional: search mode (default "advanced"; also "basic") + # via the PARALLEL_SEARCH_MODE env var. REST path only. Env vars:: - PARALLEL_API_KEY=... # https://parallel.ai (required) - PARALLEL_SEARCH_MODE=agentic # optional: agentic|fast|one-shot + PARALLEL_API_KEY=... # https://parallel.ai (optional — unlocks + # the v1 REST Search API; without it, + # search and extract use the free MCP) + PARALLEL_SEARCH_MODE=advanced # optional: basic|advanced (legacy + # fast/one-shot map to basic, agentic to + # advanced). REST path only. """ from __future__ import annotations +import asyncio +import json import logging import os +import uuid from typing import Any, Dict, List +import httpx + from agent.web_search_provider import WebSearchProvider logger = logging.getLogger(__name__) +# Free hosted Search MCP — anonymous-friendly, used when no PARALLEL_API_KEY is +# configured. Docs: https://docs.parallel.ai/integrations/mcp/search-mcp +_MCP_SEARCH_URL = "https://search.parallel.ai/mcp" +_MCP_PROTOCOL_VERSION = "2025-06-18" +# Deliberately generic client identity. Project policy (see the telemetry PR +# policy in AGENTS.md) forbids third-party usage attribution without an +# explicit user opt-in, so neither clientInfo nor the User-Agent names +# hermes. MCP requires *a* clientInfo; a neutral one satisfies the spec +# without attributing traffic. +_MCP_CLIENT_NAME = "mcp-web-client" +_MCP_CLIENT_VERSION = "1.0.0" +_MCP_USER_AGENT = f"{_MCP_CLIENT_NAME}/{_MCP_CLIENT_VERSION}" +_MCP_TIMEOUT_SECONDS = 30.0 + +# Free-tier attribution. The hosted Search MCP is free to use; surfacing this +# on keyless results credits Parallel and matches the free-tier terms +# (https://parallel.ai/customer-terms). +_FREE_MCP_ATTRIBUTION = ( + "Search powered by the free Parallel Web Search MCP (https://parallel.ai)." +) + + +def _new_session_id() -> str: + """Mint a fresh Parallel ``session_id`` for a single tool call. + + Per-call rather than process-global: one process serves many unrelated + chats in the gateway/batch runners, and a shared id would pool their + searches into one Parallel session. The prefix is deliberately generic + (no hermes attribution — telemetry policy). + """ + return f"{_MCP_CLIENT_NAME}-{uuid.uuid4().hex}" + # Module-level note: the canonical cache slots ``_parallel_client`` and # ``_async_parallel_client`` live on :mod:`tools.web_tools` so tests that do # ``tools.web_tools._parallel_client = None`` between cases see fresh state. @@ -133,11 +180,319 @@ _get_async_parallel_client = _get_async_client def _resolve_search_mode() -> str: - """Return the validated PARALLEL_SEARCH_MODE value (default "agentic").""" - mode = os.getenv("PARALLEL_SEARCH_MODE", "agentic").lower().strip() - if mode not in {"fast", "one-shot", "agentic"}: - mode = "agentic" - return mode + """Return the validated v1 search mode (default "advanced"). + + V1 collapses the three Beta modes into two. We accept the v1 values + directly and map the legacy Beta values for back-compat with anyone who + still sets ``PARALLEL_SEARCH_MODE=fast|one-shot|agentic``: + + - ``fast`` / ``one-shot`` → ``basic`` (lower latency) + - ``agentic`` → ``advanced`` (higher quality, the v1 default) + """ + mode = os.getenv("PARALLEL_SEARCH_MODE", "advanced").lower().strip() + if mode == "basic" or mode in {"fast", "one-shot"}: + return "basic" + # advanced, legacy "agentic", and anything unrecognized → the v1 default. + return "advanced" + + +# --------------------------------------------------------------------------- +# Free Search MCP transport (keyless path) +# --------------------------------------------------------------------------- +# +# A small hand-rolled Streamable-HTTP JSON-RPC client for the hosted Search +# MCP, rather than the full MCP-client subsystem: we only call two tools +# (``web_search`` / ``web_fetch``), so keeping it inline lets web_search and +# web_extract stay ordinary tools with the MCP endpoint as just their wire +# protocol. + + +def _mcp_headers( + session_id: str | None, + api_key: str | None, + protocol_version: str | None = None, +) -> Dict[str, str]: + """Headers for an MCP request. + + A Bearer token is attached only when we actually hold a key — the free + endpoint is anonymous, and sending an empty/garbage token would make it + 401 instead of serving the anonymous tier. After ``initialize`` the + Streamable-HTTP spec expects the negotiated ``MCP-Protocol-Version`` on + every follow-up request, so we echo it once known. + """ + headers = { + "Content-Type": "application/json", + "Accept": "application/json, text/event-stream", + "User-Agent": _MCP_USER_AGENT, + } + if session_id: + headers["Mcp-Session-Id"] = session_id + if protocol_version: + headers["MCP-Protocol-Version"] = protocol_version + if api_key: + headers["Authorization"] = f"Bearer {api_key}" + return headers + + +def _iter_mcp_messages(text: str): + """Yield JSON-RPC message dicts from a plain-JSON or SSE response body. + + Handles ``application/json`` (a single object) and ``text/event-stream`` + (SSE: events separated by blank lines; an event's one-or-more ``data:`` + lines concatenate into a single JSON payload). Unparseable chunks and + non-``data`` SSE fields (``event:``/``id:``/comments) are skipped. + """ + def _emit(payload): + # Streamable HTTP allows batching responses/notifications into a JSON + # array — flatten so callers always see individual message dicts. + if isinstance(payload, list): + yield from payload + elif payload is not None: + yield payload + + body = (text or "").strip() + if not body: + return + if body.startswith("{") or body.startswith("["): + try: + parsed = json.loads(body) + except json.JSONDecodeError: + return + yield from _emit(parsed) + return + + data_lines: List[str] = [] + + def _flush(): + if not data_lines: + return None + try: + return json.loads("\n".join(data_lines)) + except json.JSONDecodeError: + return None + + for raw in body.split("\n"): + line = raw.rstrip("\r") + if line.startswith("data:"): + data_lines.append(line[len("data:"):].lstrip()) + elif line.strip() == "": # event boundary + yield from _emit(_flush()) + data_lines = [] + yield from _emit(_flush()) + + +def _mcp_response_envelope(text: str, request_id: str) -> Dict[str, Any]: + """Select the JSON-RPC response for *request_id* from an MCP response body. + + Streamable-HTTP servers may emit progress/log notifications before the + final result, so we scan the whole stream and return the result/error + message whose ``id`` matches our request. Falls back to the last + result/error-bearing message if no id matches; ``{}`` if none is present. + """ + fallback: Dict[str, Any] = {} + for msg in _iter_mcp_messages(text): + if not isinstance(msg, dict) or not ("result" in msg or "error" in msg): + continue + if msg.get("id") == request_id: + return msg + fallback = msg + return fallback + + +def _mcp_payload(envelope: Dict[str, Any]) -> Dict[str, Any]: + """Extract the tool result payload from a ``tools/call`` envelope. + + Prefers ``structuredContent`` (authoritative machine-readable form); + otherwise scans text blocks for the first JSON-parseable one. Raises on a + JSON-RPC error or a tool-level ``isError``. + """ + if "error" in envelope: + raise RuntimeError(f"Parallel MCP error: {str(envelope['error'])[:500]}") + result = envelope.get("result") or {} + if result.get("isError"): + raise RuntimeError(f"Parallel MCP tool error: {str(result)[:500]}") + + structured = result.get("structuredContent") + if isinstance(structured, dict): + return structured + + for block in result.get("content", []) or []: + if isinstance(block, dict) and block.get("type") == "text": + text = str(block.get("text") or "") + if not text: + continue + try: + return json.loads(text) + except json.JSONDecodeError: + continue + raise RuntimeError( + f"Parallel MCP returned no parseable content: {str(result)[:500]}" + ) + + +def _mcp_call( + tool_name: str, arguments: Dict[str, Any], api_key: str | None +) -> Dict[str, Any]: + """Run the MCP handshake then a single ``tools/call`` and return its payload. + + initialize → (capture ``Mcp-Session-Id``) → notifications/initialized → + tools/call ``tool_name``. Returns the parsed tool payload dict (see + :func:`_mcp_payload`). A Bearer token is attached only when *api_key* is set. + """ + with httpx.Client(timeout=_MCP_TIMEOUT_SECONDS) as client: + # 1. initialize — capture the server-assigned MCP session id. + init_id = str(uuid.uuid4()) + init = client.post( + _MCP_SEARCH_URL, + headers=_mcp_headers(None, api_key), + json={ + "jsonrpc": "2.0", + "id": init_id, + "method": "initialize", + "params": { + "protocolVersion": _MCP_PROTOCOL_VERSION, + "capabilities": {}, + "clientInfo": { + "name": _MCP_CLIENT_NAME, + "version": _MCP_CLIENT_VERSION, + }, + }, + }, + ) + init.raise_for_status() + # Only echo a session id the server actually issued. Stateless + # Streamable-HTTP servers may omit it; inventing one and sending it on + # follow-up requests can get those requests rejected (the server never + # created that session). When absent, the Mcp-Session-Id header is simply + # omitted (see _mcp_headers). This is separate from the tool-arg + # ``session_id`` below, which is a client-minted rate-limit/grouping id. + mcp_session_id = init.headers.get("mcp-session-id") + init_env = _mcp_response_envelope(init.text, init_id) + # Echo the negotiated protocol version on every post-init request, per + # the Streamable-HTTP spec (servers may enforce it). + negotiated_version = ( + (init_env.get("result") or {}).get("protocolVersion") + or _MCP_PROTOCOL_VERSION + ) + + # 2. notifications/initialized — required handshake ack. + client.post( + _MCP_SEARCH_URL, + headers=_mcp_headers(mcp_session_id, api_key, negotiated_version), + json={"jsonrpc": "2.0", "method": "notifications/initialized"}, + ) + + # 3. tools/call. + call_id = str(uuid.uuid4()) + call = client.post( + _MCP_SEARCH_URL, + headers=_mcp_headers(mcp_session_id, api_key, negotiated_version), + json={ + "jsonrpc": "2.0", + "id": call_id, + "method": "tools/call", + "params": {"name": tool_name, "arguments": arguments}, + }, + ) + call.raise_for_status() + return _mcp_payload(_mcp_response_envelope(call.text, call_id)) + + +def _mcp_web_search(query: str, limit: int, api_key: str | None) -> Dict[str, Any]: + """Run a ``web_search`` tool call against the hosted Search MCP. + + Returns the standard provider search shape + (``{"success": True, "data": {"web": [...]}}``). The MCP serves a fixed + result count, so ``limit`` is applied client-side. The MCP requires + ``objective`` (REST treats it as optional), so we mirror the query. + """ + payload = _mcp_call( + "web_search", + { + "objective": query, + "search_queries": [query], + "session_id": _new_session_id(), + }, + api_key, + ) + + web_results: List[Dict[str, Any]] = [] + for i, result in enumerate((payload.get("results") or [])[: max(limit, 1)]): + if not isinstance(result, dict): + continue + excerpts = result.get("excerpts") or [] + web_results.append( + { + "url": result.get("url") or "", + "title": result.get("title") or "", + "description": " ".join(excerpts) if excerpts else "", + "position": i + 1, + } + ) + + # Credit the free tier (anonymous path only — keyed search uses REST and + # carries no attribution). + return { + "success": True, + "data": {"web": web_results}, + "provider": "parallel", + "attribution": _FREE_MCP_ATTRIBUTION, + } + + +def _mcp_web_fetch(urls: List[str], api_key: str | None) -> List[Dict[str, Any]]: + """Run a ``web_fetch`` tool call against the hosted Search MCP. + + Returns the per-URL extract shape that + :func:`tools.web_tools.web_extract_tool` expects — exactly one row per input + URL, in request order (including duplicates). We pass ``full_content=True`` + so the page body comes back as markdown (matching the keyed SDK path and + what extract callers/summarizers expect), falling back to excerpts only when + full content is absent. Any input the MCP didn't return is emitted as a + per-URL error row. + """ + payload = _mcp_call( + "web_fetch", + {"urls": list(urls), "full_content": True, "session_id": _new_session_id()}, + api_key, + ) + + # Index the response by URL, then emit one row per *input* URL in order so + # duplicates and positional alignment with the request list are preserved. + by_url: Dict[str, Dict[str, Any]] = {} + for item in payload.get("results") or []: + if isinstance(item, dict) and item.get("url"): + by_url.setdefault(item["url"], item) + + results: List[Dict[str, Any]] = [] + for url in urls: + item = by_url.get(url) + if item is None: + results.append( + { + "url": url, + "title": "", + "content": "", + "error": "extraction failed (no content returned)", + "metadata": {"sourceURL": url}, + } + ) + continue + title = item.get("title") or "" + # Prefer the full page body; fall back to joined excerpts (mirrors the + # keyed SDK extract path). + content = item.get("full_content") or "\n\n".join(item.get("excerpts") or []) + results.append( + { + "url": url, + "title": title, + "content": content, + "raw_content": content, + "metadata": {"sourceURL": url, "title": title}, + } + ) + + return results class ParallelWebSearchProvider(WebSearchProvider): @@ -152,7 +507,14 @@ class ParallelWebSearchProvider(WebSearchProvider): return "Parallel" def is_available(self) -> bool: - """Return True when ``PARALLEL_API_KEY`` is set to a non-empty value.""" + """Return True when ``PARALLEL_API_KEY`` is set. + + Deliberately key-based: this gates the registry's active-provider walk + and the ``hermes tools`` picker (auto-selecting Parallel for a user who + hasn't named it), so it must not claim availability on the keyless path. + The keyless free-MCP path is reached independently via + :func:`tools.web_tools._get_backend`'s ``parallel`` terminal default. + """ return bool(os.getenv("PARALLEL_API_KEY", "").strip()) def supports_search(self) -> bool: @@ -164,9 +526,11 @@ class ParallelWebSearchProvider(WebSearchProvider): def search(self, query: str, limit: int = 5) -> Dict[str, Any]: """Execute a Parallel search (sync). - Uses the ``beta.search`` endpoint with the configured mode - (``PARALLEL_SEARCH_MODE`` env var, default "agentic"). Limit is - capped at 20 server-side. + With ``PARALLEL_API_KEY`` set, uses the v1 ``search`` REST endpoint with + the configured mode (``PARALLEL_SEARCH_MODE`` env var, default + "advanced"; limit requested via advanced_settings.max_results, capped at + 20). Without a key, falls back to the free hosted Search MCP so search + still works with zero setup. """ try: from tools.interrupt import is_interrupted @@ -174,19 +538,31 @@ class ParallelWebSearchProvider(WebSearchProvider): if is_interrupted(): return {"success": False, "error": "Interrupted"} + api_key = os.getenv("PARALLEL_API_KEY", "").strip() + if not api_key: + logger.info( + "Parallel search (free MCP): '%s' (limit=%d)", query, limit + ) + return _mcp_web_search(query, limit, api_key=None) + mode = _resolve_search_mode() logger.info( - "Parallel search: '%s' (mode=%s, limit=%d)", query, mode, limit + "Parallel search (v1 REST): '%s' (mode=%s, limit=%d)", + query, mode, limit, ) - response = _get_sync_client().beta.search( + # v1 Search API. Request the caller's limit via max_results (capped + # at 20) so we don't rely on the API default — the slice below can + # only trim, not ask for more. + response = _get_sync_client().search( search_queries=[query], objective=query, mode=mode, - max_results=min(limit, 20), + session_id=_new_session_id(), + advanced_settings={"max_results": min(max(limit, 1), 20)}, ) web_results = [] - for i, result in enumerate(response.results or []): + for i, result in enumerate((response.results or [])[: max(limit, 1)]): excerpts = result.excerpts or [] web_results.append( { @@ -197,6 +573,8 @@ class ParallelWebSearchProvider(WebSearchProvider): } ) + # Paid/REST path: no attribution and no "[Parallel]" label — the + # branding is specifically for the free Search MCP tier. return {"success": True, "data": {"web": web_results}} except ValueError as exc: return {"success": False, "error": str(exc)} @@ -212,7 +590,12 @@ class ParallelWebSearchProvider(WebSearchProvider): async def extract( self, urls: List[str], **kwargs: Any ) -> List[Dict[str, Any]]: - """Extract content from one or more URLs via the async SDK. + """Extract content from one or more URLs. + + With ``PARALLEL_API_KEY`` set, uses the async SDK's v1 ``extract`` for + full page content. Without a key, falls back to the free hosted Search + MCP's ``web_fetch`` tool so extraction works with zero setup, mirroring + the keyless search path. Returns the legacy list-of-results shape that :func:`tools.web_tools.web_extract_tool` expects: one entry per @@ -227,10 +610,21 @@ class ParallelWebSearchProvider(WebSearchProvider): {"url": u, "error": "Interrupted", "title": ""} for u in urls ] - logger.info("Parallel extract: %d URL(s)", len(urls)) - response = await _get_async_client().beta.extract( + api_key = os.getenv("PARALLEL_API_KEY", "").strip() + if not api_key: + logger.info( + "Parallel extract (free MCP web_fetch): %d URL(s)", len(urls) + ) + # _mcp_web_fetch is sync httpx; run off the event loop. + return await asyncio.to_thread(_mcp_web_fetch, list(urls), None) + + logger.info("Parallel extract (v1 REST): %d URL(s)", len(urls)) + # v1 Extract API (client.extract, /v1/extract); full_content is set + # via advanced_settings. + response = await _get_async_client().extract( urls=urls, - full_content=True, + advanced_settings={"full_content": True}, + session_id=_new_session_id(), ) results: List[Dict[str, Any]] = [] @@ -251,13 +645,20 @@ class ParallelWebSearchProvider(WebSearchProvider): ) for error in response.errors or []: + err_url = getattr(error, "url", "") or "" + err_msg = ( + getattr(error, "message", None) + or getattr(error, "content", None) + or getattr(error, "error_type", None) + or "extraction failed" + ) results.append( { - "url": error.url or "", + "url": err_url, "title": "", "content": "", - "error": error.content or error.error_type or "extraction failed", - "metadata": {"sourceURL": error.url or ""}, + "error": err_msg, + "metadata": {"sourceURL": err_url}, } ) @@ -279,12 +680,16 @@ class ParallelWebSearchProvider(WebSearchProvider): def get_setup_schema(self) -> Dict[str, Any]: return { "name": "Parallel", - "badge": "paid", - "tag": "Objective-tuned search + parallel page extraction.", + "badge": "free", + "tag": ( + "Free web search + extraction via Parallel's hosted Search MCP " + "— no key needed. Add PARALLEL_API_KEY for the v1 REST Search " + "API (richer modes, higher limits)." + ), "env_vars": [ { "key": "PARALLEL_API_KEY", - "prompt": "Parallel API key", + "prompt": "Parallel API key (optional — unlocks the v1 REST Search API)", "url": "https://parallel.ai", }, ], diff --git a/pyproject.toml b/pyproject.toml index b2a486aefd0..e5bf882d87a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -122,7 +122,7 @@ anthropic = ["anthropic==0.87.0"] # CVE-2026-34450, CVE-2026-34452 # search provider (configured via `hermes tools` or config.yaml). exa = ["exa-py==2.10.2"] firecrawl = ["firecrawl-py==4.17.0"] -parallel-web = ["parallel-web==0.4.2"] +parallel-web = ["parallel-web==0.6.0"] # Image generation backends fal = ["fal-client==0.13.1"] # Edge TTS — default TTS provider but still optional (users can pick diff --git a/scripts/install.ps1 b/scripts/install.ps1 index 21e3d495816..b316a99e4f7 100644 --- a/scripts/install.ps1 +++ b/scripts/install.ps1 @@ -892,6 +892,42 @@ function Test-Node { return $true } +function Update-ProcessPathForPackages { + # Make freshly-installed shims (rg.exe, ffmpeg.exe) visible to Get-Command in + # THIS process without spawning a new shell, by folding the persisted + # User+Machine hives plus winget's alias-shim directory into $env:Path. + # Called after every package-manager attempt (winget/choco/scoop): previously + # PATH was only refreshed inside the winget branch, so a successful + # choco/scoop fallback -- or any install on a box without winget -- could be + # misreported as "not installed". + # + # MERGE rather than overwrite: start from the existing process PATH so any + # process-only entries added earlier in this installer run survive, then + # APPEND hive/winget-Links entries not already present (case-insensitive, + # order-preserving dedupe). A wholesale replace would silently drop those + # process-only entries. + $candidates = @() + $candidates += $env:Path + $candidates += [Environment]::GetEnvironmentVariable("Path", "User") + $candidates += [Environment]::GetEnvironmentVariable("Path", "Machine") + $wingetLinks = Join-Path $env:LOCALAPPDATA "Microsoft\WinGet\Links" + if (Test-Path $wingetLinks) { + $candidates += $wingetLinks + } + $seen = New-Object System.Collections.Generic.HashSet[string] ([StringComparer]::OrdinalIgnoreCase) + $ordered = New-Object System.Collections.Generic.List[string] + foreach ($chunk in $candidates) { + if ([string]::IsNullOrEmpty($chunk)) { continue } + foreach ($entry in $chunk.Split(';')) { + $trimmed = $entry.Trim() + if ($trimmed -and $seen.Add($trimmed)) { + $ordered.Add($trimmed) + } + } + } + $env:Path = [string]::Join(';', $ordered) +} + function Install-SystemPackages { $script:HasRipgrep = $false $script:HasFfmpeg = $false @@ -961,25 +997,33 @@ function Install-SystemPackages { try { $output = winget install --exact --id $pkg --source winget --silent ` --accept-package-agreements --accept-source-agreements 2>&1 + $code = $LASTEXITCODE $output | Out-File -FilePath $log -Encoding utf8 - "winget exit: $LASTEXITCODE" | Out-File -FilePath $log -Encoding utf8 -Append + "winget exit: $code" | Out-File -FilePath $log -Encoding utf8 -Append + # 0x8A15002B (-1978335189) = APPINSTALLER_CLI_ERROR_UPDATE_NOT_APPLICABLE. + # winget treats `install` on a package it already has registered as + # an *upgrade*, finds no newer version, and bails with this code -- + # even when the binary is gone from disk/PATH (stale registration, + # files removed outside winget, or a missing alias shim). We KNOW the + # command was missing (that's why we're here), so a plain install + # dead-ends forever. Force a reinstall to repair the registration so + # the shim reappears. + if ($code -eq -1978335189) { + "-> already-installed/no-upgrade; retrying with --force" | Out-File -FilePath $log -Encoding utf8 -Append + $output = winget install --exact --id $pkg --source winget --silent --force ` + --accept-package-agreements --accept-source-agreements 2>&1 + $output | Out-File -FilePath $log -Encoding utf8 -Append + "winget exit (force): $LASTEXITCODE" | Out-File -FilePath $log -Encoding utf8 -Append + } } catch { $_ | Out-File -FilePath $log -Encoding utf8 -Append "winget exit: " | Out-File -FilePath $log -Encoding utf8 -Append } } - # Refresh PATH from both env-var hives AND winget's alias shim directory. - # winget exposes packages via "command line aliases" in %LOCALAPPDATA%\ - # Microsoft\WinGet\Links, which is added to PATH by the AppExecutionAlias - # machinery only in *newly-spawned* shells -- not the current process. - # Without this addition, Get-Command rg below would falsely return null - # immediately after a successful install. - $wingetLinks = Join-Path $env:LOCALAPPDATA "Microsoft\WinGet\Links" - $envPath = [Environment]::GetEnvironmentVariable("Path", "User") + ";" + [Environment]::GetEnvironmentVariable("Path", "Machine") - if (Test-Path $wingetLinks) { - $envPath = "$envPath;$wingetLinks" - } - $env:Path = $envPath + # Refresh PATH so packages winget exposed via "command line aliases" in + # %LOCALAPPDATA%\Microsoft\WinGet\Links (added to PATH only in + # newly-spawned shells, not this process) are visible to Get-Command below. + Update-ProcessPathForPackages if ($needRipgrep -and (Get-Command rg -ErrorAction SilentlyContinue)) { Write-Success "ripgrep installed" $script:HasRipgrep = $true @@ -1005,6 +1049,7 @@ function Install-SystemPackages { foreach ($pkg in $chocoPkgs) { try { choco install $pkg -y 2>&1 | Out-Null } catch { } } + Update-ProcessPathForPackages if ($needRipgrep -and (Get-Command rg -ErrorAction SilentlyContinue)) { Write-Success "ripgrep installed via chocolatey" $script:HasRipgrep = $true @@ -1023,6 +1068,7 @@ function Install-SystemPackages { foreach ($pkg in $scoopPkgs) { try { scoop install $pkg 2>&1 | Out-Null } catch { } } + Update-ProcessPathForPackages if ($needRipgrep -and (Get-Command rg -ErrorAction SilentlyContinue)) { Write-Success "ripgrep installed via scoop" $script:HasRipgrep = $true diff --git a/scripts/release.py b/scripts/release.py index a9d08577b76..b7dc877b246 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -75,6 +75,10 @@ AUTHOR_MAP = { "129007007+HeLLGURD@users.noreply.github.com": "HeLLGURD", "290859878+synapsesx@users.noreply.github.com": "synapsesx", "dirtyren@users.noreply.github.com": "dirtyren", + "mvanhorn@MacBook-Pro.local": "mvanhorn", + "470766206@qq.com": "youjunxiaji", + "mharris@parallel.ai": "NormallyGaussian", + "roger@roger.local": "mollusk", "ted.malone@outlook.com": "temalo", "adityamalik2833@gmail.com": "alarcritty", "islam666@users.noreply.github.com": "islam666", @@ -943,6 +947,8 @@ AUTHOR_MAP = { "michel.belleau@malaiwah.com": "malaiwah", "gnanasekaran.sekareee@gmail.com": "gnanam1990", "jz.pentest@gmail.com": "0xyg3n", + "56406949+RaumfahrerSpiffy@users.noreply.github.com": "Spaceman-Spiffy", # PR #35586 (renamed account) + "ian@culling.ca": "ianculling", # PR #36087 "7093928+0xyg3n@users.noreply.github.com": "0xyg3n", "nftpoetrist@gmail.com": "nftpoetrist", # PR #18982 "millerc79@users.noreply.github.com": "millerc79", # PR #19033 @@ -1510,6 +1516,7 @@ AUTHOR_MAP = { "josephjohnson.joel@gmail.com": "JoelJJohnson", # PR #39913 salvage (Windows ConPTY dashboard chat bridge) "andreas@schwarz-ketsch.de": "Nea74", # PR #40022 co-author credit (same Windows ConPTY bridge design) "chanhokyim@gmail.com": "joel611", # PR #33958 salvage (DISCORD_ALLOWED_ROLES role_authorized gateway flag) + "desg38@gmail.com": "dschnurbusch", # PR #42373 salvage (archive compressed conversation lineages) } diff --git a/scripts/whatsapp-bridge/bridge.js b/scripts/whatsapp-bridge/bridge.js index 5723d8b543b..4c65740c017 100644 --- a/scripts/whatsapp-bridge/bridge.js +++ b/scripts/whatsapp-bridge/bridge.js @@ -24,7 +24,8 @@ import { Boom } from '@hapi/boom'; import pino from 'pino'; import path from 'path'; import { mkdirSync, readFileSync, writeFileSync, existsSync, readdirSync, unlinkSync } from 'fs'; -import { randomBytes } from 'crypto'; +import { fileURLToPath } from 'url'; +import { randomBytes, createHash } from 'crypto'; import { execSync } from 'child_process'; import { tmpdir } from 'os'; import qrcode from 'qrcode-terminal'; @@ -45,9 +46,28 @@ const WHATSAPP_DEBUG = const PORT = parseInt(getArg('port', '3000'), 10); const SESSION_DIR = getArg('session', path.join(process.env.HOME || '~', '.hermes', 'whatsapp', 'session')); -const IMAGE_CACHE_DIR = path.join(process.env.HOME || '~', '.hermes', 'image_cache'); -const DOCUMENT_CACHE_DIR = path.join(process.env.HOME || '~', '.hermes', 'document_cache'); -const AUDIO_CACHE_DIR = path.join(process.env.HOME || '~', '.hermes', 'audio_cache'); +// Cache directories: the Python gateway passes the profile-aware paths via +// env (HERMES_HOME-aware, new cache/ layout). Fall back to the legacy +// hardcoded locations for bridges launched outside the gateway. +const IMAGE_CACHE_DIR = process.env.HERMES_IMAGE_CACHE_DIR + || path.join(process.env.HOME || '~', '.hermes', 'image_cache'); +const DOCUMENT_CACHE_DIR = process.env.HERMES_DOCUMENT_CACHE_DIR + || path.join(process.env.HOME || '~', '.hermes', 'document_cache'); +const AUDIO_CACHE_DIR = process.env.HERMES_AUDIO_CACHE_DIR + || path.join(process.env.HOME || '~', '.hermes', 'audio_cache'); + +// Self-hash of this script file. Reported in /health so the Python gateway +// can detect a running bridge that predates the current bridge.js and +// restart it instead of silently reusing stale code (stale-bridge trap: +// `hermes update` updates bridge.js on disk but a long-lived bridge process +// keeps serving the old behavior forever). +let SCRIPT_HASH = ''; +try { + SCRIPT_HASH = createHash('sha256') + .update(readFileSync(fileURLToPath(import.meta.url))) + .digest('hex') + .slice(0, 16); +} catch {} const PAIR_ONLY = args.includes('--pair-only'); const WHATSAPP_MODE = getArg('mode', process.env.WHATSAPP_MODE || 'self-chat'); // "bot" or "self-chat" const ALLOWED_USERS = parseAllowedUsers(process.env.WHATSAPP_ALLOWED_USERS || ''); @@ -700,6 +720,7 @@ app.get('/health', (req, res) => { status: connectionState, queueLength: messageQueue.length, uptime: process.uptime(), + scriptHash: SCRIPT_HASH, }); }); diff --git a/skills/autonomous-ai-agents/hermes-agent/SKILL.md b/skills/autonomous-ai-agents/hermes-agent/SKILL.md index 08a4fd2b43a..d02ac7933cb 100644 --- a/skills/autonomous-ai-agents/hermes-agent/SKILL.md +++ b/skills/autonomous-ai-agents/hermes-agent/SKILL.md @@ -343,7 +343,7 @@ The registry of record is `hermes_cli/commands.py` — every consumer ``` ~/.hermes/config.yaml Main configuration -~/.hermes/.env API keys and secrets +~/.hermes/.env API keys and secrets (under $HERMES_HOME if set) $HERMES_HOME/skills/ Installed skills ~/.hermes/sessions/ Gateway routing index, request dumps, *.jsonl transcripts (and optional per-session JSON snapshots when sessions.write_json_snapshots: true) ~/.hermes/state.db Canonical session store (SQLite + FTS5) @@ -908,7 +908,7 @@ hermes-agent/ └── website/ # Docusaurus docs site ``` -Config: `~/.hermes/config.yaml` (settings), `~/.hermes/.env` (API keys). +Config: `~/.hermes/config.yaml` (settings), `~/.hermes/.env` (API keys) — both under `$HERMES_HOME` when it is set. ### Adding a Tool (3 files) diff --git a/skills/autonomous-ai-agents/hermes-agent/references/webhooks.md b/skills/autonomous-ai-agents/hermes-agent/references/webhooks.md index a1758d64f09..0af935ea23c 100644 --- a/skills/autonomous-ai-agents/hermes-agent/references/webhooks.md +++ b/skills/autonomous-ai-agents/hermes-agent/references/webhooks.md @@ -30,7 +30,7 @@ platforms: ``` ### Option 3: Environment variables -Add to `~/.hermes/.env`: +Add to `${HERMES_HOME:-~/.hermes}/.env`: ```bash WEBHOOK_ENABLED=true WEBHOOK_PORT=8644 diff --git a/skills/github/github-auth/SKILL.md b/skills/github/github-auth/SKILL.md index 6b929a408d5..95606d36709 100644 --- a/skills/github/github-auth/SKILL.md +++ b/skills/github/github-auth/SKILL.md @@ -220,8 +220,8 @@ if command -v gh &>/dev/null && gh auth status &>/dev/null; then echo "AUTH_METHOD=gh" elif [ -n "$GITHUB_TOKEN" ]; then echo "AUTH_METHOD=curl" -elif [ -f ~/.hermes/.env ] && grep -q "^GITHUB_TOKEN=" ~/.hermes/.env; then - export GITHUB_TOKEN=$(grep "^GITHUB_TOKEN=" ~/.hermes/.env | head -1 | cut -d= -f2 | tr -d '\n\r') +elif _hermes_env="${HERMES_HOME:-$HOME/.hermes}/.env"; [ -f "$_hermes_env" ] && grep -q "^GITHUB_TOKEN=" "$_hermes_env"; then + export GITHUB_TOKEN=$(grep "^GITHUB_TOKEN=" "$_hermes_env" | head -1 | cut -d= -f2 | tr -d '\n\r') echo "AUTH_METHOD=curl" elif grep -q "github.com" ~/.git-credentials 2>/dev/null; then export GITHUB_TOKEN=$(grep "github.com" ~/.git-credentials | head -1 | sed 's|https://[^:]*:\([^@]*\)@.*|\1|') diff --git a/skills/github/github-auth/scripts/gh-env.sh b/skills/github/github-auth/scripts/gh-env.sh index 043c6b5551b..47b3ff98c5c 100755 --- a/skills/github/github-auth/scripts/gh-env.sh +++ b/skills/github/github-auth/scripts/gh-env.sh @@ -23,8 +23,8 @@ if command -v gh &>/dev/null && gh auth status &>/dev/null 2>&1; then GH_USER=$(gh api user --jq '.login' 2>/dev/null) elif [ -n "$GITHUB_TOKEN" ]; then GH_AUTH_METHOD="curl" -elif [ -f "$HOME/.hermes/.env" ] && grep -q "^GITHUB_TOKEN=" "$HOME/.hermes/.env" 2>/dev/null; then - GITHUB_TOKEN=$(grep "^GITHUB_TOKEN=" "$HOME/.hermes/.env" | head -1 | cut -d= -f2 | tr -d '\n\r') +elif _hermes_env="${HERMES_HOME:-$HOME/.hermes}/.env"; [ -f "$_hermes_env" ] && grep -q "^GITHUB_TOKEN=" "$_hermes_env" 2>/dev/null; then + GITHUB_TOKEN=$(grep "^GITHUB_TOKEN=" "$_hermes_env" | head -1 | cut -d= -f2 | tr -d '\n\r') if [ -n "$GITHUB_TOKEN" ]; then GH_AUTH_METHOD="curl" fi diff --git a/skills/github/github-code-review/SKILL.md b/skills/github/github-code-review/SKILL.md index 3b50ac45279..b5830923512 100644 --- a/skills/github/github-code-review/SKILL.md +++ b/skills/github/github-code-review/SKILL.md @@ -28,8 +28,8 @@ if command -v gh &>/dev/null && gh auth status &>/dev/null; then else AUTH="git" if [ -z "$GITHUB_TOKEN" ]; then - if [ -f ~/.hermes/.env ] && grep -q "^GITHUB_TOKEN=" ~/.hermes/.env; then - GITHUB_TOKEN=$(grep "^GITHUB_TOKEN=" ~/.hermes/.env | head -1 | cut -d= -f2 | tr -d '\n\r') + if _hermes_env="${HERMES_HOME:-$HOME/.hermes}/.env"; [ -f "$_hermes_env" ] && grep -q "^GITHUB_TOKEN=" "$_hermes_env"; then + GITHUB_TOKEN=$(grep "^GITHUB_TOKEN=" "$_hermes_env" | head -1 | cut -d= -f2 | tr -d '\n\r') elif grep -q "github.com" ~/.git-credentials 2>/dev/null; then GITHUB_TOKEN=$(grep "github.com" ~/.git-credentials 2>/dev/null | head -1 | sed 's|https://[^:]*:\([^@]*\)@.*|\1|') fi diff --git a/skills/github/github-issues/SKILL.md b/skills/github/github-issues/SKILL.md index 338074f885c..bded118a10a 100644 --- a/skills/github/github-issues/SKILL.md +++ b/skills/github/github-issues/SKILL.md @@ -28,8 +28,8 @@ if command -v gh &>/dev/null && gh auth status &>/dev/null; then else AUTH="git" if [ -z "$GITHUB_TOKEN" ]; then - if [ -f ~/.hermes/.env ] && grep -q "^GITHUB_TOKEN=" ~/.hermes/.env; then - GITHUB_TOKEN=$(grep "^GITHUB_TOKEN=" ~/.hermes/.env | head -1 | cut -d= -f2 | tr -d '\n\r') + if _hermes_env="${HERMES_HOME:-$HOME/.hermes}/.env"; [ -f "$_hermes_env" ] && grep -q "^GITHUB_TOKEN=" "$_hermes_env"; then + GITHUB_TOKEN=$(grep "^GITHUB_TOKEN=" "$_hermes_env" | head -1 | cut -d= -f2 | tr -d '\n\r') elif grep -q "github.com" ~/.git-credentials 2>/dev/null; then GITHUB_TOKEN=$(grep "github.com" ~/.git-credentials 2>/dev/null | head -1 | sed 's|https://[^:]*:\([^@]*\)@.*|\1|') fi diff --git a/skills/github/github-pr-workflow/SKILL.md b/skills/github/github-pr-workflow/SKILL.md index 0b02eca3d1e..69eb21183d3 100644 --- a/skills/github/github-pr-workflow/SKILL.md +++ b/skills/github/github-pr-workflow/SKILL.md @@ -30,8 +30,8 @@ else AUTH="git" # Ensure we have a token for API calls if [ -z "$GITHUB_TOKEN" ]; then - if [ -f ~/.hermes/.env ] && grep -q "^GITHUB_TOKEN=" ~/.hermes/.env; then - GITHUB_TOKEN=$(grep "^GITHUB_TOKEN=" ~/.hermes/.env | head -1 | cut -d= -f2 | tr -d '\n\r') + if _hermes_env="${HERMES_HOME:-$HOME/.hermes}/.env"; [ -f "$_hermes_env" ] && grep -q "^GITHUB_TOKEN=" "$_hermes_env"; then + GITHUB_TOKEN=$(grep "^GITHUB_TOKEN=" "$_hermes_env" | head -1 | cut -d= -f2 | tr -d '\n\r') elif grep -q "github.com" ~/.git-credentials 2>/dev/null; then GITHUB_TOKEN=$(grep "github.com" ~/.git-credentials 2>/dev/null | head -1 | sed 's|https://[^:]*:\([^@]*\)@.*|\1|') fi diff --git a/skills/github/github-repo-management/SKILL.md b/skills/github/github-repo-management/SKILL.md index 0ba049e2787..1026ce36e49 100644 --- a/skills/github/github-repo-management/SKILL.md +++ b/skills/github/github-repo-management/SKILL.md @@ -27,8 +27,8 @@ if command -v gh &>/dev/null && gh auth status &>/dev/null; then else AUTH="git" if [ -z "$GITHUB_TOKEN" ]; then - if [ -f ~/.hermes/.env ] && grep -q "^GITHUB_TOKEN=" ~/.hermes/.env; then - GITHUB_TOKEN=$(grep "^GITHUB_TOKEN=" ~/.hermes/.env | head -1 | cut -d= -f2 | tr -d '\n\r') + if _hermes_env="${HERMES_HOME:-$HOME/.hermes}/.env"; [ -f "$_hermes_env" ] && grep -q "^GITHUB_TOKEN=" "$_hermes_env"; then + GITHUB_TOKEN=$(grep "^GITHUB_TOKEN=" "$_hermes_env" | head -1 | cut -d= -f2 | tr -d '\n\r') elif grep -q "github.com" ~/.git-credentials 2>/dev/null; then GITHUB_TOKEN=$(grep "github.com" ~/.git-credentials 2>/dev/null | head -1 | sed 's|https://[^:]*:\([^@]*\)@.*|\1|') fi diff --git a/skills/media/gif-search/SKILL.md b/skills/media/gif-search/SKILL.md index 1a28b8b293d..5416290a9d0 100644 --- a/skills/media/gif-search/SKILL.md +++ b/skills/media/gif-search/SKILL.md @@ -23,7 +23,7 @@ Useful for finding reaction GIFs, creating visual content, and sending GIFs in c ## Setup -Set your Tenor API key in your environment (add to `~/.hermes/.env`): +Set your Tenor API key in your environment (add to `${HERMES_HOME:-~/.hermes}/.env`): ```bash TENOR_API_KEY=your_key_here diff --git a/skills/note-taking/obsidian/SKILL.md b/skills/note-taking/obsidian/SKILL.md index 15810900889..e3a9872309a 100644 --- a/skills/note-taking/obsidian/SKILL.md +++ b/skills/note-taking/obsidian/SKILL.md @@ -12,7 +12,7 @@ Use this skill for filesystem-first Obsidian vault work: reading notes, listing Use a known or resolved vault path before calling file tools. -The documented vault-path convention is the `OBSIDIAN_VAULT_PATH` environment variable, for example from `~/.hermes/.env`. If it is unset, use `~/Documents/Obsidian Vault`. +The documented vault-path convention is the `OBSIDIAN_VAULT_PATH` environment variable, for example from `${HERMES_HOME:-~/.hermes}/.env`. If it is unset, use `~/Documents/Obsidian Vault`. File tools do not expand shell variables. Do not pass paths containing `$OBSIDIAN_VAULT_PATH` to `read_file`, `write_file`, `patch`, or `search_files`; resolve the vault path first and pass a concrete absolute path. Vault paths may contain spaces, which is another reason to prefer file tools over shell commands. diff --git a/skills/productivity/airtable/SKILL.md b/skills/productivity/airtable/SKILL.md index 547e2a14b73..3fa1b0ab973 100644 --- a/skills/productivity/airtable/SKILL.md +++ b/skills/productivity/airtable/SKILL.md @@ -26,7 +26,7 @@ Work with Airtable's REST API directly via `curl` using the `terminal` tool. No - `data.records:write` — create / update / delete rows - `schema.bases:read` — list bases and tables 3. **Important:** in the same token UI, add each base you want to access to the token's **Access** list. PATs are scoped per-base — a valid token on the wrong base returns `403`. -4. Store the token in `~/.hermes/.env` (or via `hermes setup`): +4. Store the token in `${HERMES_HOME:-~/.hermes}/.env` (or via `hermes setup`): ``` AIRTABLE_API_KEY=pat_your_token_here ``` @@ -222,7 +222,7 @@ done ## Important Notes for Hermes - **Always use the `terminal` tool with `curl`.** Do NOT use `web_extract` (it can't send auth headers) or `browser_navigate` (needs UI auth and is slow). -- **`AIRTABLE_API_KEY` flows from `~/.hermes/.env` into the subprocess automatically** when this skill is loaded — no need to re-export it before each `curl` call. +- **`AIRTABLE_API_KEY` flows from `${HERMES_HOME:-~/.hermes}/.env` into the subprocess automatically** when this skill is loaded — no need to re-export it before each `curl` call. - **Escape curly braces in formulas carefully.** In a heredoc body, `{Status}` is literal. In a shell argument, `{Status}` is safe outside `{...}` brace-expansion context — but pass dynamic strings through `python3 urllib.parse.quote` before splicing into a URL. - **Pretty-print with `python3 -m json.tool`** (always present) rather than `jq` (optional). Only reach for `jq` when you need filtering/projection. - **Pagination is per-page, not global.** Airtable's 100-record cap is a hard limit; there is no way to bump it. Loop with `offset` until the field is absent. diff --git a/skills/productivity/notion/SKILL.md b/skills/productivity/notion/SKILL.md index 83222ffd938..22010c6241d 100644 --- a/skills/productivity/notion/SKILL.md +++ b/skills/productivity/notion/SKILL.md @@ -26,7 +26,7 @@ Talk to Notion two ways. Same integration token works for both — pick by what' 1. Create an integration at https://notion.so/my-integrations 2. Copy the API key (starts with `ntn_` or `secret_`) -3. Store in `~/.hermes/.env`: +3. Store in `${HERMES_HOME:-~/.hermes}/.env`: ``` NOTION_API_KEY=ntn_your_key_here ``` @@ -50,7 +50,7 @@ export NOTION_API_TOKEN=$NOTION_API_KEY # ntn reads NOTION_API_TOKEN export NOTION_KEYRING=0 # don't try to use the OS keychain ``` -Add those exports to your shell profile (or to `~/.hermes/.env`) so every session inherits them. +Add those exports to your shell profile (or to `${HERMES_HOME:-~/.hermes}/.env`) so every session inherits them. ### 3. Choose path at runtime diff --git a/skills/productivity/teams-meeting-pipeline/SKILL.md b/skills/productivity/teams-meeting-pipeline/SKILL.md index 4ad37c4758a..11960aa3201 100644 --- a/skills/productivity/teams-meeting-pipeline/SKILL.md +++ b/skills/productivity/teams-meeting-pipeline/SKILL.md @@ -39,7 +39,7 @@ Multilingual trigger examples (not exhaustive): ## Prerequisites -Before using the pipeline, verify these are set in `~/.hermes/.env`: +Before using the pipeline, verify these are set in `${HERMES_HOME:-~/.hermes}/.env`: ```bash MSGRAPH_TENANT_ID=... diff --git a/skills/research/llm-wiki/SKILL.md b/skills/research/llm-wiki/SKILL.md index 839c2f682a0..7dc708c9a5f 100644 --- a/skills/research/llm-wiki/SKILL.md +++ b/skills/research/llm-wiki/SKILL.md @@ -35,7 +35,7 @@ Use this skill when the user: ## Wiki Location -**Location:** Set via `WIKI_PATH` environment variable (e.g. in `~/.hermes/.env`). +**Location:** Set via `WIKI_PATH` environment variable (e.g. in `${HERMES_HOME:-~/.hermes}/.env`). If unset, defaults to `~/wiki`. diff --git a/tests/agent/test_anthropic_output_field_leak.py b/tests/agent/test_anthropic_output_field_leak.py new file mode 100644 index 00000000000..a691f34ec0b --- /dev/null +++ b/tests/agent/test_anthropic_output_field_leak.py @@ -0,0 +1,96 @@ +"""Regression: output-only SDK fields must not leak into Anthropic request input. + +Reproduces HTTP 400 `messages.N.content.M.text.parsed_output: Extra inputs are +not permitted`. Anthropic SDK response blocks carry output-only attributes +(text blocks: `parsed_output`, `citations=None`; tool_use blocks: `caller`) +that the Messages *input* schema forbids. normalize_response captured blocks +verbatim via _to_plain_data and replayed them as input → 400. + +Fix: whitelist input-permitted fields per block type at three points — +normalize_response capture, _sanitize_replay_block (ordered-blocks replay), and +_convert_content_part_to_anthropic (content-list replay). +""" +import sys, os +sys.path.insert(0, os.path.expanduser("~/.hermes/hermes-agent")) + +import pytest +from agent.anthropic_adapter import ( + _sanitize_replay_block, + _convert_content_part_to_anthropic, + _convert_assistant_message, +) + +FORBIDDEN = {"parsed_output", "caller"} + + +def _assert_clean(block): + """No forbidden output-only key, and no null citations, anywhere.""" + assert isinstance(block, dict) + for k in FORBIDDEN: + assert k not in block, f"forbidden field {k!r} survived: {block}" + if "citations" in block: + assert isinstance(block["citations"], list) and block["citations"], \ + "citations must be a non-empty list if present (None/[] is input-invalid)" + + +class TestSanitizeReplayBlock: + def test_text_block_strips_parsed_output_and_null_citations(self): + poisoned = {"type": "text", "text": "hi", "parsed_output": None, "citations": None} + out = _sanitize_replay_block(poisoned) + _assert_clean(out) + assert out == {"type": "text", "text": "hi"} + + def test_tool_use_strips_caller(self): + poisoned = {"type": "tool_use", "id": "toolu_1", "name": "read_file", + "input": {"path": "a"}, "caller": {"type": "agent"}} + out = _sanitize_replay_block(poisoned) + _assert_clean(out) + assert out["name"] == "read_file" and out["input"] == {"path": "a"} + + def test_thinking_preserves_signature(self): + b = {"type": "thinking", "thinking": "x", "signature": "sig-AAA"} + out = _sanitize_replay_block(b) + assert out == {"type": "thinking", "thinking": "x", "signature": "sig-AAA"} + + def test_text_keeps_real_citations(self): + real = [{"type": "char_location", "cited_text": "q"}] + out = _sanitize_replay_block({"type": "text", "text": "t", "citations": real}) + assert out["citations"] == real + + def test_unknown_type_dropped(self): + assert _sanitize_replay_block({"type": "server_tool_use", "foo": 1}) is None + + +class TestContentPartConversion: + def test_stored_text_block_with_parsed_output_cleaned(self): + # The exact content.N.text.parsed_output failure shape. + part = {"type": "text", "text": "hello", "parsed_output": None, "citations": None} + out = _convert_content_part_to_anthropic(part) + _assert_clean(out) + + +class TestAssistantReplay: + def test_interleaved_blocks_replayed_clean_and_ordered(self): + m = { + "role": "assistant", + "anthropic_content_blocks": [ + {"type": "thinking", "thinking": "plan", "signature": "s1"}, + {"type": "text", "text": "doing it", "parsed_output": None, "citations": None}, + {"type": "tool_use", "id": "toolu_1", "name": "read_file", + "input": {"path": "a"}, "caller": {"type": "agent"}}, + ], + } + out = _convert_assistant_message(m) + blocks = out["content"] + # order preserved + assert [b["type"] for b in blocks] == ["thinking", "text", "tool_use"] + # every block clean + for b in blocks: + _assert_clean(b) + # signature + tool fields intact + assert blocks[0]["signature"] == "s1" + assert blocks[2]["name"] == "read_file" + + +if __name__ == "__main__": + raise SystemExit(pytest.main([__file__, "-v"])) diff --git a/tests/agent/test_anthropic_thinking_block_order.py b/tests/agent/test_anthropic_thinking_block_order.py new file mode 100644 index 00000000000..5455b3339a7 --- /dev/null +++ b/tests/agent/test_anthropic_thinking_block_order.py @@ -0,0 +1,314 @@ +"""Regression test for the Anthropic interleaved thinking-block 400. + +Reproduces: HTTP 400 ``messages.N.content.M: thinking or redacted_thinking +blocks in the latest assistant message cannot be modified. These blocks must +remain as they were in the original response.`` + +Root cause under test +---------------------- +With adaptive / interleaved thinking (Claude 4.6+, e.g. Opus 4.8), a single +assistant turn can emit content blocks in an interleaved order:: + + thinking_1 (signed) · tool_use_1 · thinking_2 (signed) · tool_use_2 + +Anthropic signs each thinking block against the turn content that precedes it +at its position. ``thinking_2`` is signed with ``tool_use_1`` before it. + +``AnthropicTransport.normalize_response`` (agent/transports/anthropic.py) +splits the turn into two *parallel* lists — ``reasoning_details`` (thinking +blocks) and ``tool_calls`` (tool_use blocks) — discarding the cross-type +ordering. ``run_agent`` stores those as separate fields on the assistant +message. On replay, ``_convert_assistant_message`` (agent/anthropic_adapter.py) +rebuilds the content as ``[all thinking][text][all tool_use]``, which reorders +``thinking_2`` ahead of ``tool_use_1``. The signature no longer matches its +original position, so Anthropic rejects the latest assistant message with the +400 above. + +This test asserts that an interleaved turn round-trips through +normalize_response -> stored message -> convert_messages_to_anthropic with its +block order preserved. It FAILS on the current code (documenting the bug) and +should PASS once block ordering is preserved on replay. +""" + +import json +from types import SimpleNamespace + +import pytest + +from agent.transports import get_transport +from agent.anthropic_adapter import convert_messages_to_anthropic + + +def _thinking_block(text: str, signature: str) -> SimpleNamespace: + """A signed Anthropic thinking block, shaped like the SDK object.""" + return SimpleNamespace(type="thinking", thinking=text, signature=signature) + + +def _tool_use_block(block_id: str, name: str, payload: dict) -> SimpleNamespace: + return SimpleNamespace(type="tool_use", id=block_id, name=name, input=payload) + + +def _interleaved_response() -> SimpleNamespace: + """An assistant turn with thinking interleaved between two tool_use blocks.""" + return SimpleNamespace( + content=[ + _thinking_block("Plan: inspect file A first.", "sig-AAA"), + _tool_use_block("toolu_1", "read_file", {"path": "a.py"}), + _thinking_block("A looked fine; now inspect B.", "sig-BBB"), + _tool_use_block("toolu_2", "read_file", {"path": "b.py"}), + ], + stop_reason="tool_use", + usage=None, + ) + + +def _stored_assistant_message(normalized) -> dict: + """Reconstruct the OpenAI-style assistant message the way run_agent stores it. + + run_agent.py persists assistant turns as separate fields: content, + reasoning_details (from provider_data), and tool_calls. See + run_agent.py L1513-1516 and hermes_state.py. + """ + provider_data = normalized.provider_data or {} + tool_calls = [] + for tc in (normalized.tool_calls or []): + tool_calls.append({ + "id": tc.id, + "type": "function", + "function": {"name": tc.name, "arguments": tc.arguments}, + }) + msg = { + "role": "assistant", + "content": normalized.content or "", + "reasoning_details": provider_data.get("reasoning_details"), + "tool_calls": tool_calls, + } + # build_assistant_message lifts the verbatim ordered-block channel onto + # the stored message; mirror that here. + blocks = provider_data.get("anthropic_content_blocks") + if blocks: + msg["anthropic_content_blocks"] = blocks + return msg + + +def _original_block_order(response) -> list: + """The (type, key) sequence of the original interleaved response.""" + order = [] + for b in response.content: + if b.type == "thinking": + order.append(("thinking", b.signature)) + elif b.type == "tool_use": + order.append(("tool_use", b.id)) + return order + + +def _replayed_block_order(assistant_content) -> list: + order = [] + for b in assistant_content: + if not isinstance(b, dict): + continue + if b.get("type") in ("thinking", "redacted_thinking"): + order.append(("thinking", b.get("signature"))) + elif b.get("type") == "tool_use": + order.append(("tool_use", b.get("id"))) + return order + + +class TestInterleavedThinkingBlockOrder: + def test_normalize_response_loses_interleaving(self): + """Confirm the lossy split: normalize_response stores thinking and + tool_use in independent fields with no positional linkage.""" + transport = get_transport("anthropic_messages") + normalized = transport.normalize_response(_interleaved_response()) + + # Both thinking blocks are captured... + details = (normalized.provider_data or {}).get("reasoning_details") + assert details is not None and len(details) == 2 + # ...and both tool calls... + assert normalized.tool_calls is not None and len(normalized.tool_calls) == 2 + # ...but they live in separate fields. There is no single ordered + # structure recording that thinking_2 sat between the two tool calls. + # (This is the structural precondition for the reorder bug.) + + def test_interleaved_order_preserved_on_replay(self): + """The latest assistant message must replay blocks in their ORIGINAL + order, or Anthropic rejects the signed thinking blocks with a 400. + + FAILS on current code: _convert_assistant_message front-loads all + thinking blocks, producing + thinking_1 · thinking_2 · tool_use_1 · tool_use_2 + instead of the original + thinking_1 · tool_use_1 · thinking_2 · tool_use_2 + """ + response = _interleaved_response() + original_order = _original_block_order(response) + + transport = get_transport("anthropic_messages") + normalized = transport.normalize_response(response) + assistant_msg = _stored_assistant_message(normalized) + + # Build a minimal conversation where this assistant turn is the LATEST + # assistant message (the one whose signed blocks are sent verbatim). + messages = [ + {"role": "user", "content": "Inspect a.py and b.py."}, + assistant_msg, + {"role": "tool", "tool_call_id": "toolu_1", "content": "a.py: ok"}, + {"role": "tool", "tool_call_id": "toolu_2", "content": "b.py: ok"}, + ] + + _system, anthropic_messages = convert_messages_to_anthropic( + messages, + base_url=None, # direct Anthropic + model="claude-opus-4-8", # adaptive thinking family + ) + + # Find the (latest) assistant message in the converted output. + assistant_out = [m for m in anthropic_messages if m.get("role") == "assistant"] + assert assistant_out, "no assistant message in converted output" + replayed_order = _replayed_block_order(assistant_out[-1]["content"]) + + assert replayed_order == original_order, ( + "Interleaved thinking/tool_use order was not preserved on replay.\n" + f" original: {original_order}\n" + f" replayed: {replayed_order}\n" + "Anthropic signs thinking blocks against their original position; " + "reordering invalidates the signature -> HTTP 400 'thinking blocks " + "in the latest assistant message cannot be modified'." + ) + + def test_replay_falls_back_gracefully_without_ordered_blocks(self): + """Without the ordered-block channel, conversion must not crash. + + The channel is intentionally NOT persisted to state.db (in-memory + only): a session reloaded from disk after a crash loses the field + and falls back to reconstruction. That replay may take one HTTP 400, + which the thinking-signature recovery (#43667) absorbs by stripping + reasoning_details and retrying. This test pins the fallback shape: + conversion still produces a valid assistant message from the + parallel reasoning_details + tool_calls fields. + """ + response = _interleaved_response() + transport = get_transport("anthropic_messages") + normalized = transport.normalize_response(response) + assistant_msg = _stored_assistant_message(normalized) + # Simulate a disk reload: the in-memory-only channel is gone. + assistant_msg.pop("anthropic_content_blocks", None) + + messages = [ + assistant_msg, + {"role": "tool", "tool_call_id": "toolu_1", "content": "a ok"}, + {"role": "tool", "tool_call_id": "toolu_2", "content": "b ok"}, + ] + _system, anthropic_messages = convert_messages_to_anthropic( + messages, base_url=None, model="claude-opus-4-8", + ) + assistant_out = [m for m in anthropic_messages if m.get("role") == "assistant"] + assert assistant_out, "no assistant message in converted output" + content = assistant_out[-1]["content"] + assert isinstance(content, list) and content, "fallback produced empty content" + # Reconstruction keeps both tool_use blocks (answered by results). + tool_ids = [b.get("id") for b in content if isinstance(b, dict) and b.get("type") == "tool_use"] + assert set(tool_ids) == {"toolu_1", "toolu_2"} + + +class TestInterleavedReplayCredentialRedaction: + """The verbatim-replay fast path must not leak un-redacted secrets. + + anthropic_content_blocks captures each tool_use ``input`` from the RAW API + response (normalize_response), which is NOT credential-redacted. The + parallel tool_calls[].function.arguments IS redacted at storage time + (build_assistant_message, #19798). If the fast path replays the block's raw + input verbatim, a secret the model inlined into a tool call rides back onto + the wire — even though it is redacted everywhere else in history. The fix + re-sources tool_use input from the redacted tool_calls map by id. + """ + + def test_tool_use_input_resourced_from_redacted_tool_calls(self): + REDACTED = "[REDACTED_SECRET]" + # Ordered channel: raw input carries the live secret (as captured from + # the unredacted API response). + ordered = [ + {"type": "thinking", "thinking": "Call the API.", "signature": "sig-AAA"}, + { + "type": "tool_use", + "id": "toolu_1", + "name": "terminal", + "input": {"command": "curl -H 'Authorization: Bearer sk-LIVE-SECRET-123'"}, + }, + {"type": "thinking", "thinking": "Now the second call.", "signature": "sig-BBB"}, + { + "type": "tool_use", + "id": "toolu_2", + "name": "terminal", + "input": {"command": "echo done"}, + }, + ] + # Stored tool_calls: arguments already redacted (the #19798 path). + assistant_msg = { + "role": "assistant", + "content": "", + "reasoning_details": [b for b in ordered if b["type"] == "thinking"], + "tool_calls": [ + { + "id": "toolu_1", + "type": "function", + "function": { + "name": "terminal", + "arguments": json.dumps( + {"command": f"curl -H 'Authorization: Bearer {REDACTED}'"} + ), + }, + }, + { + "id": "toolu_2", + "type": "function", + "function": { + "name": "terminal", + "arguments": json.dumps({"command": "echo done"}), + }, + }, + ], + "anthropic_content_blocks": ordered, + } + messages = [ + {"role": "user", "content": "Hit the API twice."}, + assistant_msg, + {"role": "tool", "tool_call_id": "toolu_1", "content": "200 OK"}, + {"role": "tool", "tool_call_id": "toolu_2", "content": "done"}, + ] + + _system, anthropic_messages = convert_messages_to_anthropic( + messages, base_url=None, model="claude-opus-4-8", + ) + assistant_out = [m for m in anthropic_messages if m.get("role") == "assistant"] + assert assistant_out, "no assistant message in converted output" + blocks = assistant_out[-1]["content"] + + tool_uses = {b["id"]: b for b in blocks if b.get("type") == "tool_use"} + assert set(tool_uses) == {"toolu_1", "toolu_2"}, "tool_use blocks missing/renamed" + + # The replayed input must be the REDACTED value, not the live secret. + replayed_cmd = tool_uses["toolu_1"]["input"]["command"] + assert "sk-LIVE-SECRET-123" not in replayed_cmd, ( + "Un-redacted secret leaked onto the wire via the verbatim-replay " + "fast path. tool_use input must be re-sourced from the redacted " + "tool_calls map, not the raw captured block." + ) + assert REDACTED in replayed_cmd + + # Interleave order is still preserved (the reason the channel exists). + order = [ + ("thinking", b.get("signature")) if b.get("type") == "thinking" + else ("tool_use", b.get("id")) + for b in blocks if b.get("type") in ("thinking", "tool_use") + ] + assert order == [ + ("thinking", "sig-AAA"), + ("tool_use", "toolu_1"), + ("thinking", "sig-BBB"), + ("tool_use", "toolu_2"), + ] + + +if __name__ == "__main__": + raise SystemExit(pytest.main([__file__, "-v"])) diff --git a/tests/agent/test_bedrock_adapter.py b/tests/agent/test_bedrock_adapter.py index 5f98fe5cf78..e1112b12df5 100644 --- a/tests/agent/test_bedrock_adapter.py +++ b/tests/agent/test_bedrock_adapter.py @@ -1471,3 +1471,127 @@ class TestCallConverseInvalidatesOnStaleError: ) assert _bedrock_runtime_client_cache.get("us-east-1") is live_client + + +class TestStreamingAccessDeniedDetection: + """is_streaming_access_denied_error() recognizes IAM denials of + bedrock:InvokeModelWithResponseStream (InvokeModel-only policies).""" + + def _denied_client_error(self): + from botocore.exceptions import ClientError + return ClientError( + error_response={ + "Error": { + "Code": "AccessDeniedException", + "Message": ( + "User: arn:aws:iam::123456789012:user/x is not " + "authorized to perform: " + "bedrock:InvokeModelWithResponseStream on resource: " + "arn:aws:bedrock:us-east-1::foundation-model/" + "anthropic.claude-3-sonnet-20240229-v1:0" + ), + } + }, + operation_name="ConverseStream", + ) + + def test_matches_access_denied_client_error(self): + pytest.importorskip("botocore", reason="botocore required for Bedrock exception tests") + from agent.bedrock_adapter import is_streaming_access_denied_error + assert is_streaming_access_denied_error(self._denied_client_error()) is True + + def test_ignores_access_denied_for_other_actions(self): + """AccessDenied on InvokeModel itself is NOT a streaming-only denial.""" + pytest.importorskip("botocore", reason="botocore required for Bedrock exception tests") + from agent.bedrock_adapter import is_streaming_access_denied_error + from botocore.exceptions import ClientError + exc = ClientError( + error_response={ + "Error": { + "Code": "AccessDeniedException", + "Message": ( + "User is not authorized to perform: bedrock:InvokeModel" + ), + } + }, + operation_name="Converse", + ) + assert is_streaming_access_denied_error(exc) is False + + def test_ignores_validation_error_mentioning_action(self): + """Non-authz ClientErrors don't match even if the action name appears.""" + pytest.importorskip("botocore", reason="botocore required for Bedrock exception tests") + from agent.bedrock_adapter import is_streaming_access_denied_error + from botocore.exceptions import ClientError + exc = ClientError( + error_response={ + "Error": { + "Code": "ValidationException", + "Message": "InvokeModelWithResponseStream input malformed", + } + }, + operation_name="ConverseStream", + ) + assert is_streaming_access_denied_error(exc) is False + + def test_matches_wrapped_sdk_permission_error(self): + """Non-ClientError wrappers (AnthropicBedrock SDK) match on message.""" + from agent.bedrock_adapter import is_streaming_access_denied_error + exc = RuntimeError( + "PermissionDeniedError: user is not authorized to perform: " + "bedrock:InvokeModelWithResponseStream" + ) + assert is_streaming_access_denied_error(exc) is True + + def test_ignores_unrelated_errors(self): + from agent.bedrock_adapter import is_streaming_access_denied_error + assert is_streaming_access_denied_error(ValueError("boom")) is False + assert is_streaming_access_denied_error( + RuntimeError("stream not supported") + ) is False + + +class TestCallConverseStreamIamFallback: + """call_converse_stream() falls back to converse() when IAM denies the + streaming action — InvokeModel-only policies keep working.""" + + def test_falls_back_to_converse_on_streaming_denial(self): + pytest.importorskip("botocore", reason="botocore required for Bedrock exception tests") + from agent.bedrock_adapter import ( + _bedrock_runtime_client_cache, + call_converse_stream, + reset_client_cache, + ) + from botocore.exceptions import ClientError + + reset_client_cache() + client = MagicMock() + client.converse_stream.side_effect = ClientError( + error_response={ + "Error": { + "Code": "AccessDeniedException", + "Message": ( + "User is not authorized to perform: " + "bedrock:InvokeModelWithResponseStream" + ), + } + }, + operation_name="ConverseStream", + ) + client.converse.return_value = { + "output": {"message": {"role": "assistant", "content": [{"text": "hi"}]}}, + "stopReason": "end_turn", + "usage": {"inputTokens": 1, "outputTokens": 1, "totalTokens": 2}, + } + _bedrock_runtime_client_cache["us-east-1"] = client + + result = call_converse_stream( + region="us-east-1", + model="anthropic.claude-3-sonnet-20240229-v1:0", + messages=[{"role": "user", "content": "hi"}], + ) + + client.converse.assert_called_once() + assert result.choices[0].message.content == "hi" + # Not a stale connection — client stays cached. + assert _bedrock_runtime_client_cache.get("us-east-1") is client diff --git a/tests/agent/test_coding_context.py b/tests/agent/test_coding_context.py new file mode 100644 index 00000000000..ab88e391ad1 --- /dev/null +++ b/tests/agent/test_coding_context.py @@ -0,0 +1,405 @@ +"""Tests for agent.coding_context — RuntimeMode seam, resolver, toolset, git probe.""" + +import json +import subprocess +from pathlib import Path + +import pytest + +from agent import coding_context as cc + + +def _git_init(path): + env = { + "GIT_AUTHOR_NAME": "t", "GIT_AUTHOR_EMAIL": "t@t", + "GIT_COMMITTER_NAME": "t", "GIT_COMMITTER_EMAIL": "t@t", + } + for args in ( + ["init", "-q", "-b", "main"], + ["commit", "-q", "--allow-empty", "-m", "init commit"], + ): + subprocess.run(["git", "-C", str(path), *args], check=True, env={**env, "HOME": str(path)}) + + +# ── resolver ────────────────────────────────────────────────────────────── + +class TestIsCodingContext: + def test_off_never_activates(self, tmp_path): + _git_init(tmp_path) + cfg = {"agent": {"coding_context": "off"}} + assert cc.is_coding_context(platform="cli", cwd=tmp_path, config=cfg) is False + + def test_on_forces_even_without_git(self, tmp_path): + cfg = {"agent": {"coding_context": "on"}} + assert cc.is_coding_context(platform="telegram", cwd=tmp_path, config=cfg) is True + + def test_auto_requires_git_repo(self, tmp_path): + cfg = {"agent": {"coding_context": "auto"}} + assert cc.is_coding_context(platform="cli", cwd=tmp_path, config=cfg) is False + _git_init(tmp_path) + assert cc.is_coding_context(platform="cli", cwd=tmp_path, config=cfg) is True + + def test_auto_skips_messaging_surfaces(self, tmp_path): + _git_init(tmp_path) + cfg = {"agent": {"coding_context": "auto"}} + assert cc.is_coding_context(platform="discord", cwd=tmp_path, config=cfg) is False + assert cc.is_coding_context(platform="tui", cwd=tmp_path, config=cfg) is True + + def test_default_mode_is_auto(self, tmp_path): + # Unknown/missing value normalizes to auto. + _git_init(tmp_path) + assert cc.is_coding_context(platform="cli", cwd=tmp_path, config={}) is True + + +# ── toolset substitution ──────────────────────────────────────────────────── + +class TestCodingSelection: + def test_selects_coding_under_focus(self, tmp_path): + _git_init(tmp_path) + cfg = {"agent": {"coding_context": "focus"}} + out = cc.coding_selection(platform="cli", cwd=tmp_path, config=cfg) + assert out is not None + assert out[0] == cc.CODING_TOOLSET + + def test_auto_is_prompt_only(self, tmp_path): + # Default posture must never override the user's configured toolsets — + # off-by-default toolsets are already off, and explicit opt-ins + # (image-gen, spotify, …) survive entering a code workspace. + _git_init(tmp_path) + cfg = {"agent": {"coding_context": "auto"}} + assert cc.coding_selection(platform="cli", cwd=tmp_path, config=cfg) is None + # …while the prompt posture is still active. + assert cc.is_coding_context(platform="cli", cwd=tmp_path, config=cfg) is True + + def test_on_is_prompt_only(self, tmp_path): + cfg = {"agent": {"coding_context": "on"}} + assert cc.coding_selection(platform="cli", cwd=tmp_path, config=cfg) is None + assert cc.is_coding_context(platform="cli", cwd=tmp_path, config=cfg) is True + + def test_focus_requires_workspace(self, tmp_path): + # focus inherits auto's detection gate — bare dir stays general. + cfg = {"agent": {"coding_context": "focus"}} + assert cc.coding_selection(platform="cli", cwd=tmp_path, config=cfg) is None + + def test_none_when_inactive(self, tmp_path): + cfg = {"agent": {"coding_context": "off"}} + assert cc.coding_selection(platform="cli", cwd=tmp_path, config=cfg) is None + + def test_coding_toolset_is_registered(self): + from toolsets import resolve_toolset + + tools = resolve_toolset(cc.CODING_TOOLSET) + # Coding essentials present… + for t in ("read_file", "write_file", "patch", "search_files", "terminal", "todo"): + assert t in tools + # …and the noise is gone. + for t in ("send_message", "text_to_speech", "image_generate", "computer_use"): + assert t not in tools + + +# ── git/workspace probe ───────────────────────────────────────────────────── + +class TestWorkspaceBlock: + def test_empty_outside_repo(self, tmp_path): + assert cc.build_coding_workspace_block(tmp_path) == "" + + def test_reports_branch_and_clean_status(self, tmp_path): + _git_init(tmp_path) + block = cc.build_coding_workspace_block(tmp_path) + assert "Workspace" in block + assert f"Root: {tmp_path.resolve()}" in block or "Root:" in block + assert "Branch: main" in block + assert "Status: clean" in block + assert "init commit" in block + + def test_reports_dirty_counts(self, tmp_path): + _git_init(tmp_path) + (tmp_path / "untracked.txt").write_text("hi") + block = cc.build_coding_workspace_block(tmp_path) + assert "untracked" in block + assert "clean" not in block.split("Status:")[1].splitlines()[0] + + +# ── project facts (verify-loop detection) ─────────────────────────────────── + +class TestProjectFacts: + def test_package_json_scripts_surface_verify_commands(self, tmp_path): + _git_init(tmp_path) + (tmp_path / "package.json").write_text( + json.dumps({"scripts": {"test": "vitest", "lint": "eslint .", "dev": "vite"}}) + ) + (tmp_path / "pnpm-lock.yaml").write_text("") + block = cc.build_coding_workspace_block(tmp_path) + assert "Project: package.json (pnpm)" in block + assert "pnpm run test" in block and "pnpm run lint" in block + # Non-verify scripts (dev servers, …) stay out of the snapshot. + assert "run dev" not in block + + def test_pytest_config_and_run_tests_script(self, tmp_path): + _git_init(tmp_path) + (tmp_path / "pyproject.toml").write_text("[tool.pytest.ini_options]\n") + scripts = tmp_path / "scripts" + scripts.mkdir() + (scripts / "run_tests.sh").write_text("#!/bin/sh\n") + block = cc.build_coding_workspace_block(tmp_path) + assert "scripts/run_tests.sh" in block + assert "pytest" in block.split("Verify:")[1] + + def test_makefile_verify_targets_only(self, tmp_path): + _git_init(tmp_path) + (tmp_path / "Makefile").write_text("test:\n\tgo test ./...\n\ndeploy:\n\t./deploy.sh\n") + block = cc.build_coding_workspace_block(tmp_path) + assert "make test" in block + assert "make deploy" not in block + + def test_context_files_listed(self, tmp_path): + _git_init(tmp_path) + (tmp_path / "AGENTS.md").write_text("# rules") + block = cc.build_coding_workspace_block(tmp_path) + assert "Context files: AGENTS.md" in block + + def test_marker_only_project_gets_snapshot_without_git(self, tmp_path): + # A non-git project (manifest only) still gets a workspace snapshot — + # just without the git lines. + (tmp_path / "package.json").write_text("{}") + block = cc.build_coding_workspace_block(tmp_path) + assert f"Root: {tmp_path.resolve()}" in block + assert "package.json" in block + assert "Branch:" not in block and "Status:" not in block + + def test_malformed_package_json_is_ignored(self, tmp_path): + _git_init(tmp_path) + (tmp_path / "package.json").write_text("{not json") + block = cc.build_coding_workspace_block(tmp_path) + assert "Project: package.json" in block + assert "Verify:" not in block + + +# ── $HOME dotfiles guard ──────────────────────────────────────────────────── + +class TestHomeDotfilesGuard: + def test_dotfiles_repo_at_home_is_not_coding(self, tmp_path, monkeypatch): + home = tmp_path / "home" + home.mkdir() + _git_init(home) + monkeypatch.setattr(Path, "home", lambda: home) + cfg = {"agent": {"coding_context": "auto"}} + assert cc.is_coding_context(platform="cli", cwd=home, config=cfg) is False + # …and a plain subdirectory of the dotfiles repo stays general too. + docs = home / "Documents" + docs.mkdir() + assert cc.is_coding_context(platform="cli", cwd=docs, config=cfg) is False + + def test_marker_at_home_is_not_a_project_signal(self, tmp_path, monkeypatch): + home = tmp_path / "home" + home.mkdir() + (home / "Makefile").write_text("all:\n") + monkeypatch.setattr(Path, "home", lambda: home) + cfg = {"agent": {"coding_context": "auto"}} + assert cc.is_coding_context(platform="cli", cwd=home, config=cfg) is False + + def test_real_project_under_dotfiles_home_still_detects(self, tmp_path, monkeypatch): + home = tmp_path / "home" + home.mkdir() + _git_init(home) + monkeypatch.setattr(Path, "home", lambda: home) + proj = home / "www" / "app" + proj.mkdir(parents=True) + (proj / "package.json").write_text("{}") + cfg = {"agent": {"coding_context": "auto"}} + assert cc.is_coding_context(platform="cli", cwd=proj, config=cfg) is True + + def test_on_mode_bypasses_the_guard(self, tmp_path, monkeypatch): + home = tmp_path / "home" + home.mkdir() + monkeypatch.setattr(Path, "home", lambda: home) + cfg = {"agent": {"coding_context": "on"}} + assert cc.is_coding_context(platform="cli", cwd=home, config=cfg) is True + + +# ── prompt assembly integration ───────────────────────────────────────────── + +class TestStatusParsing: + def test_parse_status_counts_and_branch(self): + porcelain = ( + "# branch.head feature\n" + "# branch.upstream origin/feature\n" + "# branch.ab +2 -1\n" + "1 M. N... 100644 100644 100644 aaa bbb staged.py\n" + "1 .M N... 100644 100644 100644 ccc ddd modified.py\n" + "? new.py\n" + "u UU N... 1 2 3 abc def conflict.py\n" + ) + branch, counts = cc._parse_status(porcelain) + assert branch["head"] == "feature" + assert branch["upstream"] == "origin/feature" + assert branch["ahead"] == "2" and branch["behind"] == "1" + assert counts["staged"] == 1 + assert counts["modified"] == 1 + assert counts["untracked"] == 1 + assert counts["conflicts"] == 1 + + +# ── RuntimeMode seam ──────────────────────────────────────────────────────── + +class TestRuntimeMode: + def test_resolves_coding_in_repo(self, tmp_path): + _git_init(tmp_path) + mode = cc.resolve_runtime_mode(platform="cli", cwd=tmp_path, config={}) + assert mode.is_coding is True + assert mode.kind == "coding" + assert mode.profile is cc.CODING_PROFILE + + def test_resolves_general_outside_workspace(self, tmp_path): + mode = cc.resolve_runtime_mode(platform="cli", cwd=tmp_path, config={}) + assert mode.is_coding is False + assert mode.kind == "general" + # General posture pins no toolset and injects no blocks. + assert mode.toolset_selection() is None + assert mode.system_blocks() == [] + + def test_is_frozen(self, tmp_path): + mode = cc.resolve_runtime_mode(platform="cli", cwd=tmp_path, config={}) + with pytest.raises(Exception): + mode.profile = cc.CODING_PROFILE # type: ignore[misc] + + def test_system_blocks_include_brief_and_workspace(self, tmp_path): + _git_init(tmp_path) + mode = cc.resolve_runtime_mode(platform="cli", cwd=tmp_path, config={"agent": {"coding_context": "on"}}) + blocks = mode.system_blocks() + assert any("coding agent" in b for b in blocks) + assert any("Workspace" in b for b in blocks) + + def test_toolset_selection_gated_on_focus(self, tmp_path): + _git_init(tmp_path) + focus = cc.resolve_runtime_mode(platform="cli", cwd=tmp_path, config={"agent": {"coding_context": "focus"}}) + sel = focus.toolset_selection() + assert sel and sel[0] == cc.CODING_TOOLSET + # auto/on resolve the coding profile but stay prompt-only. + for raw in ("auto", "on"): + mode = cc.resolve_runtime_mode(platform="cli", cwd=tmp_path, config={"agent": {"coding_context": raw}}) + assert mode.is_coding is True + assert mode.toolset_selection() is None + + +# ── edit-format steering (per-model harness tuning) ────────────────────────── + +class TestEditFormatSteering: + def test_family_detection(self): + assert cc._model_family("openai/gpt-5.4") == "patch" + assert cc._model_family("openai/codex-mini") == "patch" + assert cc._model_family("anthropic/claude-opus-4.8") == "replace" + assert cc._model_family("anthropic/claude-sonnet-4") == "replace" + # Gemini + open-weight coding models (RL'd on str_replace-style + # editors) steer to replace, not neutral. + for m in ( + "google/gemini-3-pro", "deepseek-v3.2", "qwen3-coder", + "moonshot/kimi-k2", "zai/glm-4.6", "nousresearch/hermes-4-405b", + ): + assert cc._model_family(m) == "replace" + # Unknown family and no model both fall through to neutral wording. + assert cc._model_family("acme/foo-1") is None + assert cc._model_family(None) is None + assert cc._model_family("") is None + + def test_openai_family_gets_v4a_nudge(self, tmp_path): + _git_init(tmp_path) + mode = cc.resolve_runtime_mode( + platform="cli", cwd=tmp_path, + config={"agent": {"coding_context": "on"}}, model="openai/gpt-5.4", + ) + brief = mode.system_blocks()[0] + assert "mode='patch'" in brief + assert "V4A" in brief + assert "write_file" in brief # new files authored, not patched + + def test_anthropic_family_gets_replace_nudge(self, tmp_path): + _git_init(tmp_path) + mode = cc.resolve_runtime_mode( + platform="cli", cwd=tmp_path, + config={"agent": {"coding_context": "on"}}, + model="anthropic/claude-opus-4.8", + ) + brief = mode.system_blocks()[0] + assert "mode='replace'" in brief + assert "write_file" in brief # new files authored, not patched + + def test_unknown_model_keeps_neutral_brief(self, tmp_path): + # No edit-format line appended — brief equals the bare profile guidance. + _git_init(tmp_path) + mode = cc.resolve_runtime_mode( + platform="cli", cwd=tmp_path, + config={"agent": {"coding_context": "on"}}, model="acme/foo-1", + ) + assert mode.system_blocks()[0] == cc.CODING_AGENT_GUIDANCE + + def test_no_model_keeps_neutral_brief(self, tmp_path): + _git_init(tmp_path) + mode = cc.resolve_runtime_mode( + platform="cli", cwd=tmp_path, + config={"agent": {"coding_context": "on"}}, + ) + assert mode.system_blocks()[0] == cc.CODING_AGENT_GUIDANCE + + def test_general_posture_emits_nothing_regardless_of_model(self, tmp_path): + # Edit steering only fires inside the coding posture. + mode = cc.resolve_runtime_mode( + platform="telegram", cwd=tmp_path, config={}, model="openai/gpt-5.4", + ) + assert mode.system_blocks() == [] + + +# ── profile registry ──────────────────────────────────────────────────────── + +class TestProfiles: + def test_registered_profiles(self): + assert cc.get_profile("coding") is cc.CODING_PROFILE + assert cc.get_profile("general") is cc.GENERAL_PROFILE + + def test_unknown_profile_falls_back_to_general(self): + assert cc.get_profile("nonsense") is cc.GENERAL_PROFILE + + def test_coding_profile_shape(self): + # The coding profile declares the seams other domains read. + assert cc.CODING_PROFILE.toolset == cc.CODING_TOOLSET + assert cc.CODING_PROFILE.guidance + assert cc.CODING_PROFILE.model_hint == "coding" + # General is inert. + assert cc.GENERAL_PROFILE.toolset is None + assert cc.GENERAL_PROFILE.guidance == "" + + def test_skill_pruning_scoped_to_coding_posture(self, tmp_path): + # Coding posture hides clearly-non-coding categories; coding-adjacent + # ones stay visible (deny-list semantics). + _git_init(tmp_path) + coding = cc.resolve_runtime_mode(platform="cli", cwd=tmp_path, config={}) + hidden = coding.hidden_skill_categories() + assert "social-media" in hidden and "smart-home" in hidden + for kept in ("github", "devops", "software-development", "data-science"): + assert kept not in hidden + # General posture hides nothing. + general = cc.resolve_runtime_mode( + platform="telegram", cwd=tmp_path, config={} + ) + assert general.hidden_skill_categories() == frozenset() + + +# ── detection signals ─────────────────────────────────────────────────────── + +class TestDetection: + @pytest.mark.parametrize("marker", ["pyproject.toml", "package.json", "go.mod", "AGENTS.md"]) + def test_project_manifest_triggers_without_git(self, tmp_path, marker): + (tmp_path / marker).write_text("x") + cfg = {"agent": {"coding_context": "auto"}} + assert cc.is_coding_context(platform="cli", cwd=tmp_path, config=cfg) is True + + def test_marker_in_parent_counts_from_subdir(self, tmp_path): + (tmp_path / "pyproject.toml").write_text("x") + sub = tmp_path / "src" / "pkg" + sub.mkdir(parents=True) + cfg = {"agent": {"coding_context": "auto"}} + assert cc.is_coding_context(platform="cli", cwd=sub, config=cfg) is True + + def test_bare_dir_is_not_coding(self, tmp_path): + cfg = {"agent": {"coding_context": "auto"}} + assert cc.is_coding_context(platform="cli", cwd=tmp_path, config=cfg) is False diff --git a/tests/agent/test_display.py b/tests/agent/test_display.py index 994aae28648..0203e38b3cd 100644 --- a/tests/agent/test_display.py +++ b/tests/agent/test_display.py @@ -12,6 +12,7 @@ from agent.display import ( set_tool_preview_max_len, _render_inline_unified_diff, _summarize_rendered_diff_sections, + _used_free_parallel, render_edit_diff_with_delta, ) @@ -171,6 +172,46 @@ class TestCuteToolMessagePreviewLength: assert "[error]" not in line +class TestWebProviderLabel: + """The free-path "Parallel search"/"Parallel fetch" verb labeling.""" + + def test_free_search_verb_is_parallel(self): + result = json.dumps({"success": True, "data": {"web": []}, "provider": "parallel"}) + line = get_cute_tool_message("web_search", {"query": "hello"}, 0.1, result=result) + assert "Parallel search" in line + assert "hello" in line + + def test_paid_search_verb_is_plain(self): + result = json.dumps({"success": True, "data": {"web": [{"url": "u"}]}}) + line = get_cute_tool_message("web_search", {"query": "hi"}, 0.1, result=result) + assert "Parallel" not in line + assert "search" in line + + def test_missing_result_verb_is_plain(self): + line = get_cute_tool_message("web_search", {"query": "hello"}, 0.1) + assert "Parallel" not in line + assert "search" in line + + def test_helper_is_parallel_free_specific(self): + # Only Parallel's free MCP path marks results; nothing else does. + assert _used_free_parallel(json.dumps({"provider": "parallel"})) is True + assert _used_free_parallel(json.dumps({"provider": "exa"})) is False + assert _used_free_parallel(json.dumps({"provider": "firecrawl"})) is False + assert _used_free_parallel(json.dumps({"success": True, "data": {}})) is False + assert _used_free_parallel('not json') is False + assert _used_free_parallel(None) is False + + def test_free_extract_verb_is_parallel(self): + result = json.dumps({"results": [{"url": "u", "content": "x"}], "provider": "parallel"}) + line = get_cute_tool_message("web_extract", {"urls": ["https://a.test"]}, 0.1, result=result) + assert "Parallel fetch" in line + + def test_paid_extract_verb_is_plain(self): + result = json.dumps({"results": [{"url": "u", "content": "x"}]}) + line = get_cute_tool_message("web_extract", {"urls": ["https://a.test"]}, 0.1, result=result) + assert "Parallel" not in line + + class TestEditDiffPreview: def test_extract_edit_diff_for_patch(self): diff = extract_edit_diff("patch", '{"success": true, "diff": "--- a/x\\n+++ b/x\\n"}') diff --git a/tests/agent/test_error_classifier.py b/tests/agent/test_error_classifier.py index ab6f27d6965..9708d7aadc3 100644 --- a/tests/agent/test_error_classifier.py +++ b/tests/agent/test_error_classifier.py @@ -661,6 +661,42 @@ class TestClassifyApiError: # Without "thinking" in the message, it shouldn't be thinking_signature assert result.reason != FailoverReason.thinking_signature + def test_anthropic_thinking_blocks_cannot_be_modified(self): + """Frozen-block mutation 400 (no 'signature' token) must route to + thinking_signature recovery, not hard-abort. Regression for the + real-world error: latest-assistant thinking blocks 'cannot be + modified' after upstream message mutation.""" + e = MockAPIError( + "messages.73.content.10: `thinking` or `redacted_thinking` blocks " + "in the latest assistant message cannot be modified. These blocks " + "must remain as they were in the original response.", + status_code=400, + ) + result = classify_api_error(e, provider="anthropic") + assert result.reason == FailoverReason.thinking_signature + assert result.retryable is True + + def test_anthropic_thinking_cannot_be_modified_via_openrouter(self): + """Same frozen-block error proxied through OpenRouter must also be + caught (provider is not gated).""" + e = MockAPIError( + "`thinking` or `redacted_thinking` blocks in the latest assistant " + "message cannot be modified.", + status_code=400, + ) + result = classify_api_error(e, provider="openrouter") + assert result.reason == FailoverReason.thinking_signature + assert result.retryable is True + + def test_400_cannot_be_modified_without_thinking_not_classified(self): + """A 400 'cannot be modified' that has nothing to do with thinking + blocks must NOT be swept into thinking_signature recovery.""" + e = MockAPIError( + "this field cannot be modified after creation", status_code=400, + ) + result = classify_api_error(e, provider="anthropic", approx_tokens=0) + assert result.reason != FailoverReason.thinking_signature + def test_invalid_encrypted_content_classified_as_retryable_replay_failure(self): body = { "error": { diff --git a/tests/agent/test_prompt_builder.py b/tests/agent/test_prompt_builder.py index 651256a148e..def7c6b4120 100644 --- a/tests/agent/test_prompt_builder.py +++ b/tests/agent/test_prompt_builder.py @@ -276,6 +276,42 @@ class TestBuildSkillsSystemPrompt: # "search" should appear only once per category assert result.count("- search") == 1 + def test_hidden_categories_pruned_with_note(self, monkeypatch, tmp_path): + """Posture-driven pruning drops whole categories and discloses it.""" + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + for cat, name in (("social-media", "tweet-stuff"), ("github", "pr-review")): + d = tmp_path / "skills" / cat / name + d.mkdir(parents=True) + (d / "SKILL.md").write_text( + f"---\nname: {name}\ndescription: Does {name} things\n---\n" + ) + + result = build_skills_system_prompt( + hidden_categories=frozenset({"social-media"}) + ) + assert "pr-review" in result + assert "tweet-stuff" not in result + # Disclosure note so the model knows the full catalog exists. + assert "skills_list" in result + + def test_hidden_categories_prune_nested_and_miss_cache_separately( + self, monkeypatch, tmp_path + ): + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + d = tmp_path / "skills" / "social-media" / "twitter" / "thread-writer" + d.mkdir(parents=True) + (d / "SKILL.md").write_text( + "---\nname: thread-writer\ndescription: Write threads\n---\n" + ) + # Nested category ("social-media/twitter") pruned via its parent. + pruned = build_skills_system_prompt( + hidden_categories=frozenset({"social-media"}) + ) + assert "thread-writer" not in pruned + # Unfiltered call must not be served from the filtered cache entry. + full = build_skills_system_prompt() + assert "thread-writer" in full + def test_excludes_incompatible_platform_skills(self, monkeypatch, tmp_path): """Skills with platforms: [macos] should not appear on Linux.""" monkeypatch.setenv("HERMES_HOME", str(tmp_path)) diff --git a/tests/agent/test_stream_read_timeout_floor.py b/tests/agent/test_stream_read_timeout_floor.py new file mode 100644 index 00000000000..0949866a046 --- /dev/null +++ b/tests/agent/test_stream_read_timeout_floor.py @@ -0,0 +1,111 @@ +"""Stream read timeout must never preempt the stale-stream detector. + +Reasoning models (e.g. Opus) routinely pause mid-stream for minutes during +extended thinking. The stale-stream detector is deliberately scaled up to +tolerate this (180s base, raised to 240s/300s for large contexts). The httpx +socket read timeout, however, defaulted to a flat 120s for cloud providers and +fired *first* — tearing down a healthy reasoning stream before the stale +detector (which owns retry + diagnostics) could act. + +These tests pin the invariant: for a cloud provider on the default read +timeout, the httpx socket read timeout is floored at the stale-stream timeout +so it can never fire before the detector. They mirror the inline logic in +``agent/chat_completion_helpers.py`` (the real builder lives deep inside a +worker thread, so — like ``test_local_stream_timeout.py`` — the resolution is +reproduced here rather than driven end-to-end). +""" + +import os + +import pytest + +from agent.model_metadata import is_local_endpoint + + +def _resolve_stale_timeout(base_url, est_tokens, stale_base=180.0): + """Mirror of the stale-stream detector resolution.""" + if stale_base == 180.0 and base_url and is_local_endpoint(base_url): + return float("inf") # detector disabled for local providers + if est_tokens > 100_000: + return max(stale_base, 300.0) + if est_tokens > 50_000: + return max(stale_base, 240.0) + return stale_base + + +def _resolve_read_timeout(base_url, stale_timeout, base_timeout=1800.0): + """Mirror of the httpx socket read-timeout builder (cloud branch).""" + read_timeout = float(os.getenv("HERMES_STREAM_READ_TIMEOUT", 120.0)) + if read_timeout == 120.0 and base_url and is_local_endpoint(base_url): + read_timeout = base_timeout + elif ( + read_timeout == 120.0 + and stale_timeout is not None + and stale_timeout != float("inf") + and stale_timeout > read_timeout + ): + read_timeout = stale_timeout + return read_timeout + + +CLOUD_URLS = [ + "https://api.githubcopilot.com", + "https://api.openai.com", + "https://openrouter.ai/api", + "https://api.anthropic.com", +] + + +class TestCloudReadTimeoutFloor: + @pytest.fixture(autouse=True) + def _clear_env(self): + with pytest.MonkeyPatch.context() as mp: + mp.delenv("HERMES_STREAM_READ_TIMEOUT", raising=False) + yield + + @pytest.mark.parametrize("base_url", CLOUD_URLS) + @pytest.mark.parametrize("est_tokens", [0, 10_000, 60_000, 150_000]) + def test_read_timeout_never_below_stale(self, base_url, est_tokens): + """Core invariant: the socket read timeout >= the stale detector.""" + stale = _resolve_stale_timeout(base_url, est_tokens) + read = _resolve_read_timeout(base_url, stale) + assert read >= stale + + @pytest.mark.parametrize("base_url", CLOUD_URLS) + def test_small_context_floored_to_stale_base(self, base_url): + """Reported case: ~120s timeouts on Copilot are raised to the 180s base.""" + stale = _resolve_stale_timeout(base_url, est_tokens=37_000) + read = _resolve_read_timeout(base_url, stale) + assert read == 180.0 + + @pytest.mark.parametrize("base_url", CLOUD_URLS) + def test_large_context_tracks_scaled_stale(self, base_url): + """Big contexts scale the stale detector; the read timeout follows.""" + assert _resolve_read_timeout(base_url, _resolve_stale_timeout(base_url, 60_000)) == 240.0 + assert _resolve_read_timeout(base_url, _resolve_stale_timeout(base_url, 150_000)) == 300.0 + + def test_user_override_is_respected(self): + """An explicit HERMES_STREAM_READ_TIMEOUT is never overridden by the floor.""" + with pytest.MonkeyPatch.context() as mp: + mp.setenv("HERMES_STREAM_READ_TIMEOUT", "90") + stale = _resolve_stale_timeout("https://api.githubcopilot.com", est_tokens=0) + assert _resolve_read_timeout("https://api.githubcopilot.com", stale) == 90.0 + + +class TestLocalUnaffected: + @pytest.fixture(autouse=True) + def _clear_env(self): + with pytest.MonkeyPatch.context() as mp: + mp.delenv("HERMES_STREAM_READ_TIMEOUT", raising=False) + yield + + def test_local_still_raised_to_base(self): + """Local providers keep their existing behavior (raise to base timeout).""" + stale = _resolve_stale_timeout("http://localhost:11434", est_tokens=0) + assert stale == float("inf") # detector disabled for local + read = _resolve_read_timeout("http://localhost:11434", stale) + assert read == 1800.0 # not clamped by inf + + def test_stale_none_falls_back_to_default(self): + """If the stale value is unresolved, the read timeout keeps its default.""" + assert _resolve_read_timeout("https://api.githubcopilot.com", None) == 120.0 diff --git a/tests/agent/test_system_prompt.py b/tests/agent/test_system_prompt.py index 75bf28b54d8..b9e1439e1a9 100644 --- a/tests/agent/test_system_prompt.py +++ b/tests/agent/test_system_prompt.py @@ -55,3 +55,44 @@ class TestContextFileCwd: def test_configured_dir_when_terminal_cwd_set(self, monkeypatch, tmp_path): monkeypatch.setenv("TERMINAL_CWD", str(tmp_path)) assert _captured_context_cwd(_make_agent()) == tmp_path + + +def _stable_prompt(agent): + with ( + patch("run_agent.load_soul_md", return_value=""), + patch("run_agent.build_nous_subscription_prompt", return_value=""), + patch("run_agent.build_environment_hints", return_value=""), + patch("run_agent.build_context_files_prompt", return_value=""), + ): + return build_system_prompt_parts(agent)["stable"] + + +class TestCodingContextBlock: + def test_injected_when_active(self, monkeypatch, tmp_path): + import subprocess + + subprocess.run(["git", "-C", str(tmp_path), "init", "-q"], check=True) + monkeypatch.setenv("TERMINAL_CWD", str(tmp_path)) + agent = _make_agent(valid_tool_names=["read_file"], platform="cli") + stable = _stable_prompt(agent) + assert "coding agent" in stable + assert "Workspace" in stable + + def test_absent_when_off(self, monkeypatch, tmp_path): + import subprocess + + subprocess.run(["git", "-C", str(tmp_path), "init", "-q"], check=True) + monkeypatch.setenv("TERMINAL_CWD", str(tmp_path)) + agent = _make_agent(valid_tool_names=["read_file"], platform="cli") + # Drive the real path: force the resolved mode to "off" via config. + with patch("agent.coding_context._coding_mode", return_value="off"): + stable = _stable_prompt(agent) + assert "coding agent" not in stable + + def test_absent_without_tools(self, monkeypatch, tmp_path): + import subprocess + + subprocess.run(["git", "-C", str(tmp_path), "init", "-q"], check=True) + monkeypatch.setenv("TERMINAL_CWD", str(tmp_path)) + agent = _make_agent(valid_tool_names=[], platform="cli") + assert "coding agent" not in _stable_prompt(agent) diff --git a/tests/cli/test_cli_status_bar.py b/tests/cli/test_cli_status_bar.py index 47a78c57044..c6a131a5131 100644 --- a/tests/cli/test_cli_status_bar.py +++ b/tests/cli/test_cli_status_bar.py @@ -676,3 +676,54 @@ class TestStatusBarWidthSource: mock_get_app.assert_not_called() mock_shutil.assert_not_called() assert len(text) > 0 + + +class TestIdleSinceLastTurn: + """Time-since-last-final-agent-response read-out on the status bar.""" + + def test_hidden_before_first_turn(self): + assert HermesCLI._format_idle_since(None, turn_live=False) == "" + + def test_hidden_while_turn_is_live(self): + assert HermesCLI._format_idle_since(time.time() - 30, turn_live=True) == "" + + def test_shows_compact_idle_time_after_turn(self): + label = HermesCLI._format_idle_since(time.time() - 42, turn_live=False) + assert label.startswith("✓ ") + assert label == "✓ 42s" + + def test_scales_to_minutes(self): + label = HermesCLI._format_idle_since(time.time() - 3 * 60, turn_live=False) + assert label == "✓ 3m" + + def test_snapshot_carries_idle_since(self): + cli_obj = _make_cli() + cli_obj._last_turn_finished_at = time.time() - 10 + cli_obj._prompt_start_time = None + cli_obj._prompt_duration = 5.0 + snapshot = cli_obj._get_status_bar_snapshot() + assert snapshot["idle_since"].startswith("✓ ") + + def test_snapshot_idle_empty_during_live_turn(self): + cli_obj = _make_cli() + cli_obj._last_turn_finished_at = time.time() - 10 + cli_obj._prompt_start_time = time.time() + cli_obj._prompt_duration = 0.0 + snapshot = cli_obj._get_status_bar_snapshot() + assert snapshot["idle_since"] == "" + + def test_wide_status_bar_text_includes_idle(self): + cli_obj = _attach_agent( + _make_cli(), + prompt_tokens=10_230, + completion_tokens=2_220, + total_tokens=12_450, + api_calls=7, + context_tokens=12_450, + context_length=200_000, + ) + cli_obj._last_turn_finished_at = time.time() - 42 + cli_obj._prompt_start_time = None + cli_obj._prompt_duration = 7.0 + text = cli_obj._build_status_bar_text(width=160) + assert "✓ 42s" in text diff --git a/tests/cron/test_cron_profile.py b/tests/cron/test_cron_profile.py deleted file mode 100644 index 677082e2410..00000000000 --- a/tests/cron/test_cron_profile.py +++ /dev/null @@ -1,449 +0,0 @@ -"""Tests for per-job profile support in cron jobs. - -Covers data-layer validation/storage, cronjob tool plumbing, scheduler runtime -HERMES_HOME scoping, and tick() serialization for profile jobs. -""" - -from __future__ import annotations - -import json -import os - -import pytest - - -@pytest.fixture() -def isolated_cron_profile_home(tmp_path, monkeypatch): - """Create an isolated Hermes root with a named profile and temp cron store.""" - root = tmp_path / "hermes-root" - profile_home = root / "profiles" / "support" - profile_home.mkdir(parents=True) - (root / "cron").mkdir(parents=True) - - monkeypatch.setenv("HERMES_HOME", str(root)) - monkeypatch.setattr("cron.jobs.CRON_DIR", root / "cron") - monkeypatch.setattr("cron.jobs.JOBS_FILE", root / "cron" / "jobs.json") - monkeypatch.setattr("cron.jobs.OUTPUT_DIR", root / "cron" / "output") - - return root, profile_home - - -class TestNormalizeProfile: - def test_none_and_empty_return_none(self, isolated_cron_profile_home): - from cron.jobs import _normalize_profile - - assert _normalize_profile(None) is None - assert _normalize_profile("") is None - assert _normalize_profile(" ") is None - - def test_default_profile_is_valid_and_normalized(self, isolated_cron_profile_home): - from cron.jobs import _normalize_profile - - assert _normalize_profile("Default") == "default" - - def test_named_profile_must_exist_and_is_normalized(self, isolated_cron_profile_home): - from cron.jobs import _normalize_profile - - assert _normalize_profile("Support") == "support" - - def test_invalid_profile_name_is_rejected(self, isolated_cron_profile_home): - from cron.jobs import _normalize_profile - - with pytest.raises(ValueError): - _normalize_profile("invalid!") - - def test_missing_named_profile_is_rejected(self, isolated_cron_profile_home): - from cron.jobs import _normalize_profile - - with pytest.raises(FileNotFoundError): - _normalize_profile("missing") - - -class TestCreateAndUpdateJobProfile: - def test_create_stores_profile_id(self, isolated_cron_profile_home): - from cron.jobs import create_job, get_job - - job = create_job(prompt="hello", schedule="every 1h", profile="Support") - stored = get_job(job["id"]) - - assert stored is not None - assert stored["profile"] == "support" - - def test_create_without_profile_preserves_old_behaviour(self, isolated_cron_profile_home): - from cron.jobs import create_job, get_job - - job = create_job(prompt="hello", schedule="every 1h") - stored = get_job(job["id"]) - - assert stored is not None - assert stored.get("profile") is None - - def test_create_accepts_explicit_default(self, isolated_cron_profile_home): - from cron.jobs import create_job, get_job - - job = create_job(prompt="hello", schedule="every 1h", profile="default") - stored = get_job(job["id"]) - - assert stored is not None - assert stored["profile"] == "default" - - def test_update_sets_and_clears_profile(self, isolated_cron_profile_home): - from cron.jobs import create_job, get_job, update_job - - job = create_job(prompt="x", schedule="every 1h") - update_job(job["id"], {"profile": "Support"}) - stored = get_job(job["id"]) - assert stored is not None - assert stored["profile"] == "support" - - update_job(job["id"], {"profile": ""}) - stored = get_job(job["id"]) - assert stored is not None - assert stored["profile"] is None - - def test_update_rejects_missing_profile(self, isolated_cron_profile_home): - from cron.jobs import create_job, update_job - - job = create_job(prompt="x", schedule="every 1h") - with pytest.raises(FileNotFoundError): - update_job(job["id"], {"profile": "missing"}) - - -class TestCronjobToolProfile: - def test_create_and_list_with_profile(self, isolated_cron_profile_home): - from tools.cronjob_tools import cronjob - - created = json.loads( - cronjob( - action="create", - prompt="hi", - schedule="every 1h", - profile="Support", - ) - ) - assert created["success"] is True - assert created["job"]["profile"] == "support" - - listing = json.loads(cronjob(action="list")) - assert listing["jobs"][0]["profile"] == "support" - - def test_update_clears_profile_with_empty_string(self, isolated_cron_profile_home): - from tools.cronjob_tools import cronjob - - created = json.loads( - cronjob( - action="create", - prompt="hi", - schedule="every 1h", - profile="Support", - ) - ) - updated = json.loads( - cronjob(action="update", job_id=created["job_id"], profile="") - ) - - assert updated["success"] is True - assert "profile" not in updated["job"] - - def test_schema_advertises_profile(self): - from tools.cronjob_tools import CRONJOB_SCHEMA - - assert "profile" in CRONJOB_SCHEMA["parameters"]["properties"] - desc = CRONJOB_SCHEMA["parameters"]["properties"]["profile"]["description"] - desc_lower = desc.lower() - assert "hermes profile" in desc_lower - assert "context-local" in desc_lower - assert "subprocess" in desc_lower - assert "temporarily sets hermes_home" not in desc_lower - - -class TestRunJobProfileContext: - @staticmethod - def _install_agent_stubs(monkeypatch, observed: dict): - import sys - import cron.scheduler as sched - - class FakeAgent: - def __init__(self, **kwargs): - from hermes_constants import get_hermes_home - - observed["env_home_during_init"] = os.environ.get("HERMES_HOME") - observed["profile_env_only_during_init"] = os.environ.get( - "HERMES_PROFILE_TEST_ONLY" - ) - observed["profile_env_shared_during_init"] = os.environ.get( - "HERMES_PROFILE_TEST_SHARED" - ) - observed["hermes_home_during_init"] = str(get_hermes_home()) - observed["scheduler_home_during_init"] = str(sched._get_hermes_home()) - observed["skip_context_files"] = kwargs.get("skip_context_files") - - def run_conversation(self, *_a, **_kw): - from hermes_constants import get_hermes_home - - observed["env_home_during_run"] = os.environ.get("HERMES_HOME") - observed["profile_env_only_during_run"] = os.environ.get( - "HERMES_PROFILE_TEST_ONLY" - ) - observed["profile_env_shared_during_run"] = os.environ.get( - "HERMES_PROFILE_TEST_SHARED" - ) - observed["hermes_home_during_run"] = str(get_hermes_home()) - observed["scheduler_home_during_run"] = str(sched._get_hermes_home()) - return {"final_response": "done", "messages": []} - - def get_activity_summary(self): - return {"seconds_since_activity": 0.0} - - def close(self): - observed["closed"] = True - - fake_mod = type(sys)("run_agent") - fake_mod.AIAgent = FakeAgent - monkeypatch.setitem(sys.modules, "run_agent", fake_mod) - - from hermes_cli import runtime_provider as runtime_provider - - monkeypatch.setattr( - runtime_provider, - "resolve_runtime_provider", - lambda **_kw: { - "provider": "test", - "api_key": "test-key", - "base_url": "http://test.local", - "api_mode": "chat_completions", - }, - ) - - monkeypatch.setattr(sched, "_build_job_prompt", lambda job, prerun_script=None: "hi") - monkeypatch.setattr(sched, "_resolve_origin", lambda job: None) - monkeypatch.setattr(sched, "_resolve_delivery_target", lambda job: None) - monkeypatch.setattr(sched, "_resolve_cron_enabled_toolsets", lambda job, cfg: None) - monkeypatch.setattr(sched, "_hermes_home", None) - monkeypatch.setenv("HERMES_CRON_TIMEOUT", "0") - - import dotenv - - def fake_load_dotenv(path, *_a, **_kw): - observed.setdefault("dotenv_paths", []).append(str(path)) - return True - - monkeypatch.setattr(dotenv, "load_dotenv", fake_load_dotenv) - - def test_run_job_sets_and_restores_profile_home( - self, isolated_cron_profile_home, monkeypatch - ): - import cron.scheduler as sched - - root, profile_home = isolated_cron_profile_home - observed: dict = {} - self._install_agent_stubs(monkeypatch, observed) - - job = { - "id": "abc", - "name": "profile-job", - "profile": "support", - "schedule_display": "manual", - } - - success, _output, response, error = sched.run_job(job) - - assert success is True, f"run_job failed: error={error!r} response={response!r}" - assert observed["dotenv_paths"] == [str(profile_home / ".env")] - assert observed["env_home_during_init"] == str(root) - assert observed["env_home_during_run"] == str(root) - assert observed["hermes_home_during_init"] == str(profile_home.resolve()) - assert observed["hermes_home_during_run"] == str(profile_home.resolve()) - assert observed["scheduler_home_during_init"] == str(profile_home.resolve()) - assert observed["scheduler_home_during_run"] == str(profile_home.resolve()) - assert observed["skip_context_files"] is True - assert os.environ["HERMES_HOME"] == str(root) - assert sched._get_hermes_home() == root - - def test_profile_dotenv_environment_is_restored( - self, isolated_cron_profile_home, monkeypatch - ): - import dotenv - import cron.scheduler as sched - - root, profile_home = isolated_cron_profile_home - observed: dict = {} - self._install_agent_stubs(monkeypatch, observed) - monkeypatch.setenv("HERMES_PROFILE_TEST_SHARED", "outer") - monkeypatch.delenv("HERMES_PROFILE_TEST_ONLY", raising=False) - - def fake_load_dotenv(path, *_a, **_kw): - observed.setdefault("dotenv_paths", []).append(str(path)) - os.environ["HERMES_PROFILE_TEST_SHARED"] = "profile-value" - os.environ["HERMES_PROFILE_TEST_ONLY"] = "profile-only" - os.environ["HERMES_CRON_TIMEOUT"] = "123" - return True - - monkeypatch.setattr(dotenv, "load_dotenv", fake_load_dotenv) - - job = { - "id": "env-profile", - "name": "profile-env-job", - "profile": "support", - "schedule_display": "manual", - } - - success, _output, _response, error = sched.run_job(job) - - assert success is True, error - assert observed["dotenv_paths"] == [str(profile_home / ".env")] - assert observed["profile_env_only_during_init"] == "profile-only" - assert observed["profile_env_shared_during_init"] == "profile-value" - assert observed["profile_env_only_during_run"] == "profile-only" - assert observed["profile_env_shared_during_run"] == "profile-value" - assert os.environ["HERMES_PROFILE_TEST_SHARED"] == "outer" - assert "HERMES_PROFILE_TEST_ONLY" not in os.environ - assert os.environ["HERMES_CRON_TIMEOUT"] == "0" - assert os.environ["HERMES_HOME"] == str(root) - assert sched._get_hermes_home() == root - - def test_no_agent_profile_uses_profile_scripts_dir_and_restores_env( - self, isolated_cron_profile_home, monkeypatch - ): - import cron.scheduler as sched - - root, profile_home = isolated_cron_profile_home - scripts_dir = profile_home / "scripts" - scripts_dir.mkdir(parents=True) - (scripts_dir / "print_home.py").write_text( - "import os\nprint(os.environ.get('HERMES_HOME', ''))\n", - encoding="utf-8", - ) - monkeypatch.setattr(sched, "_hermes_home", None) - - job = { - "id": "script1", - "name": "profile-script", - "profile": "support", - "script": "print_home.py", - "no_agent": True, - } - - success, _doc, response, error = sched.run_job(job) - - assert success is True, error - assert response.strip() == str(profile_home.resolve()) - assert os.environ["HERMES_HOME"] == str(root) - assert sched._get_hermes_home() == root - - def test_run_job_without_profile_leaves_hermes_home_untouched( - self, isolated_cron_profile_home, monkeypatch - ): - import cron.scheduler as sched - - root, _profile_home = isolated_cron_profile_home - observed: dict = {} - self._install_agent_stubs(monkeypatch, observed) - - job = { - "id": "noprof", - "name": "no-profile-job", - "profile": None, - "schedule_display": "manual", - } - - success, *_ = sched.run_job(job) - - assert success is True - assert observed["hermes_home_during_init"] == str(root) - assert os.environ["HERMES_HOME"] == str(root) - - def test_run_job_falls_back_on_missing_runtime_profile( - self, isolated_cron_profile_home, monkeypatch - ): - import cron.scheduler as sched - - root, _profile_home = isolated_cron_profile_home - observed: dict = {} - self._install_agent_stubs(monkeypatch, observed) - - job = { - "id": "missing-profile", - "name": "missing-profile-job", - "profile": "missing", - "schedule_display": "manual", - } - - # Should succeed with fallback, not raise - success, _output, response, error = sched.run_job(job) - - assert success is True, f"run_job should fallback, not fail: error={error!r}" - # Verify it used the default home, not the missing profile - assert observed["hermes_home_during_init"] == str(root) - assert os.environ["HERMES_HOME"] == str(root) - - -class TestTickProfilePartition: - def test_profile_and_workdir_combined(self, isolated_cron_profile_home, monkeypatch): - """Both profile and workdir set — verify both are applied and restored.""" - import cron.scheduler as sched - - root, profile_home = isolated_cron_profile_home - observed: dict = {} - TestRunJobProfileContext._install_agent_stubs(monkeypatch, observed) - fake_workdir = str(root / "myproject") - (root / "myproject").mkdir() - - job = { - "id": "combo", - "name": "combo-job", - "profile": "support", - "workdir": fake_workdir, - "schedule_display": "manual", - } - - success, _output, _response, error = sched.run_job(job) - - assert success is True, error - assert observed["hermes_home_during_init"] == str(profile_home.resolve()) - assert os.environ.get("TERMINAL_CWD", "") != fake_workdir, \ - "TERMINAL_CWD should be restored after job" - assert os.environ["HERMES_HOME"] == str(root) - assert sched._get_hermes_home() == root - - def test_profile_jobs_run_sequentially(self, isolated_cron_profile_home, monkeypatch): - import threading - import cron.scheduler as sched - - # Two profile jobs (both sequential) + one parallel job. - profile_a = {"id": "a", "name": "A", "profile": "default"} - profile_b = {"id": "b", "name": "B", "profile": "default"} - parallel_job = {"id": "c", "name": "C", "profile": None} - - monkeypatch.setattr(sched, "get_due_jobs", lambda: [profile_a, profile_b, parallel_job]) - monkeypatch.setattr(sched, "advance_next_run", lambda *_a, **_kw: None) - - calls: list[tuple[str, str]] = [] - order_lock = threading.Lock() - - def fake_run_job(job): - with order_lock: - calls.append((job["id"], threading.current_thread().name)) - return True, "output", "response", None - - monkeypatch.setattr(sched, "run_job", fake_run_job) - monkeypatch.setattr(sched, "save_job_output", lambda _jid, _o: None) - monkeypatch.setattr(sched, "mark_job_run", lambda *_a, **_kw: None) - monkeypatch.setattr(sched, "_deliver_result", lambda *_a, **_kw: None) - - n = sched.tick(verbose=False) - - assert n == 3 - ids = [job_id for job_id, _thread_name in calls] - # Sequential profile jobs preserve submission order relative to each - # other (single-thread pool). - assert ids.index("a") < ids.index("b") - # Sequential (profile) jobs run on the persistent single-thread - # cron-seq pool — NOT the main thread — so a long profile job never - # blocks the ticker. Parallel jobs run on the cron-parallel pool. - for jid in ("a", "b"): - seq_thread = next(t for job_id, t in calls if job_id == jid) - assert seq_thread != threading.current_thread().name - assert seq_thread.startswith("cron-seq"), seq_thread - par_thread = next(t for job_id, t in calls if job_id == "c") - assert par_thread.startswith("cron-parallel"), par_thread diff --git a/tests/cron/test_parallel_pool.py b/tests/cron/test_parallel_pool.py index 146853c4a00..01e65adc4fb 100644 --- a/tests/cron/test_parallel_pool.py +++ b/tests/cron/test_parallel_pool.py @@ -172,10 +172,10 @@ class TestSyncMode: class TestSequentialPool: - """Sequential (workdir/profile) jobs use the persistent cron-seq pool. + """Sequential (workdir) jobs use the persistent cron-seq pool. - Verifies the follow-up fix: env/context-mutating jobs no longer run inline - in the ticker thread, so a long workdir/profile job can't starve the + Verifies the follow-up fix: env-mutating jobs no longer run inline + in the ticker thread, so a long workdir job can't starve the schedule the same way the parallel path used to. """ diff --git a/tests/cron/test_scheduler.py b/tests/cron/test_scheduler.py index 4bf22296131..fd445de8ca6 100644 --- a/tests/cron/test_scheduler.py +++ b/tests/cron/test_scheduler.py @@ -1487,7 +1487,7 @@ class TestRunJobConfigLogging: } # Mock heavy post-yaml work so the test only exercises the warning - # path. Without these mocks, _run_job_impl continues into provider + # path. Without these mocks, run_job continues into provider # resolution and MCP discovery, both of which can spawn subprocesses # / hit the network and have caused this test to time out on CI # (>30s wall clock) under load. See PR #33661 follow-up. diff --git a/tests/gateway/test_matrix.py b/tests/gateway/test_matrix.py index 00b100d5a89..34388817655 100644 --- a/tests/gateway/test_matrix.py +++ b/tests/gateway/test_matrix.py @@ -1,6 +1,9 @@ """Tests for Matrix platform adapter (mautrix-python backend).""" import asyncio +import re +import stat import sys +import time import types import pytest from unittest.mock import MagicMock, patch, AsyncMock @@ -138,7 +141,14 @@ def _make_fake_mautrix(): return {} class MemorySyncStore: - pass + def __init__(self): + self.next_batch = None + + async def get_next_batch(self): + return self.next_batch + + async def put_next_batch(self, token): + self.next_batch = token mautrix_client_state_store.MemoryStateStore = MemoryStateStore mautrix_client_state_store.MemorySyncStore = MemorySyncStore @@ -295,6 +305,20 @@ class TestMatrixConfigLoading: mc = config.platforms[Platform.MATRIX] assert mc.extra.get("encryption") is True + def test_matrix_e2ee_mode_optional_sets_config(self, monkeypatch): + monkeypatch.setenv("MATRIX_ACCESS_TOKEN", "syt_abc123") + monkeypatch.setenv("MATRIX_HOMESERVER", "https://matrix.example.org") + monkeypatch.setenv("MATRIX_E2EE_MODE", "optional") + monkeypatch.delenv("MATRIX_ENCRYPTION", raising=False) + + from gateway.config import GatewayConfig, _apply_env_overrides + config = GatewayConfig() + _apply_env_overrides(config) + + mc = config.platforms[Platform.MATRIX] + assert mc.extra.get("encryption") is True + assert mc.extra.get("e2ee_mode") == "optional" + def test_matrix_encryption_default_off(self, monkeypatch): monkeypatch.setenv("MATRIX_ACCESS_TOKEN", "syt_abc123") monkeypatch.setenv("MATRIX_HOMESERVER", "https://matrix.example.org") @@ -465,6 +489,102 @@ class TestMatrixDmDetection: assert self.adapter._dm_rooms["!room_b:ex.org"] is True assert self.adapter._dm_rooms["!room_c:ex.org"] is False + @pytest.mark.asyncio + async def test_m_direct_room_is_dm(self): + """m.direct account data is the authoritative DM signal.""" + self.adapter._joined_rooms = {"!dm_room:ex.org"} + self.adapter._dm_rooms = {"!dm_room:ex.org": True} + self.adapter._client = MagicMock() + self.adapter._client.get_state_event = AsyncMock(side_effect=Exception("no state")) + self.adapter._client.state_store = MagicMock() + self.adapter._client.state_store.get_members = AsyncMock(return_value=["@bot:ex.org", "@alice:ex.org"]) + + assert await self.adapter._is_dm_room("!dm_room:ex.org") is True + + @pytest.mark.asyncio + async def test_named_two_member_room_is_not_dm(self): + """A named two-member room must remain a room, not a DM.""" + self.adapter._joined_rooms = {"!project:ex.org"} + self.adapter._dm_rooms = {} + self.adapter._client = MagicMock() + self.adapter._client.get_state_event = AsyncMock( + side_effect=lambda room_id, event_type: {"name": "Project Room"} + if event_type == "m.room.name" + else (_ for _ in ()).throw(Exception("no alias")) + ) + self.adapter._client.state_store = MagicMock() + self.adapter._client.state_store.get_members = AsyncMock( + return_value=["@bot:ex.org", "@alice:ex.org"] + ) + + identity = await self.adapter._resolve_room_identity("!project:ex.org") + + assert identity.chat_type == "room" + assert identity.display_name == "Project Room" + assert identity.joined_member_count == 2 + assert await self.adapter._is_dm_room("!project:ex.org") is False + + @pytest.mark.asyncio + async def test_named_room_overrides_stale_dm_cache(self): + """Explicit room names should defeat stale/conflicting m.direct data.""" + self.adapter._joined_rooms = {"!stale:ex.org"} + self.adapter._dm_rooms = {"!stale:ex.org": True} + self.adapter._client = MagicMock() + self.adapter._client.get_state_event = AsyncMock( + side_effect=lambda room_id, event_type: {"content": {"name": "Ops Room"}} + if event_type == "m.room.name" + else (_ for _ in ()).throw(Exception("no alias")) + ) + self.adapter._client.state_store = MagicMock() + self.adapter._client.state_store.get_members = AsyncMock(return_value=["@bot:ex.org", "@alice:ex.org"]) + + identity = await self.adapter._resolve_room_identity("!stale:ex.org") + + assert identity.chat_type == "room" + assert identity.conflict is True + assert await self.adapter._is_dm_room("!stale:ex.org") is False + + @pytest.mark.asyncio + async def test_canonical_alias_used_when_name_missing(self): + self.adapter._joined_rooms = {"!alias:ex.org"} + self.adapter._dm_rooms = {} + self.adapter._client = MagicMock() + + async def get_state_event(room_id, event_type): + if event_type == "m.room.name": + raise Exception("no name") + if event_type == "m.room.canonical_alias": + return {"content": {"alias": "#hermes:ex.org"}} + raise Exception("unknown") + + self.adapter._client.get_state_event = AsyncMock(side_effect=get_state_event) + self.adapter._client.state_store = MagicMock() + self.adapter._client.state_store.get_members = AsyncMock(return_value=None) + + identity = await self.adapter._resolve_room_identity("!alias:ex.org") + + assert identity.display_name == "#hermes:ex.org" + assert identity.chat_type == "room" + + @pytest.mark.asyncio + async def test_non_string_m_direct_entries_ignored(self): + self.adapter._joined_rooms = {"!room_a:ex.org", "!room_b:ex.org"} + + mock_client = MagicMock() + mock_resp = MagicMock() + mock_resp.content = { + "@alice:ex.org": ["!room_a:ex.org", 42, None], + } + mock_client.get_account_data = AsyncMock(return_value=mock_resp) + self.adapter._client = mock_client + + await self.adapter._refresh_dm_cache() + + assert self.adapter._dm_rooms == { + "!room_a:ex.org": True, + "!room_b:ex.org": False, + } + # --------------------------------------------------------------------------- # Reply fallback stripping @@ -805,6 +925,101 @@ class TestMatrixFormatMessage: assert "http://b.com/2.png" in result +# --------------------------------------------------------------------------- +# Rendering payloads +# --------------------------------------------------------------------------- + +class TestMatrixRenderingPayloads: + def setup_method(self): + self.adapter = _make_adapter() + self.mock_client = MagicMock() + self.mock_client.send_message_event = AsyncMock(return_value="$evt") + self.adapter._client = self.mock_client + + def _sent_contents(self): + return [ + call.args[2] if len(call.args) > 2 else call.kwargs["content"] + for call in self.mock_client.send_message_event.await_args_list + ] + + @pytest.mark.asyncio + async def test_render_plain_and_html_body(self): + result = await self.adapter.send("!room:example.org", "**Bold** and plain") + + assert result.success is True + content = self._sent_contents()[0] + assert content["body"] == "**Bold** and plain" + assert content["format"] == "org.matrix.custom.html" + assert "Bold" in content["formatted_body"] + + @pytest.mark.asyncio + async def test_thread_payload_uses_m_thread_with_reply_fallback(self): + result = await self.adapter.send( + "!room:example.org", + "threaded", + metadata={"thread_id": "$root"}, + ) + + assert result.success is True + relates_to = self._sent_contents()[0]["m.relates_to"] + assert relates_to == { + "rel_type": "m.thread", + "event_id": "$root", + "is_falling_back": True, + "m.in_reply_to": {"event_id": "$root"}, + } + + @pytest.mark.asyncio + async def test_thread_payload_preserves_explicit_reply_target(self): + result = await self.adapter.send( + "!room:example.org", + "threaded reply", + reply_to="$reply", + metadata={"thread_id": "$root"}, + ) + + assert result.success is True + relates_to = self._sent_contents()[0]["m.relates_to"] + assert relates_to["event_id"] == "$root" + assert relates_to["m.in_reply_to"] == {"event_id": "$reply"} + + @pytest.mark.asyncio + async def test_edit_payload_uses_m_replace(self): + result = await self.adapter.edit_message( + "!room:example.org", + "$original", + "edited **body**", + ) + + assert result.success is True + content = self._sent_contents()[0] + assert content["m.relates_to"] == { + "rel_type": "m.replace", + "event_id": "$original", + } + assert content["m.new_content"]["body"] == "edited **body**" + assert content["body"] == "* edited **body**" + + @pytest.mark.asyncio + async def test_long_response_split_preserves_thread_context(self): + long_text = "Intro\n```python\n" + ("print('hello')\n" * 500) + "```\nDone" + + result = await self.adapter.send( + "!room:example.org", + long_text, + metadata={"thread_id": "$root"}, + ) + + assert result.success is True + contents = self._sent_contents() + assert len(contents) > 1 + for content in contents: + assert content["m.relates_to"]["rel_type"] == "m.thread" + assert content["m.relates_to"]["event_id"] == "$root" + assert content["m.relates_to"]["m.in_reply_to"] == {"event_id": "$root"} + assert content["body"].count("```") % 2 == 0 + + # --------------------------------------------------------------------------- # Markdown to HTML conversion # --------------------------------------------------------------------------- @@ -834,6 +1049,47 @@ class TestMatrixMarkdownToHtml: result = self.adapter._markdown_to_html("Hello world") assert "Hello world" in result + def test_matrix_markdown_strips_script_tag(self): + result = self.adapter._markdown_to_html("Hello ") + assert "bold') + assert "onclick" not in result.lower() + assert "bold" in result + + def test_matrix_markdown_rejects_javascript_links(self): + result = self.adapter._markdown_to_html("[click](javascript:alert(1))") + assert "javascript:" not in result.lower() + assert "click') + assert "javascript:" not in result.lower() + assert "href=" not in result.lower() + assert "click" in result + + def test_matrix_markdown_preserves_code_fences(self): + result = self.adapter._markdown_to_html("```python\nprint('x')\n```") + assert "
" in result
+        assert "= 0
+        assert diagnostics["e2ee"]["recovery_key_configured"] is True
+        assert diagnostics["media"]["max_media_bytes"] == 123
+
+    def test_matrix_recovery_key_is_never_logged(self, caplog, monkeypatch):
+        from gateway.platforms.matrix import _handle_generated_matrix_recovery_key
+
+        secret = "super-secret-generated-recovery-key"
+        monkeypatch.delenv("MATRIX_RECOVERY_KEY_OUTPUT_FILE", raising=False)
+
+        _handle_generated_matrix_recovery_key("@bot:example.org", secret)
+
+        assert secret not in caplog.text
+        assert "will not be logged" in caplog.text
+
+    def test_matrix_recovery_key_output_file_is_0600(self, tmp_path, monkeypatch, caplog):
+        from gateway.platforms.matrix import _handle_generated_matrix_recovery_key
+
+        secret = "super-secret-generated-recovery-key"
+        output_path = tmp_path / "matrix-recovery-key.txt"
+        monkeypatch.setenv("MATRIX_RECOVERY_KEY_OUTPUT_FILE", str(output_path))
+
+        _handle_generated_matrix_recovery_key("@bot:example.org", secret)
+
+        assert output_path.read_text().strip() == secret
+        assert stat.S_IMODE(output_path.stat().st_mode) == 0o600
+        assert secret not in caplog.text
+
+    @pytest.mark.asyncio
+    async def test_matrix_recovery_key_bootstrap_skips_without_output_file(
+        self,
+        monkeypatch,
+        caplog,
+    ):
+        from gateway.platforms.matrix import MatrixAdapter
+
+        monkeypatch.delenv("MATRIX_RECOVERY_KEY", raising=False)
+        monkeypatch.delenv("MATRIX_RECOVERY_KEY_OUTPUT_FILE", raising=False)
+        config = PlatformConfig(
+            enabled=True,
+            token="syt_test_token",
+            extra={
+                "homeserver": "https://matrix.example.org",
+                "user_id": "@bot:example.org",
+                "encryption": True,
+            },
+        )
+        adapter = MatrixAdapter(config)
+        fake_mautrix_mods = _make_fake_mautrix()
+
+        mock_client = MagicMock()
+        mock_client.mxid = "@bot:example.org"
+        mock_client.device_id = None
+        mock_client.state_store = MagicMock()
+        mock_client.sync_store = MagicMock()
+        mock_client.crypto = None
+        mock_client.whoami = AsyncMock(return_value=MagicMock(user_id="@bot:example.org", device_id="DEV123"))
+        mock_client.sync = AsyncMock(return_value={"rooms": {"join": {}}})
+        mock_client.add_event_handler = MagicMock()
+        mock_client.add_dispatcher = MagicMock()
+        mock_client.handle_sync = MagicMock(return_value=[])
+        mock_client.query_keys = AsyncMock(return_value={
+            "device_keys": {"@bot:example.org": {"DEV123": {
+                "keys": {"ed25519:DEV123": "fake_ed25519_key"},
+            }}},
+        })
+        mock_client.api = MagicMock()
+        mock_client.api.token = "syt_test_token"
+        mock_client.api.session = MagicMock()
+        mock_client.api.session.close = AsyncMock()
+
+        mock_olm = MagicMock()
+        mock_olm.load = AsyncMock()
+        mock_olm.share_keys = AsyncMock()
+        mock_olm.get_own_cross_signing_public_keys = AsyncMock(return_value=None)
+        mock_olm.generate_recovery_key = AsyncMock(return_value="super-secret-key")
+        mock_olm.share_keys_min_trust = None
+        mock_olm.send_keys_min_trust = None
+        mock_olm.account = MagicMock()
+        mock_olm.account.identity_keys = {"ed25519": "fake_ed25519_key"}
+
+        fake_mautrix_mods["mautrix.client"].Client = MagicMock(return_value=mock_client)
+        fake_mautrix_mods["mautrix.crypto"].OlmMachine = MagicMock(return_value=mock_olm)
+
+        from gateway.platforms import matrix as matrix_mod
+        with patch.object(matrix_mod, "_check_e2ee_deps", return_value=True):
+            with patch.dict("sys.modules", fake_mautrix_mods):
+                with patch.object(adapter, "_refresh_dm_cache", AsyncMock()):
+                    with patch.object(adapter, "_sync_loop", AsyncMock(return_value=None)):
+                        assert await adapter.connect() is True
+
+        mock_olm.generate_recovery_key.assert_not_called()
+        assert "MATRIX_RECOVERY_KEY_OUTPUT_FILE is not configured" in caplog.text
+        assert "super-secret-key" not in caplog.text
+        await adapter.disconnect()
+
+    @pytest.mark.asyncio
+    async def test_matrix_recovery_key_bootstrap_skips_existing_output_file(
+        self,
+        tmp_path,
+        monkeypatch,
+        caplog,
+    ):
+        from gateway.platforms.matrix import MatrixAdapter
+
+        output_path = tmp_path / "matrix-recovery-key.txt"
+        output_path.write_text("existing\n")
+        monkeypatch.delenv("MATRIX_RECOVERY_KEY", raising=False)
+        monkeypatch.setenv("MATRIX_RECOVERY_KEY_OUTPUT_FILE", str(output_path))
+        config = PlatformConfig(
+            enabled=True,
+            token="syt_test_token",
+            extra={
+                "homeserver": "https://matrix.example.org",
+                "user_id": "@bot:example.org",
+                "encryption": True,
+            },
+        )
+        adapter = MatrixAdapter(config)
+        fake_mautrix_mods = _make_fake_mautrix()
+
+        mock_client = MagicMock()
+        mock_client.mxid = "@bot:example.org"
+        mock_client.device_id = None
+        mock_client.state_store = MagicMock()
+        mock_client.sync_store = MagicMock()
+        mock_client.crypto = None
+        mock_client.whoami = AsyncMock(return_value=MagicMock(user_id="@bot:example.org", device_id="DEV123"))
+        mock_client.sync = AsyncMock(return_value={"rooms": {"join": {}}})
+        mock_client.add_event_handler = MagicMock()
+        mock_client.add_dispatcher = MagicMock()
+        mock_client.handle_sync = MagicMock(return_value=[])
+        mock_client.query_keys = AsyncMock(return_value={
+            "device_keys": {"@bot:example.org": {"DEV123": {
+                "keys": {"ed25519:DEV123": "fake_ed25519_key"},
+            }}},
+        })
+        mock_client.api = MagicMock()
+        mock_client.api.token = "syt_test_token"
+        mock_client.api.session = MagicMock()
+        mock_client.api.session.close = AsyncMock()
+
+        mock_olm = MagicMock()
+        mock_olm.load = AsyncMock()
+        mock_olm.share_keys = AsyncMock()
+        mock_olm.get_own_cross_signing_public_keys = AsyncMock(return_value=None)
+        mock_olm.generate_recovery_key = AsyncMock(return_value="super-secret-key")
+        mock_olm.share_keys_min_trust = None
+        mock_olm.send_keys_min_trust = None
+        mock_olm.account = MagicMock()
+        mock_olm.account.identity_keys = {"ed25519": "fake_ed25519_key"}
+
+        fake_mautrix_mods["mautrix.client"].Client = MagicMock(return_value=mock_client)
+        fake_mautrix_mods["mautrix.crypto"].OlmMachine = MagicMock(return_value=mock_olm)
+
+        from gateway.platforms import matrix as matrix_mod
+        with patch.object(matrix_mod, "_check_e2ee_deps", return_value=True):
+            with patch.dict("sys.modules", fake_mautrix_mods):
+                with patch.object(adapter, "_refresh_dm_cache", AsyncMock()):
+                    with patch.object(adapter, "_sync_loop", AsyncMock(return_value=None)):
+                        assert await adapter.connect() is True
+
+        mock_olm.generate_recovery_key.assert_not_called()
+        assert "already exists" in caplog.text
+        assert "super-secret-key" not in caplog.text
+        assert output_path.read_text() == "existing\n"
+        await adapter.disconnect()
+
+    def test_matrix_diagnostics_redacts_recovery_key(self, monkeypatch):
+        monkeypatch.setenv("MATRIX_RECOVERY_KEY", "diagnostic-secret-recovery-key")
+        adapter = _make_adapter()
+
+        diagnostics = adapter.get_diagnostics()
+
+        assert diagnostics["e2ee"]["recovery_key_configured"] is True
+        assert "diagnostic-secret-recovery-key" not in str(diagnostics)
+
+    def test_capability_matrix_is_declared_for_docs(self):
+        from gateway.platforms.matrix import get_matrix_capabilities
+
+        capabilities = get_matrix_capabilities()
+
+        assert capabilities == {
+            "text": "yes",
+            "threads": "yes",
+            "reactions": "yes",
+            "approvals": "yes",
+            "model picker": "yes",
+            "thinking panes": "yes",
+            "images": "yes",
+            "multiple images": "yes",
+            "files": "yes",
+            "voice/audio": "yes",
+            "video": "yes",
+            "E2EE": "off / optional / required",
+            "diagnostics": "yes",
+        }
+
+    def test_matrix_capability_claims_match_adapter_surfaces(self):
+        from gateway.platforms.matrix import MatrixAdapter, get_matrix_capabilities
+
+        capabilities = get_matrix_capabilities()
+        required_methods = {
+            "text": "send",
+            "threads": "_apply_relation_metadata",
+            "reactions": "_send_reaction",
+            "approvals": "send_exec_approval",
+            "model picker": "send_model_picker",
+            "thinking panes": "edit_message",
+            "images": "send_image",
+            "multiple images": "send_multiple_images",
+            "files": "send_document",
+            "voice/audio": "send_voice",
+            "video": "send_video",
+            "diagnostics": "get_diagnostics",
+        }
+
+        for capability, method in required_methods.items():
+            assert capabilities[capability] == "yes"
+            assert hasattr(MatrixAdapter, method), f"{capability} needs {method}"
+        assert capabilities["E2EE"] == "off / optional / required"
+
+    def test_matrix_docs_capability_table_matches_declaration(self):
+        from pathlib import Path
+
+        from gateway.platforms.matrix import get_matrix_capabilities
+
+        docs = (
+            Path(__file__).resolve().parents[2]
+            / "website"
+            / "docs"
+            / "user-guide"
+            / "messaging"
+            / "matrix.md"
+        ).read_text()
+
+        for capability, status in get_matrix_capabilities().items():
+            assert f"| {capability} | {status} |" in docs
+
 
 class TestMatrixEncryptedSendFallback:
     @pytest.mark.asyncio
@@ -2282,6 +3188,354 @@ class TestMatrixImageOnlyMediaNormalization:
 
         assert captured_event is not None
         assert captured_event.text == "Please describe this chart"
+
+    @pytest.mark.asyncio
+    async def test_inbound_oversized_media_is_rejected(self):
+        captured_event = None
+
+        async def capture(msg_event):
+            nonlocal captured_event
+            captured_event = msg_event
+
+        self.adapter._max_media_bytes = 10
+        self.adapter.handle_message = capture
+
+        await self.adapter._handle_media_message(
+            room_id="!room:example.org",
+            sender="@alice:example.org",
+            event_id="$image-big",
+            event_ts=0.0,
+            source_content={
+                "msgtype": "m.image",
+                "body": "huge.png",
+                "url": "mxc://example/huge.png",
+                "info": {"mimetype": "image/png", "size": 11},
+            },
+            relates_to={},
+            msgtype="m.image",
+        )
+
+        assert captured_event is None
+        self.adapter._client.download_media.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_external_media_download_rejects_oversized_content_length(self, monkeypatch):
+        import aiohttp
+
+        class _Content:
+            async def iter_chunked(self, _size):
+                yield b"x"
+
+        class _Response:
+            url = "https://example.com/image.png"
+            headers = {"Content-Length": "11"}
+            content_type = "image/png"
+            content = _Content()
+
+            async def __aenter__(self):
+                return self
+
+            async def __aexit__(self, *_args):
+                return None
+
+            def raise_for_status(self):
+                return None
+
+        class _Session:
+            async def __aenter__(self):
+                return self
+
+            async def __aexit__(self, *_args):
+                return None
+
+            def get(self, *_args, **_kwargs):
+                return _Response()
+
+        self.adapter._max_media_bytes = 10
+        monkeypatch.setattr(aiohttp, "ClientSession", lambda **_kwargs: _Session())
+
+        with pytest.raises(ValueError, match="exceeds Matrix limit"):
+            await self.adapter._download_external_media_with_cap(
+                "https://example.com/image.png"
+            )
+
+    @pytest.mark.asyncio
+    async def test_external_media_download_rejects_oversized_stream(self, monkeypatch):
+        import aiohttp
+
+        class _Content:
+            async def iter_chunked(self, _size):
+                yield b"12345"
+                yield b"67890"
+                yield b"!"
+
+        class _Response:
+            url = "https://example.com/image.png"
+            headers = {}
+            content_type = "image/png"
+            content = _Content()
+
+            async def __aenter__(self):
+                return self
+
+            async def __aexit__(self, *_args):
+                return None
+
+            def raise_for_status(self):
+                return None
+
+        class _Session:
+            async def __aenter__(self):
+                return self
+
+            async def __aexit__(self, *_args):
+                return None
+
+            def get(self, *_args, **_kwargs):
+                return _Response()
+
+        self.adapter._max_media_bytes = 10
+        monkeypatch.setattr(aiohttp, "ClientSession", lambda **_kwargs: _Session())
+
+        with pytest.raises(ValueError, match="exceeds Matrix limit"):
+            await self.adapter._download_external_media_with_cap(
+                "https://example.com/image.png"
+            )
+
+    @pytest.mark.asyncio
+    async def test_external_media_download_rejects_unsafe_redirect(self, monkeypatch):
+        import aiohttp
+
+        class _Content:
+            async def iter_chunked(self, _size):
+                yield b"ok"
+
+        class _Response:
+            url = "http://127.0.0.1/private.png"
+            headers = {}
+            content_type = "image/png"
+            content = _Content()
+
+            async def __aenter__(self):
+                return self
+
+            async def __aexit__(self, *_args):
+                return None
+
+            def raise_for_status(self):
+                return None
+
+        class _Session:
+            async def __aenter__(self):
+                return self
+
+            async def __aexit__(self, *_args):
+                return None
+
+            def get(self, *_args, **_kwargs):
+                return _Response()
+
+        monkeypatch.setattr(aiohttp, "ClientSession", lambda **_kwargs: _Session())
+
+        with pytest.raises(ValueError, match="unsafe redirect"):
+            await self.adapter._download_external_media_with_cap(
+                "https://example.com/image.png"
+            )
+
+    @pytest.mark.asyncio
+    async def test_external_media_download_rejects_unsafe_initial_url(self):
+        with pytest.raises(ValueError, match="unsafe media URL"):
+            await self.adapter._download_external_media_with_cap(
+                "file:///etc/passwd"
+            )
+
+    @pytest.mark.asyncio
+    async def test_external_media_download_rejects_non_image_content_type(self, monkeypatch):
+        import aiohttp
+
+        class _Content:
+            async def iter_chunked(self, _size):
+                yield b""
+
+        class _Response:
+            url = "https://example.com/image.png"
+            headers = {}
+            content_type = "text/html"
+            content = _Content()
+
+            async def __aenter__(self):
+                return self
+
+            async def __aexit__(self, *_args):
+                return None
+
+            def raise_for_status(self):
+                return None
+
+        class _Session:
+            async def __aenter__(self):
+                return self
+
+            async def __aexit__(self, *_args):
+                return None
+
+            def get(self, *_args, **_kwargs):
+                return _Response()
+
+        monkeypatch.setattr(aiohttp, "ClientSession", lambda **_kwargs: _Session())
+
+        with pytest.raises(ValueError, match="not an image"):
+            await self.adapter._download_external_media_with_cap(
+                "https://example.com/image.png"
+            )
+
+    @pytest.mark.asyncio
+    async def test_send_image_failure_log_redacts_signed_url(self, caplog):
+        from gateway.platforms.base import SendResult
+
+        signed_url = "https://example.com/image.png?signature=secret-token#frag"
+        self.adapter._download_external_media_with_cap = AsyncMock(
+            side_effect=ValueError("download failed")
+        )
+        self.adapter.send = AsyncMock(return_value=SendResult(success=True))
+
+        await self.adapter.send_image("!room:example.org", signed_url)
+
+        assert "https://example.com/image.png" in caplog.text
+        assert "secret-token" not in caplog.text
+        assert "#frag" not in caplog.text
+
+    @pytest.mark.asyncio
+    async def test_send_image_failure_response_does_not_expose_signed_url_query(self):
+        from gateway.platforms.base import SendResult
+
+        signed_url = "https://example.com/image.png?signature=secret-token"
+        self.adapter._download_external_media_with_cap = AsyncMock(
+            side_effect=ValueError("download failed")
+        )
+        self.adapter.send = AsyncMock(return_value=SendResult(success=True))
+
+        await self.adapter.send_image("!room:example.org", signed_url)
+
+        sent_text = self.adapter.send.await_args.args[1]
+        assert "signature=" not in sent_text
+        assert "secret-token" not in sent_text
+        assert signed_url not in sent_text
+        assert "source URL was not shown" in sent_text
+
+    @pytest.mark.asyncio
+    async def test_send_image_failure_response_does_not_expose_signed_url_fragment(self):
+        from gateway.platforms.base import SendResult
+
+        signed_url = "https://example.com/image.png#fragment-secret"
+        self.adapter._download_external_media_with_cap = AsyncMock(
+            side_effect=ValueError("download failed")
+        )
+        self.adapter.send = AsyncMock(return_value=SendResult(success=True))
+
+        await self.adapter.send_image("!room:example.org", signed_url)
+
+        sent_text = self.adapter.send.await_args.args[1]
+        assert "#fragment-secret" not in sent_text
+        assert "fragment-secret" not in sent_text
+        assert signed_url not in sent_text
+        assert "source URL was not shown" in sent_text
+
+    @pytest.mark.asyncio
+    async def test_send_image_failure_response_preserves_caption(self):
+        from gateway.platforms.base import SendResult
+
+        signed_url = "https://example.com/image.png?signature=secret-token#fragment"
+        self.adapter._download_external_media_with_cap = AsyncMock(
+            side_effect=ValueError("download failed")
+        )
+        self.adapter.send = AsyncMock(return_value=SendResult(success=True))
+
+        await self.adapter.send_image(
+            "!room:example.org",
+            signed_url,
+            caption="Here is the image",
+        )
+
+        sent_text = self.adapter.send.await_args.args[1]
+        assert "Here is the image" in sent_text
+        assert "signature=" not in sent_text
+        assert "secret-token" not in sent_text
+        assert "#fragment" not in sent_text
+        assert signed_url not in sent_text
+
+    @pytest.mark.asyncio
+    async def test_send_image_failure_log_still_redacts_signed_url(self, caplog):
+        from gateway.platforms.base import SendResult
+
+        signed_url = "https://example.com/image.png?signature=secret-token#fragment"
+        self.adapter._download_external_media_with_cap = AsyncMock(
+            side_effect=ValueError("download failed")
+        )
+        self.adapter.send = AsyncMock(return_value=SendResult(success=True))
+
+        await self.adapter.send_image("!room:example.org", signed_url)
+
+        assert "https://example.com/image.png" in caplog.text
+        assert "signature=" not in caplog.text
+        assert "secret-token" not in caplog.text
+        assert "#fragment" not in caplog.text
+
+    @pytest.mark.asyncio
+    async def test_inbound_non_mxc_media_url_is_rejected(self):
+        captured_event = None
+
+        async def capture(msg_event):
+            nonlocal captured_event
+            captured_event = msg_event
+
+        self.adapter.handle_message = capture
+
+        await self.adapter._handle_media_message(
+            room_id="!room:example.org",
+            sender="@alice:example.org",
+            event_id="$image-http",
+            event_ts=0.0,
+            source_content={
+                "msgtype": "m.image",
+                "body": "remote.png",
+                "url": "https://evil.example.org/remote.png",
+                "info": {"mimetype": "image/png", "size": 1},
+            },
+            relates_to={},
+            msgtype="m.image",
+        )
+
+        assert captured_event is None
+        self.adapter._client.download_media.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_inbound_encrypted_non_mxc_media_url_is_rejected(self):
+        captured_event = None
+
+        async def capture(msg_event):
+            nonlocal captured_event
+            captured_event = msg_event
+
+        self.adapter.handle_message = capture
+
+        await self.adapter._handle_media_message(
+            room_id="!room:example.org",
+            sender="@alice:example.org",
+            event_id="$image-enc-http",
+            event_ts=0.0,
+            source_content={
+                "msgtype": "m.image",
+                "body": "remote.png",
+                "file": {"url": "https://evil.example.org/remote.png"},
+                "info": {"mimetype": "image/png", "size": 1},
+            },
+            relates_to={},
+            msgtype="m.image",
+        )
+
+        assert captured_event is None
+        self.adapter._client.download_media.assert_not_called()
 # ---------------------------------------------------------------------------
 # Message redaction
 # ---------------------------------------------------------------------------
@@ -2471,11 +3725,11 @@ class TestMatrixOnRoomMessageFilter:
         self.adapter._handle_media_message = AsyncMock()
 
     @staticmethod
-    def _mk_event(sender, body="hi", msgtype="m.text", event_id=None, ts=None):
+    def _mk_event(sender, body="hi", msgtype="m.text", event_id=None, ts=None, room_id=None):
         import time as _t
 
         ev = MagicMock()
-        ev.room_id = "!room:example.org"
+        ev.room_id = room_id or "!room:example.org"
         ev.sender = sender
         ev.event_id = event_id or f"$evt-{sender}-{body}"
         ev.timestamp = int((ts or _t.time()) * 1000)
@@ -2520,6 +3774,234 @@ class TestMatrixOnRoomMessageFilter:
         await self.adapter._on_room_message(ev)
         self.adapter._handle_text_message.assert_awaited_once()
 
+    @pytest.mark.asyncio
+    async def test_unauthorized_user_reaches_text_handler(self):
+        """MATRIX_ALLOWED_USERS is enforced by gateway authz, not adapter intake."""
+        self.adapter._allowed_user_ids = {"@alice:example.org"}
+        ev = self._mk_event(sender="@mallory:example.org", body="hello bot")
+        await self.adapter._on_room_message(ev)
+        self.adapter._handle_text_message.assert_awaited_once()
+
+    @pytest.mark.asyncio
+    async def test_authorized_user_reaches_text_handler(self):
+        self.adapter._allowed_user_ids = {"@alice:example.org"}
+        ev = self._mk_event(sender="@alice:example.org", body="hello bot")
+        await self.adapter._on_room_message(ev)
+        self.adapter._handle_text_message.assert_awaited_once()
+
+    @pytest.mark.asyncio
+    async def test_unauthorized_room_is_dropped(self):
+        self.adapter._allowed_room_ids = {"!allowed:example.org"}
+        self.adapter._is_dm_room = AsyncMock(return_value=False)
+        ev = self._mk_event(
+            sender="@alice:example.org",
+            body="hello bot",
+            room_id="!other:example.org",
+        )
+        await self.adapter._on_room_message(ev)
+        self.adapter._handle_text_message.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_dm_room_bypasses_allowed_room_gate(self):
+        self.adapter._allowed_room_ids = {"!project:example.org"}
+        self.adapter._is_dm_room = AsyncMock(return_value=True)
+        ev = self._mk_event(
+            sender="@alice:example.org",
+            body="hello bot",
+            room_id="!dm:example.org",
+        )
+        await self.adapter._on_room_message(ev)
+        self.adapter._handle_text_message.assert_awaited_once()
+
+    @pytest.mark.asyncio
+    async def test_configured_bridge_pattern_is_dropped(self):
+        self.adapter._ignored_user_patterns = [re.compile(r"^@telegram_")]
+        ev = self._mk_event(sender="@telegram_123:example.org", body="hello bot")
+        await self.adapter._on_room_message(ev)
+        self.adapter._handle_text_message.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_notice_message_is_dropped_by_default(self):
+        ev = self._mk_event(
+            sender="@alice:example.org",
+            body="bot notice",
+            msgtype="m.notice",
+        )
+        await self.adapter._on_room_message(ev)
+        self.adapter._handle_text_message.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_notice_message_can_be_enabled(self):
+        self.adapter._process_notices = True
+        ev = self._mk_event(
+            sender="@alice:example.org",
+            body="human-authored notice",
+            msgtype="m.notice",
+        )
+        await self.adapter._on_room_message(ev)
+        self.adapter._handle_text_message.assert_awaited_once()
+
+    @pytest.mark.asyncio
+    async def test_duplicate_event_id_dropped(self):
+        ev1 = self._mk_event(sender="@alice:example.org", body="hello bot", event_id="$dup")
+        ev2 = self._mk_event(sender="@alice:example.org", body="hello again bot", event_id="$dup")
+
+        await self.adapter._on_room_message(ev1)
+        await self.adapter._on_room_message(ev2)
+
+        self.adapter._handle_text_message.assert_awaited_once()
+
+    @pytest.mark.asyncio
+    async def test_old_startup_event_dropped(self):
+        now = time.time()
+        self.adapter._startup_ts = now
+        ev = self._mk_event(
+            sender="@alice:example.org",
+            body="hello bot",
+            event_id="$old",
+            ts=now - 60,
+        )
+
+        await self.adapter._on_room_message(ev)
+
+        self.adapter._handle_text_message.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_seconds_timestamp_reaches_text_handler(self):
+        now = time.time()
+        self.adapter._startup_ts = now - 10
+        ev = self._mk_event(
+            sender="@alice:example.org",
+            body="hello bot",
+            event_id="$seconds-filter",
+            ts=now,
+        )
+        ev.timestamp = now
+        ev.server_timestamp = now
+
+        await self.adapter._on_room_message(ev)
+
+        self.adapter._handle_text_message.assert_awaited_once()
+
+
+class TestMatrixRequireMention:
+    """require_mention should honor config.extra like thread_require_mention."""
+
+    def test_require_mention_from_config_extra_false(self):
+        from gateway.platforms.matrix import MatrixAdapter
+
+        config = PlatformConfig(
+            enabled=True,
+            token="syt_test",
+            extra={
+                "homeserver": "https://matrix.example.org",
+                "require_mention": False,
+            },
+        )
+        adapter = MatrixAdapter(config)
+        assert adapter._require_mention is False
+
+    def test_require_mention_from_env_when_extra_unset(self, monkeypatch):
+        monkeypatch.setenv("MATRIX_REQUIRE_MENTION", "false")
+
+        from gateway.platforms.matrix import MatrixAdapter
+
+        config = PlatformConfig(
+            enabled=True,
+            token="syt_test",
+            extra={"homeserver": "https://matrix.example.org"},
+        )
+        adapter = MatrixAdapter(config)
+        assert adapter._require_mention is False
+
+    def test_require_mention_config_takes_precedence_over_env(self, monkeypatch):
+        monkeypatch.setenv("MATRIX_REQUIRE_MENTION", "true")
+
+        from gateway.platforms.matrix import MatrixAdapter
+
+        config = PlatformConfig(
+            enabled=True,
+            token="syt_test",
+            extra={
+                "homeserver": "https://matrix.example.org",
+                "require_mention": False,
+            },
+        )
+        adapter = MatrixAdapter(config)
+        assert adapter._require_mention is False
+
+    @pytest.mark.asyncio
+    async def test_require_mention_false_allows_unmentioned_group_message(self):
+        from gateway.platforms.matrix import MatrixAdapter
+
+        config = PlatformConfig(
+            enabled=True,
+            token="syt_test",
+            extra={
+                "homeserver": "https://matrix.example.org",
+                "user_id": "@bot:example.org",
+                "require_mention": False,
+            },
+        )
+        adapter = MatrixAdapter(config)
+        adapter._is_dm_room = AsyncMock(return_value=False)
+        adapter._resolve_room_identity = AsyncMock(
+            return_value=MagicMock(display_name="Project Room")
+        )
+        adapter._get_display_name = AsyncMock(return_value="Alice")
+        adapter._background_read_receipt = MagicMock()
+
+        ctx = await adapter._resolve_message_context(
+            room_id="!project:example.org",
+            sender="@alice:example.org",
+            event_id="$unmentioned",
+            body="hello there",
+            source_content={"body": "hello there"},
+            relates_to={},
+        )
+
+        assert ctx is not None
+
+
+class TestMatrixFreeResponsePolicy:
+    def setup_method(self):
+        self.adapter = _make_adapter()
+        self.adapter._user_id = "@bot:example.org"
+        self.adapter._require_mention = True
+        self.adapter._free_rooms = {"!free:example.org"}
+        self.adapter._is_dm_room = AsyncMock(return_value=False)
+        self.adapter._resolve_room_identity = AsyncMock(
+            return_value=MagicMock(display_name="Free Room")
+        )
+        self.adapter._get_display_name = AsyncMock(return_value="Alice")
+        self.adapter._background_read_receipt = MagicMock()
+
+    @pytest.mark.asyncio
+    async def test_free_response_room_allows_unmentioned_message(self):
+        ctx = await self.adapter._resolve_message_context(
+            room_id="!free:example.org",
+            sender="@alice:example.org",
+            event_id="$free",
+            body="hello there",
+            source_content={"body": "hello there"},
+            relates_to={},
+        )
+
+        assert ctx is not None
+
+    @pytest.mark.asyncio
+    async def test_non_free_room_requires_mention(self):
+        ctx = await self.adapter._resolve_message_context(
+            room_id="!locked:example.org",
+            sender="@alice:example.org",
+            event_id="$locked",
+            body="hello there",
+            source_content={"body": "hello there"},
+            relates_to={},
+        )
+
+        assert ctx is None
+
 
 class TestMatrixClockSkewWarning:
     """Clock-skew detector for #12614.
diff --git a/tests/gateway/test_matrix_approval_reaction_fail_closed.py b/tests/gateway/test_matrix_approval_reaction_fail_closed.py
index c9b5277ee6a..be181f62e08 100644
--- a/tests/gateway/test_matrix_approval_reaction_fail_closed.py
+++ b/tests/gateway/test_matrix_approval_reaction_fail_closed.py
@@ -28,13 +28,38 @@ def _stub_mautrix():
         sys.modules.setdefault(sub, types.ModuleType(sub))
     sys.modules.setdefault("mautrix", stub)
     m = sys.modules["mautrix.types"]
-    for attr in (
-        "ContentURI", "EventID", "EventType", "PaginationDirection",
-        "PresenceState", "RoomCreatePreset", "RoomID", "SyncToken",
-        "TrustState", "UserID",
-    ):
-        if not hasattr(m, attr):
-            setattr(m, attr, str)
+
+    class EventType:
+        ROOM_MESSAGE = "m.room.message"
+        REACTION = "m.reaction"
+        ROOM_ENCRYPTED = "m.room.encrypted"
+        ROOM_NAME = "m.room.name"
+
+    class PaginationDirection:
+        BACKWARD = "b"
+        FORWARD = "f"
+
+    class PresenceState:
+        ONLINE = "online"
+        OFFLINE = "offline"
+        UNAVAILABLE = "unavailable"
+
+    class RoomCreatePreset:
+        PRIVATE = "private_chat"
+        PUBLIC = "public_chat"
+        TRUSTED_PRIVATE = "trusted_private_chat"
+
+    class TrustState:
+        UNVERIFIED = 0
+        VERIFIED = 1
+
+    for attr in ("ContentURI", "EventID", "RoomID", "SyncToken", "UserID"):
+        setattr(m, attr, str)
+    m.EventType = EventType
+    m.PaginationDirection = PaginationDirection
+    m.PresenceState = PresenceState
+    m.RoomCreatePreset = RoomCreatePreset
+    m.TrustState = TrustState
 
 
 _stub_mautrix()
diff --git a/tests/gateway/test_matrix_exec_approval.py b/tests/gateway/test_matrix_exec_approval.py
index a7afe912cba..f3a8eaf86ca 100644
--- a/tests/gateway/test_matrix_exec_approval.py
+++ b/tests/gateway/test_matrix_exec_approval.py
@@ -27,9 +27,9 @@ class TestMatrixExecApprovalReactions:
         assert result.success is True
         assert adapter._approval_prompt_by_session["sess-1"] == "$evt1"
         assert adapter._approval_prompts_by_event["$evt1"].session_key == "sess-1"
-        assert adapter._send_reaction.await_count == 2
+        assert adapter._send_reaction.await_count == 3
         emojis = [call.args[2] for call in adapter._send_reaction.await_args_list]
-        assert emojis == ["✅", "❎"]
+        assert emojis == ["✅", "♾️", "❌"]
 
     @pytest.mark.asyncio
     async def test_reaction_resolves_pending_approval(self, monkeypatch):
diff --git a/tests/gateway/test_matrix_project_context_isolation.py b/tests/gateway/test_matrix_project_context_isolation.py
new file mode 100644
index 00000000000..871f4a855f5
--- /dev/null
+++ b/tests/gateway/test_matrix_project_context_isolation.py
@@ -0,0 +1,510 @@
+"""Matrix Project A / Project B context-isolation regressions."""
+
+from __future__ import annotations
+
+import asyncio
+import time
+from datetime import datetime
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from gateway.config import GatewayConfig, Platform, PlatformConfig
+from gateway.platforms.base import MessageEvent
+from gateway.session import (
+    SessionContext,
+    SessionEntry,
+    SessionSource,
+    build_session_context_prompt,
+    build_session_key,
+)
+
+PROJECT_A_ROOM_ID = "!projectA:example.org"
+PROJECT_B_ROOM_ID = "!projectB:example.org"
+PROJECT_A_NAME = "Project - Project A"
+PROJECT_B_NAME = "Project - Project B"
+PROJECT_A_TOPIC = "Architecture and deploy plan for Project A"
+PROJECT_B_TOPIC = "Migration and branch plan for Project B"
+PROJECT_A_ALIAS = "#project-a:example.org"
+PROJECT_B_ALIAS = "#project-b:example.org"
+SENDER = "@alice:example.org"
+
+
+def _make_adapter():
+    from gateway.platforms.matrix import MatrixAdapter
+
+    adapter = MatrixAdapter(
+        PlatformConfig(
+            enabled=True,
+            token="test-token",
+            extra={"homeserver": "https://matrix.example.org", "user_id": "@bot:example.org"},
+        )
+    )
+    adapter._user_id = "@bot:example.org"
+    adapter._require_mention = False
+    adapter._auto_thread = False
+    adapter._matrix_session_scope = "room"
+    adapter._text_batch_delay_seconds = 0
+    adapter._background_read_receipt = MagicMock()
+    adapter._get_display_name = AsyncMock(return_value="Alice")
+    adapter._client = _FakeMatrixClient()
+    return adapter
+
+
+class _FakeMatrixClient:
+    def __init__(self):
+        self.state_store = MagicMock()
+        self.state_store.get_members = AsyncMock(return_value=["@bot:example.org", SENDER])
+
+    async def get_state_event(self, room_id, event_type):
+        rid = str(room_id)
+        state = {
+            PROJECT_A_ROOM_ID: {
+                "m.room.name": {"content": {"name": PROJECT_A_NAME}},
+                "m.room.topic": {"content": {"topic": PROJECT_A_TOPIC}},
+                "m.room.canonical_alias": {"content": {"alias": PROJECT_A_ALIAS}},
+            },
+            PROJECT_B_ROOM_ID: {
+                "m.room.name": {"content": {"name": PROJECT_B_NAME}},
+                "m.room.topic": {"content": {"topic": PROJECT_B_TOPIC}},
+                "m.room.canonical_alias": {"content": {"alias": PROJECT_B_ALIAS}},
+            },
+        }
+        value = state.get(rid, {}).get(str(event_type))
+        if value is None:
+            raise KeyError((rid, event_type))
+        return value
+
+
+async def _source_for(adapter, room_id: str, event_id: str = "$event"):
+    ctx = await adapter._resolve_message_context(
+        room_id=room_id,
+        sender=SENDER,
+        event_id=event_id,
+        body="What is next?",
+        source_content={"body": "What is next?"},
+        relates_to={},
+    )
+    assert ctx is not None
+    return ctx[-1]
+
+
+def _matrix_event(room_id: str, event_id: str, body: str = "What is next?"):
+    event = MagicMock()
+    event.room_id = room_id
+    event.sender = SENDER
+    event.event_id = event_id
+    event.timestamp = int(time.time() * 1000)
+    event.server_timestamp = event.timestamp
+    event.content = {"msgtype": "m.text", "body": body}
+    return event
+
+
+def _context_for(source: SessionSource) -> SessionContext:
+    return SessionContext(
+        source=source,
+        connected_platforms=[Platform.MATRIX],
+        home_channels={},
+        session_key=build_session_key(source),
+        session_id="session-test",
+    )
+
+
+@pytest.mark.asyncio
+async def test_matrix_source_includes_room_name_topic_and_message_id():
+    adapter = _make_adapter()
+    source = await _source_for(adapter, PROJECT_B_ROOM_ID, "$project-b-msg")
+
+    assert source.chat_id == PROJECT_B_ROOM_ID
+    assert source.chat_name == PROJECT_B_NAME
+    assert source.chat_topic == PROJECT_B_TOPIC
+    assert source.guild_id == "example.org"
+    assert source.message_id == "$project-b-msg"
+    assert source.parent_chat_id is None
+
+
+@pytest.mark.asyncio
+async def test_matrix_project_a_and_project_b_have_distinct_session_keys():
+    adapter = _make_adapter()
+    source_a = await _source_for(adapter, PROJECT_A_ROOM_ID, "$a")
+    source_b = await _source_for(adapter, PROJECT_B_ROOM_ID, "$b")
+
+    assert source_a.chat_id != source_b.chat_id
+    assert source_a.chat_name == PROJECT_A_NAME
+    assert source_b.chat_name == PROJECT_B_NAME
+    assert build_session_key(source_a) != build_session_key(source_b)
+
+
+@pytest.mark.asyncio
+async def test_matrix_project_b_prompt_contains_project_b_not_project_a():
+    adapter = _make_adapter()
+    source_b = await _source_for(adapter, PROJECT_B_ROOM_ID, "$b")
+
+    prompt = build_session_context_prompt(_context_for(source_b))
+
+    assert PROJECT_B_NAME in prompt
+    assert PROJECT_B_TOPIC in prompt
+    assert PROJECT_B_ROOM_ID in prompt
+    assert "Matrix room boundary" in prompt
+    assert PROJECT_A_NAME not in prompt
+    assert PROJECT_A_TOPIC not in prompt
+
+
+@pytest.mark.asyncio
+async def test_matrix_project_context_survives_sequential_messages():
+    adapter = _make_adapter()
+    adapter._matrix_session_scope = "room"
+    first = await _source_for(adapter, PROJECT_B_ROOM_ID, "$b1")
+    second = await _source_for(adapter, PROJECT_B_ROOM_ID, "$b2")
+
+    assert first.thread_id is None
+    assert second.thread_id is None
+    assert first.chat_name == PROJECT_B_NAME
+    assert second.chat_name == PROJECT_B_NAME
+    assert build_session_key(first) == build_session_key(second)
+
+
+@pytest.mark.asyncio
+async def test_matrix_session_scope_auto_and_thread_preserve_synthetic_threads():
+    adapter = _make_adapter()
+    adapter._auto_thread = True
+    adapter._matrix_session_scope = "auto"
+    auto_source = await _source_for(adapter, PROJECT_B_ROOM_ID, "$auto")
+    assert auto_source.thread_id == "$auto"
+
+    adapter._matrix_session_scope = "thread"
+    thread_source = await _source_for(adapter, PROJECT_B_ROOM_ID, "$thread")
+    assert thread_source.thread_id == "$thread"
+
+    real_thread = await adapter._resolve_message_context(
+        room_id=PROJECT_B_ROOM_ID,
+        sender=SENDER,
+        event_id="$reply",
+        body="thread reply",
+        source_content={"body": "thread reply"},
+        relates_to={"rel_type": "m.thread", "event_id": "$root"},
+    )
+    assert real_thread is not None
+    assert real_thread[-1].thread_id == "$root"
+
+
+@pytest.mark.asyncio
+async def test_matrix_project_context_survives_concurrent_messages():
+    from gateway.run import GatewayRunner
+    from gateway.session_context import get_session_env
+
+    async def observe(room_id: str):
+        adapter = _make_adapter()
+        source = await _source_for(adapter, room_id, f"${room_id}")
+        context = _context_for(source)
+        runner = object.__new__(GatewayRunner)
+        tokens = runner._set_session_env(context)
+        try:
+            await asyncio.sleep(0)
+            return SimpleNamespace(
+                chat_id=get_session_env("HERMES_SESSION_CHAT_ID"),
+                chat_name=get_session_env("HERMES_SESSION_CHAT_NAME"),
+                session_key=get_session_env("HERMES_SESSION_KEY"),
+            )
+        finally:
+            runner._clear_session_env(tokens)
+
+    observed_a, observed_b = await asyncio.gather(
+        observe(PROJECT_A_ROOM_ID),
+        observe(PROJECT_B_ROOM_ID),
+    )
+
+    assert observed_a.chat_id == PROJECT_A_ROOM_ID
+    assert observed_b.chat_id == PROJECT_B_ROOM_ID
+    assert observed_a.chat_name == PROJECT_A_NAME
+    assert observed_b.chat_name == PROJECT_B_NAME
+    assert observed_a.session_key != observed_b.session_key
+
+
+@pytest.mark.asyncio
+async def test_matrix_inbound_handler_emits_project_b_metadata_not_project_a():
+    adapter = _make_adapter()
+    captured = []
+
+    async def capture(event):
+        captured.append(event)
+
+    adapter.handle_message = capture
+
+    await adapter._on_room_message(_matrix_event(PROJECT_B_ROOM_ID, "$project-b"))
+
+    assert len(captured) == 1
+    source = captured[0].source
+    assert source.chat_id == PROJECT_B_ROOM_ID
+    assert source.chat_name == PROJECT_B_NAME
+    assert source.chat_topic == PROJECT_B_TOPIC
+    assert source.message_id == "$project-b"
+    assert PROJECT_A_NAME not in repr(source.to_dict())
+
+
+@pytest.mark.asyncio
+async def test_matrix_inbound_handler_keeps_project_a_and_b_distinct():
+    adapter = _make_adapter()
+    captured = []
+
+    async def capture(event):
+        captured.append(event)
+
+    adapter.handle_message = capture
+
+    await adapter._on_room_message(_matrix_event(PROJECT_A_ROOM_ID, "$project-a", "A"))
+    await adapter._on_room_message(_matrix_event(PROJECT_B_ROOM_ID, "$project-b", "B"))
+
+    assert [event.source.chat_id for event in captured] == [
+        PROJECT_A_ROOM_ID,
+        PROJECT_B_ROOM_ID,
+    ]
+    assert [event.source.chat_name for event in captured] == [
+        PROJECT_A_NAME,
+        PROJECT_B_NAME,
+    ]
+    assert build_session_key(captured[0].source) != build_session_key(captured[1].source)
+
+
+def test_matrix_room_scope_group_sessions_per_user_true_separates_users():
+    alice = _make_matrix_source(PROJECT_B_ROOM_ID, PROJECT_B_NAME, PROJECT_B_TOPIC)
+    bob = _make_matrix_source(PROJECT_B_ROOM_ID, PROJECT_B_NAME, PROJECT_B_TOPIC)
+    bob.user_id = "@bob:example.org"
+    alice.thread_id = None
+    bob.thread_id = None
+
+    assert build_session_key(alice, group_sessions_per_user=True) != build_session_key(
+        bob,
+        group_sessions_per_user=True,
+    )
+
+
+def test_matrix_room_scope_group_sessions_per_user_false_shares_room():
+    alice = _make_matrix_source(PROJECT_B_ROOM_ID, PROJECT_B_NAME, PROJECT_B_TOPIC)
+    bob = _make_matrix_source(PROJECT_B_ROOM_ID, PROJECT_B_NAME, PROJECT_B_TOPIC)
+    bob.user_id = "@bob:example.org"
+    alice.thread_id = None
+    bob.thread_id = None
+
+    assert build_session_key(alice, group_sessions_per_user=False) == build_session_key(
+        bob,
+        group_sessions_per_user=False,
+    )
+
+
+def _make_matrix_source(room_id: str, room_name: str, topic: str) -> SessionSource:
+    return SessionSource(
+        platform=Platform.MATRIX,
+        chat_id=room_id,
+        chat_name=room_name,
+        chat_type="group",
+        user_id=SENDER,
+        user_name="Alice",
+        chat_topic=topic,
+    )
+
+
+def _entry(source: SessionSource, session_id: str, title: str | None = None) -> SessionEntry:
+    return SessionEntry(
+        session_key=build_session_key(source),
+        session_id=session_id,
+        created_at=datetime.now(),
+        updated_at=datetime.now(),
+        origin=source,
+        display_name=title or source.chat_name,
+        platform=Platform.MATRIX,
+        chat_type="group",
+    )
+
+
+def _make_runner(current_source: SessionSource, entries: list[SessionEntry]):
+    from gateway.run import GatewayRunner
+
+    runner = object.__new__(GatewayRunner)
+    runner.config = GatewayConfig(platforms={Platform.MATRIX: PlatformConfig(enabled=True)})
+    adapter = MagicMock()
+    adapter._matrix_session_scope = "room"
+    runner.adapters = {Platform.MATRIX: adapter}
+    runner.session_store = MagicMock()
+    runner.session_store._entries = {entry.session_key: entry for entry in entries}
+    current = next((e for e in entries if e.origin and e.origin.chat_id == current_source.chat_id), entries[0])
+    runner.session_store.get_or_create_session.return_value = current
+    runner.session_store.switch_session.return_value = current
+    runner.session_store.load_transcript.return_value = [{"role": "user", "content": "hello"}]
+    runner._running_agents = {}
+    runner._session_run_generation = {}
+    runner._pending_messages = {}
+    runner._pending_approvals = {}
+    runner._release_running_agent_state = MagicMock()
+    runner._clear_session_boundary_security_state = MagicMock()
+    runner._evict_cached_agent = MagicMock()
+    runner._queue_depth = MagicMock(return_value=0)
+    runner._session_db = MagicMock()
+    runner._session_db.list_sessions_rich.return_value = [
+        {"id": entry.session_id, "title": entry.display_name, "preview": ""}
+        for entry in entries
+    ]
+    runner._session_db.resolve_resume_session_id.side_effect = lambda sid: sid
+    runner._session_db.get_session_title.side_effect = lambda sid: {
+        entry.session_id: entry.display_name for entry in entries
+    }.get(sid)
+    runner._session_db.get_session.return_value = None
+    return runner
+
+
+def _event(text: str, source: SessionSource) -> MessageEvent:
+    return MessageEvent(text=text, source=source, message_id="$cmd")
+
+
+@pytest.mark.asyncio
+async def test_matrix_status_reports_current_matrix_room_scope():
+    source_a = _make_matrix_source(PROJECT_A_ROOM_ID, PROJECT_A_NAME, PROJECT_A_TOPIC)
+    source_b = _make_matrix_source(PROJECT_B_ROOM_ID, PROJECT_B_NAME, PROJECT_B_TOPIC)
+    entry_b = _entry(source_b, "session-b", "Project B Plan")
+    runner = _make_runner(source_b, [_entry(source_a, "session-a", "Project A Plan"), entry_b])
+
+    result = await runner._handle_status_command(_event("/status", source_b))
+
+    assert "Matrix scope:" in result
+    assert PROJECT_B_NAME in result
+    assert PROJECT_B_ROOM_ID in result
+    assert "session_scope: room" in result
+    session_key = build_session_key(source_b)
+    assert session_key not in result
+    assert session_key[:8] not in result
+    assert "session_key: sha256:" in result
+    assert PROJECT_A_NAME not in result
+    assert PROJECT_A_ROOM_ID not in result
+
+
+@pytest.mark.asyncio
+async def test_matrix_resume_does_not_cross_rooms_by_default():
+    source_a = _make_matrix_source(PROJECT_A_ROOM_ID, PROJECT_A_NAME, PROJECT_A_TOPIC)
+    source_b = _make_matrix_source(PROJECT_B_ROOM_ID, PROJECT_B_NAME, PROJECT_B_TOPIC)
+    entry_a = _entry(source_a, "session-a", "Project A Plan")
+    entry_b = _entry(source_b, "session-b", "Project B Plan")
+    runner = _make_runner(source_b, [entry_a, entry_b])
+    runner._session_db.resolve_session_by_title.return_value = "session-a"
+
+    result = await runner._handle_resume_command(_event("/resume Project A Plan", source_b))
+
+    assert "blocked" in result
+    assert PROJECT_A_NAME in result
+    runner.session_store.switch_session.assert_not_called()
+
+
+@pytest.mark.asyncio
+async def test_matrix_resume_allows_same_room_session():
+    source_b = _make_matrix_source(PROJECT_B_ROOM_ID, PROJECT_B_NAME, PROJECT_B_TOPIC)
+    entry_b = _entry(source_b, "session-b-old", "Project B Plan")
+    runner = _make_runner(source_b, [entry_b])
+    runner.session_store.get_or_create_session.return_value = _entry(
+        source_b, "session-b-current", "Current Project B"
+    )
+    runner.session_store.switch_session.return_value = entry_b
+    runner._session_db.resolve_session_by_title.return_value = "session-b-old"
+
+    result = await runner._handle_resume_command(_event("/resume Project B Plan", source_b))
+
+    assert "Resumed session" in result
+    runner.session_store.switch_session.assert_called_once()
+
+
+@pytest.mark.asyncio
+async def test_matrix_resume_quoted_title_same_room():
+    source_b = _make_matrix_source(PROJECT_B_ROOM_ID, PROJECT_B_NAME, PROJECT_B_TOPIC)
+    entry_b = _entry(source_b, "session-b-old", "Project B Plan")
+    runner = _make_runner(source_b, [entry_b])
+    runner.session_store.get_or_create_session.return_value = _entry(
+        source_b, "session-b-current", "Current Project B"
+    )
+    runner.session_store.switch_session.return_value = entry_b
+    runner._session_db.resolve_session_by_title.return_value = "session-b-old"
+
+    result = await runner._handle_resume_command(
+        _event('/resume "Project B Plan"', source_b)
+    )
+
+    assert "Resumed session" in result
+    runner._session_db.resolve_session_by_title.assert_called_once_with("Project B Plan")
+
+
+@pytest.mark.asyncio
+async def test_matrix_resume_quoted_title_cross_room_blocked():
+    source_a = _make_matrix_source(PROJECT_A_ROOM_ID, PROJECT_A_NAME, PROJECT_A_TOPIC)
+    source_b = _make_matrix_source(PROJECT_B_ROOM_ID, PROJECT_B_NAME, PROJECT_B_TOPIC)
+    entry_a = _entry(source_a, "session-a", "Project A Plan")
+    entry_b = _entry(source_b, "session-b", "Project B Plan")
+    runner = _make_runner(source_b, [entry_a, entry_b])
+    runner._session_db.resolve_session_by_title.return_value = "session-a"
+
+    result = await runner._handle_resume_command(
+        _event('/resume "Project A Plan"', source_b)
+    )
+
+    assert "blocked" in result
+    runner.session_store.switch_session.assert_not_called()
+
+
+@pytest.mark.asyncio
+async def test_matrix_resume_malformed_quote_returns_helpful_error():
+    source_b = _make_matrix_source(PROJECT_B_ROOM_ID, PROJECT_B_NAME, PROJECT_B_TOPIC)
+    runner = _make_runner(source_b, [_entry(source_b, "session-b", "Project B Plan")])
+
+    result = await runner._handle_resume_command(
+        _event('/resume "Project B Plan', source_b)
+    )
+
+    assert "Could not parse" in result
+    assert "quotes" in result
+
+
+@pytest.mark.asyncio
+async def test_matrix_resume_cross_room_requires_explicit_flag_and_warns():
+    source_a = _make_matrix_source(PROJECT_A_ROOM_ID, PROJECT_A_NAME, PROJECT_A_TOPIC)
+    source_b = _make_matrix_source(PROJECT_B_ROOM_ID, PROJECT_B_NAME, PROJECT_B_TOPIC)
+    entry_a = _entry(source_a, "session-a", "Project A Plan")
+    entry_b = _entry(source_b, "session-b", "Project B Plan")
+    runner = _make_runner(source_b, [entry_a, entry_b])
+    runner.session_store.switch_session.return_value = entry_a
+    runner._session_db.resolve_session_by_title.return_value = "session-a"
+
+    result = await runner._handle_resume_command(
+        _event("/resume --cross-room Project A Plan", source_b)
+    )
+
+    assert "Cross-room resume" in result
+    assert PROJECT_B_NAME in result
+    runner.session_store.switch_session.assert_called_once()
+
+
+@pytest.mark.asyncio
+async def test_matrix_resume_lists_only_current_room_by_default():
+    source_a = _make_matrix_source(PROJECT_A_ROOM_ID, PROJECT_A_NAME, PROJECT_A_TOPIC)
+    source_b = _make_matrix_source(PROJECT_B_ROOM_ID, PROJECT_B_NAME, PROJECT_B_TOPIC)
+    runner = _make_runner(
+        source_b,
+        [_entry(source_a, "session-a", "Project A Plan"), _entry(source_b, "session-b", "Project B Plan")],
+    )
+
+    result = await runner._handle_resume_command(_event("/resume", source_b))
+
+    assert "Project B Plan" in result
+    assert "Project A Plan" not in result
+
+
+@pytest.mark.asyncio
+async def test_matrix_resume_all_lists_room_names():
+    source_a = _make_matrix_source(PROJECT_A_ROOM_ID, PROJECT_A_NAME, PROJECT_A_TOPIC)
+    source_b = _make_matrix_source(PROJECT_B_ROOM_ID, PROJECT_B_NAME, PROJECT_B_TOPIC)
+    runner = _make_runner(
+        source_b,
+        [_entry(source_a, "session-a", "Project A Plan"), _entry(source_b, "session-b", "Project B Plan")],
+    )
+
+    result = await runner._handle_resume_command(_event("/resume --all", source_b))
+
+    assert "Project A Plan" in result
+    assert PROJECT_A_NAME in result
+    assert "Project B Plan" in result
diff --git a/tests/gateway/test_restart_drain.py b/tests/gateway/test_restart_drain.py
index a48e5f73781..15b948a4f79 100644
--- a/tests/gateway/test_restart_drain.py
+++ b/tests/gateway/test_restart_drain.py
@@ -197,8 +197,10 @@ async def test_launch_detached_restart_command_uses_setsid(monkeypatch):
     runner, _adapter = make_restart_runner()
     popen_calls = []
 
+    monkeypatch.setattr(gateway_run.sys, "platform", "linux")
     monkeypatch.setattr(gateway_run, "_resolve_hermes_bin", lambda: ["/usr/bin/hermes"])
     monkeypatch.setattr(gateway_run.os, "getpid", lambda: 321)
+    monkeypatch.setenv("_HERMES_GATEWAY", "1")
     monkeypatch.setattr(shutil, "which", lambda cmd: "/usr/bin/setsid" if cmd == "setsid" else None)
 
     def fake_popen(cmd, **kwargs):
@@ -217,6 +219,72 @@ async def test_launch_detached_restart_command_uses_setsid(monkeypatch):
     assert kwargs["start_new_session"] is True
     assert kwargs["stdout"] is subprocess.DEVNULL
     assert kwargs["stderr"] is subprocess.DEVNULL
+    # The watcher must NOT inherit the gateway marker, or the CLI's
+    # self-restart loop guard refuses to run `hermes gateway restart`.
+    assert kwargs["env"].get("_HERMES_GATEWAY") is None
+
+
+def test_windows_gateway_venv_imports_add_site_packages(monkeypatch, tmp_path):
+    venv_dir = tmp_path / "venv"
+    site_packages = venv_dir / "Lib" / "site-packages"
+    pth_extra = tmp_path / "pywin32_system32"
+    site_packages.mkdir(parents=True)
+    pth_extra.mkdir()
+    (site_packages / "pywin32.pth").write_text(str(pth_extra), encoding="utf-8")
+    project_root = str(gateway_run.Path(gateway_run.__file__).resolve().parent.parent)
+
+    monkeypatch.setattr(gateway_run.sys, "platform", "win32")
+    monkeypatch.setattr(gateway_run.sys, "path", ["existing"])
+    monkeypatch.setenv("VIRTUAL_ENV", str(venv_dir))
+    monkeypatch.setenv("PYTHONPATH", "already-there")
+
+    gateway_run._ensure_windows_gateway_venv_imports()
+
+    assert gateway_run.sys.path[:2] == [project_root, str(site_packages)]
+    assert str(pth_extra) in gateway_run.sys.path
+    assert gateway_run.os.environ["VIRTUAL_ENV"] == str(venv_dir.resolve())
+    pythonpath = gateway_run.os.environ["PYTHONPATH"].split(gateway_run.os.pathsep)
+    assert pythonpath[:3] == [project_root, str(site_packages), "already-there"]
+
+
+@pytest.mark.asyncio
+async def test_windows_detached_restart_scrubs_gateway_marker(monkeypatch, tmp_path):
+    runner, _adapter = make_restart_runner()
+    popen_calls = []
+    venv_dir = tmp_path / "venv"
+    site_packages = venv_dir / "Lib" / "site-packages"
+    site_packages.mkdir(parents=True)
+
+    monkeypatch.setattr(gateway_run.sys, "platform", "win32")
+    monkeypatch.setattr(gateway_run, "_resolve_hermes_bin", lambda: ["hermes"])
+    monkeypatch.setattr(gateway_run.os, "getpid", lambda: 321)
+    monkeypatch.setenv("_HERMES_GATEWAY", "1")
+    monkeypatch.setenv("VIRTUAL_ENV", str(venv_dir))
+
+    import hermes_cli._subprocess_compat as subprocess_compat
+
+    monkeypatch.setattr(
+        subprocess_compat,
+        "windows_detach_popen_kwargs",
+        lambda: {},
+    )
+
+    def fake_popen(cmd, **kwargs):
+        popen_calls.append((cmd, kwargs))
+        return MagicMock()
+
+    monkeypatch.setattr(subprocess, "Popen", fake_popen)
+
+    await runner._launch_detached_restart_command()
+
+    assert len(popen_calls) == 1
+    cmd, kwargs = popen_calls[0]
+    assert cmd[-3:] == ["hermes", "gateway", "restart"]
+    assert kwargs["env"].get("_HERMES_GATEWAY") is None
+    assert kwargs["env"]["VIRTUAL_ENV"] == str(venv_dir)
+    assert str(site_packages) in kwargs["env"]["PYTHONPATH"].split(gateway_run.os.pathsep)
+    assert kwargs["stdout"] is subprocess.DEVNULL
+    assert kwargs["stderr"] is subprocess.DEVNULL
 
 
 # ── Shutdown notification tests ──────────────────────────────────────
diff --git a/tests/gateway/test_run_progress_topics.py b/tests/gateway/test_run_progress_topics.py
index 646ad92976b..9d20ed5b793 100644
--- a/tests/gateway/test_run_progress_topics.py
+++ b/tests/gateway/test_run_progress_topics.py
@@ -1488,3 +1488,72 @@ async def test_terminal_progress_no_bash_block_in_verbose_mode(monkeypatch, tmp_
     all_content = " ".join(call["content"] for call in adapter.sent)
     all_content += " ".join(call["content"] for call in adapter.edits)
     assert "```bash" not in all_content
+
+class MultiTerminalCommandAgent:
+    """Emits several consecutive terminal tool.started events, then a
+    different tool, then terminal again — to exercise header collapsing."""
+
+    def __init__(self, **kwargs):
+        self.tool_progress_callback = kwargs.get("tool_progress_callback")
+        self.tools = []
+
+    def run_conversation(self, message, conversation_history=None, task_id=None):
+        cb = self.tool_progress_callback
+        cb("tool.started", "terminal", "echo one", {"command": "echo one"})
+        cb("tool.started", "terminal", "echo two", {"command": "echo two"})
+        cb("tool.started", "terminal", "echo three", {"command": "echo three"})
+        cb("tool.started", "web_search", "query stuff", {"query": "query stuff"})
+        cb("tool.started", "terminal", "echo four", {"command": "echo four"})
+        time.sleep(0.35)
+        return {"final_response": "done", "messages": [], "api_calls": 1}
+
+
+@pytest.mark.asyncio
+async def test_consecutive_terminal_progress_collapses_headers(monkeypatch, tmp_path):
+    """Back-to-back terminal calls render ONE "terminal" header followed by
+    adjacent code blocks; a different tool in between resets the header so the
+    next terminal call gets a fresh one."""
+    monkeypatch.setenv("HERMES_TOOL_PROGRESS_MODE", "all")
+
+    fake_dotenv = types.ModuleType("dotenv")
+    fake_dotenv.load_dotenv = lambda *args, **kwargs: None
+    monkeypatch.setitem(sys.modules, "dotenv", fake_dotenv)
+
+    fake_run_agent = types.ModuleType("run_agent")
+    fake_run_agent.AIAgent = MultiTerminalCommandAgent
+    monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
+    import tools.terminal_tool  # noqa: F401 - register terminal emoji
+
+    adapter = CodeBlockProgressAdapter(platform=Platform.TELEGRAM)
+    runner = _make_runner(adapter)
+    gateway_run = importlib.import_module("gateway.run")
+    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
+    monkeypatch.setattr(gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "***"})
+
+    source = SessionSource(
+        platform=Platform.TELEGRAM,
+        chat_id="12345",
+        chat_type="dm",
+        thread_id=None,
+    )
+
+    result = await runner._run_agent(
+        message="hello",
+        context_prompt="",
+        history=[],
+        source=source,
+        session_id="sess-terminal-consecutive",
+        session_key="agent:main:telegram:dm:12345",
+    )
+
+    assert result["final_response"] == "done"
+    contents = [call["content"] for call in adapter.sent] + [
+        call["content"] for call in adapter.edits
+    ]
+    final = max(contents, key=len) if contents else ""
+    # All four commands present as code blocks.
+    for cmd in ("echo one", "echo two", "echo three", "echo four"):
+        assert cmd in final
+    # Exactly TWO terminal headers: one for the first run of three calls,
+    # one for the terminal call after web_search broke the streak.
+    assert final.count("terminal\n```") == 2
diff --git a/tests/gateway/test_session.py b/tests/gateway/test_session.py
index 9b5fff64214..239dc28c8fc 100644
--- a/tests/gateway/test_session.py
+++ b/tests/gateway/test_session.py
@@ -611,6 +611,30 @@ class TestSessionStoreSwitchSession:
         db.close()
 
 
+class TestSessionStoreLookupBySessionId:
+    @pytest.fixture()
+    def store(self, tmp_path):
+        config = GatewayConfig()
+        with patch("gateway.session.SessionStore._ensure_loaded"):
+            s = SessionStore(sessions_dir=tmp_path, config=config)
+        s._db = None
+        s._loaded = True
+        return s
+
+    def test_returns_active_entry_for_persisted_session_id(self, store):
+        source = SessionSource(
+            platform=Platform.MATRIX,
+            chat_id="!room:example.org",
+            chat_type="group",
+            user_id="@alice:example.org",
+        )
+        entry = store.get_or_create_session(source)
+
+        assert store.lookup_by_session_id(entry.session_id) is entry
+        assert store.lookup_by_session_id("missing") is None
+        assert store.lookup_by_session_id("") is None
+
+
 class TestWhatsAppSessionKeyConsistency:
     """Regression: WhatsApp session keys must collapse JID/LID aliases to a
     single stable identity for both DM chat_ids and group participant_ids."""
diff --git a/tests/gateway/test_stream_consumer.py b/tests/gateway/test_stream_consumer.py
index 9a445532d0d..af012fb69a7 100644
--- a/tests/gateway/test_stream_consumer.py
+++ b/tests/gateway/test_stream_consumer.py
@@ -794,9 +794,11 @@ class TestSegmentBreakOnToolBoundary:
         )
 
     @pytest.mark.asyncio
-    async def test_fallback_final_deletes_partial_after_chunks_succeed(self):
-        """After fallback chunks land, the frozen partial must be deleted so
-        the user sees only the complete response (#16668)."""
+    async def test_fallback_final_deletes_partial_after_full_resend(self):
+        """After fallback re-sends the COMPLETE response, the frozen partial
+        must be deleted so the user sees only the complete response (#16668).
+        Full resend happens when the visible prefix doesn't match the final
+        text (e.g. post-segment-break content, #10807)."""
         adapter = MagicMock()
         adapter.send = AsyncMock(
             return_value=SimpleNamespace(success=True, message_id="msg_new"),
@@ -810,14 +812,49 @@ class TestSegmentBreakOnToolBoundary:
         config = StreamConsumerConfig(edit_interval=0.01, buffer_threshold=5)
         consumer = GatewayStreamConsumer(adapter, "chat_123", config)
 
-        # Seed the consumer as if it already edited a partial message that
-        # later got stuck (flood control etc.) — _message_id is the stale id.
+        # The stale partial shows pre-tool text that is NOT a prefix of the
+        # final response — fallback re-sends the complete final text.
+        consumer._message_id = "msg_partial"
+        consumer._last_sent_text = "Let me check that for you…"
+
+        await consumer._send_fallback_final("Working on it. Done!")
+
+        adapter.delete_message.assert_awaited_once_with("chat_123", "msg_partial")
+        assert consumer._final_response_sent is True
+
+    @pytest.mark.asyncio
+    async def test_fallback_final_keeps_partial_after_tail_only_send(self):
+        """When the fallback sends only the missing TAIL (visible prefix
+        matches the final text), the partial message IS the head of the
+        answer — deleting it would leave the user with only the last part
+        of the response (the 'model sent only the second half' bug)."""
+        adapter = MagicMock()
+        adapter.send = AsyncMock(
+            return_value=SimpleNamespace(success=True, message_id="msg_new"),
+        )
+        adapter.edit_message = AsyncMock(
+            return_value=SimpleNamespace(success=True),
+        )
+        adapter.delete_message = AsyncMock(return_value=None)
+        adapter.MAX_MESSAGE_LENGTH = 4096
+
+        config = StreamConsumerConfig(edit_interval=0.01, buffer_threshold=5)
+        consumer = GatewayStreamConsumer(adapter, "chat_123", config)
+
+        # Visible partial is a true prefix of the final response — the
+        # fallback dedup sends only the tail.
         consumer._message_id = "msg_partial"
         consumer._last_sent_text = "Working on i"
 
         await consumer._send_fallback_final("Working on it. Done!")
 
-        adapter.delete_message.assert_awaited_once_with("chat_123", "msg_partial")
+        # Tail was sent...
+        sent_contents = [
+            c.kwargs.get("content", "") for c in adapter.send.call_args_list
+        ]
+        assert any("Done!" in s and "Working on i" not in s for s in sent_contents)
+        # ...and the head-bearing partial was NOT deleted.
+        adapter.delete_message.assert_not_awaited()
         assert consumer._final_response_sent is True
 
     @pytest.mark.asyncio
diff --git a/tests/gateway/test_stream_consumer_fresh_final.py b/tests/gateway/test_stream_consumer_fresh_final.py
index 2ecef4a488b..975c0ada590 100644
--- a/tests/gateway/test_stream_consumer_fresh_final.py
+++ b/tests/gateway/test_stream_consumer_fresh_final.py
@@ -347,6 +347,200 @@ class TestSegmentBreakDoesNotMarkFinalSent:
         assert any("answer is 42" in t for t in self._delivered_texts(adapter))
 
 
+class TestCancelledBestEffortDeliveryFinalizes:
+    """Cancel-path best-effort delivery must go through the finalize path.
+
+    The gateway cancels the consumer shortly after finish(). The
+    CancelledError handler re-delivers the accumulated text; previously it
+    did so with finalize=False, so REQUIRES_EDIT_FINALIZE platforms
+    (Telegram) kept the plain streaming preview — the whole final reply
+    rendered with raw markdown markers — while the success flags still
+    suppressed the gateway's formatted re-send.
+    """
+
+    @pytest.mark.asyncio
+    async def test_cancel_best_effort_edit_is_finalized(self):
+        adapter = _make_adapter()
+        adapter.REQUIRES_EDIT_FINALIZE = True
+        consumer = GatewayStreamConsumer(
+            adapter=adapter,
+            chat_id="chat",
+            config=StreamConsumerConfig(
+                edit_interval=0.01, buffer_threshold=5, cursor=" ▉",
+            ),
+        )
+        consumer.on_delta("Reply with **bold** and `code` markers.")
+        task = asyncio.create_task(consumer.run())
+        await asyncio.sleep(0.05)  # preview lands; message_id set
+        task.cancel()
+        await asyncio.gather(task, return_exceptions=True)
+
+        finalize_edits = [
+            c for c in adapter.edit_message.call_args_list
+            if c.kwargs.get("finalize")
+        ]
+        assert finalize_edits, (
+            "cancel best-effort delivery must use finalize=True so "
+            "REQUIRES_EDIT_FINALIZE platforms apply final formatting"
+        )
+        assert consumer.final_response_sent is True
+        assert consumer.final_content_delivered is True
+
+    @pytest.mark.asyncio
+    async def test_cancel_best_effort_failure_keeps_gateway_resend_possible(self):
+        adapter = _make_adapter()
+        adapter.REQUIRES_EDIT_FINALIZE = True
+        consumer = GatewayStreamConsumer(
+            adapter=adapter,
+            chat_id="chat",
+            config=StreamConsumerConfig(
+                edit_interval=0.01, buffer_threshold=5, cursor=" ▉",
+            ),
+        )
+        consumer.on_delta("Reply with **bold** and `code` markers.")
+        task = asyncio.create_task(consumer.run())
+        await asyncio.sleep(0.05)
+        # Best-effort delivery at cancel time fails.
+        adapter.edit_message = AsyncMock(return_value=SimpleNamespace(
+            success=False, error="boom",
+        ))
+        task.cancel()
+        await asyncio.gather(task, return_exceptions=True)
+
+        assert consumer.final_response_sent is False
+        assert consumer.final_content_delivered is False
+
+    @pytest.mark.asyncio
+    async def test_cancel_without_preview_makes_no_delivery_attempt(self):
+        adapter = _make_adapter()
+        adapter.REQUIRES_EDIT_FINALIZE = True
+        consumer = GatewayStreamConsumer(
+            adapter=adapter,
+            chat_id="chat",
+            config=StreamConsumerConfig(
+                edit_interval=0.01, buffer_threshold=5, cursor=" ▉",
+            ),
+        )
+        task = asyncio.create_task(consumer.run())
+        await asyncio.sleep(0.02)
+        task.cancel()
+        await asyncio.gather(task, return_exceptions=True)
+
+        adapter.edit_message.assert_not_called()
+        assert consumer.final_response_sent is False
+        assert consumer.final_content_delivered is False
+
+    @pytest.mark.asyncio
+    async def test_cancel_with_fresh_final_enabled_delivers_and_flags_via_handler(self):
+        """With fresh_final_after_seconds enabled and an aged preview, the
+        finalized cancel-path delivery is eligible for fresh-final
+        (delete + fresh send). is_turn_final=False keeps _try_fresh_final
+        from setting the flags itself; the cancel handler sets them after
+        the successful delivery."""
+        adapter = _make_adapter()
+        adapter.REQUIRES_EDIT_FINALIZE = True
+        adapter.send.side_effect = [
+            SimpleNamespace(success=True, message_id="initial_preview"),
+            SimpleNamespace(success=True, message_id="fresh_final"),
+        ]
+        consumer = GatewayStreamConsumer(
+            adapter=adapter,
+            chat_id="chat",
+            config=StreamConsumerConfig(
+                edit_interval=0.01, buffer_threshold=5, cursor=" ▉",
+                fresh_final_after_seconds=0.001,
+            ),
+        )
+        consumer.on_delta("Reply with **bold** and `code` markers.")
+        task = asyncio.create_task(consumer.run())
+        await asyncio.sleep(0.05)
+        consumer._message_created_ts = 0.0  # force the preview stale
+        task.cancel()
+        await asyncio.gather(task, return_exceptions=True)
+
+        # Fresh-final engaged: a second send replaced the stale preview.
+        assert adapter.send.call_count == 2
+        adapter.delete_message.assert_awaited_once_with("chat", "initial_preview")
+        # Flags were set by the cancel handler after successful delivery.
+        assert consumer.final_response_sent is True
+        assert consumer.final_content_delivered is True
+
+
+class TestGotDoneOverflowSplitNotRefinalized:
+    """A got_done finalize edit that split-and-delivered across continuation
+    messages must not be followed by the redundant requires-finalize edit.
+
+    After a split, the consumer adopts the last continuation as the live
+    message and the redundant finalize edit re-submits the FULL accumulated
+    text against it; the adapter pre-flights that into another overflow
+    split, editing chunk 1 over the continuation and re-sending the rest,
+    so the user sees duplicated chunks. The finalize signal was already
+    carried by the split edit itself.
+    """
+
+    def _consumer(self, adapter):
+        # High interval/threshold so the only edit is the got_done finalize.
+        return GatewayStreamConsumer(
+            adapter=adapter,
+            chat_id="chat",
+            config=StreamConsumerConfig(
+                edit_interval=10.0, buffer_threshold=10_000, cursor=" ▉",
+            ),
+        )
+
+    @pytest.mark.asyncio
+    async def test_split_finalize_edit_is_not_refinalized(self):
+        adapter = _make_adapter()
+        adapter.REQUIRES_EDIT_FINALIZE = True
+        adapter.edit_message = AsyncMock(return_value=SimpleNamespace(
+            success=True,
+            message_id="cont_2",
+            continuation_message_ids=("cont_2",),
+        ))
+        consumer = self._consumer(adapter)
+        consumer.on_delta("oversize **markdown** final reply")
+        task = asyncio.create_task(consumer.run())
+        await asyncio.sleep(0.05)  # preview send lands; no interval edits
+        consumer.finish()
+        await task
+
+        finalize_edits = [
+            c for c in adapter.edit_message.call_args_list
+            if c.kwargs.get("finalize")
+        ]
+        assert len(finalize_edits) == 1, (
+            "split finalize edit must not be re-finalized; the redundant "
+            "edit re-splits the full text into the adopted continuation "
+            "and duplicates chunks on screen"
+        )
+        assert consumer.final_response_sent is True
+        assert consumer.final_content_delivered is True
+
+    @pytest.mark.asyncio
+    async def test_non_split_finalize_edit_still_gets_explicit_refinalize(self):
+        """The narrow fix must not regress the requires-finalize contract:
+        a normal (non-split) got_done edit is still followed by the
+        explicit finalize edit (#25010 semantics unchanged)."""
+        adapter = _make_adapter()
+        adapter.REQUIRES_EDIT_FINALIZE = True
+        adapter.edit_message = AsyncMock(return_value=SimpleNamespace(
+            success=True, message_id="initial_preview",
+        ))
+        consumer = self._consumer(adapter)
+        consumer.on_delta("short final reply")
+        task = asyncio.create_task(consumer.run())
+        await asyncio.sleep(0.05)
+        consumer.finish()
+        await task
+
+        finalize_edits = [
+            c for c in adapter.edit_message.call_args_list
+            if c.kwargs.get("finalize")
+        ]
+        assert len(finalize_edits) == 2
+        assert consumer.final_response_sent is True
+
+
 class TestStreamConsumerConfigFreshFinalField:
     """The dataclass field must exist and default to 0 (disabled)."""
 
diff --git a/tests/gateway/test_telegram_overflow_partial.py b/tests/gateway/test_telegram_overflow_partial.py
new file mode 100644
index 00000000000..76e4d16a617
--- /dev/null
+++ b/tests/gateway/test_telegram_overflow_partial.py
@@ -0,0 +1,140 @@
+"""Regression coverage for partial Telegram overflow delivery."""
+
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+from gateway.config import PlatformConfig
+from gateway.platforms.base import SendResult
+from gateway.platforms.telegram import TelegramAdapter
+from gateway.stream_consumer import GatewayStreamConsumer
+
+
+def _message(message_id: int | str) -> SimpleNamespace:
+    return SimpleNamespace(message_id=message_id)
+
+
+@pytest.fixture
+def telegram_adapter() -> TelegramAdapter:
+    adapter = TelegramAdapter(PlatformConfig(enabled=True, token="fake-token"))
+    adapter._bot = MagicMock()
+    object.__setattr__(adapter, "MAX_MESSAGE_LENGTH", 160)
+    return adapter
+
+
+@pytest.mark.asyncio
+async def test_edit_overflow_split_reports_success_when_all_continuations_land(telegram_adapter):
+    """Complete overflow delivery keeps the existing successful contract."""
+    content = "word " * 120
+    telegram_adapter._bot.edit_message_text = AsyncMock(return_value=True)
+    telegram_adapter._bot.send_message = AsyncMock(
+        side_effect=[_message(202), _message(203), _message(204), _message(205)]
+    )
+
+    result = await telegram_adapter._edit_overflow_split(
+        "12345", "201", content, finalize=False, metadata={"thread_id": "77"}
+    )
+
+    assert result.success is True
+    assert result.message_id == result.continuation_message_ids[-1]
+    assert result.raw_response is None
+    assert telegram_adapter._bot.edit_message_text.await_count == 1
+    assert telegram_adapter._bot.send_message.await_count == len(result.continuation_message_ids)
+    for call in telegram_adapter._bot.send_message.await_args_list:
+        assert call.kwargs["message_thread_id"] == 77
+
+
+@pytest.mark.asyncio
+async def test_edit_overflow_split_reports_later_partial_failure_after_some_continuations_land(telegram_adapter):
+    """Partial metadata tracks the last delivered continuation before failure."""
+    content = "word " * 120
+    telegram_adapter._bot.edit_message_text = AsyncMock(return_value=True)
+    telegram_adapter._bot.send_message = AsyncMock(
+        side_effect=[
+            _message(202),
+            RuntimeError("telegram send failed"),
+            RuntimeError("telegram send failed"),
+        ]
+    )
+
+    result = await telegram_adapter._edit_overflow_split(
+        "12345", "201", content, finalize=False, metadata={"thread_id": "77"}
+    )
+
+    assert result.success is False
+    assert result.message_id == "202"
+    assert result.raw_response["partial_overflow"] is True
+    assert result.raw_response["delivered_chunks"] == 2
+    assert result.raw_response["last_message_id"] == "202"
+    assert result.continuation_message_ids == ("202",)
+
+
+@pytest.mark.asyncio
+async def test_edit_overflow_split_reports_partial_failure_when_continuation_fails(telegram_adapter):
+    """A failed continuation must not be reported as final delivery."""
+    content = "word " * 120
+    telegram_adapter._bot.edit_message_text = AsyncMock(return_value=True)
+    telegram_adapter._bot.send_message = AsyncMock(
+        side_effect=[RuntimeError("telegram send failed"), RuntimeError("telegram send failed")]
+    )
+
+    result = await telegram_adapter._edit_overflow_split(
+        "12345", "201", content, finalize=False, metadata={"thread_id": "77"}
+    )
+
+    assert result.success is False
+    assert result.retryable is True
+    assert result.error == "overflow_continuation_failed"
+    assert result.message_id == "201"
+    assert result.raw_response["partial_overflow"] is True
+    assert result.raw_response["delivered_chunks"] == 1
+    assert result.raw_response["total_chunks"] > 1
+    assert result.raw_response["last_message_id"] == "201"
+    assert result.raw_response["delivered_prefix"]
+    assert result.continuation_message_ids == ()
+
+
+@pytest.mark.asyncio
+async def test_stream_consumer_fallback_sends_tail_after_partial_overflow():
+    """A partial overflow edit enters fallback instead of marking final delivered."""
+    adapter = MagicMock()
+    adapter.MAX_MESSAGE_LENGTH = 4096
+    adapter.edit_message = AsyncMock(
+        return_value=SendResult(
+            success=False,
+            message_id="preview-1",
+            error="overflow_continuation_failed",
+            retryable=True,
+            raw_response={
+                "partial_overflow": True,
+                "delivered_chunks": 1,
+                "total_chunks": 2,
+                "last_message_id": "preview-1",
+                "delivered_prefix": "hello ",
+            },
+        )
+    )
+    adapter.send = AsyncMock(return_value=SendResult(success=True, message_id="tail-1"))
+    adapter.delete_message = AsyncMock(return_value=True)
+
+    consumer = GatewayStreamConsumer(adapter, "chat-1", metadata={"thread_id": "77"})
+    consumer._message_id = "preview-1"
+    consumer._last_sent_text = "hello "
+
+    ok = await consumer._send_or_edit("hello world", finalize=True)
+
+    assert ok is False
+    assert consumer.final_response_sent is False
+    assert consumer.final_content_delivered is False
+    assert consumer._fallback_final_send is True
+    assert consumer._fallback_prefix == "hello "
+
+    await consumer._send_fallback_final("hello world")
+
+    adapter.send.assert_awaited_once()
+    assert adapter.send.await_args.kwargs["content"] == "world"
+    assert adapter.send.await_args.kwargs["metadata"] == {"thread_id": "77"}
+    adapter.delete_message.assert_not_awaited()
+    assert consumer.final_response_sent is True
+    assert consumer.final_content_delivered is True
diff --git a/tests/gateway/test_telegram_voice_v0_regressions.py b/tests/gateway/test_telegram_voice_v0_regressions.py
new file mode 100644
index 00000000000..b2b8d4d0e8b
--- /dev/null
+++ b/tests/gateway/test_telegram_voice_v0_regressions.py
@@ -0,0 +1,71 @@
+import sys
+from pathlib import Path
+from types import SimpleNamespace
+
+import pytest
+
+
+ROOT = Path(__file__).resolve().parents[2]
+if str(ROOT) not in sys.path:
+    sys.path.insert(0, str(ROOT))
+
+from gateway.config import Platform
+from gateway.platforms.telegram import TelegramAdapter
+from gateway.run import GatewayRunner
+from gateway.session import SessionSource
+
+
+def _source():
+    return SessionSource(platform=Platform.TELEGRAM, chat_id="12345", chat_type="dm")
+
+
+def _runner(adapter=None):
+    runner = object.__new__(GatewayRunner)
+    runner.config = SimpleNamespace(
+        stt_enabled=True,
+        group_sessions_per_user=True,
+        thread_sessions_per_user=False,
+    )
+    runner.adapters = {Platform.TELEGRAM: adapter} if adapter else {}
+    runner._consume_pending_native_image_paths = lambda _key: []
+    runner._session_key_for_source = lambda _source: "telegram:dm:12345"
+    runner._thread_metadata_for_source = lambda *_args, **_kwargs: {}
+    runner._reply_anchor_for_event = lambda _event: None
+    return runner
+
+
+def test_telegram_audio_size_gate_rejects_oversized_media_before_download():
+    adapter = object.__new__(TelegramAdapter)
+    adapter._max_doc_bytes = 1024
+
+    allowed, note = adapter._telegram_media_size_allowed(
+        SimpleNamespace(file_size=2048),
+        "voice message",
+    )
+
+    assert allowed is False
+    assert "exceeds" in note
+    assert "voice message" in note
+
+
+@pytest.mark.asyncio
+async def test_voice_tts_is_explicit_audio_reply_opt_in():
+    adapter = SimpleNamespace(
+        _auto_tts_disabled_chats=set(),
+        _auto_tts_enabled_chats=set(),
+    )
+    runner = _runner(adapter)
+    runner._voice_mode = {}
+    runner._voice_provider_mode = {}
+    runner._save_voice_modes = lambda: None
+    runner._save_voice_provider_modes = lambda: None
+
+    event = SimpleNamespace(
+        source=_source(),
+        get_command_args=lambda: "tts",
+    )
+    result = await GatewayRunner._handle_voice_command(runner, event)
+
+    assert runner._voice_mode["telegram:12345"] == "all"
+    assert "12345" in adapter._auto_tts_enabled_chats
+    assert result
diff --git a/tests/gateway/test_whatsapp_stale_bridge.py b/tests/gateway/test_whatsapp_stale_bridge.py
new file mode 100644
index 00000000000..d55931ceaf7
--- /dev/null
+++ b/tests/gateway/test_whatsapp_stale_bridge.py
@@ -0,0 +1,341 @@
+"""Tests for the WhatsApp stale-bridge staleness handshake.
+
+Regression tests for the stale-bridge trap: ``connect()`` reused any
+already-running bridge with ``status: connected`` unconditionally, and
+``disconnect()`` only kills bridges the adapter spawned itself.  A
+long-lived bridge process therefore survived gateway restarts AND
+``hermes update``, serving pre-update bridge.js behavior forever (e.g.
+no inbound media download → images/voice notes arrive as placeholders).
+
+The fix: bridge.js reports a hash of its own source in ``/health``
+(``scriptHash``); the adapter compares it against the bridge.js on disk
+and restarts the bridge on mismatch.  Bridges that predate the handshake
+report no hash and are treated as stale by definition.
+
+Also covers the npm dependency-refresh stamp: deps are reinstalled when
+package.json changes, not only when node_modules is missing.
+"""
+
+import asyncio
+from pathlib import Path
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from gateway.config import Platform
+
+
+class _AsyncCM:
+    """Minimal async context manager returning a fixed value."""
+
+    def __init__(self, value):
+        self.value = value
+
+    async def __aenter__(self):
+        return self.value
+
+    async def __aexit__(self, *exc):
+        return False
+
+
+def _make_adapter(bridge_script: str = "/tmp/test-bridge.js",
+                  session_path: Path = Path("/tmp/test-wa-session")):
+    """Create a WhatsAppAdapter with test attributes (bypass __init__)."""
+    from gateway.platforms.whatsapp import WhatsAppAdapter
+
+    adapter = WhatsAppAdapter.__new__(WhatsAppAdapter)
+    adapter.platform = Platform.WHATSAPP
+    adapter.config = MagicMock()
+    adapter._bridge_port = 19876
+    adapter._bridge_script = bridge_script
+    adapter._session_path = session_path
+    adapter._bridge_log_fh = None
+    adapter._bridge_log = None
+    adapter._bridge_process = None
+    adapter._reply_prefix = None
+    adapter._running = False
+    adapter._message_handler = None
+    adapter._fatal_error_code = None
+    adapter._fatal_error_message = None
+    adapter._fatal_error_retryable = True
+    adapter._fatal_error_handler = None
+    adapter._active_sessions = {}
+    adapter._pending_messages = {}
+    adapter._background_tasks = set()
+    adapter._auto_tts_disabled_chats = set()
+    adapter._message_queue = asyncio.Queue()
+    adapter._http_session = None
+    return adapter
+
+
+def _mock_health(json_data):
+    """Mock aiohttp.ClientSession whose GET returns 200 + *json_data*."""
+    mock_resp = MagicMock()
+    mock_resp.status = 200
+    mock_resp.json = AsyncMock(return_value=json_data)
+    mock_session = MagicMock()
+    mock_session.get = MagicMock(return_value=_AsyncCM(mock_resp))
+    mock_session.close = AsyncMock()
+    return MagicMock(return_value=_AsyncCM(mock_session))
+
+
+def _setup_bridge_dir(tmp_path: Path) -> Path:
+    """Create a real bridge dir with bridge.js + package.json + creds."""
+    bridge_dir = tmp_path / "whatsapp-bridge"
+    bridge_dir.mkdir()
+    (bridge_dir / "bridge.js").write_text("// current bridge code\n")
+    (bridge_dir / "package.json").write_text('{"name": "bridge"}\n')
+    session_path = tmp_path / "session"
+    session_path.mkdir()
+    (session_path / "creds.json").write_text("{}")
+    return bridge_dir
+
+
+def _fresh_node_modules(bridge_dir: Path) -> None:
+    """Create node_modules with a stamp matching the current package.json."""
+    from gateway.platforms.whatsapp import _file_content_hash
+
+    nm = bridge_dir / "node_modules"
+    nm.mkdir()
+    (nm / ".hermes-pkg-hash").write_text(
+        _file_content_hash(bridge_dir / "package.json")
+    )
+
+
+class TestFileContentHash:
+    def test_hashes_file(self, tmp_path):
+        from gateway.platforms.whatsapp import _file_content_hash
+
+        f = tmp_path / "x.js"
+        f.write_text("abc")
+        h = _file_content_hash(f)
+        assert len(h) == 16
+        assert h == _file_content_hash(f)  # deterministic
+
+    def test_changes_with_content(self, tmp_path):
+        from gateway.platforms.whatsapp import _file_content_hash
+
+        f = tmp_path / "x.js"
+        f.write_text("abc")
+        h1 = _file_content_hash(f)
+        f.write_text("def")
+        assert _file_content_hash(f) != h1
+
+    def test_missing_file_returns_empty(self, tmp_path):
+        from gateway.platforms.whatsapp import _file_content_hash
+
+        assert _file_content_hash(tmp_path / "nope.js") == ""
+
+    def test_matches_bridge_js_self_hash_algorithm(self, tmp_path):
+        """Python and Node must compute the same hash for the same bytes."""
+        import hashlib
+
+        from gateway.platforms.whatsapp import _file_content_hash
+
+        f = tmp_path / "bridge.js"
+        f.write_bytes(b"const x = 1;\n")
+        # Node side: createHash('sha256').update(bytes).digest('hex').slice(0, 16)
+        expected = hashlib.sha256(b"const x = 1;\n").hexdigest()[:16]
+        assert _file_content_hash(f) == expected
+
+
+class TestStaleBridgeHandshake:
+    @pytest.mark.asyncio
+    async def test_reuses_bridge_when_hash_matches(self, tmp_path):
+        from gateway.platforms.whatsapp import _file_content_hash
+
+        bridge_dir = _setup_bridge_dir(tmp_path)
+        _fresh_node_modules(bridge_dir)
+        adapter = _make_adapter(
+            bridge_script=str(bridge_dir / "bridge.js"),
+            session_path=tmp_path / "session",
+        )
+        disk_hash = _file_content_hash(bridge_dir / "bridge.js")
+        mock_client = _mock_health({"status": "connected", "scriptHash": disk_hash})
+
+        with patch("gateway.platforms.whatsapp.check_whatsapp_requirements", return_value=True), \
+             patch("aiohttp.ClientSession", mock_client), \
+             patch("gateway.platforms.whatsapp.asyncio.create_task") as mock_task, \
+             patch("subprocess.Popen") as mock_popen, \
+             patch.object(adapter, "_acquire_platform_lock", return_value=True, create=True), \
+             patch.object(adapter, "_mark_connected", create=True):
+            result = await adapter.connect()
+
+        assert result is True
+        mock_popen.assert_not_called()  # reused, never spawned
+        mock_task.assert_called_once()
+
+    @pytest.mark.asyncio
+    async def test_restarts_bridge_on_hash_mismatch(self, tmp_path):
+        bridge_dir = _setup_bridge_dir(tmp_path)
+        _fresh_node_modules(bridge_dir)
+        adapter = _make_adapter(
+            bridge_script=str(bridge_dir / "bridge.js"),
+            session_path=tmp_path / "session",
+        )
+        mock_client = _mock_health(
+            {"status": "connected", "scriptHash": "deadbeefdeadbeef"}
+        )
+        # Spawned bridge dies immediately → connect() returns False, but the
+        # assertion that matters is that the stale bridge was NOT reused and
+        # a new process spawn was attempted.
+        mock_proc = MagicMock()
+        mock_proc.poll.return_value = 1
+        mock_proc.returncode = 1
+
+        with patch("gateway.platforms.whatsapp.check_whatsapp_requirements", return_value=True), \
+             patch("aiohttp.ClientSession", mock_client), \
+             patch("gateway.platforms.whatsapp.asyncio.sleep", new_callable=AsyncMock), \
+             patch("gateway.platforms.whatsapp._kill_stale_bridge_by_pidfile"), \
+             patch("gateway.platforms.whatsapp._kill_port_process") as mock_kill_port, \
+             patch("subprocess.Popen", return_value=mock_proc) as mock_popen, \
+             patch.object(adapter, "_acquire_platform_lock", return_value=True, create=True):
+            result = await adapter.connect()
+
+        assert result is False  # mock proc died; not the point of the test
+        mock_popen.assert_called_once()  # stale bridge replaced, not reused
+        mock_kill_port.assert_called_once_with(adapter._bridge_port)
+
+    @pytest.mark.asyncio
+    async def test_restarts_unversioned_bridge(self, tmp_path):
+        """Bridges predating the handshake report no scriptHash → stale."""
+        bridge_dir = _setup_bridge_dir(tmp_path)
+        _fresh_node_modules(bridge_dir)
+        adapter = _make_adapter(
+            bridge_script=str(bridge_dir / "bridge.js"),
+            session_path=tmp_path / "session",
+        )
+        # Old bridge /health payload: no scriptHash key at all
+        mock_client = _mock_health({"status": "connected"})
+        mock_proc = MagicMock()
+        mock_proc.poll.return_value = 1
+        mock_proc.returncode = 1
+
+        with patch("gateway.platforms.whatsapp.check_whatsapp_requirements", return_value=True), \
+             patch("aiohttp.ClientSession", mock_client), \
+             patch("gateway.platforms.whatsapp.asyncio.sleep", new_callable=AsyncMock), \
+             patch("gateway.platforms.whatsapp._kill_stale_bridge_by_pidfile"), \
+             patch("gateway.platforms.whatsapp._kill_port_process"), \
+             patch("subprocess.Popen", return_value=mock_proc) as mock_popen, \
+             patch.object(adapter, "_acquire_platform_lock", return_value=True, create=True):
+            await adapter.connect()
+
+        mock_popen.assert_called_once()
+
+
+class TestDepRefreshStamp:
+    @pytest.mark.asyncio
+    async def test_skips_install_when_stamp_fresh(self, tmp_path):
+        bridge_dir = _setup_bridge_dir(tmp_path)
+        _fresh_node_modules(bridge_dir)
+        adapter = _make_adapter(
+            bridge_script=str(bridge_dir / "bridge.js"),
+            session_path=tmp_path / "session",
+        )
+        mock_proc = MagicMock()
+        mock_proc.poll.return_value = 1
+        mock_proc.returncode = 1
+
+        with patch("gateway.platforms.whatsapp.check_whatsapp_requirements", return_value=True), \
+             patch("aiohttp.ClientSession", _mock_health({"status": "disconnected"})), \
+             patch("gateway.platforms.whatsapp.asyncio.sleep", new_callable=AsyncMock), \
+             patch("gateway.platforms.whatsapp._kill_stale_bridge_by_pidfile"), \
+             patch("gateway.platforms.whatsapp._kill_port_process"), \
+             patch("subprocess.run") as mock_run, \
+             patch("subprocess.Popen", return_value=mock_proc), \
+             patch.object(adapter, "_acquire_platform_lock", return_value=True, create=True):
+            await adapter.connect()
+
+        mock_run.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_reinstalls_when_package_json_changed(self, tmp_path):
+        bridge_dir = _setup_bridge_dir(tmp_path)
+        _fresh_node_modules(bridge_dir)
+        # Simulate `hermes update` bumping the Baileys pin
+        (bridge_dir / "package.json").write_text('{"name": "bridge", "v": 2}\n')
+        adapter = _make_adapter(
+            bridge_script=str(bridge_dir / "bridge.js"),
+            session_path=tmp_path / "session",
+        )
+        mock_proc = MagicMock()
+        mock_proc.poll.return_value = 1
+        mock_proc.returncode = 1
+
+        with patch("gateway.platforms.whatsapp.check_whatsapp_requirements", return_value=True), \
+             patch("aiohttp.ClientSession", _mock_health({"status": "disconnected"})), \
+             patch("gateway.platforms.whatsapp.asyncio.sleep", new_callable=AsyncMock), \
+             patch("gateway.platforms.whatsapp._kill_stale_bridge_by_pidfile"), \
+             patch("gateway.platforms.whatsapp._kill_port_process"), \
+             patch("subprocess.run", return_value=MagicMock(returncode=0)) as mock_run, \
+             patch("subprocess.Popen", return_value=mock_proc), \
+             patch.object(adapter, "_acquire_platform_lock", return_value=True, create=True):
+            await adapter.connect()
+
+        mock_run.assert_called_once()
+        assert "install" in mock_run.call_args[0][0]
+        # Stamp updated to the new package.json hash
+        from gateway.platforms.whatsapp import _file_content_hash
+        stamp = (bridge_dir / "node_modules" / ".hermes-pkg-hash").read_text().strip()
+        assert stamp == _file_content_hash(bridge_dir / "package.json")
+
+    @pytest.mark.asyncio
+    async def test_installs_when_node_modules_missing(self, tmp_path):
+        bridge_dir = _setup_bridge_dir(tmp_path)  # no node_modules
+        adapter = _make_adapter(
+            bridge_script=str(bridge_dir / "bridge.js"),
+            session_path=tmp_path / "session",
+        )
+        mock_proc = MagicMock()
+        mock_proc.poll.return_value = 1
+        mock_proc.returncode = 1
+
+        def _npm_install(*args, **kwargs):
+            # npm creates node_modules as a side effect
+            (bridge_dir / "node_modules").mkdir(exist_ok=True)
+            return MagicMock(returncode=0)
+
+        with patch("gateway.platforms.whatsapp.check_whatsapp_requirements", return_value=True), \
+             patch("aiohttp.ClientSession", _mock_health({"status": "disconnected"})), \
+             patch("gateway.platforms.whatsapp.asyncio.sleep", new_callable=AsyncMock), \
+             patch("gateway.platforms.whatsapp._kill_stale_bridge_by_pidfile"), \
+             patch("gateway.platforms.whatsapp._kill_port_process"), \
+             patch("subprocess.run", side_effect=_npm_install) as mock_run, \
+             patch("subprocess.Popen", return_value=mock_proc), \
+             patch.object(adapter, "_acquire_platform_lock", return_value=True, create=True):
+            await adapter.connect()
+
+        mock_run.assert_called_once()
+
+
+class TestCacheDirEnvPassthrough:
+    @pytest.mark.asyncio
+    async def test_bridge_spawn_env_has_cache_dirs(self, tmp_path):
+        bridge_dir = _setup_bridge_dir(tmp_path)
+        _fresh_node_modules(bridge_dir)
+        adapter = _make_adapter(
+            bridge_script=str(bridge_dir / "bridge.js"),
+            session_path=tmp_path / "session",
+        )
+        mock_proc = MagicMock()
+        mock_proc.poll.return_value = 1
+        mock_proc.returncode = 1
+
+        with patch("gateway.platforms.whatsapp.check_whatsapp_requirements", return_value=True), \
+             patch("aiohttp.ClientSession", _mock_health({"status": "disconnected"})), \
+             patch("gateway.platforms.whatsapp.asyncio.sleep", new_callable=AsyncMock), \
+             patch("gateway.platforms.whatsapp._kill_stale_bridge_by_pidfile"), \
+             patch("gateway.platforms.whatsapp._kill_port_process"), \
+             patch("subprocess.Popen", return_value=mock_proc) as mock_popen, \
+             patch.object(adapter, "_acquire_platform_lock", return_value=True, create=True):
+            await adapter.connect()
+
+        env = mock_popen.call_args.kwargs["env"]
+        from gateway.platforms.base import (
+            get_audio_cache_dir,
+            get_document_cache_dir,
+            get_image_cache_dir,
+        )
+        assert env["HERMES_IMAGE_CACHE_DIR"] == str(get_image_cache_dir())
+        assert env["HERMES_AUDIO_CACHE_DIR"] == str(get_audio_cache_dir())
+        assert env["HERMES_DOCUMENT_CACHE_DIR"] == str(get_document_cache_dir())
diff --git a/tests/hermes_cli/test_backup.py b/tests/hermes_cli/test_backup.py
index 8c0f2a39874..15a2112ac26 100644
--- a/tests/hermes_cli/test_backup.py
+++ b/tests/hermes_cli/test_backup.py
@@ -146,6 +146,12 @@ class TestShouldExclude:
         from hermes_cli.backup import _should_exclude
         assert not _should_exclude(Path("logs/agent.log"))
 
+    def test_includes_nested_hermes_agent_in_skills(self):
+        """skills/autonomous-ai-agents/hermes-agent/ must NOT be excluded —
+        only the root-level hermes-agent/ repo is skipped."""
+        from hermes_cli.backup import _should_exclude
+        assert not _should_exclude(Path("skills/autonomous-ai-agents/hermes-agent/SKILL.md"))
+        assert not _should_exclude(Path("skills/autonomous-ai-agents/hermes-agent/sub/item.txt"))
 
 # ---------------------------------------------------------------------------
 # Backup tests
@@ -186,6 +192,66 @@ class TestBackup:
             # Skins
             assert "skins/cyber.yaml" in names
 
+    def test_db_snapshots_staged_beside_output_zip(self, tmp_path, monkeypatch):
+        """SQLite staging temp files must be created on the output zip's
+        filesystem (dir=out_path.parent), NOT the system /tmp default — a
+        small tmpfs there silently drops large DBs from the backup (#35376)."""
+        hermes_home = tmp_path / ".hermes"
+        hermes_home.mkdir()
+        _make_hermes_tree(hermes_home)
+
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+        monkeypatch.setattr(Path, "home", lambda: tmp_path)
+
+        out_dir = tmp_path / "external-drive"
+        out_dir.mkdir()
+        out_zip = out_dir / "backup.zip"
+        args = Namespace(output=str(out_zip))
+
+        import hermes_cli.backup as backup_mod
+        staged_dirs = []
+        real_ntf = backup_mod.tempfile.NamedTemporaryFile
+
+        def _spy(*a, **kw):
+            staged_dirs.append(kw.get("dir"))
+            return real_ntf(*a, **kw)
+
+        monkeypatch.setattr(backup_mod.tempfile, "NamedTemporaryFile", _spy)
+        backup_mod.run_backup(args)
+
+        # At least one .db was staged, and every staging call targeted the
+        # output zip's directory rather than the system temp default.
+        assert staged_dirs, "no SQLite snapshot was staged"
+        assert all(d == str(out_dir) for d in staged_dirs), staged_dirs
+
+    def test_pre_update_db_snapshots_staged_beside_output_zip(self, tmp_path, monkeypatch):
+        """The pre-update/pre-migration zip path (_write_full_zip_backup) must
+        also stage SQLite snapshots beside its output zip, not in /tmp."""
+        hermes_home = tmp_path / ".hermes"
+        hermes_home.mkdir()
+        _make_hermes_tree(hermes_home)
+
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+        monkeypatch.setattr(Path, "home", lambda: tmp_path)
+
+        out_zip = hermes_home / "backups" / "pre-update-test.zip"
+        out_zip.parent.mkdir(parents=True, exist_ok=True)
+
+        import hermes_cli.backup as backup_mod
+        staged_dirs = []
+        real_ntf = backup_mod.tempfile.NamedTemporaryFile
+
+        def _spy(*a, **kw):
+            staged_dirs.append(kw.get("dir"))
+            return real_ntf(*a, **kw)
+
+        monkeypatch.setattr(backup_mod.tempfile, "NamedTemporaryFile", _spy)
+        result = backup_mod._write_full_zip_backup(out_zip, hermes_home)
+
+        assert result is not None
+        assert staged_dirs, "no SQLite snapshot was staged"
+        assert all(d == str(out_zip.parent) for d in staged_dirs), staged_dirs
+
     def test_excludes_hermes_agent(self, tmp_path, monkeypatch):
         """Backup does NOT include hermes-agent/ directory."""
         hermes_home = tmp_path / ".hermes"
@@ -206,6 +272,37 @@ class TestBackup:
             agent_files = [n for n in names if "hermes-agent" in n]
             assert agent_files == [], f"hermes-agent files leaked into backup: {agent_files}"
 
+    def test_includes_nested_hermes_agent_in_skills(self, tmp_path, monkeypatch):
+        """Backup includes skills/.../hermes-agent/ but NOT root hermes-agent/."""
+        hermes_home = tmp_path / ".hermes"
+        hermes_home.mkdir()
+        _make_hermes_tree(hermes_home)
+
+        # Add a nested hermes-agent directory inside skills (like the real layout)
+        nested = hermes_home / "skills" / "autonomous-ai-agents" / "hermes-agent"
+        nested.mkdir(parents=True)
+        (nested / "SKILL.md").write_text("# Hermes Agent Skill\n")
+        (nested / "sub").mkdir()
+        (nested / "sub" / "item.txt").write_text("nested content\n")
+
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+        monkeypatch.setattr(Path, "home", lambda: tmp_path)
+
+        out_zip = tmp_path / "backup.zip"
+        args = Namespace(output=str(out_zip))
+
+        from hermes_cli.backup import run_backup
+        run_backup(args)
+
+        with zipfile.ZipFile(out_zip, "r") as zf:
+            names = zf.namelist()
+            # Root hermes-agent must be excluded
+            root_agent = [n for n in names if n.startswith("hermes-agent/")]
+            assert root_agent == [], f"root hermes-agent leaked: {root_agent}"
+            # Nested skill hermes-agent must be included
+            assert "skills/autonomous-ai-agents/hermes-agent/SKILL.md" in names
+            assert "skills/autonomous-ai-agents/hermes-agent/sub/item.txt" in names
+
     def test_excludes_pycache(self, tmp_path, monkeypatch):
         """Backup does NOT include __pycache__ dirs."""
         hermes_home = tmp_path / ".hermes"
diff --git a/tests/hermes_cli/test_commands.py b/tests/hermes_cli/test_commands.py
index 6a63ebe73e5..62c2be4ab79 100644
--- a/tests/hermes_cli/test_commands.py
+++ b/tests/hermes_cli/test_commands.py
@@ -691,6 +691,169 @@ class TestSubcommandCompletion:
         completions = _completions(SlashCommandCompleter(), "/help ")
         assert completions == []
 
+    def test_tools_subcommand_completion(self):
+        """`/tools ` should suggest list, disable, enable."""
+        completions = _completions(SlashCommandCompleter(), "/tools ")
+        texts = {c.text for c in completions}
+        assert texts == {"list", "disable", "enable"}
+
+    def test_tools_subcommand_prefix_filters(self):
+        completions = _completions(SlashCommandCompleter(), "/tools en")
+        texts = {c.text for c in completions}
+        assert texts == {"enable"}
+
+    def test_tools_enable_completes_toolset_names(self, monkeypatch):
+        """`/tools enable ` should suggest currently-disabled toolsets."""
+        from hermes_cli import commands as commands_mod
+
+        # `web` is enabled, `spotify` is disabled — enabling should only offer
+        # the disabled ones.
+        monkeypatch.setattr(
+            "hermes_cli.tools_config._get_platform_tools",
+            lambda *_a, **_k: {"web", "file"},
+        )
+        monkeypatch.setattr("hermes_cli.config.load_config", lambda: {})
+        monkeypatch.setattr(
+            "hermes_cli.tools_config._get_plugin_toolset_keys",
+            lambda: set(),
+        )
+
+        completions = _completions(SlashCommandCompleter(), "/tools enable ")
+        texts = {c.text for c in completions}
+        # Should include disabled toolsets, exclude already-enabled ones.
+        assert "web" not in texts
+        assert "file" not in texts
+        assert "spotify" in texts
+
+    def test_tools_disable_completes_enabled_toolsets_only(self, monkeypatch):
+        monkeypatch.setattr(
+            "hermes_cli.tools_config._get_platform_tools",
+            lambda *_a, **_k: {"web", "file"},
+        )
+        monkeypatch.setattr("hermes_cli.config.load_config", lambda: {})
+        monkeypatch.setattr(
+            "hermes_cli.tools_config._get_plugin_toolset_keys",
+            lambda: set(),
+        )
+
+        completions = _completions(SlashCommandCompleter(), "/tools disable ")
+        texts = {c.text for c in completions}
+        # Should include enabled toolsets, exclude disabled ones.
+        assert texts == {"web", "file"}
+
+    def test_tools_enable_partial_filters(self, monkeypatch):
+        monkeypatch.setattr(
+            "hermes_cli.tools_config._get_platform_tools",
+            lambda *_a, **_k: set(),
+        )
+        monkeypatch.setattr("hermes_cli.config.load_config", lambda: {})
+        monkeypatch.setattr(
+            "hermes_cli.tools_config._get_plugin_toolset_keys",
+            lambda: set(),
+        )
+
+        completions = _completions(SlashCommandCompleter(), "/tools enable sp")
+        texts = {c.text for c in completions}
+        assert texts == {"spotify"}
+
+    def test_tools_enable_skips_already_listed(self, monkeypatch):
+        """If the user already typed a name, don't suggest it again."""
+        monkeypatch.setattr(
+            "hermes_cli.tools_config._get_platform_tools",
+            lambda *_a, **_k: set(),
+        )
+        monkeypatch.setattr("hermes_cli.config.load_config", lambda: {})
+        monkeypatch.setattr(
+            "hermes_cli.tools_config._get_plugin_toolset_keys",
+            lambda: set(),
+        )
+
+        completions = _completions(SlashCommandCompleter(), "/tools enable spotify ")
+        texts = {c.text for c in completions}
+        assert "spotify" not in texts
+
+    def test_tools_suggests_mcp_server_prefixes(self, monkeypatch):
+        monkeypatch.setattr(
+            "hermes_cli.tools_config._get_platform_tools",
+            lambda *_a, **_k: set(),
+        )
+        monkeypatch.setattr(
+            "hermes_cli.config.load_config",
+            lambda: {"mcp_servers": {"github": {}, "linear": {}}},
+        )
+        monkeypatch.setattr(
+            "hermes_cli.tools_config._get_plugin_toolset_keys",
+            lambda: set(),
+        )
+
+        completions = _completions(SlashCommandCompleter(), "/tools enable git")
+        texts = {c.text for c in completions}
+        assert "github:" in texts
+
+    def _fake_gateway(self, monkeypatch, platforms):
+        """Patch load_gateway_config with a fake whose connected platforms are
+        the keys of `platforms` (name -> home as None or a (chat_id, name) tuple).
+        """
+        from types import SimpleNamespace
+
+        enums = {name: SimpleNamespace(value=name) for name in platforms}
+        homes = {
+            name: (None if home is None else SimpleNamespace(chat_id=home[0], name=home[1]))
+            for name, home in platforms.items()
+        }
+        fake = SimpleNamespace(
+            get_connected_platforms=lambda: list(enums.values()),
+            get_home_channel=lambda p: homes[p.value],
+        )
+        monkeypatch.setattr("gateway.config.load_gateway_config", lambda: fake)
+
+    def test_handoff_completes_connected_platforms(self, monkeypatch):
+        """`/handoff ` offers connected platforms, with or without a home channel."""
+        self._fake_gateway(
+            monkeypatch,
+            {
+                "telegram": ("123", "Me"),
+                "discord": None,  # no home channel yet -> still listed
+            },
+        )
+
+        texts = {c.text for c in _completions(SlashCommandCompleter(), "/handoff ")}
+        assert texts == {"telegram", "discord"}
+
+    def test_handoff_filters_by_prefix(self, monkeypatch):
+        self._fake_gateway(
+            monkeypatch,
+            {
+                "telegram": ("1", "H"),
+                "signal": ("2", "H"),
+            },
+        )
+
+        texts = {c.text for c in _completions(SlashCommandCompleter(), "/handoff te")}
+        assert texts == {"telegram"}
+
+    def test_handoff_no_completion_after_platform_chosen(self, monkeypatch):
+        self._fake_gateway(monkeypatch, {"telegram": ("1", "H")})
+        assert _completions(SlashCommandCompleter(), "/handoff telegram ") == []
+
+    def test_handoff_completion_swallows_config_errors(self, monkeypatch):
+        def _boom():
+            raise RuntimeError("no gateway config")
+
+        monkeypatch.setattr("gateway.config.load_gateway_config", _boom)
+        assert _completions(SlashCommandCompleter(), "/handoff ") == []
+
+    def test_personality_completes_configured_personalities(self):
+        """`/personality ` lists real personalities, not just `none`.
+
+        Regression: the completer read load_config().agent.personalities, a path
+        that never exists, so it always came back empty. It must resolve from the
+        CLI config the runtime actually applies (which ships built-ins).
+        """
+        texts = {c.text for c in _completions(SlashCommandCompleter(), "/personality ")}
+        assert "none" in texts
+        assert len(texts) > 1
+
 
 # ── Ghost text (SlashCommandAutoSuggest) ────────────────────────────────
 
diff --git a/tests/hermes_cli/test_cron.py b/tests/hermes_cli/test_cron.py
index aa4f6b116f1..442433f768f 100644
--- a/tests/hermes_cli/test_cron.py
+++ b/tests/hermes_cli/test_cron.py
@@ -55,7 +55,6 @@ class TestCronCommandLifecycle:
                 repeat=None,
                 skill=None,
                 skills=["maps", "blogwatcher"],
-                profile="default",
                 clear_skills=False,
             )
         )
@@ -64,7 +63,6 @@ class TestCronCommandLifecycle:
         assert updated["name"] == "Edited Job"
         assert updated["prompt"] == "Revised prompt"
         assert updated["schedule_display"] == "every 120m"
-        assert updated["profile"] == "default"
 
         cron_command(
             Namespace(
@@ -77,14 +75,12 @@ class TestCronCommandLifecycle:
                 repeat=None,
                 skill=None,
                 skills=None,
-                profile="",
                 clear_skills=True,
             )
         )
         cleared = get_job(job["id"])
         assert cleared["skills"] == []
         assert cleared["skill"] is None
-        assert cleared["profile"] is None
 
         out = capsys.readouterr().out
         assert "Updated job" in out
@@ -100,7 +96,6 @@ class TestCronCommandLifecycle:
                 repeat=None,
                 skill=None,
                 skills=["blogwatcher", "maps"],
-                profile="default",
             )
         )
         out = capsys.readouterr().out
@@ -110,7 +105,6 @@ class TestCronCommandLifecycle:
         assert len(jobs) == 1
         assert jobs[0]["skills"] == ["blogwatcher", "maps"]
         assert jobs[0]["name"] == "Skill combo"
-        assert jobs[0]["profile"] == "default"
 
     def test_list_does_not_crash_when_repeat_is_null(self, tmp_cron_dir, capsys):
         """A one-shot job can be persisted with ``"repeat": null``. `cron
diff --git a/tests/hermes_cli/test_subcommands_cron.py b/tests/hermes_cli/test_cron_parser_builder.py
similarity index 94%
rename from tests/hermes_cli/test_subcommands_cron.py
rename to tests/hermes_cli/test_cron_parser_builder.py
index e51a0bb6409..16be898b1a9 100644
--- a/tests/hermes_cli/test_subcommands_cron.py
+++ b/tests/hermes_cli/test_cron_parser_builder.py
@@ -47,20 +47,19 @@ def test_cron_aliases():
 def test_cron_create_options():
     parser = _build()
     ns = parser.parse_args([
-        "cron", "create", "0 9 * * *", "do the thing",
+        "cron", "create", "0 9 * * *", "daily task prompt",
         "--name", "daily", "--deliver", "origin", "--repeat", "3",
         "--skill", "a", "--skill", "b", "--no-agent",
-        "--workdir", "/tmp/x", "--profile", "work",
+        "--workdir", "/tmp/x",
     ])
     assert ns.schedule == "0 9 * * *"
-    assert ns.prompt == "do the thing"
+    assert ns.prompt == "daily task prompt"
     assert ns.name == "daily"
     assert ns.deliver == "origin"
     assert ns.repeat == 3
     assert ns.skills == ["a", "b"]
     assert ns.no_agent is True
     assert ns.workdir == "/tmp/x"
-    assert ns.profile == "work"
 
 
 def test_cron_edit_no_agent_tristate():
diff --git a/tests/hermes_cli/test_dashboard_admin_endpoints.py b/tests/hermes_cli/test_dashboard_admin_endpoints.py
index 5171f3ade05..60ee50728a1 100644
--- a/tests/hermes_cli/test_dashboard_admin_endpoints.py
+++ b/tests/hermes_cli/test_dashboard_admin_endpoints.py
@@ -201,6 +201,91 @@ class TestWebhookEndpoints:
         r = self.client.post("/api/webhooks", json={"name": "gh", "deliver": "log"})
         assert r.status_code == 400
 
+    def test_enable_platform_starts_gateway_restart(self, monkeypatch):
+        import hermes_cli.web_server as ws
+        from hermes_cli.config import load_config
+
+        ws._ACTION_PROCS.pop("gateway-restart", None)
+        restart_calls = []
+
+        class FakeRestartProc:
+            pid = 4242
+
+        def fake_spawn_action(subcommand, name):
+            restart_calls.append((subcommand, name))
+            return FakeRestartProc()
+
+        monkeypatch.setattr(ws, "_spawn_hermes_action", fake_spawn_action)
+
+        r = self.client.post("/api/webhooks/enable")
+
+        assert r.status_code == 200
+        assert r.json() == {
+            "ok": True,
+            "platform": "webhook",
+            "enabled": True,
+            "needs_restart": False,
+            "restart_started": True,
+            "restart_action": "gateway-restart",
+            "restart_pid": 4242,
+        }
+        assert restart_calls == [(["gateway", "restart"], "gateway-restart")]
+        assert load_config()["platforms"]["webhook"]["enabled"] is True
+        assert self.client.get("/api/webhooks").json()["enabled"] is True
+
+    def test_enable_platform_reports_restart_failure_after_save(self, monkeypatch):
+        import hermes_cli.web_server as ws
+        from hermes_cli.config import load_config
+
+        ws._ACTION_PROCS.pop("gateway-restart", None)
+
+        def fail_spawn_action(subcommand, name):
+            assert subcommand == ["gateway", "restart"]
+            assert name == "gateway-restart"
+            raise RuntimeError("supervisor unavailable")
+
+        monkeypatch.setattr(ws, "_spawn_hermes_action", fail_spawn_action)
+
+        r = self.client.post("/api/webhooks/enable")
+
+        assert r.status_code == 200
+        data = r.json()
+        assert data["ok"] is True
+        assert data["platform"] == "webhook"
+        assert data["enabled"] is True
+        assert data["needs_restart"] is True
+        assert data["restart_started"] is False
+        assert "supervisor unavailable" in data["restart_error"]
+        assert load_config()["platforms"]["webhook"]["enabled"] is True
+
+    def test_enable_platform_reuses_inflight_gateway_restart(self, monkeypatch):
+        import hermes_cli.web_server as ws
+        from hermes_cli.config import load_config
+
+        ws._ACTION_PROCS.pop("gateway-restart", None)
+
+        class FakeRunningProc:
+            pid = 5151
+
+            def poll(self):
+                return None
+
+        monkeypatch.setitem(ws._ACTION_PROCS, "gateway-restart", FakeRunningProc())
+
+        def fail_spawn_action(subcommand, name):
+            raise AssertionError("must not spawn a second concurrent restart")
+
+        monkeypatch.setattr(ws, "_spawn_hermes_action", fail_spawn_action)
+
+        r = self.client.post("/api/webhooks/enable")
+
+        assert r.status_code == 200
+        data = r.json()
+        assert data["needs_restart"] is False
+        assert data["restart_started"] is True
+        assert data["restart_pid"] == 5151
+        assert load_config()["platforms"]["webhook"]["enabled"] is True
+
 
 class TestOpsEndpoints:
     @pytest.fixture(autouse=True)
@@ -622,6 +707,10 @@ class TestAdminEndpointsAuthGate:
         resp = self.client.get(path)
         assert resp.status_code in (401, 403)
 
+    def test_webhooks_enable_post_gated(self):
+        resp = self.client.post("/api/webhooks/enable")
+        assert resp.status_code in (401, 403)
+
 
 class TestUpdateCheckEndpoint:
     """``GET /api/hermes/update/check`` reports availability without applying.
@@ -953,4 +1042,3 @@ class TestToolsConfigEndpoints:
                 kwargs["json"] = payload
             r = fn(path, **kwargs)
             assert r.status_code == 401, f"{method} {path} not gated"
-
diff --git a/tests/hermes_cli/test_dashboard_unified_launch.py b/tests/hermes_cli/test_dashboard_unified_launch.py
new file mode 100644
index 00000000000..232d7a4a394
--- /dev/null
+++ b/tests/hermes_cli/test_dashboard_unified_launch.py
@@ -0,0 +1,130 @@
+"""Tests for the unified profile→machine dashboard launch routing.
+
+` dashboard` routes to ONE machine-level dashboard instead of
+spawning a per-profile server: attach (open browser at ?profile=) when one
+is already listening, else re-exec as the machine dashboard with the
+launching profile preselected. `--isolated` opts out.
+"""
+import sys
+import types
+import pytest
+
+
+@pytest.fixture
+def main_mod():
+    import hermes_cli.main as main_mod
+    return main_mod
+
+
+def _args(**kw):
+    defaults = dict(
+        status=False, stop=False, host="127.0.0.1", port=9119,
+        no_open=True, insecure=False, skip_build=False,
+        isolated=False, open_profile="",
+    )
+    defaults.update(kw)
+    return types.SimpleNamespace(**defaults)
+
+
+class TestUnifiedDashboardRouting:
+    def test_profile_launch_attaches_to_running_dashboard(self, main_mod, monkeypatch):
+        monkeypatch.setattr(
+            "hermes_cli.profiles.get_active_profile_name", lambda: "worker_x"
+        )
+        monkeypatch.setattr(main_mod, "_dashboard_listening", lambda host, port: True)
+        execs = []
+        monkeypatch.setattr(main_mod.os, "execvpe", lambda *a, **k: execs.append(a))
+
+        with pytest.raises(SystemExit) as exc:
+            main_mod.cmd_dashboard(_args())
+        assert exc.value.code == 0
+        assert execs == []  # attached, never re-exec'd
+
+    def test_profile_launch_attach_opens_scoped_url(self, main_mod, monkeypatch):
+        """The attach path must open the browser at ?profile= — that
+        URL is the entire point of attaching (preselects the switcher)."""
+        monkeypatch.setattr(
+            "hermes_cli.profiles.get_active_profile_name", lambda: "worker_x"
+        )
+        monkeypatch.setattr(main_mod, "_dashboard_listening", lambda host, port: True)
+        opened = []
+        import webbrowser
+        monkeypatch.setattr(webbrowser, "open", lambda url: opened.append(url))
+
+        with pytest.raises(SystemExit) as exc:
+            main_mod.cmd_dashboard(_args(no_open=False))
+        assert exc.value.code == 0
+        assert opened == ["http://127.0.0.1:9119/?profile=worker_x"]
+
+    def test_profile_launch_reexecs_machine_dashboard(self, main_mod, monkeypatch):
+        monkeypatch.setattr(
+            "hermes_cli.profiles.get_active_profile_name", lambda: "worker_x"
+        )
+        monkeypatch.setattr(main_mod, "_dashboard_listening", lambda host, port: False)
+        execs = []
+
+        def fake_exec(exe, argv, env):
+            execs.append((exe, argv, env))
+            raise SystemExit(0)  # execvpe never returns
+
+        monkeypatch.setattr(main_mod.os, "execvpe", fake_exec)
+
+        with pytest.raises(SystemExit):
+            main_mod.cmd_dashboard(_args())
+
+        assert len(execs) == 1
+        exe, argv, env = execs[0]
+        assert exe == sys.executable
+        # Pinned to the default profile + launching profile preselected.
+        assert "-p" in argv and argv[argv.index("-p") + 1] == "default"
+        assert "--open-profile" in argv
+        assert argv[argv.index("--open-profile") + 1] == "worker_x"
+        # Profile HERMES_HOME dropped so the child binds the machine root.
+        assert "HERMES_HOME" not in env
+
+    def test_isolated_flag_skips_routing(self, main_mod, monkeypatch):
+        monkeypatch.setattr(
+            "hermes_cli.profiles.get_active_profile_name", lambda: "worker_x"
+        )
+        listening_calls = []
+        monkeypatch.setattr(
+            main_mod, "_dashboard_listening",
+            lambda host, port: listening_calls.append(1) or True,
+        )
+        # With --isolated the routing block is skipped entirely; the command
+        # proceeds to dependency checks. Make the first post-routing step
+        # bail so the test doesn't actually start a server.
+        monkeypatch.setitem(sys.modules, "fastapi", None)
+
+        with pytest.raises((SystemExit, AttributeError, ImportError, TypeError)):
+            main_mod.cmd_dashboard(_args(isolated=True))
+        assert listening_calls == []
+
+    def test_default_profile_launch_skips_routing(self, main_mod, monkeypatch):
+        monkeypatch.setattr(
+            "hermes_cli.profiles.get_active_profile_name", lambda: "default"
+        )
+        listening_calls = []
+        monkeypatch.setattr(
+            main_mod, "_dashboard_listening",
+            lambda host, port: listening_calls.append(1) or True,
+        )
+        monkeypatch.setitem(sys.modules, "fastapi", None)
+
+        with pytest.raises((SystemExit, AttributeError, ImportError, TypeError)):
+            main_mod.cmd_dashboard(_args())
+        assert listening_calls == []
+
+    def test_reexec_child_does_not_reroute(self, main_mod, monkeypatch):
+        """The re-exec'd child carries --open-profile; the guard must treat
+        that as 'already routed' and never re-exec again (no exec loop)."""
+        monkeypatch.setattr(
+            "hermes_cli.profiles.get_active_profile_name", lambda: "worker_x"
+        )
+        execs = []
+        monkeypatch.setattr(main_mod.os, "execvpe", lambda *a, **k: execs.append(a))
+        monkeypatch.setitem(sys.modules, "fastapi", None)
+
+        with pytest.raises((SystemExit, AttributeError, ImportError, TypeError)):
+            main_mod.cmd_dashboard(_args(open_profile="worker_x"))
+        assert execs == []
diff --git a/tests/hermes_cli/test_gateway.py b/tests/hermes_cli/test_gateway.py
index 0988f8fb64a..30773e1ed13 100644
--- a/tests/hermes_cli/test_gateway.py
+++ b/tests/hermes_cli/test_gateway.py
@@ -369,6 +369,16 @@ def test_systemd_install_checks_linger_status(monkeypatch, tmp_path, capsys):
     unit_path = tmp_path / "systemd" / "user" / "hermes-gateway.service"
 
     monkeypatch.setattr(gateway, "get_systemd_unit_path", lambda system=False: unit_path)
+    # Synthetic unit with a non-temp home: the real generator bakes the
+    # hermetic test HERMES_HOME (a tmp dir), which the temp-home write
+    # guard correctly refuses.
+    monkeypatch.setattr(
+        gateway,
+        "generate_systemd_unit",
+        lambda system=False, run_as_user=None: (
+            '[Service]\nEnvironment="HERMES_HOME=/home/alice/.hermes"\n'
+        ),
+    )
 
     calls = []
     helper_calls = []
@@ -396,6 +406,15 @@ def test_systemd_install_can_skip_enable_on_startup(monkeypatch, tmp_path, capsy
     unit_path = tmp_path / "systemd" / "user" / "hermes-gateway.service"
 
     monkeypatch.setattr(gateway, "get_systemd_unit_path", lambda system=False: unit_path)
+    # Non-temp home so the temp-home write guard (which trips on the
+    # hermetic test HERMES_HOME) stays out of the way.
+    monkeypatch.setattr(
+        gateway,
+        "generate_systemd_unit",
+        lambda system=False, run_as_user=None: (
+            '[Service]\nEnvironment="HERMES_HOME=/home/alice/.hermes"\n'
+        ),
+    )
 
     calls = []
     helper_calls = []
diff --git a/tests/hermes_cli/test_gateway_linger.py b/tests/hermes_cli/test_gateway_linger.py
index 90f8ea3d708..4a34f7ab1b6 100644
--- a/tests/hermes_cli/test_gateway_linger.py
+++ b/tests/hermes_cli/test_gateway_linger.py
@@ -102,6 +102,15 @@ def test_systemd_install_calls_linger_helper(monkeypatch, tmp_path, capsys):
     unit_path = tmp_path / "systemd" / "user" / "hermes-gateway.service"
 
     monkeypatch.setattr(gateway, "get_systemd_unit_path", lambda system=False: unit_path)
+    # Non-temp home so the temp-home write guard (which trips on the
+    # hermetic test HERMES_HOME) stays out of the way.
+    monkeypatch.setattr(
+        gateway,
+        "generate_systemd_unit",
+        lambda system=False, run_as_user=None: (
+            '[Service]\nEnvironment="HERMES_HOME=/home/alice/.hermes"\n'
+        ),
+    )
 
     calls = []
 
diff --git a/tests/hermes_cli/test_gateway_service.py b/tests/hermes_cli/test_gateway_service.py
index 0b897af01f8..0c6d7ca836d 100644
--- a/tests/hermes_cli/test_gateway_service.py
+++ b/tests/hermes_cli/test_gateway_service.py
@@ -289,6 +289,105 @@ class TestSystemdServiceRefresh:
             "daemon-reload" in str(c) for c in ran
         ), "daemon-reload must not run when write was refused"
 
+    def test_refresh_refuses_to_bake_any_tempdir_home_into_real_user_unit(
+        self, tmp_path, monkeypatch
+    ):
+        """Structural guard: a manual E2E HERMES_HOME like
+        ``/tmp/hermes-e2e-41264`` carries none of the pytest markers but
+        poisons the unit identically (seen live 2026-06-11 — an E2E probe ran
+        ``hermes gateway restart`` with a /tmp HERMES_HOME exported; the
+        restart's unit refresh baked it into the production unit and the
+        post-update restart produced a 7-hour zombie gateway). The refresh
+        must refuse ANY temp-dir HERMES_HOME, not just pytest-shaped ones.
+        """
+        unit_path = tmp_path / "hermes-gateway.service"
+        unit_path.write_text("old unit\n", encoding="utf-8")
+
+        monkeypatch.setattr(
+            gateway_cli, "get_systemd_unit_path", lambda system=False: unit_path
+        )
+        polluted_unit = (
+            "[Service]\n"
+            'Environment="HERMES_HOME=/tmp/hermes-e2e-41264"\n'
+            "WorkingDirectory=/tmp/hermes-e2e-41264\n"
+        )
+        monkeypatch.setattr(
+            gateway_cli,
+            "generate_systemd_unit",
+            lambda system=False, run_as_user=None: polluted_unit,
+        )
+
+        ran = []
+
+        def fake_run(cmd, check=True, **kwargs):
+            ran.append(cmd)
+            return SimpleNamespace(returncode=0, stdout="", stderr="")
+
+        monkeypatch.setattr(gateway_cli.subprocess, "run", fake_run)
+
+        result = gateway_cli.refresh_systemd_unit_if_needed(system=False)
+
+        assert result is False, "refresh should refuse to write a temp-home unit"
+        assert (
+            unit_path.read_text(encoding="utf-8") == "old unit\n"
+        ), "installed unit must be left untouched"
+        assert not any(
+            "daemon-reload" in str(c) for c in ran
+        ), "daemon-reload must not run when write was refused"
+
+
+class TestTempHomeServiceDefinitionGuard:
+    """_temp_home_in_service_definition() — structural temp-dir detection."""
+
+    def test_detects_tmp_home_in_systemd_unit(self):
+        unit = '[Service]\nEnvironment="HERMES_HOME=/tmp/hermes-e2e-41264"\n'
+        assert (
+            gateway_cli._temp_home_in_service_definition(unit)
+            == "/tmp/hermes-e2e-41264"
+        )
+
+    def test_detects_var_tmp_home(self):
+        unit = '[Service]\nEnvironment="HERMES_HOME=/var/tmp/hermes-x"\n'
+        assert gateway_cli._temp_home_in_service_definition(unit) is not None
+
+    def test_detects_tempdir_env_home(self, monkeypatch, tmp_path):
+        import tempfile as _tempfile
+
+        monkeypatch.setattr(_tempfile, "gettempdir", lambda: str(tmp_path))
+        unit = f'[Service]\nEnvironment="HERMES_HOME={tmp_path}/hermes-home"\n'
+        assert gateway_cli._temp_home_in_service_definition(unit) is not None
+
+    def test_detects_tmp_home_in_launchd_plist(self):
+        plist = (
+            "\n  HERMES_HOME\n"
+            "  /tmp/hermes-e2e-99999\n\n"
+        )
+        assert (
+            gateway_cli._temp_home_in_service_definition(plist)
+            == "/tmp/hermes-e2e-99999"
+        )
+
+    def test_accepts_real_home(self):
+        unit = '[Service]\nEnvironment="HERMES_HOME=/home/alice/.hermes"\n'
+        assert gateway_cli._temp_home_in_service_definition(unit) is None
+
+    def test_accepts_macos_real_home_plist(self):
+        plist = (
+            "\n  HERMES_HOME\n"
+            "  /Users/alice/.hermes\n\n"
+        )
+        assert gateway_cli._temp_home_in_service_definition(plist) is None
+
+    def test_accepts_unit_without_hermes_home(self):
+        unit = "[Service]\nExecStart=/usr/bin/python -m hermes_cli.main gateway run\n"
+        assert gateway_cli._temp_home_in_service_definition(unit) is None
+
+    def test_tmp_prefixed_non_temp_path_is_accepted(self):
+        # /tmpfs-data is NOT under /tmp — prefix matching must be
+        # component-wise, not string startswith.
+        unit = '[Service]\nEnvironment="HERMES_HOME=/tmpfs-data/.hermes"\n'
+        assert gateway_cli._temp_home_in_service_definition(unit) is None
+
 
 class TestRequireServiceInstalled:
     def test_exits_with_install_hint_when_unit_missing(self, tmp_path, monkeypatch, capsys):
@@ -481,6 +580,17 @@ class TestLaunchdServiceRecovery:
         plist_path.write_text("old content", encoding="utf-8")
 
         monkeypatch.setattr(gateway_cli, "get_launchd_plist_path", lambda: plist_path)
+        # Patch the generator with synthetic content carrying a real-looking
+        # home — the temp-home guard refuses to write plists whose
+        # HERMES_HOME resolves under the (pytest tmp) test HERMES_HOME.
+        monkeypatch.setattr(
+            gateway_cli,
+            "generate_launchd_plist",
+            lambda: (
+                "--replace\nHERMES_HOME"
+                "/Users/alice/.hermes"
+            ),
+        )
 
         calls = []
 
@@ -495,7 +605,10 @@ class TestLaunchdServiceRecovery:
         label = gateway_cli.get_launchd_label()
         domain = gateway_cli._launchd_domain()
         assert "--replace" in plist_path.read_text(encoding="utf-8")
-        assert calls[:2] == [
+        # The calls list includes launchctl print probes from _launchd_domain()
+        # before the bootout/bootstrap calls. Filter to only bootout/bootstrap.
+        service_calls = [c for c in calls if "bootout" in c or "bootstrap" in c]
+        assert service_calls[:2] == [
             ["launchctl", "bootout", f"{domain}/{label}"],
             ["launchctl", "bootstrap", domain, str(plist_path)],
         ]
@@ -679,10 +792,22 @@ class TestLaunchdServiceRecovery:
         assert "stale" in output.lower()
         assert "not loaded" in output.lower()
 
-    def test_launchd_domain_uses_user_domain(self):
+    def test_launchd_domain_uses_user_domain(self, monkeypatch):
         # The user/ domain (not gui/) is the one reachable from
         # non-Aqua/background sessions on macOS 26+ (issue #23387).
-        assert gateway_cli._launchd_domain() == f"user/{os.getuid()}"
+        # When gui/ fails to probe and user/ succeeds,
+        # _launchd_domain() must return user/.
+        gateway_cli._resolved_launchd_domain = None
+        monkeypatch.setattr(os, "getuid", lambda: 501)
+        label = gateway_cli.get_launchd_label()
+
+        def fake_run(cmd, check=False, **kwargs):
+            if "print" in cmd and "gui/" in " ".join(cmd):
+                raise subprocess.CalledProcessError(1, cmd, stderr="Domain error")
+            return SimpleNamespace(returncode=0, stdout="", stderr="")
+
+        monkeypatch.setattr(gateway_cli.subprocess, "run", fake_run)
+        assert gateway_cli._launchd_domain() == "user/501"
 
     def test_launchctl_domain_unsupported_recognizes_macos26_codes(self):
         # Codes that persist after a fresh bootstrap → launchd truly unavailable.
@@ -761,6 +886,17 @@ class TestLaunchdServiceRecovery:
         """macOS bootstrap error 5 should spawn a detached gateway, not crash."""
         plist_path = tmp_path / "ai.hermes.gateway.plist"
         monkeypatch.setattr(gateway_cli, "get_launchd_plist_path", lambda: plist_path)
+        # Synthetic plist with a non-temp home so the temp-home write guard
+        # (which would trip on the pytest-tmp test HERMES_HOME) stays out of
+        # the way — this test exercises the bootstrap-error fallback.
+        monkeypatch.setattr(
+            gateway_cli,
+            "generate_launchd_plist",
+            lambda: (
+                "HERMES_HOME"
+                "/Users/alice/.hermes"
+            ),
+        )
 
         def fake_run(cmd, check=False, **kwargs):
             if cmd[:2] == ["launchctl", "bootstrap"]:
@@ -836,6 +972,114 @@ class TestLaunchdServiceRecovery:
         assert "nohup hermes gateway run" in out
 
 
+class TestLaunchdDomainDetection:
+    """Regression tests for _launchd_domain() probing (#40831).
+
+    The function must detect which launchd domain actually contains (or can
+    manage) the service, rather than hardcoding ``user/`` or ``gui/``.
+    """
+
+    def _reset_domain_cache(self):
+        """Clear any cached domain result between tests."""
+        gateway_cli._resolved_launchd_domain = None
+
+    def test_prefers_gui_domain_when_service_loaded_there(self, monkeypatch):
+        """In an Aqua session where the service is loaded under gui/,
+        _launchd_domain() must return ``gui/`` — not ``user/``."""
+        self._reset_domain_cache()
+        monkeypatch.setattr(os, "getuid", lambda: 501)
+        label = gateway_cli.get_launchd_label()
+
+        run_calls = []
+
+        def fake_run(cmd, check=False, **kwargs):
+            run_calls.append(cmd)
+            return SimpleNamespace(returncode=0, stdout="", stderr="")
+
+        monkeypatch.setattr(gateway_cli.subprocess, "run", fake_run)
+
+        domain = gateway_cli._launchd_domain()
+        assert domain == f"gui/501"
+        # Should have probed gui first
+        assert run_calls[0] == ["launchctl", "print", f"gui/501/{label}"]
+
+    def test_falls_back_to_user_domain_when_gui_fails(self, monkeypatch):
+        """In a Background/SSH session where gui/ fails but user/
+        works, _launchd_domain() must return ``user/``."""
+        self._reset_domain_cache()
+        monkeypatch.setattr(os, "getuid", lambda: 501)
+        label = gateway_cli.get_launchd_label()
+
+        run_calls = []
+
+        def fake_run(cmd, check=False, **kwargs):
+            run_calls.append(cmd)
+            if "print" in cmd and "gui/" in " ".join(cmd):
+                raise subprocess.CalledProcessError(1, cmd, stderr="Domain error")
+            return SimpleNamespace(returncode=0, stdout="", stderr="")
+
+        monkeypatch.setattr(gateway_cli.subprocess, "run", fake_run)
+
+        domain = gateway_cli._launchd_domain()
+        assert domain == f"user/501"
+        # Should have tried gui first, then user
+        assert len(run_calls) >= 2
+
+    def test_uses_managername_heuristic_when_both_probe_fail(self, monkeypatch):
+        """When neither domain contains a loaded service, use
+        ``launchctl managername`` as a tiebreaker: Aqua -> gui, else -> user."""
+        self._reset_domain_cache()
+        monkeypatch.setattr(os, "getuid", lambda: 501)
+        label = gateway_cli.get_launchd_label()
+
+        def fake_run(cmd, check=False, **kwargs):
+            if "print" in cmd:
+                raise subprocess.CalledProcessError(1, cmd, stderr="not found")
+            if "managername" in cmd:
+                return SimpleNamespace(returncode=0, stdout="Aqua\n", stderr="")
+            return SimpleNamespace(returncode=0, stdout="", stderr="")
+
+        monkeypatch.setattr(gateway_cli.subprocess, "run", fake_run)
+
+        domain = gateway_cli._launchd_domain()
+        assert domain == f"gui/501"
+
+    def test_managername_background_selects_user_domain(self, monkeypatch):
+        """When managername is Background (non-Aqua), use user/."""
+        self._reset_domain_cache()
+        monkeypatch.setattr(os, "getuid", lambda: 501)
+
+        def fake_run(cmd, check=False, **kwargs):
+            if "print" in cmd:
+                raise subprocess.CalledProcessError(1, cmd, stderr="not found")
+            if "managername" in cmd:
+                return SimpleNamespace(returncode=0, stdout="Background\n", stderr="")
+            return SimpleNamespace(returncode=0, stdout="", stderr="")
+
+        monkeypatch.setattr(gateway_cli.subprocess, "run", fake_run)
+
+        domain = gateway_cli._launchd_domain()
+        assert domain == f"user/501"
+
+    def test_caches_result_across_calls(self, monkeypatch):
+        """Domain detection should run once and cache the result."""
+        self._reset_domain_cache()
+        monkeypatch.setattr(os, "getuid", lambda: 501)
+
+        run_count = [0]
+
+        def fake_run(cmd, check=False, **kwargs):
+            run_count[0] += 1
+            return SimpleNamespace(returncode=0, stdout="", stderr="")
+
+        monkeypatch.setattr(gateway_cli.subprocess, "run", fake_run)
+
+        d1 = gateway_cli._launchd_domain()
+        d2 = gateway_cli._launchd_domain()
+        assert d1 == d2
+        assert run_count[0] == 1  # Only probed once
+
+
 class TestGatewayServiceDetection:
     def test_supports_systemd_services_requires_systemctl_binary(self, monkeypatch):
         monkeypatch.setattr(gateway_cli, "is_linux", lambda: True)
diff --git a/tests/hermes_cli/test_runtime_provider_resolution.py b/tests/hermes_cli/test_runtime_provider_resolution.py
index 2f89be93368..3e788fe3d53 100644
--- a/tests/hermes_cli/test_runtime_provider_resolution.py
+++ b/tests/hermes_cli/test_runtime_provider_resolution.py
@@ -712,6 +712,76 @@ def test_named_custom_provider_uses_saved_credentials(monkeypatch):
     assert resolved["source"] == "custom_provider:Local"
 
 
+def test_bare_custom_resolves_providers_dict_entry_named_custom(monkeypatch):
+    """A request for bare ``provider="custom"`` must resolve a literal
+    ``providers.custom`` entry (e.g. a cliproxy endpoint) instead of falling
+    through to the global default. Regression for cron jobs stored with
+    ``provider: "custom"`` failing with ``auth_unavailable: providers=codex``.
+    """
+    monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+    monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
+    monkeypatch.setattr(
+        rp,
+        "load_config",
+        lambda: {
+            "providers": {
+                "custom": {
+                    "api": "https://cliproxy.example.com/v1",
+                    "api_key": "cliproxy-key",
+                    "default_model": "gpt-5.4",
+                    "name": "CLIProxy",
+                }
+            }
+        },
+    )
+    # Reaching resolve_provider for bare custom with a matching entry means the
+    # named-custom path was bypassed — that is the bug we are fixing.
+    monkeypatch.setattr(
+        rp,
+        "resolve_provider",
+        lambda *a, **k: (_ for _ in ()).throw(
+            AssertionError(
+                "resolve_provider must not be called; providers.custom should match"
+            )
+        ),
+    )
+
+    resolved = rp.resolve_runtime_provider(requested="custom")
+
+    assert resolved["provider"] == "custom"
+    assert resolved["base_url"] == "https://cliproxy.example.com/v1"
+    assert resolved["api_key"] == "cliproxy-key"
+    assert resolved["requested_provider"] == "custom"
+
+
+def test_bare_custom_without_named_entry_still_falls_through(monkeypatch):
+    """No literal providers.custom entry → bare custom keeps the legacy
+    model.base_url trust-path behavior, unchanged by the fix."""
+    monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter")
+    monkeypatch.setattr(
+        rp,
+        "_get_model_config",
+        lambda: {
+            "provider": "openrouter",
+            "base_url": "http://127.0.0.1:8082/v1",
+            "default": "my-local-model",
+        },
+    )
+    monkeypatch.setattr(
+        rp,
+        "load_config",
+        lambda: {"providers": {"some-other-proxy": {"api": "https://x.example/v1"}}},
+    )
+    monkeypatch.delenv("CUSTOM_BASE_URL", raising=False)
+    monkeypatch.delenv("OPENROUTER_BASE_URL", raising=False)
+    monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
+
+    resolved = rp.resolve_runtime_provider(requested="custom")
+
+    assert resolved["provider"] == "custom"
+    assert resolved["base_url"] == "http://127.0.0.1:8082/v1"
+
+
 def test_named_custom_provider_uses_providers_dict_when_list_missing(monkeypatch):
     """After v11→v12 migration deletes custom_providers, resolution should
     still find entries in the providers dict via get_compatible_custom_providers."""
diff --git a/tests/hermes_cli/test_tools_config.py b/tests/hermes_cli/test_tools_config.py
index 5b24d2b6ebd..3a68c975897 100644
--- a/tests/hermes_cli/test_tools_config.py
+++ b/tests/hermes_cli/test_tools_config.py
@@ -975,6 +975,19 @@ def test_toolset_has_keys_treats_no_key_providers_as_configured():
     assert _toolset_has_keys("computer_use", config) is True
 
 
+def test_web_no_prompt_when_usable_keyless():
+    """Fresh install: web works via the free Parallel MCP, so enabling the web
+    toolset should not force provider setup."""
+    with patch("tools.web_tools.check_web_api_key", return_value=True):
+        assert _toolset_needs_configuration_prompt("web", {}) is False
+
+
+def test_web_no_prompt_when_extract_backend_is_extract_capable():
+    with patch("tools.web_tools.check_web_api_key", return_value=True):
+        cfg = {"web": {"extract_backend": "parallel"}}
+        assert _toolset_needs_configuration_prompt("web", cfg) is False
+
+
 def test_computer_use_needs_configuration_when_cua_driver_post_setup_pending():
     """No-key providers can still need setup when their post_setup is unsatisfied.
 
diff --git a/tests/hermes_cli/test_tui_npm_install.py b/tests/hermes_cli/test_tui_npm_install.py
index 5d52e4276c4..f9b3cf108fc 100644
--- a/tests/hermes_cli/test_tui_npm_install.py
+++ b/tests/hermes_cli/test_tui_npm_install.py
@@ -425,3 +425,43 @@ def test_tui_launch_install_uses_workspace_scope(
     install_cmd = npm_calls[0]
     assert "--workspace" in install_cmd
     assert "ui-tui" in install_cmd
+
+def test_make_tui_argv_omits_workspace_when_tui_has_own_lockfile(
+    tmp_path: Path, main_mod, monkeypatch
+) -> None:
+    """When ui-tui/ has its own package-lock.json, _workspace_root returns
+    tui_dir itself.  npm install --workspace ui-tui would fail in that case
+    because npm cannot find a workspace named "ui-tui" inside ui-tui/.
+    The fix omits --workspace and runs plain npm install from tui_dir.
+    See #42973.
+    """
+    tui_dir = tmp_path / "ui-tui"
+    tui_dir.mkdir()
+    (tui_dir / "package.json").write_text("{}")
+    # Simulate curl-install layout: tui_dir has its own lockfile
+    (tui_dir / "package-lock.json").write_text("{}")
+    # Parent also has lockfile (but _workspace_root prefers tui_dir's own)
+    (tmp_path / "package-lock.json").write_text("{}")
+
+    monkeypatch.delenv("TERMUX_VERSION", raising=False)
+    monkeypatch.setenv("PREFIX", "/usr")
+    monkeypatch.setattr(main_mod, "_tui_need_npm_install", lambda _root: True)
+    monkeypatch.setattr(main_mod.shutil, "which", lambda name: f"/bin/{name}")
+    calls = []
+
+    def fake_run(*args, **kwargs):
+        calls.append((args, kwargs))
+        return types.SimpleNamespace(returncode=0, stdout="", stderr="")
+
+    monkeypatch.setattr(main_mod.subprocess, "run", fake_run)
+
+    main_mod._make_tui_argv(tui_dir, tui_dev=False)
+
+    install_cmd = calls[0][0][0]
+    # Must NOT contain --workspace when npm_cwd == tui_dir
+    assert "--workspace" not in install_cmd, (
+        f"npm install should omit --workspace when tui_dir has its own lockfile, got: {install_cmd}"
+    )
+    assert install_cmd[:2] == ["/bin/npm", "install"]
+    # cwd must be tui_dir (standalone), not parent
+    assert calls[0][1]["cwd"] == str(tui_dir)
diff --git a/tests/hermes_cli/test_update_check.py b/tests/hermes_cli/test_update_check.py
index 47c018cbbdb..5c590bff15c 100644
--- a/tests/hermes_cli/test_update_check.py
+++ b/tests/hermes_cli/test_update_check.py
@@ -93,7 +93,39 @@ def test_check_for_updates_expired_cache(tmp_path, monkeypatch):
         result = check_for_updates()
 
     assert result == 5
-    assert mock_run.call_count == 2  # git fetch + git rev-list
+    assert mock_run.call_count == 3  # origin probe + git fetch + git rev-list
+
+
+def test_check_for_updates_official_ssh_origin_uses_https_probe(tmp_path):
+    """Passive update checks must not trigger SSH auth for official installs."""
+    import hermes_cli.banner as banner
+
+    repo_dir = tmp_path / "hermes-agent"
+    repo_dir.mkdir()
+    (repo_dir / ".git").mkdir()
+
+    calls = []
+
+    def fake_run(cmd, **kwargs):
+        calls.append(cmd)
+        if cmd == ["git", "remote", "get-url", "origin"]:
+            return MagicMock(returncode=0, stdout="git@github.com:NousResearch/hermes-agent.git\n")
+        if cmd == ["git", "rev-parse", "HEAD"]:
+            return MagicMock(returncode=0, stdout="local-sha\n")
+        if cmd == [
+            "git",
+            "ls-remote",
+            "https://github.com/NousResearch/hermes-agent.git",
+            "refs/heads/main",
+        ]:
+            return MagicMock(returncode=0, stdout="upstream-sha\trefs/heads/main\n")
+        raise AssertionError(f"unexpected git command: {cmd!r}")
+
+    with patch("hermes_cli.banner.subprocess.run", side_effect=fake_run):
+        result = banner._check_via_local_git(repo_dir)
+
+    assert result == banner.UPDATE_AVAILABLE_NO_COUNT
+    assert ["git", "fetch", "origin", "--quiet"] not in calls
 
 
 def test_check_for_updates_no_git_dir(tmp_path, monkeypatch):
diff --git a/tests/hermes_cli/test_web_server.py b/tests/hermes_cli/test_web_server.py
index 76cbd59efdc..dc92f7cefb4 100644
--- a/tests/hermes_cli/test_web_server.py
+++ b/tests/hermes_cli/test_web_server.py
@@ -1104,6 +1104,113 @@ class TestWebServerEndpoints:
         assert confirmed.status_code == 200
         assert confirmed.json()["ok"] is True
 
+    def test_model_set_normalizes_vendor_slug_for_native_provider(self, monkeypatch):
+        """'Use as → Main' with an OpenRouter slug + native provider must not
+        persist the vendor-prefixed slug verbatim (it 400s against the native
+        API and reads as "changing models does nothing")."""
+        monkeypatch.setattr(
+            "hermes_cli.model_cost_guard.expensive_model_warning",
+            lambda *_args, **_kwargs: None,
+        )
+        resp = self.client.post(
+            "/api/model/set",
+            json={
+                "scope": "main",
+                "provider": "anthropic",
+                "model": "anthropic/claude-opus-4.6",
+            },
+        )
+        assert resp.status_code == 200
+        data = resp.json()
+        assert data["ok"] is True
+        assert data["provider"] == "anthropic"
+        # Vendor prefix stripped + dots→hyphens for the native Anthropic API.
+        assert data["model"] == "claude-opus-4-6"
+
+        from hermes_cli.config import load_config
+        cfg = load_config()
+        assert cfg["model"]["provider"] == "anthropic"
+        assert cfg["model"]["default"] == "claude-opus-4-6"
+
+    def test_model_set_maps_unknown_vendor_to_aggregator(self, monkeypatch):
+        """A bare vendor name from analytics rows (no billing_provider) is not
+        a Hermes provider — keep the user's aggregator instead of writing a
+        provider that can never resolve credentials."""
+        monkeypatch.setattr(
+            "hermes_cli.model_cost_guard.expensive_model_warning",
+            lambda *_args, **_kwargs: None,
+        )
+        from hermes_cli.config import load_config, save_config
+        cfg = load_config()
+        cfg["model"] = {"provider": "openrouter", "default": "openai/gpt-5.5"}
+        save_config(cfg)
+
+        resp = self.client.post(
+            "/api/model/set",
+            json={
+                "scope": "main",
+                "provider": "moonshotai",  # vendor prefix, not a provider
+                "model": "moonshotai/kimi-k2.6",
+            },
+        )
+        assert resp.status_code == 200
+        data = resp.json()
+        assert data["ok"] is True
+        assert data["provider"] == "openrouter"
+        assert data["model"] == "moonshotai/kimi-k2.6"
+
+    def test_model_set_keeps_aggregator_slug_unchanged(self, monkeypatch):
+        """The happy path (picker → openrouter + vendor/model) is untouched."""
+        monkeypatch.setattr(
+            "hermes_cli.model_cost_guard.expensive_model_warning",
+            lambda *_args, **_kwargs: None,
+        )
+        resp = self.client.post(
+            "/api/model/set",
+            json={
+                "scope": "main",
+                "provider": "openrouter",
+                "model": "anthropic/claude-sonnet-4.6",
+            },
+        )
+        assert resp.status_code == 200
+        data = resp.json()
+        assert data["ok"] is True
+        assert data["provider"] == "openrouter"
+        assert data["model"] == "anthropic/claude-sonnet-4.6"
+
+    def test_ops_import_passes_force_flag(self, tmp_path, monkeypatch):
+        """force=True must append --force so the spawned non-interactive
+        `hermes import` doesn't auto-abort at the overwrite prompt."""
+        import hermes_cli.web_server as ws
+
+        archive = tmp_path / "backup.zip"
+        import zipfile
+        with zipfile.ZipFile(archive, "w") as zf:
+            zf.writestr("config.yaml", "model: {}\n")
+
+        captured = {}
+
+        def fake_spawn(subcommand, name):
+            captured["args"] = subcommand
+            captured["name"] = name
+            from types import SimpleNamespace as NS
+            return NS(pid=12345)
+
+        monkeypatch.setattr(ws, "_spawn_hermes_action", fake_spawn)
+
+        resp = self.client.post(
+            "/api/ops/import", json={"archive": str(archive), "force": True},
+        )
+        assert resp.status_code == 200
+        assert captured["args"] == ["import", str(archive), "--force"]
+
+        resp = self.client.post(
+            "/api/ops/import", json={"archive": str(archive)},
+        )
+        assert resp.status_code == 200
+        assert captured["args"] == ["import", str(archive)]
+
 
     def test_reveal_env_var(self, tmp_path):
         """POST /api/env/reveal should return the real unredacted value."""
@@ -4441,7 +4548,7 @@ class TestPtyWebSocket:
         monkeypatch.setattr(
             self.ws_module,
             "_resolve_chat_argv",
-            lambda resume=None, sidecar_url=None: (["/bin/cat"], None, None),
+            lambda resume=None, sidecar_url=None, profile=None: (["/bin/cat"], None, None),
         )
         from starlette.websockets import WebSocketDisconnect
 
@@ -4454,7 +4561,7 @@ class TestPtyWebSocket:
         monkeypatch.setattr(
             self.ws_module,
             "_resolve_chat_argv",
-            lambda resume=None, sidecar_url=None: (["/bin/cat"], None, None),
+            lambda resume=None, sidecar_url=None, profile=None: (["/bin/cat"], None, None),
         )
         from starlette.websockets import WebSocketDisconnect
 
@@ -4467,7 +4574,7 @@ class TestPtyWebSocket:
         monkeypatch.setattr(
             self.ws_module,
             "_resolve_chat_argv",
-            lambda resume=None, sidecar_url=None: (
+            lambda resume=None, sidecar_url=None, profile=None: (
                 ["/bin/sh", "-c", "printf hermes-ws-ok"],
                 None,
                 None,
@@ -4497,7 +4604,7 @@ class TestPtyWebSocket:
         monkeypatch.setattr(
             self.ws_module,
             "_resolve_chat_argv",
-            lambda resume=None, sidecar_url=None: (["/bin/cat"], None, None),
+            lambda resume=None, sidecar_url=None, profile=None: (["/bin/cat"], None, None),
         )
         with self.client.websocket_connect(self._url()) as conn:
             conn.send_bytes(b"round-trip-payload\n")
@@ -4530,7 +4637,7 @@ class TestPtyWebSocket:
             self.ws_module,
             "_resolve_chat_argv",
             # sleep gives the test time to push the resize before the child reads the ioctl.
-            lambda resume=None, sidecar_url=None: (
+            lambda resume=None, sidecar_url=None, profile=None: (
                 [sys.executable, "-c", winsize_script],
                 None,
                 None,
@@ -4566,7 +4673,7 @@ class TestPtyWebSocket:
         monkeypatch.setattr(
             self.ws_module,
             "_resolve_chat_argv",
-            lambda resume=None, sidecar_url=None: (["/bin/cat"], None, None),
+            lambda resume=None, sidecar_url=None, profile=None: (["/bin/cat"], None, None),
         )
         # Patch PtyBridge.spawn at the web_server module's binding.
         import hermes_cli.web_server as ws_mod
@@ -4581,7 +4688,7 @@ class TestPtyWebSocket:
     def test_resume_parameter_is_forwarded_to_argv(self, monkeypatch):
         captured: dict = {}
 
-        def fake_resolve(resume=None, sidecar_url=None):
+        def fake_resolve(resume=None, sidecar_url=None, profile=None):
             captured["resume"] = resume
             return (["/bin/sh", "-c", "printf resume-arg-ok"], None, None)
 
@@ -4601,7 +4708,7 @@ class TestPtyWebSocket:
         same channel — which is how tool events reach the dashboard sidebar."""
         captured: dict = {}
 
-        def fake_resolve(resume=None, sidecar_url=None):
+        def fake_resolve(resume=None, sidecar_url=None, profile=None):
             captured["sidecar_url"] = sidecar_url
             return (["/bin/sh", "-c", "printf sidecar-ok"], None, None)
 
diff --git a/tests/hermes_cli/test_web_server_profile_unification.py b/tests/hermes_cli/test_web_server_profile_unification.py
new file mode 100644
index 00000000000..d458348f128
--- /dev/null
+++ b/tests/hermes_cli/test_web_server_profile_unification.py
@@ -0,0 +1,385 @@
+"""Regression tests for the machine-dashboard multi-profile unification.
+
+The dashboard is ONE machine-level management surface: config, env, MCP,
+model, and chat-PTY endpoints accept an optional ``profile`` so the global
+profile switcher can target any profile's HERMES_HOME. These tests pin:
+reads/writes land in the REQUESTED profile, the dashboard's own profile
+stays untouched, and the chat PTY env is scoped via HERMES_HOME.
+"""
+import pytest
+import yaml
+
+
+@pytest.fixture
+def isolated_profiles(tmp_path, monkeypatch, _isolate_hermes_home):
+    """Isolated default home + one named profile, each with config + .env."""
+    from hermes_constants import get_hermes_home
+    from hermes_cli import profiles
+
+    default_home = get_hermes_home()
+    profiles_root = default_home / "profiles"
+    worker_home = profiles_root / "worker_beta"
+    for home in (default_home, worker_home):
+        home.mkdir(parents=True, exist_ok=True)
+        (home / "config.yaml").write_text("{}\n", encoding="utf-8")
+    (worker_home / ".env").write_text("", encoding="utf-8")
+
+    monkeypatch.setattr(profiles, "_get_default_hermes_home", lambda: default_home)
+    monkeypatch.setattr(profiles, "_get_profiles_root", lambda: profiles_root)
+    return {"default": default_home, "worker_beta": worker_home}
+
+
+@pytest.fixture
+def client(monkeypatch, isolated_profiles):
+    try:
+        from starlette.testclient import TestClient
+    except ImportError:
+        pytest.skip("fastapi/starlette not installed")
+
+    import hermes_state
+    from hermes_constants import get_hermes_home
+    from hermes_cli.web_server import app, _SESSION_HEADER_NAME, _SESSION_TOKEN
+
+    monkeypatch.setattr(hermes_state, "DEFAULT_DB_PATH", get_hermes_home() / "state.db")
+    c = TestClient(app)
+    c.headers[_SESSION_HEADER_NAME] = _SESSION_TOKEN
+    return c
+
+
+def _cfg(home):
+    return yaml.safe_load((home / "config.yaml").read_text()) or {}
+
+
+class TestProfileScopedConfig:
+    def test_config_put_lands_in_target_profile_only(self, client, isolated_profiles):
+        resp = client.put(
+            "/api/config",
+            json={"config": {"timezone": "Mars/Olympus"}, "profile": "worker_beta"},
+        )
+        assert resp.status_code == 200
+        assert _cfg(isolated_profiles["worker_beta"]).get("timezone") == "Mars/Olympus"
+        assert _cfg(isolated_profiles["default"]).get("timezone") != "Mars/Olympus"
+
+    def test_config_get_reads_target_profile(self, client, isolated_profiles):
+        (isolated_profiles["worker_beta"] / "config.yaml").write_text(
+            "timezone: Venus/Cloud\n", encoding="utf-8"
+        )
+        resp = client.get("/api/config", params={"profile": "worker_beta"})
+        assert resp.status_code == 200
+        assert resp.json().get("timezone") == "Venus/Cloud"
+        # Unscoped read sees the dashboard's own config.
+        resp = client.get("/api/config")
+        assert resp.json().get("timezone") != "Venus/Cloud"
+
+    def test_config_query_param_equivalent_to_body(self, client, isolated_profiles):
+        """The SPA's fetchJSON injects ?profile= — must scope like body.profile."""
+        resp = client.put(
+            "/api/config?profile=worker_beta",
+            json={"config": {"timezone": "Pluto/Far"}},
+        )
+        assert resp.status_code == 200
+        assert _cfg(isolated_profiles["worker_beta"]).get("timezone") == "Pluto/Far"
+        assert _cfg(isolated_profiles["default"]).get("timezone") != "Pluto/Far"
+
+    def test_config_raw_round_trip_scoped(self, client, isolated_profiles):
+        resp = client.put(
+            "/api/config/raw",
+            json={"yaml_text": "timezone: Io/Volcano\n", "profile": "worker_beta"},
+        )
+        assert resp.status_code == 200
+        resp = client.get("/api/config/raw", params={"profile": "worker_beta"})
+        assert "Io/Volcano" in resp.json()["yaml"]
+        resp = client.get("/api/config/raw")
+        assert "Io/Volcano" not in resp.json()["yaml"]
+
+    def test_unknown_profile_404(self, client, isolated_profiles):
+        resp = client.get("/api/config", params={"profile": "ghost"})
+        assert resp.status_code == 404
+
+
+class TestProfileScopedEnv:
+    def test_env_set_lands_in_target_profile_only(self, client, isolated_profiles):
+        resp = client.put(
+            "/api/env",
+            json={"key": "FAL_KEY", "value": "test-fal-123", "profile": "worker_beta"},
+        )
+        assert resp.status_code == 200
+        worker_env = (isolated_profiles["worker_beta"] / ".env").read_text()
+        assert "test-fal-123" in worker_env
+        default_env_path = isolated_profiles["default"] / ".env"
+        if default_env_path.exists():
+            assert "test-fal-123" not in default_env_path.read_text()
+
+    def test_env_list_reads_target_profile(self, client, isolated_profiles):
+        (isolated_profiles["worker_beta"] / ".env").write_text(
+            "FAL_KEY=worker-only-value\n", encoding="utf-8"
+        )
+        resp = client.get("/api/env", params={"profile": "worker_beta"})
+        assert resp.status_code == 200
+        assert resp.json()["FAL_KEY"]["is_set"] is True
+        resp = client.get("/api/env")
+        assert resp.json()["FAL_KEY"]["is_set"] is False
+
+    def test_env_delete_scoped(self, client, isolated_profiles):
+        (isolated_profiles["worker_beta"] / ".env").write_text(
+            "FAL_KEY=doomed\n", encoding="utf-8"
+        )
+        resp = client.request(
+            "DELETE",
+            "/api/env",
+            json={"key": "FAL_KEY", "profile": "worker_beta"},
+        )
+        assert resp.status_code == 200
+        assert "doomed" not in (isolated_profiles["worker_beta"] / ".env").read_text()
+
+
+class TestProfileScopedMcp:
+    def test_mcp_add_and_list_scoped(self, client, isolated_profiles):
+        resp = client.post(
+            "/api/mcp/servers",
+            json={"name": "scoped-srv", "url": "http://localhost:1234/sse",
+                  "profile": "worker_beta"},
+        )
+        assert resp.status_code == 200
+
+        worker_cfg = _cfg(isolated_profiles["worker_beta"])
+        assert "scoped-srv" in worker_cfg.get("mcp_servers", {})
+        assert "scoped-srv" not in _cfg(isolated_profiles["default"]).get("mcp_servers", {})
+
+        listing = client.get("/api/mcp/servers", params={"profile": "worker_beta"}).json()
+        assert any(s["name"] == "scoped-srv" for s in listing["servers"])
+        listing = client.get("/api/mcp/servers").json()
+        assert not any(s["name"] == "scoped-srv" for s in listing["servers"])
+
+    def test_mcp_enabled_toggle_scoped(self, client, isolated_profiles):
+        (isolated_profiles["worker_beta"] / "config.yaml").write_text(
+            "mcp_servers:\n  srv1:\n    url: http://x/sse\n", encoding="utf-8"
+        )
+        resp = client.put(
+            "/api/mcp/servers/srv1/enabled",
+            json={"enabled": False, "profile": "worker_beta"},
+        )
+        assert resp.status_code == 200
+        worker_cfg = _cfg(isolated_profiles["worker_beta"])
+        assert worker_cfg["mcp_servers"]["srv1"]["enabled"] is False
+
+    def test_mcp_probe_runs_inside_profile_scope(
+        self, client, isolated_profiles, monkeypatch
+    ):
+        """The test-server probe must execute with the selected profile's
+        scope active so env-placeholder expansion reads the profile's .env,
+        matching the config the server was saved into."""
+        import hermes_cli.mcp_config as mcp_config
+        from hermes_constants import get_hermes_home
+
+        (isolated_profiles["worker_beta"] / "config.yaml").write_text(
+            "mcp_servers:\n  probe-srv:\n    url: http://x/sse\n",
+            encoding="utf-8",
+        )
+        seen = {}
+
+        def fake_probe(name, config, connect_timeout=30):
+            seen["home"] = str(get_hermes_home())
+            return [("tool-a", "desc")]
+
+        monkeypatch.setattr(mcp_config, "_probe_single_server", fake_probe)
+        resp = client.post(
+            "/api/mcp/servers/probe-srv/test", params={"profile": "worker_beta"}
+        )
+        assert resp.status_code == 200
+        assert resp.json()["ok"] is True
+        assert seen["home"] == str(isolated_profiles["worker_beta"])
+
+    def test_mcp_remove_scoped(self, client, isolated_profiles):
+        (isolated_profiles["worker_beta"] / "config.yaml").write_text(
+            "mcp_servers:\n  srv2:\n    url: http://x/sse\n", encoding="utf-8"
+        )
+        # Removing from the DASHBOARD's profile must 404 (srv2 lives in worker).
+        resp = client.delete("/api/mcp/servers/srv2")
+        assert resp.status_code == 404
+        resp = client.delete("/api/mcp/servers/srv2", params={"profile": "worker_beta"})
+        assert resp.status_code == 200
+        assert "srv2" not in _cfg(isolated_profiles["worker_beta"]).get("mcp_servers", {})
+
+
+class TestProfileScopedModel:
+    def test_model_set_main_scoped(self, client, isolated_profiles):
+        resp = client.post(
+            "/api/model/set",
+            json={
+                "scope": "main",
+                "provider": "openrouter",
+                "model": "test/model-1",
+                "confirm_expensive_model": True,
+                "profile": "worker_beta",
+            },
+        )
+        assert resp.status_code == 200
+        worker_cfg = _cfg(isolated_profiles["worker_beta"])
+        model_cfg = worker_cfg.get("model", {})
+        assert isinstance(model_cfg, dict)
+        assert model_cfg.get("provider") == "openrouter"
+        default_model = _cfg(isolated_profiles["default"]).get("model", {})
+        if isinstance(default_model, dict):
+            assert default_model.get("default") != "test/model-1"
+
+    def test_auxiliary_read_scoped_matches_write_target(
+        self, client, isolated_profiles
+    ):
+        """Reads and writes must scope symmetrically: an aux pin written to
+        the worker profile must show up ONLY in the worker-scoped read.
+        (Regression: /api/model/auxiliary used to read unscoped while
+        /api/model/set wrote scoped — the Models page displayed the
+        dashboard profile's pins while editing the selected profile's.)"""
+        (isolated_profiles["worker_beta"] / "config.yaml").write_text(
+            "auxiliary:\n  vision:\n    provider: openrouter\n"
+            "    model: worker/vision-pin\n",
+            encoding="utf-8",
+        )
+        resp = client.get("/api/model/auxiliary", params={"profile": "worker_beta"})
+        assert resp.status_code == 200
+        vision = next(t for t in resp.json()["tasks"] if t["task"] == "vision")
+        assert vision["model"] == "worker/vision-pin"
+
+        # Unscoped read = the dashboard's own profile, which has no pin.
+        resp = client.get("/api/model/auxiliary")
+        assert resp.status_code == 200
+        vision = next(t for t in resp.json()["tasks"] if t["task"] == "vision")
+        assert vision["model"] != "worker/vision-pin"
+
+    def test_auxiliary_unknown_profile_404(self, client, isolated_profiles):
+        resp = client.get("/api/model/auxiliary", params={"profile": "ghost"})
+        assert resp.status_code == 404
+
+    def test_model_options_scoped_to_profile(self, client, isolated_profiles):
+        """The Models picker must read the SAME profile model/set writes —
+        current model/provider in the payload come from the scoped config."""
+        (isolated_profiles["worker_beta"] / "config.yaml").write_text(
+            "model:\n  provider: openrouter\n  default: worker/current-pin\n",
+            encoding="utf-8",
+        )
+        resp = client.get("/api/model/options", params={"profile": "worker_beta"})
+        assert resp.status_code == 200
+        body = resp.json()
+        # The payload carries the current selection somewhere stable; assert
+        # the worker pin appears in the scoped response and not the unscoped.
+        assert "worker/current-pin" in resp.text
+        resp = client.get("/api/model/options")
+        assert resp.status_code == 200
+        assert "worker/current-pin" not in resp.text
+        assert isinstance(body, dict)
+
+    def test_model_options_unknown_profile_404(self, client, isolated_profiles):
+        resp = client.get("/api/model/options", params={"profile": "ghost"})
+        assert resp.status_code == 404
+
+    def test_model_info_unknown_profile_404(self, client, isolated_profiles):
+        """Regression: the broad except used to convert the 404 into a 200
+        with empty model info ("no model set" — silently wrong)."""
+        resp = client.get("/api/model/info", params={"profile": "ghost"})
+        assert resp.status_code == 404
+
+    def test_mcp_catalog_unknown_profile_404(self, client, isolated_profiles):
+        resp = client.get("/api/mcp/catalog", params={"profile": "ghost"})
+        assert resp.status_code == 404
+
+
+class TestProfileScopedPostSetup:
+    def test_post_setup_spawns_with_profile_flag(
+        self, client, isolated_profiles, monkeypatch
+    ):
+        """Post-setup runs in a -p scoped subprocess so hooks that read
+        config / write per-profile state see the same HERMES_HOME the rest
+        of the drawer's writes targeted."""
+        import hermes_cli.web_server as web_server
+
+        calls = []
+
+        class _FakeProc:
+            pid = 777
+
+        monkeypatch.setattr(
+            web_server,
+            "_spawn_hermes_action",
+            lambda subcommand, name: calls.append(list(subcommand)) or _FakeProc(),
+        )
+        monkeypatch.setattr(
+            "hermes_cli.tools_config.valid_post_setup_keys",
+            lambda: {"agent_browser"},
+        )
+        resp = client.post(
+            "/api/tools/toolsets/browser/post-setup",
+            json={"key": "agent_browser", "profile": "worker_beta"},
+        )
+        assert resp.status_code == 200
+        assert calls == [
+            ["-p", "worker_beta", "tools", "post-setup", "agent_browser"]
+        ]
+
+    def test_post_setup_without_profile_keeps_legacy_argv(
+        self, client, isolated_profiles, monkeypatch
+    ):
+        import hermes_cli.web_server as web_server
+
+        calls = []
+
+        class _FakeProc:
+            pid = 777
+
+        monkeypatch.setattr(
+            web_server,
+            "_spawn_hermes_action",
+            lambda subcommand, name: calls.append(list(subcommand)) or _FakeProc(),
+        )
+        monkeypatch.setattr(
+            "hermes_cli.tools_config.valid_post_setup_keys",
+            lambda: {"agent_browser"},
+        )
+        resp = client.post(
+            "/api/tools/toolsets/browser/post-setup",
+            json={"key": "agent_browser"},
+        )
+        assert resp.status_code == 200
+        assert calls == [["tools", "post-setup", "agent_browser"]]
+
+
+class TestProfileScopedChatPty:
+    def test_chat_argv_scopes_hermes_home(self, isolated_profiles, monkeypatch):
+        import hermes_cli.web_server as web_server
+
+        monkeypatch.setattr(
+            "hermes_cli.main._make_tui_argv",
+            lambda root, tui_dev=False: (["cat"], None),
+            raising=False,
+        )
+        argv, cwd, env = web_server._resolve_chat_argv(profile="worker_beta")
+        assert env is not None
+        assert env["HERMES_HOME"] == str(isolated_profiles["worker_beta"])
+        # Scoped chat must NOT attach to the dashboard's in-memory gateway.
+        assert "HERMES_TUI_GATEWAY_URL" not in env
+
+    def test_chat_argv_unscoped_keeps_legacy_env(self, isolated_profiles, monkeypatch):
+        import hermes_cli.web_server as web_server
+
+        monkeypatch.setattr(
+            "hermes_cli.main._make_tui_argv",
+            lambda root, tui_dev=False: (["cat"], None),
+            raising=False,
+        )
+        argv, cwd, env = web_server._resolve_chat_argv()
+        assert env is not None
+        assert env.get("HERMES_HOME") != str(isolated_profiles["worker_beta"])
+
+    def test_chat_argv_unknown_profile_raises(self, isolated_profiles, monkeypatch):
+        import hermes_cli.web_server as web_server
+
+        monkeypatch.setattr(
+            "hermes_cli.main._make_tui_argv",
+            lambda root, tui_dev=False: (["cat"], None),
+            raising=False,
+        )
+        # Reuse the HTTPException class web_server itself raises — avoids a
+        # direct fastapi import (unresolvable in the ty lint environment).
+        with pytest.raises(web_server.HTTPException) as exc:
+            web_server._resolve_chat_argv(profile="ghost")
+        assert exc.value.status_code == 404
diff --git a/tests/hermes_cli/test_web_server_skill_editor.py b/tests/hermes_cli/test_web_server_skill_editor.py
new file mode 100644
index 00000000000..c89142ae0df
--- /dev/null
+++ b/tests/hermes_cli/test_web_server_skill_editor.py
@@ -0,0 +1,259 @@
+"""Tests for the dashboard skill editor endpoints and cron skill attachment.
+
+The Skills page can now create/edit custom skills (SKILL.md) and the Cron
+page can attach skills to jobs — closing the "SSH + nano is the only way"
+gap for headless/VPS users. These tests pin:
+
+- GET /api/skills/content returns raw SKILL.md (and profile-scopes).
+- POST /api/skills creates a skill through the same validated write path
+  as the agent's ``skill_manage`` tool (frontmatter validation enforced).
+- PUT /api/skills/content rewrites an existing SKILL.md (404 on unknown).
+- POST /api/cron/jobs accepts ``skills`` and persists it on the job;
+  PUT /api/cron/jobs/{id} can update the list.
+"""
+import pytest
+
+
+SKILL_MD = """---
+name: {name}
+description: a test skill
+---
+
+# {name}
+
+Do the thing.
+"""
+
+
+def _write_skill(skills_dir, name):
+    d = skills_dir / name
+    d.mkdir(parents=True, exist_ok=True)
+    (d / "SKILL.md").write_text(SKILL_MD.format(name=name), encoding="utf-8")
+
+
+@pytest.fixture
+def isolated_profiles(tmp_path, monkeypatch, _isolate_hermes_home):
+    """Isolated default home + one named profile, each with its own skills."""
+    from hermes_constants import get_hermes_home
+    from hermes_cli import profiles
+
+    default_home = get_hermes_home()
+    profiles_root = default_home / "profiles"
+    worker_home = profiles_root / "worker_alpha"
+    for home in (default_home, worker_home):
+        (home / "skills").mkdir(parents=True, exist_ok=True)
+        (home / "config.yaml").write_text("{}\n", encoding="utf-8")
+
+    _write_skill(default_home / "skills", "dashboard-skill")
+    _write_skill(worker_home / "skills", "worker-skill")
+
+    monkeypatch.setattr(profiles, "_get_default_hermes_home", lambda: default_home)
+    monkeypatch.setattr(profiles, "_get_profiles_root", lambda: profiles_root)
+    return {"default": default_home, "worker_alpha": worker_home}
+
+
+@pytest.fixture
+def client(monkeypatch, isolated_profiles):
+    try:
+        from starlette.testclient import TestClient
+    except ImportError:
+        pytest.skip("fastapi/starlette not installed")
+
+    import hermes_state
+    from hermes_constants import get_hermes_home
+    from hermes_cli.web_server import app, _SESSION_HEADER_NAME, _SESSION_TOKEN
+
+    monkeypatch.setattr(hermes_state, "DEFAULT_DB_PATH", get_hermes_home() / "state.db")
+    c = TestClient(app)
+    c.headers[_SESSION_HEADER_NAME] = _SESSION_TOKEN
+    return c
+
+
+class TestSkillContent:
+    def test_get_content_returns_raw_skill_md(self, client, isolated_profiles):
+        resp = client.get("/api/skills/content", params={"name": "dashboard-skill"})
+        assert resp.status_code == 200
+        data = resp.json()
+        assert data["name"] == "dashboard-skill"
+        assert data["content"].startswith("---")
+        assert "Do the thing." in data["content"]
+
+    def test_get_content_scopes_to_profile(self, client, isolated_profiles):
+        resp = client.get(
+            "/api/skills/content",
+            params={"name": "worker-skill", "profile": "worker_alpha"},
+        )
+        assert resp.status_code == 200
+        # ...and the worker skill is invisible without the profile param.
+        resp = client.get("/api/skills/content", params={"name": "worker-skill"})
+        assert resp.status_code == 404
+
+    def test_get_content_unknown_skill_404(self, client, isolated_profiles):
+        resp = client.get("/api/skills/content", params={"name": "nope"})
+        assert resp.status_code == 404
+
+
+class TestSkillCreate:
+    def test_create_writes_skill_md(self, client, isolated_profiles):
+        resp = client.post(
+            "/api/skills",
+            json={"name": "my-new-skill", "content": SKILL_MD.format(name="my-new-skill")},
+        )
+        assert resp.status_code == 200
+        assert resp.json()["success"] is True
+        skill_md = isolated_profiles["default"] / "skills" / "my-new-skill" / "SKILL.md"
+        assert skill_md.exists()
+        assert "Do the thing." in skill_md.read_text(encoding="utf-8")
+
+    def test_create_with_category(self, client, isolated_profiles):
+        resp = client.post(
+            "/api/skills",
+            json={
+                "name": "cat-skill",
+                "category": "devops",
+                "content": SKILL_MD.format(name="cat-skill"),
+            },
+        )
+        assert resp.status_code == 200
+        assert (
+            isolated_profiles["default"] / "skills" / "devops" / "cat-skill" / "SKILL.md"
+        ).exists()
+
+    def test_create_scopes_to_profile(self, client, isolated_profiles):
+        resp = client.post(
+            "/api/skills",
+            json={
+                "name": "worker-new",
+                "content": SKILL_MD.format(name="worker-new"),
+                "profile": "worker_alpha",
+            },
+        )
+        assert resp.status_code == 200
+        assert (
+            isolated_profiles["worker_alpha"] / "skills" / "worker-new" / "SKILL.md"
+        ).exists()
+        # Dashboard's own skills dir stays clean.
+        assert not (
+            isolated_profiles["default"] / "skills" / "worker-new"
+        ).exists()
+
+    def test_create_rejects_missing_frontmatter(self, client, isolated_profiles):
+        resp = client.post(
+            "/api/skills",
+            json={"name": "bad-skill", "content": "no frontmatter here"},
+        )
+        assert resp.status_code == 400
+        assert "frontmatter" in resp.json()["detail"].lower()
+        assert not (isolated_profiles["default"] / "skills" / "bad-skill").exists()
+
+    def test_create_rejects_duplicate_name(self, client, isolated_profiles):
+        resp = client.post(
+            "/api/skills",
+            json={
+                "name": "dashboard-skill",
+                "content": SKILL_MD.format(name="dashboard-skill"),
+            },
+        )
+        assert resp.status_code == 400
+        assert "already exists" in resp.json()["detail"]
+
+    def test_create_rejects_invalid_name(self, client, isolated_profiles):
+        resp = client.post(
+            "/api/skills",
+            json={"name": "../escape", "content": SKILL_MD.format(name="x")},
+        )
+        assert resp.status_code == 400
+
+
+class TestSkillUpdate:
+    def test_update_rewrites_skill_md(self, client, isolated_profiles):
+        new_content = SKILL_MD.format(name="dashboard-skill").replace(
+            "Do the thing.", "Do the NEW thing."
+        )
+        resp = client.put(
+            "/api/skills/content",
+            json={"name": "dashboard-skill", "content": new_content},
+        )
+        assert resp.status_code == 200
+        skill_md = (
+            isolated_profiles["default"] / "skills" / "dashboard-skill" / "SKILL.md"
+        )
+        assert "Do the NEW thing." in skill_md.read_text(encoding="utf-8")
+
+    def test_update_unknown_skill_404(self, client, isolated_profiles):
+        resp = client.put(
+            "/api/skills/content",
+            json={"name": "nope", "content": SKILL_MD.format(name="nope")},
+        )
+        assert resp.status_code == 404
+
+    def test_update_invalid_frontmatter_400(self, client, isolated_profiles):
+        resp = client.put(
+            "/api/skills/content",
+            json={"name": "dashboard-skill", "content": "broken"},
+        )
+        assert resp.status_code == 400
+
+
+class TestEditorEndpointsAuth:
+    @pytest.mark.parametrize(
+        "method,path,kwargs",
+        [
+            ("get", "/api/skills/content?name=dashboard-skill", {}),
+            ("post", "/api/skills", {"json": {"name": "x", "content": "y"}}),
+            ("put", "/api/skills/content", {"json": {"name": "x", "content": "y"}}),
+        ],
+    )
+    def test_endpoints_401_without_token(
+        self, client, isolated_profiles, method, path, kwargs
+    ):
+        from hermes_cli.web_server import _SESSION_HEADER_NAME
+
+        client.headers.pop(_SESSION_HEADER_NAME, None)
+        resp = getattr(client, method)(path, **kwargs)
+        assert resp.status_code == 401
+
+
+class TestCronJobSkills:
+    def test_create_job_with_skills(self, client, isolated_profiles):
+        resp = client.post(
+            "/api/cron/jobs",
+            json={
+                "prompt": "do work",
+                "schedule": "every 1h",
+                "name": "skilled-job",
+                "skills": ["dashboard-skill"],
+            },
+        )
+        assert resp.status_code == 200
+        job = resp.json()
+        assert job["skills"] == ["dashboard-skill"]
+
+        # Round-trip: the list endpoint carries the skills field too.
+        listed = client.get("/api/cron/jobs", params={"profile": "default"}).json()
+        match = [j for j in listed if j["id"] == job["id"]]
+        assert match and match[0]["skills"] == ["dashboard-skill"]
+
+    def test_update_job_skills(self, client, isolated_profiles):
+        job = client.post(
+            "/api/cron/jobs",
+            json={"prompt": "do work", "schedule": "every 1h"},
+        ).json()
+        assert job.get("skills") in (None, [])
+
+        resp = client.put(
+            f"/api/cron/jobs/{job['id']}",
+            json={"updates": {"skills": ["dashboard-skill", "worker-skill"]}},
+            params={"profile": "default"},
+        )
+        assert resp.status_code == 200
+        assert resp.json()["skills"] == ["dashboard-skill", "worker-skill"]
+
+        # Clearing works too.
+        resp = client.put(
+            f"/api/cron/jobs/{job['id']}",
+            json={"updates": {"skills": []}},
+            params={"profile": "default"},
+        )
+        assert resp.status_code == 200
+        assert resp.json()["skills"] == []
diff --git a/tests/hermes_cli/test_web_server_skills_profiles.py b/tests/hermes_cli/test_web_server_skills_profiles.py
new file mode 100644
index 00000000000..9a131bbb246
--- /dev/null
+++ b/tests/hermes_cli/test_web_server_skills_profiles.py
@@ -0,0 +1,210 @@
+"""Regression tests for dashboard profile-scoped skills/toolsets management.
+
+"Set as active" on the Profiles page only flips the sticky ``active_profile``
+file (future CLI/gateway runs) — it never retargets the running dashboard
+process. Before the ``profile`` parameter existed, toggling a skill after
+"activating" a profile silently wrote into the dashboard's own config.
+These tests pin the new behavior: reads and writes land in the REQUESTED
+profile's HERMES_HOME, and the dashboard's own profile stays untouched.
+"""
+import pytest
+import yaml
+
+
+def _write_skill(skills_dir, name, description="test skill"):
+    d = skills_dir / name
+    d.mkdir(parents=True, exist_ok=True)
+    (d / "SKILL.md").write_text(
+        f"---\nname: {name}\ndescription: {description}\n---\n\n# {name}\n",
+        encoding="utf-8",
+    )
+
+
+@pytest.fixture
+def isolated_profiles(tmp_path, monkeypatch, _isolate_hermes_home):
+    """Isolated default home + one named profile, each with its own skills."""
+    from hermes_constants import get_hermes_home
+    from hermes_cli import profiles
+
+    default_home = get_hermes_home()
+    profiles_root = default_home / "profiles"
+    worker_home = profiles_root / "worker_alpha"
+    for home in (default_home, worker_home):
+        (home / "skills").mkdir(parents=True, exist_ok=True)
+        (home / "config.yaml").write_text("{}\n", encoding="utf-8")
+
+    _write_skill(default_home / "skills", "dashboard-skill")
+    _write_skill(worker_home / "skills", "worker-skill")
+
+    monkeypatch.setattr(profiles, "_get_default_hermes_home", lambda: default_home)
+    monkeypatch.setattr(profiles, "_get_profiles_root", lambda: profiles_root)
+    return {"default": default_home, "worker_alpha": worker_home}
+
+
+@pytest.fixture
+def client(monkeypatch, isolated_profiles):
+    try:
+        from starlette.testclient import TestClient
+    except ImportError:
+        pytest.skip("fastapi/starlette not installed")
+
+    import hermes_state
+    from hermes_constants import get_hermes_home
+    from hermes_cli.web_server import app, _SESSION_HEADER_NAME, _SESSION_TOKEN
+
+    monkeypatch.setattr(hermes_state, "DEFAULT_DB_PATH", get_hermes_home() / "state.db")
+    c = TestClient(app)
+    c.headers[_SESSION_HEADER_NAME] = _SESSION_TOKEN
+    return c
+
+
+def _load_cfg(home):
+    return yaml.safe_load((home / "config.yaml").read_text()) or {}
+
+
+class TestProfileScopedSkills:
+    def test_skills_list_scopes_to_requested_profile(self, client, isolated_profiles):
+        resp = client.get("/api/skills", params={"profile": "worker_alpha"})
+        assert resp.status_code == 200
+        names = {s["name"] for s in resp.json()}
+        assert "worker-skill" in names
+        assert "dashboard-skill" not in names
+
+    def test_skills_list_without_profile_uses_dashboard_home(
+        self, client, isolated_profiles
+    ):
+        resp = client.get("/api/skills")
+        assert resp.status_code == 200
+        names = {s["name"] for s in resp.json()}
+        assert "dashboard-skill" in names
+        assert "worker-skill" not in names
+
+    def test_toggle_writes_into_target_profile_only(self, client, isolated_profiles):
+        resp = client.put(
+            "/api/skills/toggle",
+            json={"name": "worker-skill", "enabled": False, "profile": "worker_alpha"},
+        )
+        assert resp.status_code == 200
+        assert resp.json() == {"ok": True, "name": "worker-skill", "enabled": False}
+
+        worker_cfg = _load_cfg(isolated_profiles["worker_alpha"])
+        assert "worker-skill" in worker_cfg.get("skills", {}).get("disabled", [])
+        # The dashboard's own config must stay untouched — this was the bug.
+        default_cfg = _load_cfg(isolated_profiles["default"])
+        assert "worker-skill" not in default_cfg.get("skills", {}).get("disabled", [])
+
+    def test_toggle_reenable_round_trip(self, client, isolated_profiles):
+        for enabled in (False, True):
+            client.put(
+                "/api/skills/toggle",
+                json={
+                    "name": "worker-skill",
+                    "enabled": enabled,
+                    "profile": "worker_alpha",
+                },
+            )
+        worker_cfg = _load_cfg(isolated_profiles["worker_alpha"])
+        assert "worker-skill" not in worker_cfg.get("skills", {}).get("disabled", [])
+
+    def test_unknown_profile_returns_404(self, client, isolated_profiles):
+        resp = client.get("/api/skills", params={"profile": "no_such_profile"})
+        assert resp.status_code == 404
+
+    def test_invalid_profile_name_returns_400(self, client, isolated_profiles):
+        resp = client.get("/api/skills", params={"profile": "Bad Name!"})
+        assert resp.status_code == 400
+
+    def test_scope_restores_module_globals(self, client, isolated_profiles):
+        """The SKILLS_DIR swap is per-request; the module global must be
+        restored even after a scoped call (cron-style locked swap)."""
+        import tools.skills_tool as skills_tool
+
+        before = skills_tool.SKILLS_DIR
+        client.get("/api/skills", params={"profile": "worker_alpha"})
+        assert skills_tool.SKILLS_DIR == before
+
+
+class TestProfileScopedToolsets:
+    def test_toolset_toggle_scopes_to_profile(self, client, isolated_profiles):
+        resp = client.put(
+            "/api/tools/toolsets/x_search",
+            json={"enabled": True, "profile": "worker_alpha"},
+        )
+        assert resp.status_code == 200
+
+        worker_cfg = _load_cfg(isolated_profiles["worker_alpha"])
+        assert "x_search" in worker_cfg.get("platform_toolsets", {}).get("cli", [])
+        default_cfg = _load_cfg(isolated_profiles["default"])
+        assert "x_search" not in default_cfg.get("platform_toolsets", {}).get("cli", [])
+
+        listing = client.get(
+            "/api/tools/toolsets", params={"profile": "worker_alpha"}
+        ).json()
+        assert {t["name"]: t for t in listing}["x_search"]["enabled"] is True
+        # Unscoped listing reflects the dashboard's own (untouched) config.
+        listing = client.get("/api/tools/toolsets").json()
+        assert {t["name"]: t for t in listing}["x_search"]["enabled"] is False
+
+    def test_toolset_toggle_unknown_profile_404(self, client, isolated_profiles):
+        resp = client.put(
+            "/api/tools/toolsets/x_search",
+            json={"enabled": True, "profile": "ghost"},
+        )
+        assert resp.status_code == 404
+
+
+class TestProfileScopedHubActions:
+    def test_hub_install_spawns_with_profile_flag(
+        self, client, isolated_profiles, monkeypatch
+    ):
+        """Hub installs must go through a fresh ``hermes -p ``
+        subprocess — the in-process scope can't reach skills_hub's
+        import-time SKILLS_DIR binding."""
+        import hermes_cli.web_server as web_server
+
+        calls = []
+
+        class _FakeProc:
+            pid = 4242
+
+        def _fake_spawn(subcommand, name):
+            calls.append((list(subcommand), name))
+            return _FakeProc()
+
+        monkeypatch.setattr(web_server, "_spawn_hermes_action", _fake_spawn)
+        resp = client.post(
+            "/api/skills/hub/install",
+            json={"identifier": "official/demo", "profile": "worker_alpha"},
+        )
+        assert resp.status_code == 200
+        assert calls == [
+            (["-p", "worker_alpha", "skills", "install", "official/demo"], "skills-install")
+        ]
+
+    def test_hub_install_without_profile_keeps_legacy_argv(
+        self, client, isolated_profiles, monkeypatch
+    ):
+        import hermes_cli.web_server as web_server
+
+        calls = []
+
+        class _FakeProc:
+            pid = 4242
+
+        monkeypatch.setattr(
+            web_server,
+            "_spawn_hermes_action",
+            lambda subcommand, name: calls.append(list(subcommand)) or _FakeProc(),
+        )
+        resp = client.post(
+            "/api/skills/hub/install", json={"identifier": "official/demo"}
+        )
+        assert resp.status_code == 200
+        assert calls == [["skills", "install", "official/demo"]]
+
+    def test_hub_install_unknown_profile_404(self, client, isolated_profiles):
+        resp = client.post(
+            "/api/skills/hub/install",
+            json={"identifier": "official/demo", "profile": "ghost"},
+        )
+        assert resp.status_code == 404
diff --git a/tests/hermes_cli/test_web_ui_build.py b/tests/hermes_cli/test_web_ui_build.py
index 0783af22a13..cc20fa61cf3 100644
--- a/tests/hermes_cli/test_web_ui_build.py
+++ b/tests/hermes_cli/test_web_ui_build.py
@@ -142,6 +142,11 @@ class TestBuildWebUISkipsWhenFresh:
 
     def test_npm_install_uses_workspace_web_scope(self, tmp_path):
         web_dir, _ = _make_web_dir(tmp_path)
+        # Real workspace checkout: the single lockfile lives at the root, so
+        # _workspace_root(web_dir) resolves to the parent and --workspace web
+        # scopes the install. (Without a root lockfile, web_dir IS the root and
+        # --workspace would be dropped — see test below and #42973.)
+        (tmp_path / "package-lock.json").write_text("{}", encoding="utf-8")
         mock_cp = __import__("subprocess").CompletedProcess([], 0, stdout="", stderr="")
         build_ok = __import__("subprocess").CompletedProcess([], 0, stdout="", stderr="")
         with patch("hermes_cli.main.shutil.which", return_value="/usr/bin/npm"), \
@@ -153,6 +158,36 @@ class TestBuildWebUISkipsWhenFresh:
         assert "--workspace" in install_cmd
         assert install_cmd[install_cmd.index("--workspace") + 1] == "web"
 
+    def test_web_install_omits_workspace_when_web_has_own_lockfile(
+        self, tmp_path, monkeypatch
+    ):
+        """web/ with its own lockfile => _workspace_root returns web_dir, so
+        --workspace web would fail (npm can't find that workspace from inside
+        web/). The flag must be dropped and the install run plainly from web_dir.
+        Symmetric to the TUI fix in test_tui_npm_install.py. See #42973.
+
+        With web's own lockfile present at cwd, _run_npm_install_deterministic
+        uses ``npm ci`` (not ``npm install``).
+        """
+        web_dir, _ = _make_web_dir(tmp_path)
+        (web_dir / "package-lock.json").write_text("{}", encoding="utf-8")
+        (tmp_path / "package-lock.json").write_text("{}", encoding="utf-8")
+        monkeypatch.delenv("TERMUX_VERSION", raising=False)
+        monkeypatch.setenv("PREFIX", "/usr")
+
+        install_cp = __import__("subprocess").CompletedProcess([], 0, stdout="", stderr="")
+        build_cp = __import__("subprocess").CompletedProcess([], 0, stdout="", stderr="")
+        with patch("hermes_cli.main.shutil.which", return_value="/usr/bin/npm"), \
+             patch("hermes_cli.main.subprocess.run", return_value=install_cp) as mock_run, \
+             patch("hermes_cli.main._run_with_idle_timeout", return_value=build_cp):
+            result = _build_web_ui(web_dir)
+
+        assert result is True
+        args, kwargs = mock_run.call_args
+        assert "--workspace" not in args[0]
+        assert args[0] == ["/usr/bin/npm", "ci", "--silent"]
+        assert kwargs["cwd"] == web_dir
+
     def test_web_build_uses_idle_timeout_helper(self, tmp_path):
         """npm run build now goes through _run_with_idle_timeout (issue #33788).
 
diff --git a/tests/hermes_state/test_session_archiving.py b/tests/hermes_state/test_session_archiving.py
new file mode 100644
index 00000000000..36ecb95a17b
--- /dev/null
+++ b/tests/hermes_state/test_session_archiving.py
@@ -0,0 +1,51 @@
+import time
+
+import pytest
+
+from hermes_state import SessionDB
+
+
+@pytest.fixture
+def db(tmp_path):
+    database = SessionDB(tmp_path / "state.db")
+    try:
+        yield database
+    finally:
+        database.close()
+
+
+def _compression_pair(db: SessionDB):
+    base = time.time() - 100
+    db.create_session("root", source="cli")
+    db.create_session("tip", source="cli", parent_session_id="root")
+    db._conn.execute(
+        "UPDATE sessions SET started_at = ?, ended_at = ?, end_reason = 'compression', message_count = 1 WHERE id = 'root'",
+        (base, base + 10),
+    )
+    db._conn.execute(
+        "UPDATE sessions SET started_at = ?, message_count = 1 WHERE id = 'tip'",
+        (base + 20,),
+    )
+    db._conn.commit()
+
+
+def test_archiving_compression_tip_archives_projected_root(db):
+    _compression_pair(db)
+
+    assert db.set_session_archived("tip", True) is True
+
+    assert db.get_session("root")["archived"] == 1
+    assert db.get_session("tip")["archived"] == 1
+    assert [s["id"] for s in db.list_sessions_rich(order_by_last_active=True)] == []
+    assert [s["id"] for s in db.list_sessions_rich(order_by_last_active=True, archived_only=True)] == ["tip"]
+
+
+def test_unarchiving_compression_tip_unarchives_projected_root(db):
+    _compression_pair(db)
+    db.set_session_archived("tip", True)
+
+    assert db.set_session_archived("tip", False) is True
+
+    assert db.get_session("root")["archived"] == 0
+    assert db.get_session("tip")["archived"] == 0
+    assert [s["id"] for s in db.list_sessions_rich(order_by_last_active=True)] == ["tip"]
diff --git a/tests/plugins/web/test_parallel_keyless_mcp.py b/tests/plugins/web/test_parallel_keyless_mcp.py
new file mode 100644
index 00000000000..8495df144b4
--- /dev/null
+++ b/tests/plugins/web/test_parallel_keyless_mcp.py
@@ -0,0 +1,383 @@
+"""Keyless Parallel search via the free hosted Search MCP.
+
+Covers the transport added in ``plugins/web/parallel/provider.py`` that lets
+``web_search`` work with no ``PARALLEL_API_KEY``:
+
+- ``_mcp_headers``  — Bearer attached only when a key is held
+- ``_decode_mcp_envelope`` — plain-JSON and SSE (``data:``) response bodies
+- ``_mcp_payload`` — structuredContent preferred, text-block JSON fallback, errors
+- ``_mcp_web_search`` — full handshake (mocked transport) → standard search shape
+- ``ParallelWebSearchProvider.search`` — keyless path routes to the MCP
+"""
+
+from __future__ import annotations
+
+import asyncio
+import json
+from unittest.mock import patch
+
+import pytest
+
+import plugins.web.parallel.provider as pp
+
+
+# ─── _mcp_headers ──────────────────────────────────────────────────────────
+
+class TestMcpHeaders:
+    def test_anonymous_has_no_authorization(self):
+        h = pp._mcp_headers(session_id=None, api_key=None)
+        assert "Authorization" not in h
+        assert h["Accept"] == "application/json, text/event-stream"
+        assert "Mcp-Session-Id" not in h
+
+    def test_user_agent_is_generic_not_hermes(self):
+        # Telemetry policy: no third-party usage attribution without opt-in.
+        # The UA must be set (not python-httpx default) but must not name
+        # hermes, on both the anonymous and keyed paths.
+        for ua in (
+            pp._mcp_headers(session_id=None, api_key=None)["User-Agent"],
+            pp._mcp_headers(session_id="sid", api_key="pk-live")["User-Agent"],
+        ):
+            assert ua == f"{pp._MCP_CLIENT_NAME}/{pp._MCP_CLIENT_VERSION}"
+            assert "hermes" not in ua.lower()
+
+    def test_session_id_and_bearer_when_present(self):
+        h = pp._mcp_headers(session_id="sid-123", api_key="pk-live")
+        assert h["Mcp-Session-Id"] == "sid-123"
+        assert h["Authorization"] == "Bearer pk-live"
+
+
+# ─── SSE / JSON-RPC parsing ──────────────────────────────────────────────────
+
+class TestMcpResponseParsing:
+    def test_plain_json_matched_by_id(self):
+        body = '{"jsonrpc":"2.0","id":"abc","result":{"ok":true}}'
+        assert pp._mcp_response_envelope(body, "abc")["result"]["ok"] is True
+
+    def test_sse_selects_response_for_request_id_skipping_notifications(self):
+        # A progress notification (no id) precedes the real result; an unrelated
+        # response id is also present. We must pick the one matching our id.
+        body = (
+            'event: message\ndata: {"jsonrpc":"2.0","method":"notifications/progress","params":{"p":1}}\n\n'
+            'event: message\ndata: {"jsonrpc":"2.0","id":"other","result":{"ok":false}}\n\n'
+            'event: message\ndata: {"jsonrpc":"2.0","id":"req-1","result":{"ok":true}}\n\n'
+        )
+        env = pp._mcp_response_envelope(body, "req-1")
+        assert env["result"]["ok"] is True
+
+    def test_sse_multiline_data_concatenated(self):
+        body = 'data: {"jsonrpc":"2.0","id":"x",\ndata: "result":{"n":42}}\n\n'
+        assert pp._mcp_response_envelope(body, "x")["result"]["n"] == 42
+
+    def test_falls_back_to_last_result_when_id_absent(self):
+        body = '{"jsonrpc":"2.0","id":"server-chose","result":{"ok":true}}'
+        # request id doesn't match, but there's a single result → use it
+        assert pp._mcp_response_envelope(body, "mismatch")["result"]["ok"] is True
+
+    def test_empty_body(self):
+        assert pp._mcp_response_envelope("", "x") == {}
+        assert pp._mcp_response_envelope("   ", "x") == {}
+
+    def test_batched_json_array_flattened(self):
+        # Streamable HTTP may batch messages into a JSON array.
+        body = ('[{"jsonrpc":"2.0","method":"notifications/progress"},'
+                '{"jsonrpc":"2.0","id":"req-9","result":{"ok":true}}]')
+        assert pp._mcp_response_envelope(body, "req-9")["result"]["ok"] is True
+
+    def test_batched_sse_data_array_flattened(self):
+        body = 'data: [{"jsonrpc":"2.0","id":"a","result":{"n":1}}]\n\n'
+        assert pp._mcp_response_envelope(body, "a")["result"]["n"] == 1
+
+
+# ─── _mcp_payload ────────────────────────────────────────────────────────────
+
+class TestMcpPayload:
+    def test_prefers_structured_content(self):
+        env = {"result": {"structuredContent": {"results": [{"url": "u"}]},
+                          "content": [{"type": "text", "text": "ignored"}]}}
+        assert pp._mcp_payload(env) == {"results": [{"url": "u"}]}
+
+    def test_parses_text_block_json(self):
+        inner = {"search_id": "s1", "results": [{"url": "u", "title": "t"}]}
+        env = {"result": {"content": [{"type": "text", "text": json.dumps(inner)}]}}
+        assert pp._mcp_payload(env)["search_id"] == "s1"
+
+    def test_raises_on_jsonrpc_error(self):
+        with pytest.raises(RuntimeError, match="Parallel MCP error"):
+            pp._mcp_payload({"error": {"code": -32000, "message": "boom"}})
+
+    def test_raises_on_tool_iserror(self):
+        with pytest.raises(RuntimeError, match="Parallel MCP tool error"):
+            pp._mcp_payload({"result": {"isError": True, "content": []}})
+
+
+# ─── _mcp_web_search (mocked transport) ──────────────────────────────────────
+
+class _FakeResponse:
+    def __init__(self, *, text="", headers=None):
+        self.text = text
+        self.headers = headers or {}
+
+    def raise_for_status(self):
+        return None
+
+
+class _FakeClient:
+    """Stands in for httpx.Client: replays init → ack → tools/call."""
+
+    def __init__(self, search_payload, init_session_id="server-sid"):
+        self._search_payload = search_payload
+        self._init_session_id = init_session_id
+        self.calls = []
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, *exc):
+        return False
+
+    def post(self, url, headers=None, json=None):
+        self.calls.append({"headers": headers, "json": json})
+        req = json or {}
+        method = req.get("method")
+        req_id = req.get("id")
+        if method == "initialize":
+            # Echo the request id, as the real server does.
+            return _FakeResponse(
+                text=json_dumps({"jsonrpc": "2.0", "id": req_id,
+                                 "result": {"protocolVersion": "2099-01-01"}}),
+                headers=(
+                    {"mcp-session-id": self._init_session_id}
+                    if self._init_session_id is not None
+                    else {}
+                ),
+            )
+        if method == "notifications/initialized":
+            return _FakeResponse(text="")
+        # tools/call
+        envelope = {"jsonrpc": "2.0", "id": req_id, "result": {
+            "content": [{"type": "text", "text": json_dumps(self._search_payload)}],
+        }}
+        return _FakeResponse(text=json_dumps(envelope))
+
+
+def json_dumps(obj):
+    return json.dumps(obj)
+
+
+class TestMcpWebSearch:
+    def _payload(self, n):
+        return {"search_id": "s", "results": [
+            {"url": f"https://ex/{i}", "title": f"t{i}",
+             "excerpts": [f"a{i}", f"b{i}"]}
+            for i in range(n)
+        ]}
+
+    def test_returns_standard_shape_and_handshake(self):
+        fake = _FakeClient(self._payload(3))
+        with patch.object(pp.httpx, "Client", return_value=fake):
+            out = pp._mcp_web_search("hello", limit=5, api_key=None)
+
+        assert out["success"] is True
+        # Free-tier results credit Parallel.
+        assert "Parallel" in out["attribution"]
+        web = out["data"]["web"]
+        assert [r["position"] for r in web] == [1, 2, 3]
+        assert web[0]["url"] == "https://ex/0"
+        assert web[0]["description"] == "a0 b0"  # excerpts joined
+        # handshake order
+        methods = [c["json"].get("method") for c in fake.calls]
+        assert methods == ["initialize", "notifications/initialized", "tools/call"]
+        # session id from the initialize response header is reused
+        assert fake.calls[-1]["headers"]["Mcp-Session-Id"] == "server-sid"
+
+    def test_stateless_server_no_session_header_not_invented(self):
+        # A stateless Streamable-HTTP server may omit mcp-session-id on
+        # initialize; we must NOT invent one (sending an unissued session id can
+        # get follow-up requests rejected). The follow-ups carry no header.
+        fake = _FakeClient(self._payload(1), init_session_id=None)
+        with patch.object(pp.httpx, "Client", return_value=fake):
+            out = pp._mcp_web_search("hello", limit=5, api_key=None)
+        assert out["success"] is True
+        follow_ups = [c for c in fake.calls if c["json"].get("method") != "initialize"]
+        assert follow_ups, "expected notifications/initialized + tools/call"
+        assert all("Mcp-Session-Id" not in c["headers"] for c in follow_ups)
+        # anonymous → no Authorization on any call
+        assert all("Authorization" not in c["headers"] for c in fake.calls)
+        # tools/call mirrors query into objective + search_queries
+        args = fake.calls[-1]["json"]["params"]["arguments"]
+        assert args["objective"] == "hello"
+        assert args["search_queries"] == ["hello"]
+
+    def test_limit_is_applied_client_side(self):
+        fake = _FakeClient(self._payload(10))
+        with patch.object(pp.httpx, "Client", return_value=fake):
+            out = pp._mcp_web_search("q", limit=2, api_key=None)
+        assert len(out["data"]["web"]) == 2
+
+    def test_bearer_attached_when_key_present(self):
+        fake = _FakeClient(self._payload(1))
+        with patch.object(pp.httpx, "Client", return_value=fake):
+            pp._mcp_web_search("q", limit=1, api_key="pk-live")
+        assert all(c["headers"]["Authorization"] == "Bearer pk-live" for c in fake.calls)
+
+    def test_negotiated_protocol_version_echoed_post_init(self):
+        fake = _FakeClient(self._payload(1))
+        with patch.object(pp.httpx, "Client", return_value=fake):
+            pp._mcp_web_search("q", limit=1, api_key=None)
+        # initialize request doesn't carry the (not-yet-negotiated) version...
+        assert "MCP-Protocol-Version" not in fake.calls[0]["headers"]
+        # ...but notifications/initialized and tools/call echo the negotiated one.
+        assert fake.calls[1]["headers"]["MCP-Protocol-Version"] == "2099-01-01"
+        assert fake.calls[-1]["headers"]["MCP-Protocol-Version"] == "2099-01-01"
+
+
+# ─── provider.search keyless routing ─────────────────────────────────────────
+
+class TestProviderKeylessSearch:
+    def test_search_without_key_uses_mcp(self, monkeypatch):
+        monkeypatch.delenv("PARALLEL_API_KEY", raising=False)
+        captured = {}
+
+        def _fake(query, limit, api_key):
+            captured.update(query=query, limit=limit, api_key=api_key)
+            return {"success": True, "data": {"web": []}}
+
+        monkeypatch.setattr(pp, "_mcp_web_search", _fake)
+        out = pp.ParallelWebSearchProvider().search("kittens", limit=4)
+        assert out["success"] is True
+        assert captured == {"query": "kittens", "limit": 4, "api_key": None}
+
+    def test_is_available_reflects_key(self, monkeypatch):
+        # is_available() gates the registry's active-provider walk + picker, so
+        # it's key-based (keyless dispatch is handled by _get_backend, not this).
+        monkeypatch.delenv("PARALLEL_API_KEY", raising=False)
+        assert pp.ParallelWebSearchProvider().is_available() is False
+        monkeypatch.setenv("PARALLEL_API_KEY", "k")
+        assert pp.ParallelWebSearchProvider().is_available() is True
+
+
+# ─── web_fetch (keyless extract) ─────────────────────────────────────────────
+
+class TestMcpWebFetch:
+    def _payload(self, urls):
+        return {"extract_id": "e1", "results": [
+            {"url": u, "title": f"T{i}", "publish_date": None,
+             "excerpts": [f"chunk-a-{i}", f"chunk-b-{i}"]}
+            for i, u in enumerate(urls)
+        ]}
+
+    def test_maps_to_extract_shape(self):
+        urls = ["https://a.test", "https://b.test"]
+        fake = _FakeClient(self._payload(urls))
+        with patch.object(pp.httpx, "Client", return_value=fake):
+            out = pp._mcp_web_fetch(urls, api_key=None)
+        assert [r["url"] for r in out] == urls
+        assert out[0]["content"] == "chunk-a-0\n\nchunk-b-0"
+        assert out[0]["raw_content"] == out[0]["content"]
+        assert out[0]["metadata"] == {"sourceURL": "https://a.test", "title": "T0"}
+        # tools/call targeted web_fetch, requesting full page bodies.
+        args = fake.calls[-1]["json"]["params"]
+        assert args["name"] == "web_fetch"
+        assert args["arguments"]["urls"] == urls
+        assert args["arguments"]["full_content"] is True
+        assert args["arguments"]["session_id"].startswith(f"{pp._MCP_CLIENT_NAME}-")
+
+    def test_prefers_full_content_over_excerpts(self):
+        payload = {"results": [
+            {"url": "https://a.test", "title": "T",
+             "excerpts": ["snippet"], "full_content": "the entire page body"},
+        ]}
+        fake = _FakeClient(payload)
+        with patch.object(pp.httpx, "Client", return_value=fake):
+            out = pp._mcp_web_fetch(["https://a.test"], api_key=None)
+        assert out[0]["content"] == "the entire page body"
+
+    def test_missing_url_becomes_error_entry(self):
+        # Server returns only one of the two requested URLs.
+        fake = _FakeClient(self._payload(["https://a.test"]))
+        with patch.object(pp.httpx, "Client", return_value=fake):
+            out = pp._mcp_web_fetch(["https://a.test", "https://missing.test"], api_key=None)
+        assert len(out) == 2
+        missing = [r for r in out if r["url"] == "https://missing.test"][0]
+        assert "error" in missing
+        assert missing["content"] == ""
+
+    def test_preserves_order_and_duplicate_inputs(self):
+        # MCP returns each unique URL once; output must still be one row per
+        # input, in order, including the duplicate.
+        fake = _FakeClient(self._payload(["https://a.test", "https://b.test"]))
+        urls = ["https://b.test", "https://a.test", "https://b.test"]
+        with patch.object(pp.httpx, "Client", return_value=fake):
+            out = pp._mcp_web_fetch(urls, api_key=None)
+        assert [r["url"] for r in out] == urls  # one row per input, in order
+        assert all("error" not in r for r in out)  # all three resolved
+
+    def test_extract_without_key_uses_web_fetch(self, monkeypatch):
+        monkeypatch.delenv("PARALLEL_API_KEY", raising=False)
+        captured = {}
+
+        def _fake(urls, api_key):
+            captured.update(urls=list(urls), api_key=api_key)
+            return [{"url": urls[0], "title": "", "content": "x",
+                     "raw_content": "x", "metadata": {}}]
+
+        monkeypatch.setattr(pp, "_mcp_web_fetch", _fake)
+        out = asyncio.run(pp.ParallelWebSearchProvider().extract(["https://x.test"]))
+        assert out[0]["content"] == "x"
+        assert captured == {"urls": ["https://x.test"], "api_key": None}
+
+
+# ─── keyed v1 REST search ────────────────────────────────────────────────────
+
+class TestKeyedV1Search:
+    def test_passes_max_results_and_omits_branding(self, monkeypatch):
+        monkeypatch.setenv("PARALLEL_API_KEY", "pk-live")
+        monkeypatch.delenv("PARALLEL_SEARCH_MODE", raising=False)
+        captured = {}
+
+        class _Res:
+            def __init__(self, url):
+                self.url, self.title, self.excerpts = url, "T", ["x"]
+
+        class _Resp:
+            results = [_Res(f"https://r/{i}") for i in range(10)]
+
+        class _Client:
+            def search(self, **kw):
+                captured.update(kw)
+                return _Resp()
+
+        monkeypatch.setattr(pp, "_get_sync_client", lambda: _Client())
+        out = pp.ParallelWebSearchProvider().search("q", limit=7)
+
+        assert out["success"] is True
+        # honors the caller's limit via advanced_settings.max_results
+        assert captured["advanced_settings"] == {"max_results": 7}
+        assert captured["mode"] == "advanced"            # v1 default
+        assert captured["session_id"].startswith(f"{pp._MCP_CLIENT_NAME}-")  # per-call id
+        assert len(out["data"]["web"]) == 7              # client-side slice
+        # paid path: no free-tier attribution, no [Parallel] label signal
+        assert "attribution" not in out
+        assert "provider" not in out
+
+
+# ─── v1 search mode mapping ──────────────────────────────────────────────────
+
+class TestResolveSearchMode:
+    @pytest.mark.parametrize("env,expected", [
+        (None, "advanced"),        # default
+        ("advanced", "advanced"),
+        ("basic", "basic"),
+        ("fast", "basic"),         # legacy → basic
+        ("one-shot", "basic"),     # legacy → basic
+        ("agentic", "advanced"),   # legacy → advanced
+        ("garbage", "advanced"),   # invalid → default
+        ("BASIC", "basic"),        # case-insensitive
+    ])
+    def test_mode_mapping(self, monkeypatch, env, expected):
+        if env is None:
+            monkeypatch.delenv("PARALLEL_SEARCH_MODE", raising=False)
+        else:
+            monkeypatch.setenv("PARALLEL_SEARCH_MODE", env)
+        assert pp._resolve_search_mode() == expected
diff --git a/tests/plugins/web/test_web_search_provider_plugins.py b/tests/plugins/web/test_web_search_provider_plugins.py
index 2177d875c4b..2d74b2a1813 100644
--- a/tests/plugins/web/test_web_search_provider_plugins.py
+++ b/tests/plugins/web/test_web_search_provider_plugins.py
@@ -193,11 +193,16 @@ class TestIsAvailable:
         assert p.is_available() is True
 
     def test_parallel_requires_api_key(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        """is_available() is key-based — it gates the registry's active-provider
+        walk/picker. (Keyless search/extract still work via the free MCP through
+        _get_backend's terminal default, independent of this flag.)
+        """
         _ensure_plugins_loaded()
         from agent.web_search_registry import get_provider
 
         p = get_provider("parallel")
         assert p is not None
+        monkeypatch.delenv("PARALLEL_API_KEY", raising=False)
         assert p.is_available() is False
         monkeypatch.setenv("PARALLEL_API_KEY", "real")
         assert p.is_available() is True
@@ -422,17 +427,33 @@ class TestErrorResponseShapes:
         assert result.get("success") is False
         assert "error" in result
 
-    def test_parallel_extract_returns_per_url_errors_when_unconfigured(self) -> None:
+    def test_parallel_extract_keyless_uses_mcp_web_fetch(
+        self, monkeypatch: pytest.MonkeyPatch
+    ) -> None:
+        """Without a key, extract routes to the free MCP web_fetch tool rather
+        than erroring. The MCP transport is mocked so the test stays offline."""
         _ensure_plugins_loaded()
         from agent.web_search_registry import get_provider
+        import plugins.web.parallel.provider as parallel_provider
+
+        monkeypatch.delenv("PARALLEL_API_KEY", raising=False)
+        captured = {}
+
+        def _fake_fetch(urls, api_key):
+            captured["urls"] = list(urls)
+            captured["api_key"] = api_key
+            return [{"url": urls[0], "title": "Example", "content": "body",
+                     "raw_content": "body", "metadata": {"sourceURL": urls[0]}}]
+
+        monkeypatch.setattr(parallel_provider, "_mcp_web_fetch", _fake_fetch)
 
         p = get_provider("parallel")
         assert p is not None
         result = asyncio.run(p.extract(["https://example.com"]))
         assert isinstance(result, list)
-        assert len(result) == 1
-        assert "error" in result[0]
         assert result[0]["url"] == "https://example.com"
+        assert result[0]["content"] == "body"
+        assert captured == {"urls": ["https://example.com"], "api_key": None}
 
     def test_firecrawl_extract_returns_per_url_errors_when_unconfigured(self) -> None:
         _ensure_plugins_loaded()
diff --git a/tests/run_agent/test_streaming.py b/tests/run_agent/test_streaming.py
index 5af349fa859..c7897804737 100644
--- a/tests/run_agent/test_streaming.py
+++ b/tests/run_agent/test_streaming.py
@@ -1573,3 +1573,87 @@ class TestCopilotACPStreamingDecision:
             _use_streaming = False
 
         assert _use_streaming is True
+
+
+class TestBedrockIamStreamingFallback:
+    """bedrock_converse streaming branch: IAM denial of
+    InvokeModelWithResponseStream falls back to converse() inline and sets
+    _disable_streaming for the rest of the session."""
+
+    def _make_bedrock_agent(self):
+        from run_agent import AIAgent
+
+        agent = AIAgent(
+            api_key="test-key",
+            base_url="https://openrouter.ai/api/v1",
+            model="anthropic.claude-3-sonnet-20240229-v1:0",
+            quiet_mode=True,
+            skip_context_files=True,
+            skip_memory=True,
+        )
+        agent.api_mode = "bedrock_converse"
+        agent._interrupt_requested = False
+        return agent
+
+    def test_iam_denial_falls_back_inline_and_disables_streaming(self):
+        pytest.importorskip("botocore", reason="botocore required for Bedrock tests")
+        from botocore.exceptions import ClientError
+
+        agent = self._make_bedrock_agent()
+
+        client = MagicMock()
+        client.converse_stream.side_effect = ClientError(
+            error_response={
+                "Error": {
+                    "Code": "AccessDeniedException",
+                    "Message": (
+                        "User is not authorized to perform: "
+                        "bedrock:InvokeModelWithResponseStream"
+                    ),
+                }
+            },
+            operation_name="ConverseStream",
+        )
+        client.converse.return_value = {
+            "output": {"message": {"role": "assistant", "content": [{"text": "hi"}]}},
+            "stopReason": "end_turn",
+            "usage": {"inputTokens": 1, "outputTokens": 1, "totalTokens": 2},
+        }
+
+        with patch(
+            "agent.bedrock_adapter._get_bedrock_runtime_client",
+            return_value=client,
+        ):
+            response = agent._interruptible_streaming_api_call(
+                {"modelId": agent.model, "messages": []}
+            )
+
+        client.converse.assert_called_once()
+        assert response.choices[0].message.content == "hi"
+        assert getattr(agent, "_disable_streaming", False) is True
+
+    def test_other_bedrock_errors_still_propagate(self):
+        pytest.importorskip("botocore", reason="botocore required for Bedrock tests")
+        from botocore.exceptions import ClientError
+
+        agent = self._make_bedrock_agent()
+
+        client = MagicMock()
+        client.converse_stream.side_effect = ClientError(
+            error_response={
+                "Error": {"Code": "ThrottlingException", "Message": "slow down"}
+            },
+            operation_name="ConverseStream",
+        )
+
+        with patch(
+            "agent.bedrock_adapter._get_bedrock_runtime_client",
+            return_value=client,
+        ):
+            with pytest.raises(ClientError):
+                agent._interruptible_streaming_api_call(
+                    {"modelId": agent.model, "messages": []}
+                )
+
+        client.converse.assert_not_called()
+        assert getattr(agent, "_disable_streaming", False) is False
diff --git a/tests/run_agent/test_thinking_sig_recovery_persistence.py b/tests/run_agent/test_thinking_sig_recovery_persistence.py
new file mode 100644
index 00000000000..e518af5145d
--- /dev/null
+++ b/tests/run_agent/test_thinking_sig_recovery_persistence.py
@@ -0,0 +1,93 @@
+"""Regression tests for the thinking-block signature recovery.
+
+The recovery in ``agent/conversation_loop.py`` strips ``reasoning_details``
+from ``api_messages`` (the API-call-time list rebuilt on every retry) and
+leaves ``messages`` (the canonical store) untouched. The previous
+implementation popped from ``messages`` directly, which never reached
+``api_messages`` because each entry in ``api_messages`` was a shallow
+copy of the corresponding entry in ``messages``, and the mutation also
+landed in ``state.db`` on the next ``_persist_session`` call, corrupting
+the conversation.
+
+These tests cover the surface that the recovery touches in isolation:
+shallow copies share inner field references; popping a key from one dict
+does not remove it from the other; and a list of shallow copies behaves
+the same way.
+"""
+
+
+def _shallow_copies(messages):
+    return [m.copy() for m in messages]
+
+
+def test_pop_on_shallow_copy_does_not_affect_source():
+    rd = [{"type": "thinking", "thinking": "r", "signature": "s"}]
+    src = {"role": "assistant", "content": "x", "reasoning_details": rd}
+    cp = src.copy()
+
+    cp.pop("reasoning_details", None)
+
+    assert "reasoning_details" not in cp
+    assert "reasoning_details" in src
+    assert src["reasoning_details"] is rd
+
+
+def test_strip_api_messages_leaves_canonical_messages_intact():
+    """Mirrors the recovery: pop reasoning_details from api_messages only.
+
+    The canonical ``messages`` list keeps its reasoning_details so future
+    persists carry the original signed blocks.
+    """
+    rd_one = [{"type": "thinking", "thinking": "one", "signature": "sig_one"}]
+    rd_two = [{"type": "thinking", "thinking": "two", "signature": "sig_two"}]
+    messages = [
+        {"role": "user", "content": "q1"},
+        {"role": "assistant", "content": "a1", "reasoning_details": rd_one},
+        {"role": "user", "content": "q2"},
+        {"role": "assistant", "content": "a2", "reasoning_details": rd_two},
+    ]
+    api_messages = _shallow_copies(messages)
+
+    stripped = 0
+    for m in api_messages:
+        if isinstance(m, dict) and "reasoning_details" in m:
+            m.pop("reasoning_details", None)
+            stripped += 1
+
+    assert stripped == 2
+    assert all("reasoning_details" not in m for m in api_messages)
+    canonical_rd = [
+        m.get("reasoning_details") for m in messages if m["role"] == "assistant"
+    ]
+    assert canonical_rd == [rd_one, rd_two]
+
+
+def test_strip_is_idempotent_when_run_twice():
+    """A second strip is a no-op when reasoning_details has already been
+    removed from api_messages. Guards against a duplicate firing path.
+    """
+    api_messages = [
+        {"role": "assistant", "content": "a", "reasoning_details": [{"x": 1}]},
+        {"role": "user", "content": "q"},
+    ]
+    for _ in range(2):
+        for m in api_messages:
+            if isinstance(m, dict) and "reasoning_details" in m:
+                m.pop("reasoning_details", None)
+
+    assert all("reasoning_details" not in m for m in api_messages)
+
+
+def test_strip_skips_messages_without_reasoning_details():
+    api_messages = [
+        {"role": "user", "content": "q"},
+        {"role": "assistant", "content": "a"},
+        {"role": "tool", "tool_call_id": "1", "content": "ok"},
+    ]
+    snapshot = [dict(m) for m in api_messages]
+
+    for m in api_messages:
+        if isinstance(m, dict) and "reasoning_details" in m:
+            m.pop("reasoning_details", None)
+
+    assert api_messages == snapshot
diff --git a/tests/test_empty_session_hygiene.py b/tests/test_empty_session_hygiene.py
new file mode 100644
index 00000000000..3576e7dce72
--- /dev/null
+++ b/tests/test_empty_session_hygiene.py
@@ -0,0 +1,161 @@
+"""Tests for empty-session hygiene — gemini-cli#27770 port.
+
+Starting the CLI and immediately quitting (or rotating sessions with /new)
+used to leave empty untitled rows in the session DB that clutter /resume
+and `hermes sessions list`. ``SessionDB.delete_session_if_empty`` removes
+a just-ended session row only when it never gained resumable content:
+no messages, no title, and no child sessions.
+"""
+
+import pytest
+
+from hermes_state import SessionDB
+
+
+@pytest.fixture()
+def db(tmp_path):
+    session_db = SessionDB(db_path=tmp_path / "state.db")
+    yield session_db
+    session_db.close()
+
+
+class TestDeleteSessionIfEmpty:
+    def test_deletes_empty_untitled_session(self, db):
+        db.create_session(session_id="empty", source="cli", model="test")
+        db.end_session("empty", "cli_close")
+
+        assert db.delete_session_if_empty("empty") is True
+        assert db.get_session("empty") is None
+
+    def test_keeps_session_with_messages(self, db):
+        db.create_session(session_id="busy", source="cli", model="test")
+        db.append_message("busy", role="user", content="hello")
+        db.end_session("busy", "cli_close")
+
+        assert db.delete_session_if_empty("busy") is False
+        assert db.get_session("busy") is not None
+
+    def test_keeps_titled_session(self, db):
+        """A user-assigned title is resumable content even without messages."""
+        db.create_session(session_id="titled", source="cli", model="test")
+        db.set_session_title("titled", "Important plans")
+        db.end_session("titled", "cli_close")
+
+        assert db.delete_session_if_empty("titled") is False
+        assert db.get_session("titled") is not None
+
+    def test_keeps_session_with_children(self, db):
+        """A parent that spawned delegate subagent runs is not empty."""
+        db.create_session(session_id="parent", source="cli", model="test")
+        db.create_session(
+            session_id="child",
+            source="tool",
+            model="test",
+            parent_session_id="parent",
+        )
+        db.end_session("parent", "cli_close")
+
+        assert db.delete_session_if_empty("parent") is False
+        assert db.get_session("parent") is not None
+        assert db.get_session("child") is not None
+
+    def test_unknown_session_returns_false(self, db):
+        assert db.delete_session_if_empty("nope") is False
+
+    def test_removes_on_disk_transcripts(self, db, tmp_path):
+        sessions_dir = tmp_path / "sessions"
+        sessions_dir.mkdir()
+        (sessions_dir / "empty.json").write_text("{}", encoding="utf-8")
+        (sessions_dir / "empty.jsonl").write_text("", encoding="utf-8")
+
+        db.create_session(session_id="empty", source="cli", model="test")
+        db.end_session("empty", "cli_close")
+
+        assert db.delete_session_if_empty("empty", sessions_dir=sessions_dir)
+        assert not (sessions_dir / "empty.json").exists()
+        assert not (sessions_dir / "empty.jsonl").exists()
+
+    def test_no_file_cleanup_when_kept(self, db, tmp_path):
+        sessions_dir = tmp_path / "sessions"
+        sessions_dir.mkdir()
+        (sessions_dir / "busy.json").write_text("{}", encoding="utf-8")
+
+        db.create_session(session_id="busy", source="cli", model="test")
+        db.append_message("busy", role="user", content="hello")
+
+        assert not db.delete_session_if_empty("busy", sessions_dir=sessions_dir)
+        assert (sessions_dir / "busy.json").exists()
+
+    def test_empty_session_disappears_from_listing(self, db):
+        """The user-facing symptom: empty rows polluting session lists."""
+        db.create_session(session_id="real", source="cli", model="test")
+        db.append_message("real", role="user", content="do the thing")
+        db.end_session("real", "cli_close")
+
+        db.create_session(session_id="ghost", source="cli", model="test")
+        db.end_session("ghost", "cli_close")
+
+        ids_before = {s["id"] for s in db.list_sessions_rich(source="cli")}
+        assert {"real", "ghost"} <= ids_before
+
+        db.delete_session_if_empty("ghost")
+
+        ids_after = {s["id"] for s in db.list_sessions_rich(source="cli")}
+        assert "real" in ids_after
+        assert "ghost" not in ids_after
+
+
+class TestCLIDiscardSessionIfEmpty:
+    """Wiring tests for HermesCLI._discard_session_if_empty."""
+
+    def _make_cli(self, db):
+        from cli import HermesCLI
+
+        cli = HermesCLI.__new__(HermesCLI)
+        cli._session_db = db
+        cli.conversation_history = []
+        return cli
+
+    def test_discards_empty(self, db):
+        db.create_session(session_id="empty", source="cli", model="test")
+        db.end_session("empty", "cli_close")
+
+        cli = self._make_cli(db)
+        assert cli._discard_session_if_empty("empty") is True
+        assert db.get_session("empty") is None
+
+    def test_keeps_nonempty(self, db):
+        db.create_session(session_id="busy", source="cli", model="test")
+        db.append_message("busy", role="user", content="hi")
+
+        cli = self._make_cli(db)
+        assert cli._discard_session_if_empty("busy") is False
+        assert db.get_session("busy") is not None
+
+    def test_no_db_is_noop(self):
+        cli = self._make_cli(None)
+        assert cli._discard_session_if_empty("anything") is False
+
+    def test_none_session_id_is_noop(self, db):
+        cli = self._make_cli(db)
+        assert cli._discard_session_if_empty(None) is False
+
+    def test_db_error_swallowed(self, db):
+        class Boom:
+            def delete_session_if_empty(self, *a, **k):
+                raise RuntimeError("locked")
+
+        cli = self._make_cli(Boom())
+        assert cli._discard_session_if_empty("x") is False
+
+    def test_in_memory_history_blocks_prune(self, db):
+        """The live transcript is authoritative: even if the DB row has no
+        flushed messages yet, a CLI holding conversation history must not
+        prune the session (covers flush-failed / not-yet-flushed turns)."""
+        db.create_session(session_id="unflushed", source="cli", model="test")
+        db.end_session("unflushed", "new_session")
+
+        cli = self._make_cli(db)
+        cli.conversation_history = [{"role": "user", "content": "hello"}]
+        assert cli._discard_session_if_empty("unflushed") is False
+        assert db.get_session("unflushed") is not None
diff --git a/tests/test_tui_gateway_server.py b/tests/test_tui_gateway_server.py
index 3b95b8dceb8..c510c4ef230 100644
--- a/tests/test_tui_gateway_server.py
+++ b/tests/test_tui_gateway_server.py
@@ -86,6 +86,47 @@ def test_session_context_uses_session_cwd(monkeypatch, tmp_path):
         server._sessions.pop(sid, None)
 
 
+def test_handoff_fail_marks_only_inflight_rows(monkeypatch):
+    class DbContext:
+        def __init__(self, db):
+            self.db = db
+
+        def __enter__(self):
+            return self.db
+
+        def __exit__(self, *_args):
+            return False
+
+    class FakeDb:
+        def __init__(self, state):
+            self.state = state
+            self.failed_with = None
+
+        def get_handoff_state(self, _key):
+            return {"state": self.state, "platform": "telegram", "error": None}
+
+        def fail_handoff(self, _key, error):
+            self.failed_with = error
+            self.state = "failed"
+
+    sid = "rt-handoff"
+    server._sessions[sid] = {"session_key": "stored-handoff"}
+    try:
+        pending = FakeDb("pending")
+        monkeypatch.setattr(server, "_session_db", lambda _session: DbContext(pending))
+        result = server._methods["handoff.fail"]("r1", {"session_id": sid, "error": "timed out"})
+        assert result["result"] == {"failed": True, "state": "failed"}
+        assert pending.failed_with == "timed out"
+
+        completed = FakeDb("completed")
+        monkeypatch.setattr(server, "_session_db", lambda _session: DbContext(completed))
+        result = server._methods["handoff.fail"]("r2", {"session_id": sid, "error": "late timeout"})
+        assert result["result"] == {"failed": False, "state": "completed"}
+        assert completed.failed_with is None
+    finally:
+        server._sessions.pop(sid, None)
+
+
 def test_session_context_explicit_cwd_for_ephemeral_task(monkeypatch, tmp_path):
     """Background/preview tasks use ephemeral ids absent from `_sessions`, so the
     parent workspace is passed explicitly; it must pin instead of clearing back
diff --git a/tests/tools/test_cronjob_tools.py b/tests/tools/test_cronjob_tools.py
index fc03ab1d330..1ca877064a7 100644
--- a/tests/tools/test_cronjob_tools.py
+++ b/tests/tools/test_cronjob_tools.py
@@ -452,3 +452,54 @@ class TestUnifiedCronjobTool:
         assert updated["success"] is True
         stored = get_job(created["job_id"])
         assert stored["deliver"] == "telegram"
+
+
+# =========================================================================
+# Per-job model/provider override resolution
+# =========================================================================
+
+from tools.cronjob_tools import _resolve_model_override  # noqa: E402
+
+
+class TestResolveModelOverride:
+    """`_resolve_model_override` must not silently hijack a job that meant to
+    use a configured custom endpoint (e.g. ``providers.custom`` → cliproxy).
+    Regression for cron jobs with ``provider: "custom"`` falling back to codex.
+    """
+
+    def test_keeps_bare_custom_when_a_named_entry_exists(self, monkeypatch):
+        import hermes_cli.runtime_provider as rp_mod
+
+        monkeypatch.setattr(rp_mod, "has_named_custom_provider", lambda name: True)
+        provider, model = _resolve_model_override(
+            {"provider": "custom", "model": "gpt-5.4"}
+        )
+        assert provider == "custom"
+        assert model == "gpt-5.4"
+
+    def test_pins_main_provider_when_bare_custom_unresolvable(self, monkeypatch):
+        import hermes_cli.config as cfg_mod
+        import hermes_cli.runtime_provider as rp_mod
+
+        monkeypatch.setattr(rp_mod, "has_named_custom_provider", lambda name: False)
+        monkeypatch.setattr(
+            cfg_mod, "load_config", lambda: {"model": {"provider": "openai-codex"}}
+        )
+        provider, model = _resolve_model_override(
+            {"provider": "custom", "model": "gpt-5.4"}
+        )
+        # No matching custom entry → fall back to pinning the main provider.
+        assert provider == "openai-codex"
+        assert model == "gpt-5.4"
+
+    def test_keeps_explicit_custom_name_unchanged(self, monkeypatch):
+        import hermes_cli.runtime_provider as rp_mod
+
+        # Even if the resolver claims no entry, the canonical "custom:"
+        # form is never stripped or pinned.
+        monkeypatch.setattr(rp_mod, "has_named_custom_provider", lambda name: False)
+        provider, model = _resolve_model_override(
+            {"provider": "custom:cliproxy", "model": "gpt-5.4"}
+        )
+        assert provider == "custom:cliproxy"
+        assert model == "gpt-5.4"
diff --git a/tests/tools/test_mcp_loop_profile_override.py b/tests/tools/test_mcp_loop_profile_override.py
new file mode 100644
index 00000000000..2667d995c0b
--- /dev/null
+++ b/tests/tools/test_mcp_loop_profile_override.py
@@ -0,0 +1,139 @@
+"""Regression tests for HERMES_HOME override propagation onto the MCP loop.
+
+Tasks scheduled via run_coroutine_threadsafe are created inside the MCP
+event-loop thread, so they copy THAT thread's context — not the scheduling
+thread's. A per-request profile scope (dashboard ?profile= endpoints, e.g.
+the MCP "Test server" probe) would silently vanish for anything resolving
+get_hermes_home() inside the coroutine, most visibly OAuth token-store
+paths. _run_on_mcp_loop now wraps scheduled coroutines with the caller's
+override (mcp_tool._wrap_with_home_override).
+"""
+import os
+
+import pytest
+
+
+@pytest.fixture
+def mcp_loop():
+    import tools.mcp_tool as mcp_tool
+
+    mcp_tool._ensure_mcp_loop()
+    yield mcp_tool
+    mcp_tool._stop_mcp_loop()
+
+
+def test_override_propagates_to_mcp_loop(tmp_path, monkeypatch, mcp_loop):
+    from hermes_constants import (
+        get_hermes_home,
+        reset_hermes_home_override,
+        set_hermes_home_override,
+    )
+
+    process_home = tmp_path / "proc-home"
+    profile_home = tmp_path / "profile-home"
+    process_home.mkdir()
+    profile_home.mkdir()
+    monkeypatch.setenv("HERMES_HOME", str(process_home))
+
+    async def read_home():
+        return str(get_hermes_home())
+
+    # Unscoped: the loop task sees the process home.
+    assert mcp_loop._run_on_mcp_loop(read_home(), timeout=10) == str(process_home)
+
+    # Scoped: the caller's override must reach the loop task.
+    token = set_hermes_home_override(str(profile_home))
+    try:
+        assert mcp_loop._run_on_mcp_loop(read_home(), timeout=10) == str(profile_home)
+        # Factory form must be wrapped too.
+        assert mcp_loop._run_on_mcp_loop(lambda: read_home(), timeout=10) == str(
+            profile_home
+        )
+    finally:
+        reset_hermes_home_override(token)
+
+    # The loop thread's default context is untouched afterwards.
+    assert mcp_loop._run_on_mcp_loop(read_home(), timeout=10) == str(process_home)
+
+
+def test_oauth_token_paths_follow_override(tmp_path, monkeypatch, mcp_loop):
+    """The actual symptom path: HermesTokenStorage resolving inside the
+    probe's MCP-loop coroutine must land in the selected profile's
+    mcp-tokens dir, not the process home's."""
+    from hermes_constants import (
+        reset_hermes_home_override,
+        set_hermes_home_override,
+    )
+
+    process_home = tmp_path / "proc-home"
+    profile_home = tmp_path / "profile-home"
+    process_home.mkdir()
+    profile_home.mkdir()
+    monkeypatch.setenv("HERMES_HOME", str(process_home))
+
+    async def token_path():
+        from tools.mcp_oauth import HermesTokenStorage
+
+        return str(HermesTokenStorage("probe-srv")._tokens_path())
+
+    token = set_hermes_home_override(str(profile_home))
+    try:
+        path = mcp_loop._run_on_mcp_loop(token_path(), timeout=10)
+    finally:
+        reset_hermes_home_override(token)
+    assert path.startswith(str(profile_home))
+    assert os.path.join("mcp-tokens", "probe-srv.json") in path
+
+
+def test_concurrent_scopes_do_not_interfere(tmp_path, monkeypatch, mcp_loop):
+    """Two threads carrying DIFFERENT overrides scheduling onto the same
+    loop must each see their own home — the wrapper is task-local."""
+    import threading
+
+    from hermes_constants import (
+        get_hermes_home,
+        reset_hermes_home_override,
+        set_hermes_home_override,
+    )
+
+    process_home = tmp_path / "proc-home"
+    home_a = tmp_path / "profile-a"
+    home_b = tmp_path / "profile-b"
+    for h in (process_home, home_a, home_b):
+        h.mkdir()
+    monkeypatch.setenv("HERMES_HOME", str(process_home))
+
+    async def read_home():
+        return str(get_hermes_home())
+
+    results: dict = {}
+
+    def scoped_call(key, home):
+        token = set_hermes_home_override(str(home))
+        try:
+            results[key] = mcp_loop._run_on_mcp_loop(read_home(), timeout=10)
+        finally:
+            reset_hermes_home_override(token)
+
+    threads = [
+        threading.Thread(target=scoped_call, args=("a", home_a)),
+        threading.Thread(target=scoped_call, args=("b", home_b)),
+    ]
+    for t in threads:
+        t.start()
+    for t in threads:
+        t.join(timeout=15)
+
+    assert results == {"a": str(home_a), "b": str(home_b)}
+
+
+def test_wrap_is_noop_without_override(mcp_loop):
+    """No active override → the coroutine passes through unwrapped."""
+
+    async def trivial():
+        return 42
+
+    coro = trivial()
+    wrapped = mcp_loop._wrap_with_home_override(coro)
+    assert wrapped is coro
+    coro.close()
diff --git a/tests/tools/test_terminal_tool.py b/tests/tools/test_terminal_tool.py
index fe2f5e3f514..ea113e63c27 100644
--- a/tests/tools/test_terminal_tool.py
+++ b/tests/tools/test_terminal_tool.py
@@ -90,6 +90,30 @@ def test_cached_sudo_password_is_used_when_env_is_unset(monkeypatch):
     assert sudo_stdin == "cached-pass\n"
 
 
+def test_registered_sudo_callback_is_used_without_interactive_env(monkeypatch):
+    monkeypatch.delenv("SUDO_PASSWORD", raising=False)
+    monkeypatch.delenv("HERMES_INTERACTIVE", raising=False)
+    monkeypatch.setattr(terminal_tool, "_sudo_nopasswd_works", lambda: False)
+
+    calls = []
+
+    def sudo_callback():
+        calls.append("called")
+        return "callback-pass"
+
+    terminal_tool.set_sudo_password_callback(sudo_callback)
+    try:
+        transformed, sudo_stdin = terminal_tool._transform_sudo_command(
+            "echo ok | sudo tee /tmp/hermes-test"
+        )
+    finally:
+        terminal_tool.set_sudo_password_callback(None)
+
+    assert calls == ["called"]
+    assert transformed == "echo ok | sudo -S -p '' tee /tmp/hermes-test"
+    assert sudo_stdin == "callback-pass\n"
+
+
 def test_cached_sudo_password_isolated_by_session_key(monkeypatch):
     monkeypatch.delenv("SUDO_PASSWORD", raising=False)
     monkeypatch.delenv("HERMES_INTERACTIVE", raising=False)
diff --git a/tests/tools/test_web_providers.py b/tests/tools/test_web_providers.py
index bd3cce8754a..230f909b5fe 100644
--- a/tests/tools/test_web_providers.py
+++ b/tests/tools/test_web_providers.py
@@ -167,6 +167,21 @@ class TestPerCapabilityBackendSelection:
         monkeypatch.setenv("TAVILY_API_KEY", "test-key")
         assert web_tools._get_search_backend() == "tavily"
 
+    def test_explicit_extract_backend_honored_when_unavailable(self, monkeypatch):
+        """An explicit per-capability backend is honored even with no creds, so
+        its setup error surfaces instead of silently rerouting to the keyless
+        Parallel default (which would send user URLs to a different provider)."""
+        from tools import web_tools
+
+        monkeypatch.setattr(web_tools, "_load_web_config", lambda: {
+            "extract_backend": "firecrawl",
+        })
+        for key in ("FIRECRAWL_API_KEY", "FIRECRAWL_API_URL", "FIRECRAWL_GATEWAY_URL"):
+            monkeypatch.delenv(key, raising=False)
+        monkeypatch.setattr(web_tools, "_is_tool_gateway_ready", lambda: False, raising=False)
+        # Resolves to firecrawl (not parallel) despite firecrawl being unavailable.
+        assert web_tools._get_extract_backend() == "firecrawl"
+
     def test_falls_back_to_generic_backend_when_extract_backend_empty(self, monkeypatch):
         from tools import web_tools
 
@@ -177,7 +192,7 @@ class TestPerCapabilityBackendSelection:
         monkeypatch.setenv("PARALLEL_API_KEY", "test-key")
         assert web_tools._get_extract_backend() == "parallel"
 
-    def test_search_backend_ignored_when_not_available(self, monkeypatch):
+    def test_explicit_search_backend_honored_when_unavailable(self, monkeypatch):
         from tools import web_tools
 
         monkeypatch.setattr(web_tools, "_load_web_config", lambda: {
@@ -186,8 +201,10 @@ class TestPerCapabilityBackendSelection:
         })
         monkeypatch.delenv("EXA_API_KEY", raising=False)
         monkeypatch.setenv("FIRECRAWL_API_KEY", "fc-key")
-        # Should fall back to firecrawl since exa isn't configured
-        assert web_tools._get_search_backend() == "firecrawl"
+        # The explicit per-capability choice (exa) is honored even though it's
+        # unavailable, so its setup error surfaces — we don't silently reroute
+        # to the shared backend (or the keyless Parallel default).
+        assert web_tools._get_search_backend() == "exa"
 
     def test_fully_backward_compatible_with_web_backend_only(self, monkeypatch):
         from tools import web_tools
@@ -291,25 +308,55 @@ class TestUnconfiguredErrorEnvelopeParity:
         ):
             monkeypatch.delenv(k, raising=False)
 
-    def test_unconfigured_search_emits_top_level_error(self, monkeypatch):
-        """``web_search_tool`` with no creds returns ``{"error": "Error searching web: ..."}``
-        — matching main's ``tool_error()`` envelope, not a per-result shape.
+    def test_extract_empty_urls_does_not_raise(self, monkeypatch):
+        """Regression: empty (or fully SSRF-blocked) URL sets skip the dispatch
+        branch; the free-Parallel flag must still be initialized so the tool
+        returns an error envelope instead of UnboundLocalError."""
+        import asyncio
+        from tools import web_tools
+        self._clear_web_creds(monkeypatch)
+        monkeypatch.setattr(web_tools, "_load_web_config", lambda: {})
+        out = asyncio.run(web_tools.web_extract_tool([], "markdown"))
+        # The key assertion is that it returns a normal error envelope (a
+        # string) rather than raising UnboundLocalError.
+        assert isinstance(out, str)
+        result = json.loads(out)
+        assert "error" in result
+
+    def test_unconfigured_search_falls_back_to_free_parallel(self, monkeypatch):
+        """``web_search_tool`` with no creds routes to Parallel's free Search
+        MCP rather than erroring. The MCP transport is mocked so the test
+        stays offline; we assert dispatch landed on parallel and returned the
+        standard search envelope.
         """
         from tools import web_tools
+        import plugins.web.parallel.provider as parallel_provider
 
         self._clear_web_creds(monkeypatch)
-        # Reset firecrawl client cache so the unconfigured state is re-evaluated
         monkeypatch.setattr(web_tools, "_firecrawl_client", None, raising=False)
         monkeypatch.setattr(web_tools, "_firecrawl_client_config", None, raising=False)
         monkeypatch.setattr(web_tools, "_load_web_config", lambda: {})
 
+        captured = {}
+
+        def _fake_mcp(query, limit, api_key):
+            captured["query"] = query
+            captured["api_key"] = api_key
+            return {
+                "success": True,
+                "data": {"web": [
+                    {"url": "https://example.com", "title": "Example",
+                     "description": "hit", "position": 1},
+                ]},
+            }
+
+        monkeypatch.setattr(parallel_provider, "_mcp_web_search", _fake_mcp)
+
         result = json.loads(web_tools.web_search_tool("hello world", limit=3))
-        assert "error" in result, f"expected top-level 'error' key, got {result}"
-        # ``Error searching web:`` prefix comes from web_tools' top-level except handler
-        assert "Error searching web:" in result["error"]
-        assert "FIRECRAWL_API_KEY" in result["error"]
-        # No per-result burying
-        assert "results" not in result
+        assert result.get("success") is True, f"expected success, got {result}"
+        assert result["data"]["web"][0]["url"] == "https://example.com"
+        # Keyless path: dispatched to parallel with no Bearer token.
+        assert captured == {"query": "hello world", "api_key": None}
 
 
 class TestDispatchersTriggerPluginDiscovery:
diff --git a/tests/tools/test_web_providers_ddgs.py b/tests/tools/test_web_providers_ddgs.py
index 283a25f0a1b..1050a4e554a 100644
--- a/tests/tools/test_web_providers_ddgs.py
+++ b/tests/tools/test_web_providers_ddgs.py
@@ -190,7 +190,11 @@ class TestDDGSBackendWiring:
         monkeypatch.setattr(web_tools, "_ddgs_package_importable", lambda: True)
         assert web_tools._get_backend() == "exa"
 
-    def test_auto_detect_picks_ddgs_as_last_resort(self, monkeypatch):
+    def test_auto_detect_prefers_keyless_parallel_over_ddgs(self, monkeypatch):
+        # With no credentials, keyless Parallel is the auto-detect default even
+        # when the ddgs package is installed — ddgs is search-only (can't
+        # extract), so Parallel is preferred so both search and extract work.
+        # ddgs remains reachable via an explicit web.backend=ddgs.
         from tools import web_tools
         monkeypatch.setattr(web_tools, "_load_web_config", lambda: {})
         for key in ("FIRECRAWL_API_KEY", "FIRECRAWL_API_URL", "PARALLEL_API_KEY",
@@ -198,7 +202,7 @@ class TestDDGSBackendWiring:
             monkeypatch.delenv(key, raising=False)
         monkeypatch.setattr(web_tools, "_is_tool_gateway_ready", lambda: False)
         monkeypatch.setattr(web_tools, "_ddgs_package_importable", lambda: True)
-        assert web_tools._get_backend() == "ddgs"
+        assert web_tools._get_backend() == "parallel"
 
     def test_check_web_api_key_true_when_ddgs_configured(self, monkeypatch):
         from tools import web_tools
diff --git a/tests/tools/test_web_providers_searxng.py b/tests/tools/test_web_providers_searxng.py
index e093532bf37..2877d56b868 100644
--- a/tests/tools/test_web_providers_searxng.py
+++ b/tests/tools/test_web_providers_searxng.py
@@ -313,7 +313,9 @@ class TestCheckWebApiKey:
         )
         assert web_tools.check_web_api_key() is True
 
-    def test_no_credentials_fails(self, monkeypatch):
+    def test_no_credentials_usable_via_free_parallel(self, monkeypatch):
+        """No credentials → check_web_api_key True: the keyless Parallel free MCP
+        services calls, so web is usable out of the box."""
         from tools import web_tools
         monkeypatch.setattr(web_tools, "_load_web_config", lambda: {})
         monkeypatch.delenv("FIRECRAWL_API_KEY", raising=False)
@@ -324,7 +326,8 @@ class TestCheckWebApiKey:
         monkeypatch.delenv("SEARXNG_URL", raising=False)
         monkeypatch.setattr(web_tools, "_is_tool_gateway_ready", lambda: False)
         monkeypatch.setattr(web_tools, "check_firecrawl_api_key", lambda: False)
-        assert web_tools.check_web_api_key() is False
+        monkeypatch.setattr(web_tools, "_ddgs_package_importable", lambda: False)
+        assert web_tools.check_web_api_key() is True
 
 
 # ---------------------------------------------------------------------------
diff --git a/tests/tools/test_web_tools_config.py b/tests/tools/test_web_tools_config.py
index 28323122aca..c9a6b3b31a3 100644
--- a/tests/tools/test_web_tools_config.py
+++ b/tests/tools/test_web_tools_config.py
@@ -384,11 +384,14 @@ class TestBackendSelection:
              patch.dict(os.environ, {"FIRECRAWL_API_KEY": "fc-test"}):
             assert _get_backend() == "firecrawl"
 
-    def test_fallback_no_keys_defaults_to_firecrawl(self):
-        """No keys, no config → 'firecrawl' (will fail at client init)."""
+    def test_fallback_no_keys_defaults_to_parallel(self):
+        """No credentials, no config → 'parallel' (free Search MCP works
+        keyless). Selection is purely credential-based."""
         from tools.web_tools import _get_backend
-        with patch("tools.web_tools._load_web_config", return_value={}):
-            assert _get_backend() == "firecrawl"
+        with patch("tools.web_tools._load_web_config", return_value={}), \
+             patch("tools.web_tools._is_tool_gateway_ready", return_value=False), \
+             patch("tools.web_tools._ddgs_package_importable", return_value=False):
+            assert _get_backend() == "parallel"
 
     def test_invalid_config_falls_through_to_fallback(self):
         """web.backend=invalid → ignored, uses key-based fallback."""
@@ -623,9 +626,74 @@ class TestCheckWebApiKey:
             from tools.web_tools import check_web_api_key
             assert check_web_api_key() is True
 
-    def test_no_keys_returns_false(self):
+    def test_no_keys_usable_via_free_parallel(self):
+        """No credentials → check_web_api_key True: selection resolves to the
+        keyless Parallel free MCP, which genuinely services calls (web works out
+        of the box). check_web_api_key is a usability probe, not a key check."""
         from tools.web_tools import check_web_api_key
-        assert check_web_api_key() is False
+        with patch("tools.web_tools._load_web_config", return_value={}), \
+             patch("tools.web_tools._is_tool_gateway_ready", return_value=False), \
+             patch("tools.web_tools._ddgs_package_importable", return_value=False), \
+             patch.dict(os.environ, {}, clear=False):
+            for k in ("PARALLEL_API_KEY", "FIRECRAWL_API_KEY", "FIRECRAWL_API_URL",
+                      "TAVILY_API_KEY", "EXA_API_KEY", "SEARXNG_URL", "BRAVE_SEARCH_API_KEY"):
+                os.environ.pop(k, None)
+            assert check_web_api_key() is True
+
+    def test_typo_extract_backend_not_masked_by_parallel(self):
+        """A typo'd per-capability backend is honored (so dispatch errors)
+        rather than silently falling through to keyless Parallel."""
+        from tools.web_tools import _get_extract_backend, check_web_api_key
+        with patch("tools.web_tools._load_web_config",
+                   return_value={"extract_backend": "parrallel"}):
+            assert _get_extract_backend() == "parrallel"   # not "parallel"
+            assert check_web_api_key() is False            # unknown → unusable
+
+    def test_keyless_parallel_unusable_when_provider_disabled(self):
+        """If the bundled web-parallel provider is disabled/unregistered, the
+        keyless free-MCP path must NOT report web as usable — otherwise setup is
+        skipped but web tools fail at runtime with no provider."""
+        from tools.web_tools import check_web_api_key
+        with patch("tools.web_tools._load_web_config", return_value={}), \
+             patch("tools.web_tools._parallel_provider_registered", return_value=False), \
+             patch("tools.web_tools._is_tool_gateway_ready", return_value=False), \
+             patch("tools.web_tools.check_firecrawl_api_key", return_value=False), \
+             patch("tools.web_tools._ddgs_package_importable", return_value=False), \
+             patch.dict(os.environ, {}, clear=False):
+            for var in (
+                "PARALLEL_API_KEY", "FIRECRAWL_API_KEY", "FIRECRAWL_API_URL",
+                "TAVILY_API_KEY", "EXA_API_KEY", "BRAVE_SEARCH_API_KEY", "SEARXNG_URL",
+            ):
+                os.environ.pop(var, None)
+            assert check_web_api_key() is False
+
+    def test_extract_autodetect_skips_search_only_for_keyless_parallel(self):
+        """A search-only env credential (SEARXNG_URL) must not shadow the keyless
+        Parallel free-MCP extract fallback: extract auto-detect skips search-only
+        backends, so _get_extract_backend resolves to parallel (which can fetch),
+        while search auto-detect still prefers the configured searxng."""
+        from tools.web_tools import _get_extract_backend, _get_search_backend
+        with patch("tools.web_tools._load_web_config", return_value={}), \
+             patch.dict(os.environ, {}, clear=False):
+            for var in (
+                "PARALLEL_API_KEY", "FIRECRAWL_API_KEY", "FIRECRAWL_API_URL",
+                "TAVILY_API_KEY", "EXA_API_KEY", "BRAVE_SEARCH_API_KEY",
+            ):
+                os.environ.pop(var, None)
+            os.environ["SEARXNG_URL"] = "http://localhost:8080"
+            with patch("tools.web_tools._is_tool_gateway_ready", return_value=False):
+                assert _get_search_backend() == "searxng"
+                assert _get_extract_backend() == "parallel"
+
+    def test_configured_but_unavailable_backend_reports_unusable(self):
+        """An explicitly configured backend with no creds (exa, no key) →
+        check_web_api_key False so diagnostics flag the misconfiguration —
+        even though the tools stay registered."""
+        from tools.web_tools import check_web_api_key
+        with patch("tools.web_tools._load_web_config", return_value={"backend": "exa"}), \
+             patch.dict(os.environ, {}, clear=False):
+            os.environ.pop("EXA_API_KEY", None)
+            assert check_web_api_key() is False
 
     def test_both_keys_returns_true(self):
         with patch.dict(os.environ, {
@@ -688,12 +756,18 @@ class TestCheckWebApiKey:
 
         assert refresh_calls == []
 
-    def test_configured_backend_must_match_available_provider(self):
-        with patch("tools.web_tools._load_web_config", return_value={"backend": "parallel"}):
-            with patch("tools.web_tools._read_nous_access_token", return_value="nous-token"):
-                with patch.dict(os.environ, {"FIRECRAWL_GATEWAY_URL": "http://127.0.0.1:3002"}, clear=False):
-                    from tools.web_tools import check_web_api_key
-                    assert check_web_api_key() is False
+    def test_web_tools_registered_even_when_configured_backend_unavailable(self):
+        # Registration is unconditional (web_tools_registered) so an explicitly
+        # configured but unavailable backend (exa without EXA_API_KEY) keeps the
+        # tools registered to surface exa's setup error at call time — while the
+        # readiness probe (check_web_api_key) honestly reports not-configured.
+        from tools.web_tools import web_tools_registered, check_web_api_key
+        assert web_tools_registered() is True
+        with patch("tools.web_tools._load_web_config", return_value={"backend": "exa"}), \
+             patch.dict(os.environ, {}, clear=False):
+            os.environ.pop("EXA_API_KEY", None)
+            assert web_tools_registered() is True
+            assert check_web_api_key() is False
 
     def test_configured_firecrawl_backend_accepts_managed_gateway(self):
         with patch("tools.web_tools._load_web_config", return_value={"backend": "firecrawl"}):
diff --git a/tools/cronjob_tools.py b/tools/cronjob_tools.py
index 3b1c46ec3d7..7ec31b806c4 100644
--- a/tools/cronjob_tools.py
+++ b/tools/cronjob_tools.py
@@ -326,15 +326,23 @@ def _resolve_model_override(model_obj: Optional[Dict[str, Any]]) -> tuple:
         return (None, None)
     model_name = (model_obj.get("model") or "").strip() or None
     provider_name = (model_obj.get("provider") or "").strip() or None
-    # Bare "custom" is an incomplete spec — the canonical form is
-    # "custom:" matching a custom_providers entry. LLMs frequently
+    # Bare "custom" is usually an incomplete spec — the canonical form is
+    # "custom:" matching a custom_providers entry, and LLMs frequently
     # supply the bare type because the schema does not advertise the
-    # ":" suffix, which used to bypass the pinning path below and
-    # leave the job stored with an unresolvable "custom" provider. Treat
-    # the bare value as "no provider supplied" so the current main
-    # provider gets pinned instead.
+    # ":" suffix. It is only a problem when it can't resolve at runtime:
+    # a user may literally name a ``providers.custom`` (or custom_providers
+    # "custom") entry, in which case the job should keep ``provider="custom"``
+    # and run against that endpoint. Only when no such entry exists do we treat
+    # the bare value as "no provider supplied" and pin the current main
+    # provider below — otherwise pinning to ``model.provider`` (e.g. codex)
+    # silently hijacks a job that meant to use the configured custom endpoint.
     if provider_name == "custom":
-        provider_name = None
+        try:
+            from hermes_cli.runtime_provider import has_named_custom_provider
+            if not has_named_custom_provider("custom"):
+                provider_name = None
+        except Exception:
+            provider_name = None
     if model_name and not provider_name:
         # Pin to the current main provider so the job is stable
         try:
@@ -451,8 +459,6 @@ def _format_job(job: Dict[str, Any]) -> Dict[str, Any]:
         result["enabled_toolsets"] = job["enabled_toolsets"]
     if job.get("workdir"):
         result["workdir"] = job["workdir"]
-    if job.get("profile"):
-        result["profile"] = job["profile"]
     return result
 
 
@@ -475,7 +481,6 @@ def cronjob(
     context_from: Optional[Union[str, List[str]]] = None,
     enabled_toolsets: Optional[List[str]] = None,
     workdir: Optional[str] = None,
-    profile: Optional[str] = None,
     no_agent: Optional[bool] = None,
     task_id: str = None,
 ) -> str:
@@ -542,7 +547,6 @@ def cronjob(
                 context_from=context_from,
                 enabled_toolsets=enabled_toolsets or None,
                 workdir=_normalize_optional_job_value(workdir),
-                profile=_normalize_optional_job_value(profile),
                 no_agent=_no_agent,
             )
             return json.dumps(
@@ -677,10 +681,6 @@ def cronjob(
                 # Empty string clears the field (restores old behaviour);
                 # otherwise pass raw — update_job() validates / normalizes.
                 updates["workdir"] = _normalize_optional_job_value(workdir) or None
-            if profile is not None:
-                # Empty string clears the field (restores old behaviour);
-                # otherwise pass raw — update_job() validates / normalizes.
-                updates["profile"] = _normalize_optional_job_value(profile) or None
             if no_agent is not None:
                 # Toggling no_agent on/off at update time. If flipping to True,
                 # we need a script to already exist on the job (or be part of
@@ -834,10 +834,6 @@ Important safety rule: cron-run sessions should not recursively schedule more cr
                 "type": "string",
                 "description": "Optional absolute path to run the job from. When set, AGENTS.md / CLAUDE.md / .cursorrules from that directory are injected into the system prompt, and the terminal/file/code_exec tools use it as their working directory — useful for running a job inside a specific project repo. Must be an absolute path that exists. When unset (default), preserves the original behaviour: no project context files, tools use the scheduler's cwd. On update, pass an empty string to clear. Jobs with workdir run sequentially (not parallel) to keep per-job directories isolated."
             },
-            "profile": {
-                "type": "string",
-                "description": "Optional Hermes profile name to run the job under. When set, the scheduler resolves that profile, applies a context-local Hermes home override, loads that profile's config/.env for the run, and bridges HERMES_HOME into subprocesses. Any temporary process-environment changes from profile .env loading are restored after the job exits. Use 'default' for the root Hermes profile. Named profiles must already exist. When unset (default), preserves the scheduler's existing profile. On update, pass an empty string to clear. Jobs with profile run sequentially (not parallel) to keep profile-scoped runtime state isolated."
-            },
         },
         "required": ["action"]
     }
@@ -892,7 +888,6 @@ registry.register(
         context_from=args.get("context_from"),
         enabled_toolsets=args.get("enabled_toolsets"),
         workdir=args.get("workdir"),
-        profile=args.get("profile"),
         no_agent=args.get("no_agent"),
         task_id=kw.get("task_id"),
     ))(),
diff --git a/tools/lazy_deps.py b/tools/lazy_deps.py
index e4b0a9a57f0..76f146c7869 100644
--- a/tools/lazy_deps.py
+++ b/tools/lazy_deps.py
@@ -90,7 +90,7 @@ LAZY_DEPS: dict[str, tuple[str, ...]] = {
     # ─── Web search backends ───────────────────────────────────────────────
     "search.exa": ("exa-py==2.10.2",),
     "search.firecrawl": ("firecrawl-py==4.17.0",),
-    "search.parallel": ("parallel-web==0.4.2",),
+    "search.parallel": ("parallel-web==0.6.0",),
 
     # ─── TTS providers ─────────────────────────────────────────────────────
     # Pinned to exact versions to match pyproject.toml's no-ranges policy
diff --git a/tools/mcp_tool.py b/tools/mcp_tool.py
index 5c3c46c4db4..7287a45dbed 100644
--- a/tools/mcp_tool.py
+++ b/tools/mcp_tool.py
@@ -90,7 +90,7 @@ import sys
 import threading
 import time
 from datetime import datetime
-from typing import Any, Dict, List, Optional
+from typing import Any, Coroutine, Dict, List, Optional
 from urllib.parse import urlparse
 
 logger = logging.getLogger(__name__)
@@ -2460,6 +2460,37 @@ def _ensure_mcp_loop():
         _mcp_thread.start()
 
 
+def _wrap_with_home_override(coro: "Coroutine") -> "Coroutine":
+    """Carry the caller's context-local HERMES_HOME override into ``coro``.
+
+    Returns ``coro`` unchanged when no override is active. Otherwise wraps
+    it so the override is set inside the coroutine's own (task-local)
+    context on the MCP loop and reset when it completes — concurrent calls
+    carrying different scopes don't interfere.
+    """
+    try:
+        from hermes_constants import (
+            get_hermes_home_override,
+            reset_hermes_home_override,
+            set_hermes_home_override,
+        )
+
+        home_override = get_hermes_home_override()
+    except Exception:
+        return coro
+    if not home_override:
+        return coro
+
+    async def _scoped():
+        token = set_hermes_home_override(home_override)
+        try:
+            return await coro
+        finally:
+            reset_hermes_home_override(token)
+
+    return _scoped()
+
+
 def _run_on_mcp_loop(coro_or_factory, timeout: float = 30):
     """Schedule a coroutine on the MCP event loop and block until done.
 
@@ -2482,6 +2513,19 @@ def _run_on_mcp_loop(coro_or_factory, timeout: float = 30):
         raise RuntimeError("MCP event loop is not running")
 
     coro = coro_or_factory() if callable(coro_or_factory) else coro_or_factory
+
+    # Propagate the context-local HERMES_HOME override onto the MCP loop.
+    # Tasks scheduled via run_coroutine_threadsafe are created INSIDE the
+    # loop thread, so they copy the loop thread's context — not the
+    # scheduling thread's. A per-request profile scope (the dashboard's
+    # ?profile= endpoints, e.g. the MCP "Test server" probe) would silently
+    # vanish here: OAuth token stores and any other get_hermes_home()
+    # resolution inside the coroutine would read the process home instead
+    # of the selected profile's. Re-establish the override inside the
+    # task's own context (task-local — concurrent calls carrying different
+    # scopes don't interfere). No-op when no override is active.
+    coro = _wrap_with_home_override(coro)
+
     future = safe_schedule_threadsafe(
         coro, loop,
         logger=logger,
diff --git a/tools/terminal_tool.py b/tools/terminal_tool.py
index d9edd7a5d5d..2ad882fba25 100644
--- a/tools/terminal_tool.py
+++ b/tools/terminal_tool.py
@@ -777,7 +777,8 @@ def _transform_sudo_command(command: str | None) -> tuple[str | None, str | None
     the password in the command string themselves; see their execute()
     methods for how they handle the non-None sudo_stdin case.
 
-    If SUDO_PASSWORD is not set and in interactive mode (HERMES_INTERACTIVE=1):
+    If SUDO_PASSWORD is not set and an interactive UI is available
+    (HERMES_INTERACTIVE=1 or a registered sudo password callback):
       Prompts user for password with 45s timeout, caches for session.
 
     If SUDO_PASSWORD is not set and NOT interactive:
@@ -805,7 +806,11 @@ def _transform_sudo_command(command: str | None) -> tuple[str | None, str | None
     if not has_configured_password and not sudo_password and _sudo_nopasswd_works():
         return command, None
 
-    if not has_configured_password and not sudo_password and env_var_enabled("HERMES_INTERACTIVE"):
+    has_sudo_prompt_callback = _get_sudo_password_callback() is not None
+    should_prompt_for_sudo = (
+        env_var_enabled("HERMES_INTERACTIVE") or has_sudo_prompt_callback
+    )
+    if not has_configured_password and not sudo_password and should_prompt_for_sudo:
         sudo_password = _prompt_for_sudo_password(timeout_seconds=45)
         if sudo_password:
             _set_cached_sudo_password(sudo_password)
diff --git a/tools/web_tools.py b/tools/web_tools.py
index 133489b0a89..6bf522f33ec 100644
--- a/tools/web_tools.py
+++ b/tools/web_tools.py
@@ -141,15 +141,35 @@ def _load_web_config() -> dict:
     except (ImportError, Exception):
         return {}
 
-def _get_backend() -> str:
+# Recognized web backend names (config values accepted in ``web.backend`` /
+# ``web.search_backend`` / ``web.extract_backend``). Kept as a single source of
+# truth for config validation across the selection helpers.
+_KNOWN_WEB_BACKENDS = frozenset(
+    {"parallel", "firecrawl", "tavily", "exa", "searxng", "brave-free", "ddgs", "xai"}
+)
+
+# Backends that only service web_search (their provider's ``supports_extract()``
+# is False). They are skipped during *extract* auto-detect so a search-only
+# credential (e.g. SEARXNG_URL) does not shadow the keyless Parallel free-MCP
+# fallback, which would otherwise leave web_extract broken on a no-key install.
+_SEARCH_ONLY_BACKENDS = frozenset({"searxng", "brave-free", "ddgs", "xai"})
+
+
+def _get_backend(capability: str = "search") -> str:
     """Determine which web backend to use (shared fallback).
 
     Reads ``web.backend`` from config.yaml (set by ``hermes tools``).
     Falls back to whichever API key is present for users who configured
     keys manually without running setup.
+
+    ``capability`` ("search" | "extract") only affects auto-detect: for
+    ``extract`` we skip search-only backends (``_SEARCH_ONLY_BACKENDS``) so a
+    search-only credential never shadows the keyless Parallel free-MCP extract
+    fallback. An explicit ``web.backend`` value is honored as-is (explicit wins,
+    surfacing that backend's own search-only error rather than rerouting).
     """
     configured = (_load_web_config().get("backend") or "").lower().strip()
-    if configured in {"parallel", "firecrawl", "tavily", "exa", "searxng", "brave-free", "ddgs", "xai"}:
+    if configured in _KNOWN_WEB_BACKENDS:
         return configured
 
     # Fallback for manual / legacy config — pick the highest-priority
@@ -158,7 +178,8 @@ def _get_backend() -> str:
     # pre-empted by a Nous OAuth token whose subscription tier may not
     # actually grant web-search access (the gateway then fails at runtime
     # with "no subscription" and the tool returns an error to the agent
-    # without falling back). Free-tier backends trail the paid ones.
+    # without falling back). Free-tier backends (searxng / brave-free /
+    # keyless parallel / ddgs) trail the keyed ones.
     backend_candidates = (
         ("tavily", _has_env("TAVILY_API_KEY")),
         ("exa", _has_env("EXA_API_KEY")),
@@ -167,13 +188,24 @@ def _get_backend() -> str:
         ("firecrawl", _is_tool_gateway_ready()),
         ("searxng", _has_env("SEARXNG_URL")),
         ("brave-free", _has_env("BRAVE_SEARCH_API_KEY")),
+        # Keyless Parallel free MCP — always available, the intended no-key
+        # default for both search and extract. Ahead of ddgs (search-only, so it
+        # can't service web_extract); ddgs stays reachable via web.backend=ddgs.
+        ("parallel", True),
         ("ddgs", _ddgs_package_importable()),
     )
     for backend, available in backend_candidates:
-        if available:
-            return backend
+        if not available:
+            continue
+        # For extract, skip search-only backends so the keyless Parallel
+        # free-MCP fallback (which can fetch URLs) is reached instead.
+        if capability == "extract" and backend in _SEARCH_ONLY_BACKENDS:
+            continue
+        return backend
 
-    return "firecrawl"  # default (backward compat)
+    # Defensive terminal (the keyless ``parallel`` candidate above is always
+    # available, so this is effectively unreachable).
+    return "parallel"
 
 
 def _get_search_backend() -> str:
@@ -204,14 +236,19 @@ def _get_extract_backend() -> str:
 def _get_capability_backend(capability: str) -> str:
     """Shared helper for per-capability backend selection.
 
-    Reads ``web.{capability}_backend`` from config; if set and available,
-    uses it. Otherwise falls through to the shared ``_get_backend()``.
+    Reads ``web.{capability}_backend`` from config. Any explicit value is
+    honored **regardless of availability** — including unrecognized typos like
+    ``parrallel`` — so the dispatcher surfaces that backend's own setup/config
+    error rather than silently rerouting to the keyless Parallel default (which
+    would send user queries to a different provider and hide the
+    misconfiguration). This matches ``web_search_registry``'s "explicit config
+    wins" rule. Only an *unset* value falls through to ``_get_backend()``.
     """
     cfg = _load_web_config()
     specific = (cfg.get(f"{capability}_backend") or "").lower().strip()
-    if specific and _is_backend_available(specific):
+    if specific:
         return specific
-    return _get_backend()
+    return _get_backend(capability)
 
 
 def _is_backend_available(backend: str) -> bool:
@@ -219,6 +256,8 @@ def _is_backend_available(backend: str) -> bool:
     if backend == "exa":
         return _has_env("EXA_API_KEY")
     if backend == "parallel":
+        # Credential probe: True only with a real key. The keyless free-MCP
+        # fallback is handled by _get_backend()'s terminal default, not here.
         return _has_env("PARALLEL_API_KEY")
     if backend == "firecrawl":
         return check_firecrawl_api_key()
@@ -972,11 +1011,19 @@ async def web_extract_tool(
             else:
                 safe_urls.append(url)
 
+        # Tracks the free-tier Parallel extract path (no key → web_fetch via the
+        # hosted Search MCP) so we can credit Parallel in the output/UI. Bound
+        # here so empty/all-blocked inputs (which skip dispatch) stay defined.
+        _free_parallel_extract = False
+
         # Dispatch only safe URLs to the configured backend
         if not safe_urls:
             results = []
         else:
             backend = _get_extract_backend()
+            _free_parallel_extract = (
+                backend == "parallel" and not _has_env("PARALLEL_API_KEY")
+            )
 
             # All seven providers (brave-free, ddgs, searxng, exa, parallel,
             # tavily, firecrawl) now live as plugins. The dispatcher is a
@@ -1150,6 +1197,14 @@ async def web_extract_tool(
             for r in response.get("results", [])
         ]
         trimmed_response = {"results": trimmed_results}
+        if _free_parallel_extract:
+            # Credit Parallel's free Search MCP (drives the "[Parallel]" UI tag
+            # + lets the model cite the source). Free tier only.
+            trimmed_response["provider"] = "parallel"
+            trimmed_response["attribution"] = (
+                "Extraction powered by the free Parallel Web Search MCP "
+                "(https://parallel.ai)."
+            )
 
         if trimmed_response.get("results") == []:
             result_json = tool_error("Content was inaccessible or not found")
@@ -1181,16 +1236,61 @@ async def web_extract_tool(
         return tool_error(error_msg)
 
 
-# Convenience function to check Firecrawl credentials
+def web_tools_registered() -> bool:
+    """Whether the web tools should be registered. Always True.
+
+    Registration is decoupled from credential readiness: with no credentials,
+    search/extract fall back to Parallel's free hosted Search MCP, and an
+    explicitly configured-but-unavailable backend must stay registered so
+    dispatch surfaces that backend's own setup error rather than the tool
+    silently vanishing. For "is web actually configured?" use
+    :func:`check_web_api_key`.
+    """
+    return True
+
+
+def _parallel_provider_registered() -> bool:
+    """True when the bundled ``web-parallel`` provider is registered/enabled.
+
+    Plugin discovery skips disabled plugins, so a disabled (``plugins.disabled``)
+    or otherwise-unregistered parallel provider yields ``None`` here.
+    """
+    _ensure_web_plugins_loaded()
+    try:
+        from agent.web_search_registry import get_provider
+
+        return get_provider("parallel") is not None
+    except Exception:  # noqa: BLE001
+        return False
+
+
+def _backend_usable(backend: str) -> bool:
+    """True when *backend* can service calls. Keyless Parallel counts (free MCP).
+
+    Unknown/typo'd backend names are not usable (so an explicit typo is reported
+    as a config problem rather than masked by the keyless fallback).
+    """
+    if backend == "parallel" and not _has_env("PARALLEL_API_KEY"):
+        # Keyless Parallel is only genuinely usable when its provider is actually
+        # registered/enabled. If web-parallel is disabled or discovery failed,
+        # report unusable so setup is not skipped and the user is not left with
+        # web tools that fail at runtime ("No web search provider configured").
+        return _parallel_provider_registered()
+    return _is_backend_available(backend)
+
+
 def check_web_api_key() -> bool:
-    """Check whether the configured web backend is available."""
-    configured = _load_web_config().get("backend", "").lower().strip()
-    if configured in {"exa", "parallel", "firecrawl", "tavily", "searxng", "brave-free", "ddgs", "xai"}:
-        return _is_backend_available(configured)
-    return any(
-        _is_backend_available(backend)
-        for backend in ("exa", "parallel", "firecrawl", "tavily", "searxng", "brave-free", "ddgs", "xai")
-    )
+    """Usability probe: True when the selected web backends can service calls.
+
+    Probes the backends that :func:`_get_search_backend` /
+    :func:`_get_extract_backend` actually select (not just shared
+    ``web.backend``), so an explicit per-capability backend with missing
+    credentials — or a typo'd name — reports unusable instead of being masked by
+    the keyless Parallel fallback. Keyless Parallel itself genuinely services
+    calls, so a zero-setup install reports usable. Distinct from
+    :func:`web_tools_registered` (always True — whether the tool is offered).
+    """
+    return _backend_usable(_get_search_backend()) and _backend_usable(_get_extract_backend())
 
 
 def check_auxiliary_model() -> bool:
@@ -1358,7 +1458,7 @@ registry.register(
     toolset="web",
     schema=WEB_SEARCH_SCHEMA,
     handler=lambda args, **kw: web_search_tool(args.get("query", ""), limit=args.get("limit", 5)),
-    check_fn=check_web_api_key,
+    check_fn=web_tools_registered,
     requires_env=_web_requires_env(),
     emoji="🔍",
     max_result_size_chars=100_000,
@@ -1369,7 +1469,7 @@ registry.register(
     schema=WEB_EXTRACT_SCHEMA,
     handler=lambda args, **kw: web_extract_tool(
         args.get("urls", [])[:5] if isinstance(args.get("urls"), list) else [], "markdown"),
-    check_fn=check_web_api_key,
+    check_fn=web_tools_registered,
     requires_env=_web_requires_env(),
     is_async=True,
     emoji="📄",
diff --git a/toolsets.py b/toolsets.py
index 901b072f46c..5c67bfb2114 100644
--- a/toolsets.py
+++ b/toolsets.py
@@ -339,6 +339,33 @@ TOOLSETS = {
         "tools": [],
         "includes": ["web", "vision", "image_gen"]
     },
+
+    # Coding posture (base Hermes — CLI/TUI/desktop/ACP). Auto-selected in a
+    # code workspace; see agent/coding_context.py. Keeps everything you reach
+    # for while pairing on code and drops the rest (messaging, tts, image_gen,
+    # spotify, home-assistant, cron, computer-use).
+    "coding": {
+        "description": "Coding-focused toolset: files, terminal, search, web docs, skills, todo, delegate, vision, browser",
+        "tools": [
+            "web_search", "web_extract",
+            "terminal", "process", "read_terminal",
+            "read_file", "write_file", "patch", "search_files",
+            "vision_analyze",
+            "skills_list", "skill_view", "skill_manage",
+            "browser_navigate", "browser_snapshot", "browser_click",
+            "browser_type", "browser_scroll", "browser_back",
+            "browser_press", "browser_get_images",
+            "browser_vision", "browser_console", "browser_cdp", "browser_dialog",
+            "todo", "memory",
+            "session_search", "clarify",
+            "execute_code", "delegate_task",
+        ],
+        "includes": [],
+        # Posture toolset: selected per-session by agent/coding_context.py,
+        # never auto-recovered into per-platform tool config (see the
+        # non-configurable-toolset recovery loop in hermes_cli/tools_config.py).
+        "posture": True,
+    },
     
     # ==========================================================================
     # Full Hermes toolsets (CLI + messaging platforms)
diff --git a/tui_gateway/server.py b/tui_gateway/server.py
index 390c31b092e..d932e98510f 100644
--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@@ -1,5 +1,6 @@
 import atexit
 import concurrent.futures
+import contextlib
 import contextvars
 import copy
 import inspect
@@ -1171,6 +1172,34 @@ def _ensure_session_db_row(session: dict) -> None:
                 pass
 
 
+@contextlib.contextmanager
+def _session_db(session: dict):
+    """Yield the SessionDB that owns this session's row (profile-aware).
+
+    Mirrors :func:`_ensure_session_db_row`: a remote/profile session persists
+    into its own profile's ``state.db`` (a fresh handle we close on exit);
+    everything else borrows the shared ``_get_db()`` handle (left open). Yields
+    None when the db is unavailable.
+    """
+    db, close_db = None, False
+    profile_home = session.get("profile_home")
+    if profile_home:
+        from hermes_state import SessionDB
+
+        try:
+            db, close_db = SessionDB(db_path=Path(profile_home) / "state.db"), True
+        except Exception:
+            logger.debug("failed to open profile db for session", exc_info=True)
+    else:
+        db = _get_db()
+    try:
+        yield db
+    finally:
+        if close_db and db is not None:
+            with contextlib.suppress(Exception):
+                db.close()
+
+
 def _set_session_cwd(session: dict, cwd: str) -> str:
     resolved = os.path.abspath(os.path.expanduser(str(cwd)))
     if not os.path.isdir(resolved):
@@ -1651,6 +1680,22 @@ def _load_enabled_toolsets() -> list[str] | None:
     cfg = None
     fallback_notice = None
 
+    # Coding posture (base Hermes): with no explicit pin, collapse to the
+    # coding toolset (+ enabled MCP servers) when sitting in a code workspace.
+    # The desktop app and `hermes --tui` both land here. See
+    # agent/coding_context.py. No config is loaded yet at this point, so we let
+    # coding_selection() load it lazily (cli.py passes its already-resolved
+    # CLI_CONFIG instead, purely to avoid a redundant read).
+    if not explicit:
+        try:
+            from agent.coding_context import coding_selection
+
+            selection = coding_selection(platform="tui")
+            if selection is not None:
+                return selection
+        except Exception:
+            pass
+
     try:
         from toolsets import validate_toolset
     except Exception:
@@ -4193,6 +4238,145 @@ def _(rid, params: dict) -> dict:
         return _err(rid, 5007, str(e))
 
 
+@method("handoff.request")
+def _(rid, params: dict) -> dict:
+    """Queue a handoff of this session to a messaging platform.
+
+    Desktop parity with the CLI ``/handoff`` command: we only write
+    ``handoff_state='pending'`` onto the persisted session row. The actual
+    transfer is performed by the separate ``hermes gateway`` process, whose
+    ``_handoff_watcher`` claims the row, re-binds the session to the platform's
+    home channel, and forges a synthetic turn. The desktop then polls
+    ``handoff.state`` for the terminal result.
+    """
+    session, err = _sess_nowait(params, rid)
+    if err:
+        return err
+    if session.get("running"):
+        return _err(
+            rid,
+            4009,
+            "session busy — wait for the current turn to finish, then retry the handoff",
+        )
+
+    platform_name = (params.get("platform", "") or "").strip().lower()
+    if not platform_name:
+        return _err(rid, 4023, "platform required")
+
+    # Validate against the live gateway config — an unconfigured platform or a
+    # missing home channel would leave the handoff pending forever, so reject
+    # up front with a clear, actionable message (mirrors cli.py).
+    try:
+        from gateway.config import Platform, load_gateway_config
+    except Exception as e:  # pragma: no cover — gateway pkg always ships
+        return _err(rid, 5021, f"could not load gateway config: {e}")
+    try:
+        platform = Platform(platform_name)
+    except (ValueError, KeyError):
+        return _err(rid, 4024, f"unknown platform '{platform_name}'")
+    try:
+        gw_config = load_gateway_config()
+    except Exception as e:
+        return _err(rid, 5021, f"could not load gateway config: {e}")
+    pcfg = gw_config.platforms.get(platform)
+    if not pcfg or not pcfg.enabled:
+        return _err(
+            rid,
+            4025,
+            f"platform '{platform_name}' is not configured/enabled in the gateway",
+        )
+    home = gw_config.get_home_channel(platform)
+    if not home or not home.chat_id:
+        return _err(
+            rid,
+            4026,
+            f"no home channel configured for {platform_name} — set one with "
+            "/sethome on the destination chat first",
+        )
+
+    # The watcher transfers a persisted DB row, so make sure one exists even
+    # for a brand-new empty chat (mirrors the CLI's set_session_title stub).
+    _ensure_session_db_row(session)
+
+    with _session_db(session) as db:
+        if db is None:
+            return _db_unavailable_error(rid, code=5007)
+        key = session["session_key"]
+        try:
+            if not db.get_session(key):
+                db.set_session_title(key, f"handoff-{key[:8]}")
+            ok = db.request_handoff(key, platform_name)
+        except Exception as e:
+            return _err(rid, 5007, str(e))
+
+    if not ok:
+        return _err(
+            rid,
+            4027,
+            "session is already in flight for handoff — wait for it to settle, then retry",
+        )
+    return _ok(
+        rid,
+        {
+            "queued": True,
+            "session_key": key,
+            "platform": platform_name,
+            "home_name": home.name,
+        },
+    )
+
+
+@method("handoff.state")
+def _(rid, params: dict) -> dict:
+    """Poll the handoff state for a session.
+
+    Returns ``{state, platform, error}`` where ``state`` is one of
+    ``pending|running|completed|failed`` (or empty when no handoff record
+    exists). Desktop polls this after ``handoff.request``.
+    """
+    session, err = _sess_nowait(params, rid)
+    if err:
+        return err
+    with _session_db(session) as db:
+        if db is None:
+            return _db_unavailable_error(rid, code=5007)
+        record = db.get_handoff_state(session["session_key"])
+
+    record = record or {}
+    return _ok(
+        rid,
+        {
+            "state": record.get("state") or "",
+            "platform": record.get("platform") or "",
+            "error": record.get("error") or "",
+        },
+    )
+
+
+@method("handoff.fail")
+def _(rid, params: dict) -> dict:
+    """Mark an in-flight handoff as failed so the user can retry.
+
+    Desktop calls this when its bounded poll times out. Only pending/running
+    rows are changed so a late success from the gateway watcher is not clobbered.
+    """
+    session, err = _sess_nowait(params, rid)
+    if err:
+        return err
+    reason = str(params.get("error") or "handoff failed").strip()[:500]
+    with _session_db(session) as db:
+        if db is None:
+            return _db_unavailable_error(rid, code=5007)
+        key = session["session_key"]
+        record = db.get_handoff_state(key) or {}
+        state = record.get("state") or ""
+        if state in {"pending", "running"}:
+            db.fail_handoff(key, reason)
+            return _ok(rid, {"failed": True, "state": "failed"})
+
+    return _ok(rid, {"failed": False, "state": state})
+
+
 @method("session.usage")
 def _(rid, params: dict) -> dict:
     session, err = _sess_nowait(params, rid)
diff --git a/ui-tui/src/__tests__/appChromeStatusRule.test.tsx b/ui-tui/src/__tests__/appChromeStatusRule.test.tsx
index 3381b4b8e4e..5bbd14bbdce 100644
--- a/ui-tui/src/__tests__/appChromeStatusRule.test.tsx
+++ b/ui-tui/src/__tests__/appChromeStatusRule.test.tsx
@@ -260,3 +260,71 @@ describe('StatusRule credits notice render priority', () => {
     expect(textContent(element)).toContain('opus 4.8')
   })
 })
+
+describe('StatusRule idle-since read-out', () => {
+  // The IdleSince component uses hooks, so it can't be invoked outside a
+  // renderer — assert on the element tree instead (same reason the duration
+  // tests don't check SessionDuration's text).
+  const findComponentByName = (node: ReactNodeLike, name: string): React.ReactElement | null => {
+    if (node === null || node === undefined || typeof node === 'boolean') {
+      return null
+    }
+
+    if (Array.isArray(node)) {
+      for (const child of node) {
+        const found = findComponentByName(child, name)
+
+        if (found) {
+          return found
+        }
+      }
+
+      return null
+    }
+
+    if (!React.isValidElement(node)) {
+      return null
+    }
+
+    if (typeof node.type === 'function' && node.type.name === name) {
+      return node
+    }
+
+    return findComponentByName(node.props.children, name)
+  }
+
+  it('shows time since the last final agent response when idle', () => {
+    const endedAt = Date.now() - 42_000
+    const element = StatusRule({
+      ...baseProps,
+      lastTurnEndedAt: endedAt,
+      sessionStartedAt: Date.now() - 60_000
+    })
+
+    const idle = findComponentByName(element, 'IdleSince')
+
+    expect(idle).not.toBeNull()
+    expect(idle!.props.endedAt).toBe(endedAt)
+  })
+
+  it('is hidden while a turn is busy', () => {
+    const element = StatusRule({
+      ...baseProps,
+      busy: true,
+      lastTurnEndedAt: Date.now() - 42_000,
+      turnStartedAt: Date.now()
+    })
+
+    expect(findComponentByName(element, 'IdleSince')).toBeNull()
+  })
+
+  it('is hidden before the first turn completes', () => {
+    const element = StatusRule({
+      ...baseProps,
+      lastTurnEndedAt: null,
+      sessionStartedAt: Date.now() - 60_000
+    })
+
+    expect(findComponentByName(element, 'IdleSince')).toBeNull()
+  })
+})
diff --git a/ui-tui/src/app/interfaces.ts b/ui-tui/src/app/interfaces.ts
index 30c62e03590..f7297c151da 100644
--- a/ui-tui/src/app/interfaces.ts
+++ b/ui-tui/src/app/interfaces.ts
@@ -368,6 +368,7 @@ export interface AppLayoutProgressProps {
 export interface AppLayoutStatusProps {
   cwdLabel: string
   goodVibesTick: number
+  lastTurnEndedAt: null | number
   sessionStartedAt: null | number
   showStickyPrompt: boolean
   statusColor: string
diff --git a/ui-tui/src/app/useMainApp.ts b/ui-tui/src/app/useMainApp.ts
index 3bd981b36cf..d11e8e08dba 100644
--- a/ui-tui/src/app/useMainApp.ts
+++ b/ui-tui/src/app/useMainApp.ts
@@ -173,6 +173,7 @@ export function useMainApp(gw: GatewayClient) {
   const [voiceRecordKey, setVoiceRecordKey] = useState(DEFAULT_VOICE_RECORD_KEY)
   const [sessionStartedAt, setSessionStartedAt] = useState(() => Date.now())
   const [turnStartedAt, setTurnStartedAt] = useState(null)
+  const [lastTurnEndedAt, setLastTurnEndedAt] = useState(null)
   const [goodVibesTick, setGoodVibesTick] = useState(0)
   const [bellOnComplete, setBellOnComplete] = useState(false)
 
@@ -500,10 +501,14 @@ export function useMainApp(gw: GatewayClient) {
   useEffect(() => {
     if (ui.busy) {
       setTurnStartedAt(prev => prev ?? Date.now())
-    } else {
+    } else if (turnStartedAt != null) {
+      // Only stamp the idle marker when a turn was actually live — busy is
+      // also false on mount and we don't want a phantom "done" timestamp
+      // before the first turn has completed.
+      setLastTurnEndedAt(Date.now())
       setTurnStartedAt(null)
     }
-  }, [ui.busy])
+  }, [ui.busy, turnStartedAt])
 
   useConfigSync({ gw, setBellOnComplete, setVoiceEnabled, setVoiceRecordKey, sid: ui.sid })
 
@@ -1090,6 +1095,7 @@ export function useMainApp(gw: GatewayClient) {
       // essentials and truncates this further on narrow terminals.
       cwdLabel: fmtCwdBranch(cwd, gitBranch, 28),
       goodVibesTick,
+      lastTurnEndedAt: ui.sid ? lastTurnEndedAt : null,
       sessionStartedAt: ui.sid ? sessionStartedAt : null,
       showStickyPrompt: !!stickyPrompt,
       statusColor: statusColorOf(ui.status, ui.theme.color),
@@ -1103,6 +1109,7 @@ export function useMainApp(gw: GatewayClient) {
       cwd,
       gitBranch,
       goodVibesTick,
+      lastTurnEndedAt,
       sessionStartedAt,
       stickyPrompt,
       turnStartedAt,
diff --git a/ui-tui/src/components/appChrome.tsx b/ui-tui/src/components/appChrome.tsx
index a420d815341..007fd356355 100644
--- a/ui-tui/src/components/appChrome.tsx
+++ b/ui-tui/src/components/appChrome.tsx
@@ -341,6 +341,21 @@ function SessionDuration({ startedAt }: { startedAt: number }) {
   return fmtDuration(now - startedAt)
 }
 
+function IdleSince({ endedAt }: { endedAt: number }) {
+  // Time since the last final agent response. Re-ticks every second like
+  // SessionDuration so the read-out stays live while the session idles.
+  const [now, setNow] = useState(() => Date.now())
+
+  useEffect(() => {
+    setNow(Date.now())
+    const id = setInterval(() => setNow(Date.now()), 1000)
+
+    return () => clearInterval(id)
+  }, [endedAt])
+
+  return `✓ ${fmtDuration(now - endedAt)}`
+}
+
 const effortLabel = (effort?: string) => {
   const value = String(effort ?? '')
     .trim()
@@ -400,6 +415,7 @@ export function StatusRule({
   notice,
   usage,
   bgCount,
+  lastTurnEndedAt,
   liveSessionCount,
   sessionStartedAt,
   showCost,
@@ -488,6 +504,10 @@ export function StatusRule({
 
   const showBar = !!bar && fits(SEP + stringWidth(`[${bar}] ${pct != null ? `${pct}%` : ''}`))
   const showDuration = segs.duration && !!sessionStartedAt && fits(SEP + MAX_DURATION_WIDTH)
+  // Idle clock — time since the last final agent response. Hidden while busy
+  // (the FaceTicker's elapsed tail covers the live turn) and before the first
+  // turn completes. Shares the duration breakpoint and width reservation.
+  const showIdle = segs.duration && !busy && lastTurnEndedAt != null && fits(SEP + stringWidth('✓ ') + MAX_DURATION_WIDTH)
   const showCompressions = segs.compressions && compressions > 0 && fits(SEP + stringWidth(`cmp ${compressions}`))
   const showVoice = segs.voice && !!voiceLabel && fits(SEP + stringWidth(voiceLabel))
   const showSessionCount = !!sessionCountText && fits(SEP + stringWidth(sessionCountText))
@@ -567,6 +587,12 @@ export function StatusRule({
             
           
         ) : null}
+        {showIdle ? (
+          
+            {' │ '}
+            
+          
+        ) : null}
         {showCompressions ? (
           
             {' │ '}
@@ -725,6 +751,7 @@ export function TranscriptScrollbar({ scrollRef, t }: TranscriptScrollbarProps)
 
 interface StatusRuleProps {
   bgCount: number
+  lastTurnEndedAt?: null | number
   liveSessionCount: number
   busy: boolean
   cols: number
diff --git a/ui-tui/src/components/appLayout.tsx b/ui-tui/src/components/appLayout.tsx
index b93e2045c7e..d54f5c6da90 100644
--- a/ui-tui/src/components/appLayout.tsx
+++ b/ui-tui/src/components/appLayout.tsx
@@ -366,6 +366,7 @@ const StatusRulePane = memo(function StatusRulePane({
         cols={composer.cols}
         cwdLabel={status.cwdLabel}
         indicatorStyle={ui.indicatorStyle}
+        lastTurnEndedAt={status.lastTurnEndedAt}
         liveSessionCount={ui.liveSessionCount}
         model={ui.info?.model ?? ''}
         modelFast={ui.info?.fast || ui.info?.service_tier === 'priority'}
diff --git a/uv.lock b/uv.lock
index 55d7da4a4a8..f90a3a4270c 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1653,7 +1653,7 @@ requires-dist = [
     { name = "numpy", marker = "extra == 'voice'", specifier = "==2.4.3" },
     { name = "openai", specifier = "==2.24.0" },
     { name = "packaging", specifier = "==26.0" },
-    { name = "parallel-web", marker = "extra == 'parallel-web'", specifier = "==0.4.2" },
+    { name = "parallel-web", marker = "extra == 'parallel-web'", specifier = "==0.6.0" },
     { name = "pathspec", specifier = "==1.1.1" },
     { name = "pillow", specifier = "==12.2.0" },
     { name = "prompt-toolkit", specifier = "==3.0.52" },
@@ -2690,7 +2690,7 @@ wheels = [
 
 [[package]]
 name = "parallel-web"
-version = "0.4.2"
+version = "0.6.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
@@ -2700,9 +2700,9 @@ dependencies = [
     { name = "sniffio" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/24/50/fb9b28a679e01682006b5259abff96de3d16e114e9447a7793fec31715de/parallel_web-0.4.2.tar.gz", hash = "sha256:599b5a8f387dc35c7dc8c81e372eadf6958a40acacea58bf170dfc663c003da7", size = 140026, upload-time = "2026-03-09T22:24:35.448Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/7f/81/101c961fe6665212df01fb39a70ebb379dc33529c7bc9210675c0f525139/parallel_web-0.6.0.tar.gz", hash = "sha256:f8aecd3f1958090090c4516881cefea4f55c40948ba3bb99217ca9a6d4263225", size = 173149, upload-time = "2026-05-06T19:13:09.782Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/a0/3e/2218fa29637781b8e7ac35a928108ff2614ddd40879389d3af2caa725af5/parallel_web-0.4.2-py3-none-any.whl", hash = "sha256:aa3a4a9aecc08972c5ce9303271d4917903373dff4dd277d9a3e30f9cff53346", size = 144012, upload-time = "2026-03-09T22:24:33.979Z" },
+    { url = "https://files.pythonhosted.org/packages/a2/7c/7e8b63a0e90efaf567a818fca86c6ad3a85711f8995d2657b51b0cae2351/parallel_web-0.6.0-py3-none-any.whl", hash = "sha256:dc5342ef7262bd2e9f85eb7eace32833bd3d7e3af0bf5fbd780d1ea8c8d9ceb0", size = 199217, upload-time = "2026-05-06T19:13:08.316Z" },
 ]
 
 [[package]]
diff --git a/web/src/App.tsx b/web/src/App.tsx
index 52108a22cec..d3c976358d5 100644
--- a/web/src/App.tsx
+++ b/web/src/App.tsx
@@ -64,6 +64,10 @@ import { useBelowBreakpoint } from "@nous-research/ui/hooks/use-below-breakpoint
 import { useSidebarStatus } from "@/hooks/useSidebarStatus";
 import { AuthWidget } from "@/components/AuthWidget";
 import { PageHeaderProvider } from "@/contexts/PageHeaderProvider";
+import { ProfileProvider } from "@/contexts/ProfileProvider";
+import { useProfileScope } from "@/contexts/useProfileScope";
+import { ProfileSwitcher } from "@/components/ProfileSwitcher";
+import { ProfileScopeBanner } from "@/components/ProfileScopeBanner";
 import { useSystemActions } from "@/contexts/useSystemActions";
 import type { SystemAction } from "@/contexts/system-actions-context";
 import ConfigPage from "@/pages/ConfigPage";
@@ -474,6 +478,7 @@ export default function App() {
   }, []);
 
   return (
+    
     
+
@@ -602,6 +608,8 @@ export default function App() {
+ +
+ ); } +/** + * Remounts the entire routed page tree when the global management profile + * changes. Pages load their data on mount; without this, a page opened + * under profile A would keep showing A's state while writes (via the + * fetchJSON ?profile= injection) silently targeted the newly selected + * profile B — the exact stale-target footgun the switcher exists to kill. + * Keying by profile resets every page's local state so it refetches under + * the new scope. The persistent ChatPage host below handles its own + * remount (channel keyed on scopedProfile). + */ +function ProfileKeyedRoutes({ children }: { children: ReactNode }) { + const { profile } = useProfileScope(); + return
{children}
; +} + function SidebarNavLink({ closeMobile, collapsed, diff --git a/web/src/components/ProfileScopeBanner.tsx b/web/src/components/ProfileScopeBanner.tsx new file mode 100644 index 00000000000..9d5adc2fdfd --- /dev/null +++ b/web/src/components/ProfileScopeBanner.tsx @@ -0,0 +1,30 @@ +import { Users } from "lucide-react"; +import { useProfileScope } from "@/contexts/useProfileScope"; +import { useI18n } from "@/i18n"; + +/** + * App-wide amber banner shown while the global switcher targets a profile + * OTHER than the dashboard's own — every management write (config, keys, + * skills, MCPs, model) and new Chat sessions land in that profile. + */ +export function ProfileScopeBanner() { + const { profile, currentProfile } = useProfileScope(); + const { t } = useI18n(); + + if (!profile || profile === currentProfile) return null; + + return ( + // mt-14 on mobile clears the fixed lg:hidden header (h-14, z-40) so the + // scope banner — the main safety signal for scoped writes — is never + // hidden behind it; lg:mt-0 restores desktop flow. +
+ + + {( + t.app.managingProfileBanner ?? + "Managing profile “{name}” — config, keys, skills, MCPs, model, and new chats apply to that profile." + ).replace("{name}", profile)} + +
+ ); +} diff --git a/web/src/components/ProfileSwitcher.tsx b/web/src/components/ProfileSwitcher.tsx new file mode 100644 index 00000000000..827ea881f6f --- /dev/null +++ b/web/src/components/ProfileSwitcher.tsx @@ -0,0 +1,67 @@ +import { Users } from "lucide-react"; +import { useProfileScope } from "@/contexts/useProfileScope"; +import { useI18n } from "@/i18n"; +import { cn } from "@/lib/utils"; + +/** + * The machine dashboard's single write-target selector. + * + * Rendered in the sidebar above the nav. Every management page (Config, + * Keys, Skills, MCP, Models) reads/writes the selected profile via the + * fetchJSON ?profile= injection. Hidden when only one profile exists. + */ +export function ProfileSwitcher({ collapsed }: { collapsed?: boolean }) { + const { profile, currentProfile, profiles, setProfile } = useProfileScope(); + const { t } = useI18n(); + + if (profiles.length < 2) return null; + + const managed = profile || currentProfile || "default"; + const isOther = !!profile && profile !== currentProfile; + + return ( +
+ + + {collapsed && ( + {managed} + )} +
+ ); +} diff --git a/web/src/components/SkillEditorDialog.tsx b/web/src/components/SkillEditorDialog.tsx new file mode 100644 index 00000000000..9981fe79dc8 --- /dev/null +++ b/web/src/components/SkillEditorDialog.tsx @@ -0,0 +1,215 @@ +import { useEffect, useState } from "react"; +import { api } from "@/lib/api"; +import { Button } from "@nous-research/ui/ui/components/button"; +import { Input } from "@nous-research/ui/ui/components/input"; +import { Label } from "@nous-research/ui/ui/components/label"; +import { Spinner } from "@nous-research/ui/ui/components/spinner"; +import { + Dialog, + DialogContent, + DialogDescription, + DialogHeader, + DialogTitle, +} from "@nous-research/ui/ui/components/dialog"; + +/* ------------------------------------------------------------------ */ +/* SkillEditorDialog — create or edit a SKILL.md from the dashboard */ +/* */ +/* Headless/VPS users have no editor besides this: the only other way */ +/* to author a custom skill is SSH + a terminal editor. Create mode */ +/* posts a brand-new skill (name + optional category + SKILL.md); */ +/* edit mode loads the existing SKILL.md raw text and rewrites it. */ +/* Validation (frontmatter, name, size) happens server-side via the */ +/* same path the agent's skill_manage tool uses, so errors come back */ +/* as actionable messages rendered inline. */ +/* ------------------------------------------------------------------ */ + +const CREATE_TEMPLATE = `--- +name: my-skill +description: One-line description of when to use this skill. +--- + +# My Skill + +Numbered steps, exact commands, and pitfalls go here. +`; + +export interface SkillEditorDialogProps { + open: boolean; + /** Skill name to edit, or null for create mode. */ + editName: string | null; + /** Profile to scope reads/writes to ("" = the dashboard's own profile). */ + profile?: string; + onClose: () => void; + /** Called after a successful save so the page can refresh its list. */ + onSaved: (name: string) => void; +} + +export function SkillEditorDialog({ + open, + editName, + profile, + onClose, + onSaved, +}: SkillEditorDialogProps) { + // The body is remounted via `key` every time the dialog opens or the + // target skill changes, so all form state initializes through useState + // initializers — no reset-on-open effect (react-hooks/set-state-in-effect). + return ( + !o && onClose()}> + + {open && ( + + )} + + + ); +} + +function EditorBody({ + editName, + profile, + onClose, + onSaved, +}: Omit) { + const isEdit = editName !== null; + const [name, setName] = useState(""); + const [category, setCategory] = useState(""); + const [content, setContent] = useState(isEdit ? "" : CREATE_TEMPLATE); + const [loading, setLoading] = useState(isEdit); + const [saving, setSaving] = useState(false); + const [error, setError] = useState(null); + + useEffect(() => { + if (!editName) return; + let cancelled = false; + api + .getSkillContent(editName, profile || undefined) + .then((res) => !cancelled && setContent(res.content)) + .catch((e) => !cancelled && setError(String(e))) + .finally(() => !cancelled && setLoading(false)); + return () => { + cancelled = true; + }; + }, [editName, profile]); + + const handleSave = async () => { + setError(null); + if (!isEdit && !name.trim()) { + setError("Skill name is required."); + return; + } + if (!content.trim()) { + setError("SKILL.md content is required."); + return; + } + setSaving(true); + try { + if (isEdit) { + await api.updateSkillContent(editName, content, profile || undefined); + onSaved(editName); + } else { + const trimmed = name.trim(); + await api.createSkill( + { + name: trimmed, + content, + category: category.trim() || undefined, + }, + profile || undefined, + ); + onSaved(trimmed); + } + onClose(); + } catch (e) { + setError(String(e)); + } finally { + setSaving(false); + } + }; + + return ( + <> + + + {isEdit ? `Edit skill: ${editName}` : "New skill"} + + + {isEdit + ? "Rewrite this skill's SKILL.md. Frontmatter (name, description) is validated on save." + : "Author a custom skill — YAML frontmatter plus markdown instructions. It becomes available to the agent and attachable to cron jobs."} + + + +
+ {!isEdit && ( +
+
+ + setName(e.target.value)} + /> +
+
+ + setCategory(e.target.value)} + /> +
+
+ )} + +
+ + {loading ? ( +
+ +
+ ) : ( +