Merge remote-tracking branch 'origin/main' into hermes/salvage-composer-drafts

This commit is contained in:
Brooklyn Nicholson 2026-06-11 00:07:07 -05:00
commit 419c8a98a9
40 changed files with 2678 additions and 840 deletions

View file

@ -1571,6 +1571,15 @@ def _convert_content_part_to_anthropic(part: Any) -> Optional[Dict[str, Any]]:
if ptype == "input_text":
block: Dict[str, Any] = {"type": "text", "text": part.get("text", "")}
elif ptype == "text":
# A stored Anthropic text block. Rebuild from whitelisted fields only —
# SDK response text blocks carry output-only siblings (parsed_output,
# citations=None) that the Messages INPUT schema rejects with HTTP 400
# "Extra inputs are not permitted". Do NOT dict(part) it verbatim.
block = {"type": "text", "text": part.get("text", "")}
cits = part.get("citations")
if isinstance(cits, list) and cits:
block["citations"] = cits
elif ptype in {"image_url", "input_image"}:
image_value = part.get("image_url", {})
url = image_value.get("url", "") if isinstance(image_value, dict) else str(image_value or "")
@ -1685,6 +1694,58 @@ def _content_parts_to_anthropic_blocks(parts: Any) -> List[Dict[str, Any]]:
return out
def _sanitize_replay_block(b: Dict[str, Any]) -> Optional[Dict[str, Any]]:
"""Strip output-only fields from a stored Anthropic content block so it is
valid as REQUEST input on replay.
The SDK response objects carry output-only attributes that the Messages
*input* schema forbids ("Extra inputs are not permitted"): text blocks get
``parsed_output``/``citations`` (when null), tool_use blocks get ``caller``,
etc. ``normalize_response`` captured blocks verbatim via ``_to_plain_data``,
so these leak back as input on the next turn HTTP 400.
Whitelist per type (NOT a blacklist) so future SDK output-only fields can't
reintroduce the bug. Returns a clean block, or None to drop it.
"""
if not isinstance(b, dict):
return None
btype = b.get("type")
if btype == "text":
out: Dict[str, Any] = {"type": "text", "text": b.get("text", "")}
# citations is input-valid ONLY when it's a non-empty list; the SDK
# emits citations=None on responses, which the input schema rejects.
cits = b.get("citations")
if isinstance(cits, list) and cits:
out["citations"] = cits
if isinstance(b.get("cache_control"), dict):
out["cache_control"] = b["cache_control"]
return out
if btype == "thinking":
out = {"type": "thinking", "thinking": b.get("thinking", "")}
if b.get("signature"):
out["signature"] = b["signature"]
return out
if btype == "redacted_thinking":
# Only valid with its data payload; drop if missing.
return {"type": "redacted_thinking", "data": b["data"]} if b.get("data") else None
if btype == "tool_use":
out = {
"type": "tool_use",
"id": _sanitize_tool_id(b.get("id", "")),
"name": b.get("name", ""),
"input": b.get("input", {}),
}
if isinstance(b.get("cache_control"), dict):
out["cache_control"] = b["cache_control"]
return out
if btype == "image":
src = b.get("source")
return {"type": "image", "source": src} if isinstance(src, dict) else None
# Unknown/unsupported block type on the input path — drop rather than risk
# another "Extra inputs are not permitted".
return None
def _convert_assistant_message(m: Dict[str, Any]) -> Dict[str, Any]:
"""Convert an assistant message to Anthropic content blocks.
@ -1692,6 +1753,55 @@ def _convert_assistant_message(m: Dict[str, Any]) -> Dict[str, Any]:
reasoning_content injection for Kimi/DeepSeek endpoints.
"""
content = m.get("content", "")
# Anthropic interleaved-thinking fast path: when this turn carries a
# verbatim, order-preserving block list (set by normalize_response only
# for turns that interleave SIGNED thinking with tool_use), replay it.
# Each block is run through _sanitize_replay_block to strip output-only
# SDK fields (parsed_output, caller, citations=None, …) that the Messages
# INPUT schema forbids — replaying them verbatim caused HTTP 400 "Extra
# inputs are not permitted" (text.parsed_output). Block ORDER is preserved
# (the reason this channel exists); only forbidden sibling fields are
# dropped, leaving thinking signatures and tool_use id/name/input intact.
ordered_blocks = m.get("anthropic_content_blocks")
if isinstance(ordered_blocks, list) and ordered_blocks:
# Re-source each tool_use input from the stored tool_calls map rather
# than the captured block. The ordered-blocks list captures tool_use
# input from the RAW API response (normalize_response), which is NOT
# credential-redacted; tool_calls[].function.arguments IS redacted at
# storage time (build_assistant_message, #19798). Replaying the raw
# block input would resurrect a secret the model inlined into a tool
# call (e.g. terminal(command="curl -H 'Authorization: Bearer sk-...'")
# onto the wire, even though the same value is redacted everywhere else
# in history. Keying by sanitized tool id preserves interleave order
# (the reason this channel exists) while swapping in the redacted
# input. Adapted from #36071 (replay-time tool-input re-sourcing).
redacted_input_by_id: Dict[str, Any] = {}
for tc in m.get("tool_calls", []) or []:
if not isinstance(tc, dict):
continue
fn = tc.get("function", {}) or {}
raw_args = fn.get("arguments", "{}")
try:
parsed_args = json.loads(raw_args) if isinstance(raw_args, str) else raw_args
except (json.JSONDecodeError, ValueError):
parsed_args = {}
redacted_input_by_id[_sanitize_tool_id(tc.get("id", ""))] = parsed_args
replayed: List[Dict[str, Any]] = []
for b in ordered_blocks:
clean = _sanitize_replay_block(b)
if clean is None:
continue
if clean.get("type") == "tool_use":
# Override raw (un-redacted) input with the redacted copy when
# we have one for this id; fall back to the sanitized block
# input only if the tool_call is missing (shape mismatch).
redacted = redacted_input_by_id.get(clean.get("id", ""))
if redacted is not None:
clean["input"] = redacted
replayed.append(clean)
if replayed:
return {"role": "assistant", "content": replayed}
blocks = _extract_preserved_thinking_blocks(m)
if content:
if isinstance(content, list):

View file

@ -952,6 +952,18 @@ def build_assistant_message(agent, assistant_message, finish_reason: str) -> dic
if preserved:
msg["reasoning_details"] = preserved
# Anthropic interleaved-thinking replay: when a turn interleaves signed
# thinking blocks with tool_use, the parallel reasoning_details +
# tool_calls fields lose the cross-type ordering, and reconstruction
# front-loads thinking — reordering signed blocks and triggering HTTP 400
# ("thinking ... blocks in the latest assistant message cannot be
# modified"). Carry the verbatim ordered block list so the adapter can
# replay the latest assistant message unchanged. See
# agent/transports/anthropic.py and agent/anthropic_adapter.py.
ordered_blocks = getattr(assistant_message, "anthropic_content_blocks", None)
if ordered_blocks:
msg["anthropic_content_blocks"] = ordered_blocks
# Codex Responses API: preserve encrypted reasoning items for
# multi-turn continuity. These get replayed as input on the next turn.
codex_items = getattr(assistant_message, "codex_reasoning_items", None)

700
agent/coding_context.py Normal file
View file

@ -0,0 +1,700 @@
"""Coding-context awareness — base Hermes, every interactive surface.
When the user runs Hermes inside a code workspace (CLI, TUI, desktop app, or an
editor over ACP), Hermes shifts into a **coding posture**. This module is the
single place that decides whether we're in that posture and what it implies,
so the rest of the codebase never re-derives "are we coding?" on its own.
Architecture one seam, many consumers
----------------------------------------
The posture is modelled as a frozen :class:`RuntimeMode` selected from a small
:class:`ContextProfile` registry (today: ``coding`` and ``general``). A profile
is *data* it declares the toolset to collapse to, the operating brief to
inject, and hints for other domains (model routing, memory, subagents). Every
domain reads the same resolved object instead of probing git/config itself:
* **System prompt** ``RuntimeMode.system_blocks()`` the operating brief +
a live git/workspace snapshot (``agent/system_prompt.py``).
* **Toolset** ``RuntimeMode.toolset_selection()`` the ``coding`` toolset
plus the user's enabled MCP servers (``cli.py`` / ``tui_gateway``). Only
under the opt-in ``focus`` mode: the default posture is prompt-only and
never touches the user's configured toolsets (toolsets like messaging /
smart-home / music are off-by-default anyway, and someone who explicitly
enabled image-gen or Spotify shouldn't lose it for being in a git repo).
* **Delegation** subagents inherit the parent's toolset and run through the
same prompt builder, so the coding posture propagates to children for free.
* **Model / memory / compression** declared on the profile
(``model_hint``, ``memory_policy``) as the extension seam; consumers read
``mode.profile`` rather than re-deciding.
Cache safety
------------
The mode is resolved **once** and is immutable. The workspace snapshot is built
once at prompt-build time and baked into the *stable* system-prompt tier never
re-probed per turn (that would shatter the prompt cache). Branch and dirty state
drift mid-session, so the brief tells the model to re-check with ``git`` before
acting on the snapshot. A ``/coding`` flip therefore only takes effect next
session (deferred), the same contract as ``/skills install`` vs ``--now``.
Activation (config ``agent.coding_context``):
* ``auto`` (default) posture (brief + snapshot) on an interactive coding
surface sitting in a code workspace (git repo or recognised project root).
Prompt-only; toolsets untouched.
* ``focus`` like ``auto``, but additionally collapses the toolset to the
``coding`` set + enabled MCP servers. Explicit opt-in for a lean schema.
* ``on`` force the posture anywhere (incl. non-workspaces). Prompt-only.
* ``off`` disable entirely.
"""
from __future__ import annotations
import json
import logging
import os
import re
import subprocess
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Optional
logger = logging.getLogger("hermes.coding_context")
CODING_TOOLSET = "coding"
# Surfaces where a coding posture makes sense under ``auto``. Messaging
# platforms (telegram, discord, slack, …) are intentionally absent — a chat bot
# in a group is not pair-programming.
INTERACTIVE_CODING_PLATFORMS = {"cli", "tui", "acp", "desktop", ""}
# Project-root signals that mark a directory as a code workspace even when it
# isn't (yet) a git repo. Cheap filename checks — no parsing.
_PROJECT_MARKERS = (
"pyproject.toml", "setup.py", "setup.cfg", "requirements.txt",
"package.json", "tsconfig.json", "deno.json",
"Cargo.toml", "go.mod", "pom.xml", "build.gradle", "build.gradle.kts",
"Gemfile", "composer.json", "mix.exs", "pubspec.yaml",
"CMakeLists.txt", "Makefile", "Dockerfile",
"AGENTS.md", "CLAUDE.md", ".cursorrules",
)
# Agent-instruction files surfaced separately from manifests in the snapshot.
_CONTEXT_FILES = ("AGENTS.md", "CLAUDE.md", ".cursorrules")
# Lockfile → package manager, checked in priority order.
_PY_LOCKFILES = (("uv.lock", "uv"), ("poetry.lock", "poetry"), ("Pipfile.lock", "pipenv"))
_JS_LOCKFILES = (
("pnpm-lock.yaml", "pnpm"), ("bun.lockb", "bun"), ("bun.lock", "bun"),
("yarn.lock", "yarn"), ("package-lock.json", "npm"),
)
# package.json scripts / Makefile targets worth surfacing as verify commands.
_VERIFY_TARGETS = ("test", "tests", "lint", "typecheck", "check", "build", "fmt", "format")
_MAX_VERIFY_COMMANDS = 8
_MAX_FACT_FILE_BYTES = 256 * 1024
_GIT_TIMEOUT = 2.5
# Per-model edit-format steering. Matching the edit tool format to how a model
# was trained reduces mistakes and wasted reasoning (OpenAI/Codex handle
# patch-style diffs best; Anthropic models — and most open-weight coding
# models, whose RL scaffolds use str_replace-style editors — do best with
# string-replacement). Our `patch` tool exposes both: mode="patch" (V4A
# multi-file) and mode="replace" (find-and-swap). We nudge each family toward
# its native format. Unknown families get nothing (the brief's neutral wording
# stands). Substrings match the model id; aligned with TOOL_USE_ENFORCEMENT_MODELS.
_EDIT_FORMAT_GUIDANCE: dict[str, tuple[tuple[str, ...], str]] = {
"patch": (
("gpt", "codex"),
"- Edit format: author new files with `write_file`; for edits to "
"existing code prefer `patch` with `mode='patch'` (V4A multi-file diff) "
"for structured or multi-file changes — it's the diff format you handle "
"most reliably. Use `mode='replace'` for a single small swap.",
),
"replace": (
("claude", "sonnet", "opus", "haiku",
"gemini", "gemma", "deepseek", "qwen", "kimi", "glm", "grok",
"hermes", "llama", "mistral", "devstral", "minimax"),
"- Edit format: author new files with `write_file`; for edits to "
"existing code prefer `patch` in `mode='replace'` — match a unique "
"snippet and swap it. Reach for `mode='patch'` (V4A) only when an edit "
"genuinely spans several files at once.",
),
}
def _model_family(model: Optional[str]) -> Optional[str]:
"""Classify a model id into an edit-format family key, or ``None``.
Used to steer the coding posture toward the edit tool format a model was
trained on. Family-agnostic by design: an unrecognised model gets ``None``
and the operating brief's neutral edit wording applies.
"""
if not model:
return None
lowered = model.lower()
for family, (needles, _line) in _EDIT_FORMAT_GUIDANCE.items():
if any(n in lowered for n in needles):
return family
return None
def _edit_format_line(model: Optional[str]) -> str:
"""The edit-format guidance line for this model's family (``""`` if none)."""
family = _model_family(model)
if family is None:
return ""
return _EDIT_FORMAT_GUIDANCE[family][1]
# Operating brief for the coding posture. Tool names referenced here (read_file,
# search_files, patch, write_file, terminal, todo) are in the coding toolset and
# in _HERMES_CORE_TOOLS, so they're present on every surface this fires on.
CODING_AGENT_GUIDANCE = (
"You are a coding agent pairing with the user inside their codebase. "
"Operate like a careful senior engineer.\n"
"\n"
"Gather context first:\n"
"- Read the relevant files with `read_file` and locate code with "
"`search_files` before changing anything. Trace a symbol to its definition "
"and usages rather than guessing its shape.\n"
"- Batch independent lookups: when several reads/searches don't depend on "
"each other, issue them together in one turn instead of one at a time.\n"
"- Never invent files, symbols, APIs, or imports. If you haven't seen it in "
"the repo, go look. Don't assume a library is available — check the project "
"manifest (pyproject.toml / package.json / Cargo.toml / go.mod) and how "
"neighbouring files import it.\n"
"\n"
"Make changes through the tools, not the chat:\n"
"- Edit with `patch`/`write_file`. Do NOT print code blocks to the user as "
"a substitute for editing — apply the change, then summarise it. Only show "
"code when the user explicitly asks to see it.\n"
"- Match the project's existing style and conventions; AGENTS.md / "
"CLAUDE.md / .cursorrules already in context win over your defaults. Touch "
"only what the task needs — no drive-by refactors, renames, or reformatting "
"— and add any imports/dependencies your code requires.\n"
"- If an edit fails to apply, re-read the file to get the current exact "
"contents before retrying — don't repeat a stale patch. If the same region "
"fails twice, rewrite the enclosing function or file with `write_file` "
"instead of attempting a third patch.\n"
"\n"
"Verify, and know when to stop:\n"
"- Use `terminal` for git, builds, tests, and inspection. Run the relevant "
"tests/linter/build and confirm they pass before claiming the work is done.\n"
"- Fix root causes, not symptoms: when you find a bug, check sibling call "
"paths for the same flaw and fix the class, not just the reported site.\n"
"- When fixing linter/type errors on a file, stop after about three "
"attempts on the same file and ask the user rather than looping.\n"
"- Track multi-step work with `todo`. Reference code as `path:line` instead "
"of pasting whole files.\n"
"\n"
"Respect the user's repo: don't commit, push, or rewrite history unless "
"asked, and never read, print, or commit secrets — leave `.env` and "
"credential files alone unless the user explicitly asks. The Workspace "
"block below is a snapshot from session start — re-run `git status`/"
"`git branch` before relying on it. Be concise: lead with the change or "
"answer, not a preamble."
)
# ── Context profiles (declarative posture definitions) ──────────────────────
@dataclass(frozen=True)
class ContextProfile:
"""A named operating posture. Pure data — consumers read these fields.
``toolset`` collapse to this toolset (+ enabled MCP) when no explicit
selection is pinned; ``None`` keeps the platform default.
``guidance`` operating brief injected into the stable system prompt;
``""`` injects nothing.
``model_hint`` routing preference key for smart model routing
(extension seam; not yet consumed by the router).
``memory_policy`` memory namespace/weighting hint (extension seam).
``hidden_skill_categories`` skill categories pruned from the system-prompt
skill index while this posture is active. Discovery-only:
nothing is disabled ``skills_list`` still returns the
full catalog and ``skill_view`` loads anything. Deny-list
semantics so unknown/custom categories stay visible.
"""
name: str
toolset: Optional[str] = None
guidance: str = ""
model_hint: Optional[str] = None
memory_policy: str = "default"
hidden_skill_categories: tuple[str, ...] = ()
# Skill categories that are clearly not part of a coding workflow. Hidden from
# the prompt's skill index in the coding posture (deny-list — anything not
# listed here, incl. custom user categories, stays visible). Coding-adjacent
# categories (devops, github, mcp, data-science, diagramming, research,
# security, …) are intentionally absent.
_NON_CODING_SKILL_CATEGORIES = (
"apple", "communication", "cooking", "creative", "email", "finance",
"gaming", "gifs", "health", "media", "music", "note-taking",
"productivity", "shopping", "smart-home", "social-media", "travel",
"yuanbao",
)
GENERAL_PROFILE = ContextProfile(name="general")
CODING_PROFILE = ContextProfile(
name="coding",
toolset=CODING_TOOLSET,
guidance=CODING_AGENT_GUIDANCE,
model_hint="coding",
memory_policy="project",
hidden_skill_categories=_NON_CODING_SKILL_CATEGORIES,
)
_PROFILES: dict[str, ContextProfile] = {
GENERAL_PROFILE.name: GENERAL_PROFILE,
CODING_PROFILE.name: CODING_PROFILE,
}
def get_profile(name: str) -> ContextProfile:
"""Return a registered profile, falling back to ``general``."""
return _PROFILES.get(name, GENERAL_PROFILE)
# ── Helpers ─────────────────────────────────────────────────────────────────
def _coding_mode(config: Optional[dict[str, Any]]) -> str:
"""Return the normalized ``agent.coding_context`` mode (auto/focus/on/off)."""
if config is None:
try:
from hermes_cli.config import load_config
config = load_config()
except Exception:
config = {}
raw = ((config or {}).get("agent", {}) or {}).get("coding_context", "auto")
mode = str(raw).strip().lower()
if mode in {"focus", "strict", "lean"}:
return "focus"
if mode in {"on", "true", "yes", "1", "always"}:
return "on"
if mode in {"off", "false", "no", "0", "never"}:
return "off"
return "auto"
def _resolve_cwd(cwd: Optional[str | Path]) -> Path:
if cwd:
return Path(cwd).expanduser()
try:
from agent.runtime_cwd import resolve_agent_cwd
return resolve_agent_cwd()
except Exception:
return Path(os.getcwd())
def _git_root(cwd: Path) -> Optional[Path]:
current = cwd.resolve()
for parent in [current, *current.parents]:
if (parent / ".git").exists():
return parent
return None
def _home() -> Optional[Path]:
try:
return Path.home().resolve()
except (OSError, RuntimeError):
return None
def _marker_root(cwd: Path) -> Optional[Path]:
"""Nearest ancestor that looks like a project root, or ``None``.
Walks up at most a few levels so a manifest in the workspace root counts
even when the user is in a subdirectory. ``$HOME`` itself is skipped a
Makefile or AGENTS.md sitting in the home directory is global user config,
not a project-root signal.
"""
current = cwd.resolve()
home = _home()
for depth, parent in enumerate([current, *current.parents]):
if depth > 6:
break
if parent == home:
continue
for marker in _PROJECT_MARKERS:
if (parent / marker).exists():
return parent
return None
def _detect_profile_name(mode: str, platform: str, cwd_str: str) -> str:
"""Resolve which profile applies.
``auto``/``focus``: coding when the surface is interactive AND the cwd is a
code workspace (a git repo or a recognised project root). ``on``: always
coding. ``off``: always general.
A git repo rooted at ``$HOME`` (the dotfiles pattern) is NOT a workspace
signal without the guard, every session anywhere under a dotfiles-managed
home directory would silently flip to the coding posture.
Detection is intentionally not memoized: it's a handful of ``stat`` calls,
and callers resolve the mode once per session anyway. Caching here would
risk a stale posture if a long-lived process (gateway/TUI) serves sessions
from different working directories.
"""
if mode == "off":
return GENERAL_PROFILE.name
if mode == "on":
return CODING_PROFILE.name
if platform and platform.strip().lower() not in INTERACTIVE_CODING_PLATFORMS:
return GENERAL_PROFILE.name
cwd = Path(cwd_str)
git_root = _git_root(cwd)
if git_root is not None and git_root == _home():
git_root = None # dotfiles repo at $HOME — not a code workspace
if git_root is not None or _marker_root(cwd) is not None:
return CODING_PROFILE.name
return GENERAL_PROFILE.name
# ── RuntimeMode (the seam) ──────────────────────────────────────────────────
@dataclass(frozen=True)
class RuntimeMode:
"""The resolved operating posture for a session. Immutable by construction.
Built once via :func:`resolve_runtime_mode` and consumed by every domain
that cares about the coding/general distinction. Never mutate or re-resolve
mid-session that would break the prompt cache.
"""
profile: ContextProfile
surface: str
cwd: Path
# The normalized ``agent.coding_context`` mode this posture was resolved
# under (auto/focus/on/off). Toolset collapse is gated on ``focus``.
config_mode: str = "auto"
# The model id this session runs (e.g. "anthropic/claude-opus-4.8"). Used
# only to steer edit-format guidance toward the model's family — see
# ``_edit_format_line``. Fixed for the session, so cache-safe.
model: Optional[str] = None
@property
def kind(self) -> str:
return self.profile.name
@property
def is_coding(self) -> bool:
return self.profile.name == CODING_PROFILE.name
def toolset_selection(self, config: Optional[dict[str, Any]] = None) -> Optional[list[str]]:
"""Toolset list for this posture, or ``None`` to keep the platform default.
Non-``None`` only under the opt-in ``focus`` mode. The default posture
is prompt-only: most strippable toolsets are off-by-default anyway, and
a user who explicitly enabled one (image-gen for frontend/game assets,
messaging for build notifications, ) keeps it while coding.
Callers apply this only when the user hasn't pinned an explicit
selection (``--toolsets``, ``HERMES_TUI_TOOLSETS``, ); they never
override a pin. Returns the profile's toolset plus enabled MCP servers.
"""
if self.config_mode != "focus":
return None
if self.profile.toolset is None:
return None
return [self.profile.toolset, *_enabled_mcp_servers(config)]
def system_blocks(self) -> list[str]:
"""Stable system-prompt blocks for this posture (brief + workspace).
The operating brief carries a model-family edit-format nudge appended
to it (one cached string, not a separate block) so the model is steered
toward the `patch` mode it handles best see ``_edit_format_line``.
"""
if not self.is_coding:
return []
blocks: list[str] = []
if self.profile.guidance:
brief = self.profile.guidance
edit_line = _edit_format_line(self.model)
if edit_line:
brief = f"{brief}\n{edit_line}"
blocks.append(brief)
workspace = build_coding_workspace_block(self.cwd)
if workspace:
blocks.append(workspace)
return blocks
def hidden_skill_categories(self) -> frozenset[str]:
"""Skill categories to prune from the prompt's skill index (may be empty)."""
return frozenset(self.profile.hidden_skill_categories)
def resolve_runtime_mode(
*,
platform: Optional[str] = None,
cwd: Optional[str | Path] = None,
config: Optional[dict[str, Any]] = None,
model: Optional[str] = None,
) -> RuntimeMode:
"""Resolve the operating posture once. Cheap — a handful of ``stat`` calls.
This is the single entry point every domain should call. The returned
object is immutable and safe to cache for the session. Detection itself is
intentionally *not* memoized (see ``_detect_profile_name``) so a long-lived
process can't pin a stale posture; callers resolve once per session and
hold the result. ``model`` is recorded only to steer edit-format guidance;
it never affects detection.
"""
resolved_cwd = _resolve_cwd(cwd)
mode = _coding_mode(config)
name = _detect_profile_name(
mode, (platform or "").strip().lower(), str(resolved_cwd)
)
return RuntimeMode(
profile=get_profile(name),
surface=platform or "",
cwd=resolved_cwd,
config_mode=mode,
model=model,
)
# ── Back-compat surface (thin wrappers over RuntimeMode) ────────────────────
def is_coding_context(
*,
platform: Optional[str] = None,
cwd: Optional[str | Path] = None,
config: Optional[dict[str, Any]] = None,
) -> bool:
"""Whether Hermes should operate in its coding posture right now."""
return resolve_runtime_mode(platform=platform, cwd=cwd, config=config).is_coding
def coding_selection(
*,
platform: Optional[str] = None,
cwd: Optional[str | Path] = None,
config: Optional[dict[str, Any]] = None,
) -> Optional[list[str]]:
"""Toolset selection for the coding posture.
``None`` unless the user opted into ``focus`` mode AND the posture is
active the default coding posture never overrides configured toolsets.
"""
return resolve_runtime_mode(
platform=platform, cwd=cwd, config=config
).toolset_selection(config)
def coding_system_blocks(
*,
platform: Optional[str] = None,
cwd: Optional[str | Path] = None,
config: Optional[dict[str, Any]] = None,
model: Optional[str] = None,
) -> list[str]:
"""Stable system-prompt blocks for the current posture (empty when general).
``model`` steers the brief's edit-format nudge toward the model's family.
"""
return resolve_runtime_mode(
platform=platform, cwd=cwd, config=config, model=model
).system_blocks()
def coding_hidden_skill_categories(
*,
platform: Optional[str] = None,
cwd: Optional[str | Path] = None,
config: Optional[dict[str, Any]] = None,
) -> frozenset[str]:
"""Skill categories the active posture prunes from the prompt's skill index.
Empty outside the coding posture. Discovery-only: hidden skills remain
loadable via ``skills_list`` / ``skill_view``.
"""
return resolve_runtime_mode(
platform=platform, cwd=cwd, config=config
).hidden_skill_categories()
def _enabled_mcp_servers(config: Optional[dict[str, Any]]) -> list[str]:
"""Names of MCP servers the user has enabled — kept in the coding posture.
MCP servers (figma, browser, tophat, ) are explicitly configured and part
of the coding workflow, not noise to strip.
"""
try:
from hermes_cli.config import read_raw_config
from hermes_cli.tools_config import _parse_enabled_flag
servers = read_raw_config().get("mcp_servers") or {}
return [
str(name)
for name, cfg in servers.items()
if isinstance(cfg, dict)
and _parse_enabled_flag(cfg.get("enabled", True), default=True)
]
except Exception:
return []
# ── git/workspace probe ─────────────────────────────────────────────────────
def _git(cwd: Path, *args: str) -> str:
try:
out = subprocess.run(
["git", "-C", str(cwd), *args],
capture_output=True,
text=True,
timeout=_GIT_TIMEOUT,
)
except (OSError, subprocess.SubprocessError):
return ""
return out.stdout.strip() if out.returncode == 0 else ""
def _parse_status(porcelain: str) -> tuple[dict[str, str], dict[str, int]]:
"""Parse ``git status --porcelain=2 --branch`` into branch + counts."""
branch: dict[str, str] = {}
counts = {"staged": 0, "modified": 0, "untracked": 0, "conflicts": 0}
for line in porcelain.splitlines():
if line.startswith("# branch.head"):
branch["head"] = line.split(maxsplit=2)[-1]
elif line.startswith("# branch.upstream"):
branch["upstream"] = line.split(maxsplit=2)[-1]
elif line.startswith("# branch.ab"):
parts = line.split()
branch["ahead"], branch["behind"] = parts[2].lstrip("+"), parts[3].lstrip("-")
elif line.startswith(("1 ", "2 ")):
xy = line.split(maxsplit=2)[1]
if xy[0] != ".":
counts["staged"] += 1
if xy[1] != ".":
counts["modified"] += 1
elif line.startswith("u "):
counts["conflicts"] += 1
elif line.startswith("? "):
counts["untracked"] += 1
return branch, counts
def _read_small(path: Path) -> str:
"""Read a small text file, or ``""`` — never raises, never reads huge files."""
try:
if not path.is_file() or path.stat().st_size > _MAX_FACT_FILE_BYTES:
return ""
return path.read_text(encoding="utf-8", errors="replace")
except OSError:
return ""
def _project_facts(root: Path) -> list[str]:
"""Detected project facts for the workspace snapshot.
The point is to hand the model its *verify loop* up front which manifest,
which package manager, and the exact test/lint/build commands instead of
making it rediscover them every session. Cheap: stat calls plus reads of a
couple of small files; built once at prompt-build time (cache-safe).
"""
facts: list[str] = []
manifests = [m for m in _PROJECT_MARKERS if m not in _CONTEXT_FILES and (root / m).is_file()]
package_managers = [
pm for lock, pm in (*_PY_LOCKFILES, *_JS_LOCKFILES) if (root / lock).is_file()
]
if manifests:
line = f"- Project: {', '.join(manifests[:6])}"
if package_managers:
line += f" ({'/'.join(dict.fromkeys(package_managers))})"
facts.append(line)
verify: list[str] = []
if (root / "scripts" / "run_tests.sh").is_file():
verify.append("scripts/run_tests.sh")
if (root / "package.json").is_file():
try:
scripts = json.loads(_read_small(root / "package.json") or "{}").get("scripts") or {}
except (json.JSONDecodeError, AttributeError):
scripts = {}
js_pm = next((pm for lock, pm in _JS_LOCKFILES if (root / lock).is_file()), "npm")
verify.extend(f"{js_pm} run {name}" for name in _VERIFY_TARGETS if name in scripts)
if (root / "pytest.ini").is_file() or "[tool.pytest" in _read_small(root / "pyproject.toml"):
verify.append("pytest")
makefile = _read_small(root / "Makefile")
if makefile:
verify.extend(
f"make {name}" for name in _VERIFY_TARGETS
if re.search(rf"^{re.escape(name)}\s*:", makefile, re.MULTILINE)
)
if verify:
deduped = list(dict.fromkeys(verify))[:_MAX_VERIFY_COMMANDS]
facts.append(f"- Verify: {'; '.join(deduped)}")
context_files = [c for c in _CONTEXT_FILES if (root / c).is_file()]
if context_files:
facts.append(f"- Context files: {', '.join(context_files)}")
return facts
def build_coding_workspace_block(cwd: Optional[str | Path] = None) -> str:
"""Workspace snapshot for the system prompt (empty outside a workspace).
Git state (branch/status/commits) when the cwd is in a repo, plus detected
project facts (manifest, package manager, verify commands, context files)
so marker-only (non-git) projects still get a snapshot.
"""
resolved = _resolve_cwd(cwd)
git_root = _git_root(resolved)
root = git_root or _marker_root(resolved)
if root is None:
return ""
lines = ["Workspace (snapshot at session start — re-check with `git` before acting on it):"]
lines.append(f"- Root: {root}")
if git_root is not None:
branch, counts = _parse_status(_git(root, "status", "--porcelain=2", "--branch"))
head = branch.get("head", "")
if head and head != "(detached)":
line = f"- Branch: {head}"
if branch.get("upstream"):
line += f" \u2192 {branch['upstream']}"
ahead, behind = branch.get("ahead", "0"), branch.get("behind", "0")
if ahead != "0" or behind != "0":
line += f" (ahead {ahead}, behind {behind})"
lines.append(line)
elif head == "(detached)":
lines.append("- Branch: (detached HEAD)")
# Linked worktree: the per-worktree git dir differs from the shared common dir.
git_dir, common_dir = _git(root, "rev-parse", "--git-dir"), _git(root, "rev-parse", "--git-common-dir")
if git_dir and common_dir and Path(git_dir).resolve() != Path(common_dir).resolve():
main_tree = Path(common_dir).resolve().parent
lines.append(f"- Worktree: linked (primary tree at {main_tree})")
dirty = [f"{n} {label}" for label, n in (
("staged", counts["staged"]), ("modified", counts["modified"]),
("untracked", counts["untracked"]), ("conflicts", counts["conflicts"]),
) if n]
lines.append(f"- Status: {', '.join(dirty) if dirty else 'clean'}")
recent = _git(root, "log", "-3", "--pretty=%h %s")
if recent:
lines.append("- Recent commits:")
lines.extend(f" {c}" for c in recent.splitlines())
lines.extend(_project_facts(root))
return "\n".join(lines)

View file

@ -1101,11 +1101,12 @@ def _skill_should_show(
def build_skills_system_prompt(
available_tools: "set[str] | None" = None,
available_toolsets: "set[str] | None" = None,
hidden_categories: "frozenset[str] | None" = None,
) -> str:
"""Build a compact skill index for the system prompt.
Two-layer cache:
1. In-process LRU dict keyed by (skills_dir, tools, toolsets)
1. In-process LRU dict keyed by (skills_dir, tools, toolsets, hidden)
2. Disk snapshot (``.skills_prompt_snapshot.json``) validated by
mtime/size manifest survives process restarts
@ -1115,6 +1116,12 @@ def build_skills_system_prompt(
scanned alongside the local ``~/.hermes/skills/`` directory. External dirs
are read-only they appear in the index but new skills are always created
in the local dir. Local skills take precedence when names collide.
``hidden_categories`` (e.g. from the coding posture see
agent/coding_context.py) prunes whole categories from the rendered index.
Discovery-only: the snapshot stores everything, ``skills_list`` /
``skill_view`` still reach every skill, and a footer note tells the model
the full catalog exists.
"""
skills_dir = get_skills_dir()
external_dirs = get_all_skills_dirs()[1:] # skip local (index 0)
@ -1139,6 +1146,7 @@ def build_skills_system_prompt(
tuple(sorted(str(ts) for ts in (available_toolsets or set()))),
_platform_hint,
tuple(sorted(disabled)),
tuple(sorted(hidden_categories or ())),
)
with _SKILLS_PROMPT_CACHE_LOCK:
cached = _SKILLS_PROMPT_CACHE.get(cache_key)
@ -1272,6 +1280,26 @@ def build_skills_system_prompt(
except Exception as e:
logger.debug("Could not read external skill description %s: %s", desc_file, e)
# Posture-driven category pruning (e.g. non-coding skills while pairing on
# code). Match on the top-level category segment so nested categories
# ("social-media/twitter") are pruned with their parent.
hidden_note = ""
if hidden_categories:
before = sum(len(v) for v in skills_by_category.values())
skills_by_category = {
cat: entries
for cat, entries in skills_by_category.items()
if cat.split("/", 1)[0] not in hidden_categories
}
pruned = before - sum(len(v) for v in skills_by_category.values())
if pruned:
hidden_note = (
f"\n(Note: {pruned} skill(s) in categories unrelated to the "
"current coding context are not listed here. The full catalog "
"is available via skills_list if the user asks for something "
"outside this list.)"
)
if not skills_by_category:
result = ""
else:
@ -1320,6 +1348,7 @@ def build_skills_system_prompt(
"</available_skills>\n"
"\n"
"Only proceed without loading a skill if genuinely none are relevant to the task."
+ hidden_note
)
# ── Store in LRU cache ────────────────────────────────────────────

View file

@ -191,9 +191,21 @@ def build_system_prompt_parts(agent: Any, system_message: Optional[str] = None)
)
if toolset
}
# Coding posture prunes non-coding skill categories from the index
# (discovery-only — skills_list/skill_view still reach everything).
_hidden_cats = frozenset()
try:
from agent.coding_context import coding_hidden_skill_categories
_hidden_cats = coding_hidden_skill_categories(
platform=agent.platform, cwd=resolve_context_cwd()
)
except Exception:
_hidden_cats = frozenset()
skills_prompt = _r.build_skills_system_prompt(
available_tools=agent.valid_tool_names,
available_toolsets=avail_toolsets,
hidden_categories=_hidden_cats or None,
)
else:
skills_prompt = ""
@ -221,6 +233,26 @@ def build_system_prompt_parts(agent: Any, system_message: Optional[str] = None)
if _env_hints:
stable_parts.append(_env_hints)
# Coding posture (base Hermes, any interactive coding surface in a code
# workspace — see agent/coding_context.py). The operating brief + the live
# git/workspace snapshot are built once here and cached for the session;
# the snapshot is never re-probed per turn (that would break the prompt
# cache), so the brief tells the model to re-check git before relying on it.
if agent.valid_tool_names:
try:
from agent.coding_context import coding_system_blocks
stable_parts.extend(
coding_system_blocks(
platform=agent.platform,
cwd=resolve_context_cwd(),
model=agent.model,
)
)
except Exception:
# Coding-context probing must never block prompt build.
pass
# Local Python toolchain probe — names python/pip/uv/PEP-668 state when
# something is non-default so the model can pick the right install
# strategy without discovering by failure. Emits a single line; emits

View file

@ -84,7 +84,7 @@ class AnthropicTransport(ProviderTransport):
to OpenAI finish_reason, and collects reasoning_details in provider_data.
"""
import json
from agent.anthropic_adapter import _to_plain_data
from agent.anthropic_adapter import _to_plain_data, _sanitize_replay_block
from agent.transports.types import ToolCall
strip_tool_prefix = kwargs.get("strip_tool_prefix", False)
@ -94,14 +94,40 @@ class AnthropicTransport(ProviderTransport):
reasoning_parts = []
reasoning_details = []
tool_calls = []
# Verbatim, order-preserving copy of every content block in the turn.
# Anthropic signs each thinking block against the turn content that
# PRECEDES it at its position; when a turn interleaves thinking and
# tool_use (adaptive/interleaved thinking, Claude 4.6+), the parallel
# reasoning_details + tool_calls lists below lose that cross-type
# ordering. Replaying the latest assistant message in the wrong order
# invalidates the signatures -> HTTP 400 "thinking ... blocks in the
# latest assistant message cannot be modified". Preserve the exact
# block sequence here so the adapter can replay it unchanged. See
# tests/agent/test_anthropic_thinking_block_order.py.
ordered_blocks = []
for block in response.content:
block_dict = _to_plain_data(block)
clean_block = None
if isinstance(block_dict, dict):
# Sanitize at capture so output-only SDK fields (parsed_output,
# caller, citations=None, …) never persist to state.db and leak
# back as request input on replay → HTTP 400 "Extra inputs are
# not permitted". Defence-in-depth with the replay-side sanitize.
clean_block = _sanitize_replay_block(block_dict)
if clean_block is not None:
ordered_blocks.append(clean_block)
if block.type == "text":
text_parts.append(block.text)
elif block.type == "thinking":
reasoning_parts.append(block.thinking)
block_dict = _to_plain_data(block)
if isinstance(block_dict, dict):
elif block.type in ("thinking", "redacted_thinking"):
if block.type == "thinking":
reasoning_parts.append(block.thinking)
# Use the sanitized block (clean_block) for reasoning_details too,
# since _extract_preserved_thinking_blocks replays these on the
# non-ordered path. Falls back to raw only if sanitize dropped it.
if isinstance(clean_block, dict):
reasoning_details.append(clean_block)
elif isinstance(block_dict, dict):
reasoning_details.append(block_dict)
elif block.type == "tool_use":
name = block.name
@ -130,6 +156,23 @@ class AnthropicTransport(ProviderTransport):
provider_data = {}
if reasoning_details:
provider_data["reasoning_details"] = reasoning_details
# Only worth carrying the ordered-blocks channel when the turn
# actually interleaves signed thinking with tool_use — that's the
# only shape the parallel lists reconstruct incorrectly. A turn that
# is purely text, or thinking-then-tools with a single leading
# thinking block, replays correctly without it.
_has_signed_thinking = any(
isinstance(b, dict)
and b.get("type") in ("thinking", "redacted_thinking")
and (b.get("signature") or b.get("data"))
for b in ordered_blocks
)
_has_tool_use = any(
isinstance(b, dict) and b.get("type") == "tool_use"
for b in ordered_blocks
)
if _has_signed_thinking and _has_tool_use:
provider_data["anthropic_content_blocks"] = ordered_blocks
return NormalizedResponse(
content="\n".join(text_parts) if text_parts else None,

View file

@ -121,6 +121,18 @@ class NormalizedResponse:
pd = self.provider_data or {}
return pd.get("reasoning_details")
@property
def anthropic_content_blocks(self):
"""Verbatim, order-preserving Anthropic content blocks for a turn.
Present only when an Anthropic turn interleaves signed thinking with
tool_use the one shape the parallel reasoning_details + tool_calls
lists reconstruct in the wrong order, invalidating thinking-block
signatures on replay. See agent/transports/anthropic.py.
"""
pd = self.provider_data or {}
return pd.get("anthropic_content_blocks")
@property
def codex_reasoning_items(self):
pd = self.provider_data or {}

18
cli.py
View file

@ -13336,9 +13336,21 @@ def main(
else:
toolsets_list.append(str(t))
else:
# Use the shared resolver so MCP servers are included at runtime
from hermes_cli.tools_config import _get_platform_tools
toolsets_list = sorted(_get_platform_tools(CLI_CONFIG, "cli"))
# Coding posture (base Hermes): with no explicit --toolsets, collapse
# to the coding toolset (+ enabled MCP servers) when sitting in a code
# workspace. See agent/coding_context.py.
_coding = None
try:
from agent.coding_context import coding_selection
_coding = coding_selection(platform="cli", config=CLI_CONFIG)
except Exception:
_coding = None
if _coding is not None:
toolsets_list = _coding
else:
# Use the shared resolver so MCP servers are included at runtime
from hermes_cli.tools_config import _get_platform_tools
toolsets_list = sorted(_get_platform_tools(CLI_CONFIG, "cli"))
parsed_skills = _parse_skills_argument(skills)

View file

@ -150,9 +150,6 @@ def _normalize_job_record(job: Dict[str, Any]) -> Dict[str, Any]:
state = "scheduled" if normalized.get("enabled", True) else "paused"
normalized["state"] = state
profile = _coerce_job_text(normalized.get("profile")).strip()
normalized["profile"] = profile or None
return normalized
@ -523,30 +520,6 @@ def _normalize_workdir(workdir: Optional[str]) -> Optional[str]:
return str(resolved)
def _normalize_profile(profile: Optional[str]) -> Optional[str]:
"""Normalize and validate an optional cron job profile name.
Empty / None disables per-job profile selection. Otherwise the profile name
is canonicalized with the same rules as ``hermes -p`` and must refer to an
existing profile at create/update time. ``default`` is the built-in root
profile and is always valid.
"""
if profile is None:
return None
raw = str(profile).strip()
if not raw:
return None
from hermes_cli.profiles import normalize_profile_name, resolve_profile_env
normalized = normalize_profile_name(raw)
# resolve_profile_env validates the canonical name and checks that named
# profiles exist. Store only the stable profile id, not the filesystem path,
# so profile directories can move with the Hermes root.
resolve_profile_env(normalized)
return normalized
def create_job(
prompt: Optional[str],
schedule: str,
@ -563,7 +536,6 @@ def create_job(
context_from: Optional[Union[str, List[str]]] = None,
enabled_toolsets: Optional[List[str]] = None,
workdir: Optional[str] = None,
profile: Optional[str] = None,
no_agent: bool = False,
) -> Dict[str, Any]:
"""
@ -605,11 +577,6 @@ def create_job(
With ``no_agent=True``, ``workdir`` is still applied as the
script's cwd so relative paths inside the script behave
predictably.
profile: Optional Hermes profile name. When set, the job runs with
that profile's HERMES_HOME so profile-specific config,
credentials, scripts, skills, and memory paths resolve
consistently. ``default`` selects the root profile; empty /
None preserves the scheduler's existing behaviour.
no_agent: When True, skip the agent entirely run ``script`` on schedule
and deliver its stdout directly. Empty stdout = silent (no
delivery). Requires ``script`` to be set. Ideal for classic
@ -647,7 +614,6 @@ def create_job(
normalized_toolsets = [str(t).strip() for t in enabled_toolsets if str(t).strip()] if enabled_toolsets else None
normalized_toolsets = normalized_toolsets or None
normalized_workdir = _normalize_workdir(workdir)
normalized_profile = _normalize_profile(profile)
normalized_no_agent = bool(no_agent)
# no_agent jobs are meaningless without a script — the script IS the job.
@ -702,7 +668,6 @@ def create_job(
"origin": origin, # Tracks where job was created for "origin" delivery
"enabled_toolsets": normalized_toolsets,
"workdir": normalized_workdir,
"profile": normalized_profile,
}
jobs = load_jobs()
@ -792,15 +757,6 @@ def update_job(job_id: str, updates: Dict[str, Any]) -> Optional[Dict[str, Any]]
else:
updates["workdir"] = _normalize_workdir(_wd)
# Validate / normalize profile if present in updates. Empty string or
# None both mean "clear the field" (restore old behaviour).
if "profile" in updates:
_profile = updates["profile"]
if _profile is None or _profile == "" or _profile is False:
updates["profile"] = None
else:
updates["profile"] = _normalize_profile(_profile)
updated = _apply_skill_fields({**job, **updates})
schedule_changed = "schedule" in updates

View file

@ -19,7 +19,6 @@ import shutil
import subprocess
import sys
import threading
from contextlib import contextmanager
# fcntl is Unix-only; on Windows use msvcrt for file locking
try:
@ -166,7 +165,7 @@ _parallel_pool_max_workers: Optional[int] = None
_running_job_ids: set = set()
_running_lock = threading.Lock()
# Sequential (env/context-mutating) cron jobs — workdir/profile jobs that touch
# Sequential (env-mutating) cron jobs — workdir jobs that touch
# process-global runtime state — must run one at a time, but must NOT block the
# ticker thread. A persistent single-thread executor preserves ordering across
# ticks while keeping dispatch fire-and-forget, the same as the parallel pool.
@ -190,10 +189,10 @@ def _get_parallel_pool(max_workers: Optional[int]) -> concurrent.futures.ThreadP
def _get_sequential_pool() -> concurrent.futures.ThreadPoolExecutor:
"""Return (or create) the persistent single-thread sequential pool.
A single worker guarantees env/context-mutating jobs never overlap, even
A single worker guarantees env-mutating jobs never overlap, even
across ticks: a job queued by a newer tick waits for the previous tick's
sequential jobs to finish rather than corrupting their os.environ /
profile state.
sequential jobs to finish rather than corrupting their os.environ
state.
"""
global _sequential_pool
if _sequential_pool is None:
@ -235,71 +234,6 @@ def _get_lock_paths() -> tuple[Path, Path]:
return lock_dir, lock_dir / ".tick.lock"
@contextmanager
def _job_profile_context(job_id: str, profile: Optional[str]):
"""Temporarily run a job under a specific Hermes profile.
Cron jobs are stored and scheduled by the profile running the scheduler, but
an individual job can opt into a different runtime profile. While active,
the scheduler's test/override hook and a context-local Hermes home override
both point at the resolved profile directory so _get_hermes_home(),
.env/config loading, script resolution, AIAgent construction, and downstream
get_hermes_home() callers agree on the same home.
Some existing provider/config paths still load profile .env values through
os.environ, so profile jobs also snapshot and restore the process
environment on exit. tick() runs profile jobs sequentially to keep that
temporary mutation isolated from other scheduled jobs.
"""
raw_profile = str(profile or "").strip()
if not raw_profile:
yield None
return
global _hermes_home
prior_override = _hermes_home
env_snapshot = os.environ.copy()
from hermes_cli.profiles import normalize_profile_name, resolve_profile_env
from hermes_constants import reset_hermes_home_override, set_hermes_home_override
normalized_profile = normalize_profile_name(raw_profile)
try:
profile_home = Path(resolve_profile_env(normalized_profile)).resolve()
except (FileNotFoundError, ValueError) as exc:
logger.warning(
"Job '%s': configured profile %r no longer valid (%s) — "
"falling back to scheduler default",
job_id, raw_profile, exc,
)
yield None
return
override_token = None
try:
override_token = set_hermes_home_override(profile_home)
_hermes_home = profile_home
logger.info(
"Job '%s': using Hermes profile '%s' (%s)",
job_id,
normalized_profile,
profile_home,
)
yield normalized_profile
finally:
_hermes_home = prior_override
if override_token is not None:
reset_hermes_home_override(override_token)
# Delta-based restore: remove added keys, restore changed keys.
# Avoids a brief window where other threads see an empty env.
added = set(os.environ.keys()) - set(env_snapshot.keys())
for k in added:
os.environ.pop(k, None)
for k, v in env_snapshot.items():
if os.environ.get(k) != v:
os.environ[k] = v
def _resolve_origin(job: dict) -> Optional[dict]:
"""Extract origin info from a job, preserving any extra routing metadata.
@ -1032,17 +966,6 @@ def _run_job_script(script_path: str) -> tuple[bool, str]:
else:
argv = [sys.executable, str(path)]
run_env = os.environ.copy()
run_env["HERMES_HOME"] = str(_get_hermes_home())
try:
from hermes_constants import get_subprocess_home
profile_home = get_subprocess_home()
if profile_home:
run_env["HOME"] = profile_home
except Exception:
pass
try:
popen_kwargs = {"creationflags": windows_hide_flags()} if sys.platform == "win32" else {}
result = subprocess.run(
@ -1051,7 +974,6 @@ def _run_job_script(script_path: str) -> tuple[bool, str]:
text=True,
timeout=script_timeout,
cwd=str(path.parent),
env=run_env,
**popen_kwargs,
)
stdout = (result.stdout or "").strip()
@ -1381,13 +1303,6 @@ def _scan_assembled_cron_prompt(
def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
"""Execute a single cron job, applying any per-job profile override."""
job_id = job["id"]
with _job_profile_context(job_id, job.get("profile")):
return _run_job_impl(job)
def _run_job_impl(job: dict) -> tuple[bool, str, str, Optional[str]]:
"""
Execute a single cron job.
@ -1624,9 +1539,8 @@ def _run_job_impl(job: dict) -> tuple[bool, str, str, Optional[str]]:
# .cursorrules from the job's project dir, AND
# - the terminal, file, and code-exec tools run commands from there.
#
# tick() serializes jobs that mutate process-global runtime state (workdir
# and/or profile jobs) outside the parallel pool, so mutating
# os.environ["TERMINAL_CWD"] here is safe for those jobs. For workdir-less
# tick() serializes workdir-jobs outside the parallel pool, so mutating
# os.environ["TERMINAL_CWD"] here is safe for those jobs. For workdir-less
# jobs we leave TERMINAL_CWD untouched — preserves the original behaviour
# (skip_context_files=True, tools use whatever cwd the scheduler has).
_job_workdir = (job.get("workdir") or "").strip() or None
@ -2173,21 +2087,12 @@ def tick(verbose: bool = True, adapters=None, loop=None, sync: bool = True) -> i
mark_job_run(job["id"], False, str(e))
return False
# Partition due jobs: jobs with a per-job workdir and/or profile touch
# process-global runtime state inside run_job. Workdir jobs temporarily
# set os.environ["TERMINAL_CWD"]; profile jobs use a context-local
# Hermes home override, scheduler _hermes_home hook, and temporary
# profile .env load into os.environ with snapshot/restore. They MUST run
# sequentially to avoid corrupting each other. Jobs without either field
# stay parallel-safe.
sequential_jobs = [
j for j in due_jobs
if (j.get("workdir") or "").strip() or (j.get("profile") or "").strip()
]
parallel_jobs = [
j for j in due_jobs
if not ((j.get("workdir") or "").strip() or (j.get("profile") or "").strip())
]
# Partition due jobs: those with a per-job workdir mutate
# os.environ["TERMINAL_CWD"] inside run_job, which is process-global —
# so they MUST run sequentially to avoid corrupting each other. Jobs
# without a workdir leave env untouched and stay parallel-safe.
sequential_jobs = [j for j in due_jobs if (j.get("workdir") or "").strip()]
parallel_jobs = [j for j in due_jobs if not (j.get("workdir") or "").strip()]
_results: list = []
_all_futures: list = []
@ -2216,9 +2121,9 @@ def tick(verbose: bool = True, adapters=None, loop=None, sync: bool = True) -> i
return pool.submit(_run_and_release)
# Sequential pass for env/context-mutating (workdir/profile) jobs.
# Sequential pass for env-mutating (workdir) jobs.
# Queued to a persistent single-thread pool so they run one at a time
# WITHOUT blocking the ticker thread — a long workdir/profile job no
# WITHOUT blocking the ticker thread — a long workdir job no
# longer starves the rest of the schedule (same fix as the parallel
# pass, just serialized). The in-flight guard prevents a still-running
# job from being re-queued on the next tick.

View file

@ -863,6 +863,19 @@ DEFAULT_CONFIG = {
# identity slot (SOUL.md). Empty by default. The HERMES_ENVIRONMENT_HINT
# env var overrides this (build-time/container mechanism).
"environment_hint": "",
# Coding posture — on interactive coding surfaces (CLI, TUI, desktop
# app, ACP) in a code workspace, Hermes adds a coding operating brief
# + a live git/workspace snapshot to the system prompt. See
# agent/coding_context.py.
# "auto" (default) — prompt-only posture when the surface is
# interactive AND cwd is a code workspace.
# Toolsets are never touched; messaging platforms
# unaffected.
# "focus" — auto + collapse the toolset to the lean coding
# set (+ enabled MCP servers). Explicit opt-in.
# "on" — force the prompt posture everywhere.
# "off" — disable entirely.
"coding_context": "auto",
# Staged inactivity warning: send a warning to the user at this
# threshold before escalating to a full timeout. The warning fires
# once per run and does not interrupt the agent. 0 = disable warning.

View file

@ -120,9 +120,6 @@ def cron_list(show_all: bool = False):
workdir = job.get("workdir")
if workdir:
print(f" Workdir: {workdir}")
profile = job.get("profile")
if profile:
print(f" Profile: {profile}")
# Execution history
last_status = job.get("last_status")
@ -221,7 +218,6 @@ def cron_create(args):
skills=_normalize_skills(getattr(args, "skill", None), getattr(args, "skills", None)),
script=getattr(args, "script", None),
workdir=getattr(args, "workdir", None),
profile=getattr(args, "profile", None),
no_agent=getattr(args, "no_agent", False) or None,
)
if not result.get("success"):
@ -239,8 +235,6 @@ def cron_create(args):
print(" Mode: no-agent (script stdout delivered directly)")
if job_data.get("workdir"):
print(f" Workdir: {job_data['workdir']}")
if job_data.get("profile"):
print(f" Profile: {job_data['profile']}")
print(f" Next run: {result['next_run_at']}")
return 0
@ -286,7 +280,6 @@ def cron_edit(args):
skills=final_skills,
script=getattr(args, "script", None),
workdir=getattr(args, "workdir", None),
profile=getattr(args, "profile", None),
no_agent=getattr(args, "no_agent", None),
)
if not result.get("success"):
@ -307,8 +300,6 @@ def cron_edit(args):
print(" Mode: no-agent (script stdout delivered directly)")
if updated.get("workdir"):
print(f" Workdir: {updated['workdir']}")
if updated.get("profile"):
print(f" Profile: {updated['profile']}")
return 0

View file

@ -1623,7 +1623,11 @@ def _make_tui_argv(tui_dir: Path, tui_dev: bool) -> tuple[list[str], Path]:
npm_cwd = _workspace_root(tui_dir)
# --workspace ui-tui avoids resolving apps/desktop (Electron + node-pty).
# See #38772.
npm_workspace_args: tuple[str, ...] = ("--workspace", "ui-tui")
# When ui-tui/ has its own package-lock.json (e.g. curl install),
# _workspace_root() returns tui_dir itself. Passing --workspace in
# that case fails because npm cannot find a workspace named "ui-tui"
# inside ui-tui/. See #42973.
npm_workspace_args: tuple[str, ...] = () if npm_cwd == tui_dir else ("--workspace", "ui-tui")
if termux_startup:
npm_cwd, npm_workspace_args = _termux_workspace_install_context(
tui_dir,
@ -4642,7 +4646,9 @@ def _build_web_ui(web_dir: Path, *, fatal: bool = False) -> bool:
# graph (including apps/desktop with its Electron + node-pty deps) is never
# resolved here. Without --workspace the root package.json's apps/* glob
# would pull in desktop on every web build. See #38772.
npm_workspace_args: tuple[str, ...] = ("--workspace", "web")
# When web/ has its own package-lock.json, _workspace_root() returns
# web_dir itself and --workspace would fail. See #42973.
npm_workspace_args: tuple[str, ...] = () if npm_cwd == web_dir else ("--workspace", "web")
if _is_termux_startup_environment():
npm_cwd, npm_workspace_args = _termux_workspace_install_context(web_dir)
r1 = _run_npm_install_deterministic(

View file

@ -70,10 +70,6 @@ def build_cron_parser(subparsers, *, cmd_cron: Callable) -> None:
"--workdir",
help="Absolute path for the job to run from. Injects AGENTS.md / CLAUDE.md / .cursorrules from that directory and uses it as the cwd for terminal/file/code_exec tools. Omit to preserve old behaviour (no project context files).",
)
cron_create.add_argument(
"--profile",
help="Hermes profile name to run the job under. Use 'default' for the root profile. Named profiles must already exist. Omit to preserve the scheduler's existing profile.",
)
# cron edit
cron_edit = cron_subparsers.add_parser(
@ -138,10 +134,6 @@ def build_cron_parser(subparsers, *, cmd_cron: Callable) -> None:
"--workdir",
help="Absolute path for the job to run from (injects AGENTS.md etc. and sets terminal cwd). Pass empty string to clear.",
)
cron_edit.add_argument(
"--profile",
help="Hermes profile name to run the job under. Use 'default' for the root profile. Pass empty string to clear.",
)
# lifecycle actions
cron_pause = cron_subparsers.add_parser("pause", help="Pause a scheduled job")

View file

@ -1437,6 +1437,10 @@ def _get_platform_tools(
continue
if ts_def.get("includes"):
continue
# Posture toolsets (e.g. ``coding``) are session-level selections made
# by agent/coding_context.py — not per-platform capabilities to recover.
if ts_def.get("posture"):
continue
ts_tools = set(resolve_toolset(ts_key))
if not ts_tools or not ts_tools.issubset(platform_tool_universe):
continue

View file

@ -9,7 +9,7 @@ Usage:
python -m hermes_cli.main web --port 8080
"""
from contextlib import asynccontextmanager
from contextlib import asynccontextmanager, contextmanager
import asyncio
import base64
@ -7373,6 +7373,24 @@ async def prune_checkpoints():
class SkillInstallRequest(BaseModel):
identifier: str
profile: Optional[str] = None
def _profile_cli_args(profile: Optional[str]) -> List[str]:
"""Return ``["-p", <name>]`` for a validated non-default profile.
Hub install/uninstall/update run in a fresh ``hermes`` subprocess, and
``_apply_profile_override()`` reads ``-p`` from argv in the child the
only mechanism that reaches import-time-bound globals like
``skills_hub.SKILLS_DIR``. Empty/"current" means the dashboard's own
profile (no args, legacy behavior).
"""
requested = (profile or "").strip()
if not requested or requested.lower() == "current":
return []
from hermes_cli import profiles as profiles_mod
_resolve_profile_dir(requested)
return ["-p", profiles_mod.normalize_profile_name(requested)]
@app.post("/api/skills/hub/install")
@ -7381,7 +7399,12 @@ async def install_skill_hub(body: SkillInstallRequest):
if not identifier:
raise HTTPException(status_code=400, detail="identifier is required")
try:
proc = _spawn_hermes_action(["skills", "install", identifier], "skills-install")
proc = _spawn_hermes_action(
_profile_cli_args(body.profile) + ["skills", "install", identifier],
"skills-install",
)
except HTTPException:
raise
except Exception as exc:
_log.exception("Failed to spawn skills install")
raise HTTPException(status_code=500, detail=f"Failed to install skill: {exc}")
@ -7390,6 +7413,7 @@ async def install_skill_hub(body: SkillInstallRequest):
class SkillUninstallRequest(BaseModel):
name: str
profile: Optional[str] = None
@app.post("/api/skills/hub/uninstall")
@ -7398,17 +7422,31 @@ async def uninstall_skill_hub(body: SkillUninstallRequest):
if not name:
raise HTTPException(status_code=400, detail="name is required")
try:
proc = _spawn_hermes_action(["skills", "uninstall", name, "--yes"], "skills-uninstall")
proc = _spawn_hermes_action(
_profile_cli_args(body.profile) + ["skills", "uninstall", name, "--yes"],
"skills-uninstall",
)
except HTTPException:
raise
except Exception as exc:
_log.exception("Failed to spawn skills uninstall")
raise HTTPException(status_code=500, detail=f"Failed to uninstall skill: {exc}")
return {"ok": True, "pid": proc.pid, "name": "skills-uninstall"}
class SkillsUpdateRequest(BaseModel):
profile: Optional[str] = None
@app.post("/api/skills/hub/update")
async def update_skills_hub():
async def update_skills_hub(body: Optional[SkillsUpdateRequest] = None):
try:
proc = _spawn_hermes_action(["skills", "update"], "skills-update")
profile = body.profile if body else None
proc = _spawn_hermes_action(
_profile_cli_args(profile) + ["skills", "update"], "skills-update"
)
except HTTPException:
raise
except Exception as exc:
_log.exception("Failed to spawn skills update")
raise HTTPException(status_code=500, detail=f"Failed to update skills: {exc}")
@ -7443,17 +7481,25 @@ def _skill_meta_to_payload(m) -> dict:
}
def _installed_hub_identifiers() -> dict:
def _installed_hub_identifiers(profile: Optional[str] = None) -> dict:
"""Map identifier -> installed lock entry for hub-installed skills.
Lets the UI mark search results that are already installed. Best-effort:
returns an empty dict if the lock file can't be read.
Lets the UI mark search results that are already installed. Scoped to
``profile``'s skills/.hub/lock.json when provided (HubLockFile takes an
explicit path, sidestepping the import-time LOCK_FILE binding).
Best-effort: returns an empty dict if the lock file can't be read.
"""
try:
from tools.skills_hub import HubLockFile
requested = (profile or "").strip()
if requested and requested.lower() != "current":
profile_dir = _resolve_profile_dir(requested)
lock = HubLockFile(profile_dir / "skills" / ".hub" / "lock.json")
else:
lock = HubLockFile()
out = {}
for entry in HubLockFile().list_installed():
for entry in lock.list_installed():
ident = entry.get("identifier")
if ident:
out[ident] = {
@ -7467,13 +7513,14 @@ def _installed_hub_identifiers() -> dict:
@app.get("/api/skills/hub/sources")
async def list_skills_hub_sources():
async def list_skills_hub_sources(profile: Optional[str] = None):
"""List the configured skill-hub sources and installed-skill provenance.
Gives the dashboard something to show BEFORE a search runs which hubs
are wired up, their trust tier, and a set of featured skills pulled from
the centralized index (zero extra API calls). Without this the Browse-hub
tab is a blank page with no indication it's even connected to anything.
``profile`` scopes the installed-skill provenance to that profile.
"""
def _run():
@ -7514,18 +7561,22 @@ async def list_skills_hub_sources():
"sources": out,
"index_available": index_available,
"featured": featured,
"installed": _installed_hub_identifiers(),
"installed": _installed_hub_identifiers(profile),
}
try:
return await asyncio.to_thread(_run)
except HTTPException:
raise
except Exception as exc:
_log.exception("skills hub sources listing failed")
raise HTTPException(status_code=502, detail=f"Hub sources failed: {exc}")
@app.get("/api/skills/hub/search")
async def search_skills_hub(q: str = "", source: str = "all", limit: int = 20):
async def search_skills_hub(
q: str = "", source: str = "all", limit: int = 20, profile: Optional[str] = None
):
"""Search the skill hub across all configured sources.
Network-bound (parallel source search); runs in a thread so the FastAPI
@ -7560,11 +7611,13 @@ async def search_skills_hub(q: str = "", source: str = "all", limit: int = 20):
"results": [_skill_meta_to_payload(m) for m in deduped],
"source_counts": source_counts,
"timed_out": timed_out,
"installed": _installed_hub_identifiers(),
"installed": _installed_hub_identifiers(profile),
}
try:
return await asyncio.to_thread(_run)
except HTTPException:
raise
except Exception as exc:
_log.exception("skills hub search failed")
raise HTTPException(status_code=502, detail=f"Hub search failed: {exc}")
@ -8333,21 +8386,75 @@ async def describe_profile_auto_endpoint(name: str, body: ProfileDescribeAuto):
# ---------------------------------------------------------------------------
# Skills & Tools endpoints
#
# Every read/write below accepts an optional ``profile`` query param so the
# dashboard can manage ANY profile's skills/toolsets, not just the profile
# the dashboard process happens to be running under. Without this, "Set as
# active" on the Profiles page (which only flips the sticky ``active_profile``
# file for FUTURE CLI/gateway invocations) misled users into thinking skill
# toggles would land in the activated profile — they silently wrote into the
# dashboard's own config instead. See _profile_scope() for the mechanism.
# ---------------------------------------------------------------------------
_SKILLS_PROFILE_LOCK = threading.RLock()
@contextmanager
def _profile_scope(profile: Optional[str]):
"""Scope config + skill-directory resolution to ``profile`` for one request.
Two seams must be redirected for skills/toolsets endpoints:
1. ``load_config``/``save_config`` resolve ``get_hermes_home()`` at call
time the context-local override from ``set_hermes_home_override``
reaches them (same pattern as ``_write_profile_model``).
2. ``tools.skills_tool`` binds ``SKILLS_DIR`` at import time, so the
override CANNOT reach it. Like ``_call_cron_for_profile`` does for
cron's module globals, temporarily retarget it under a lock and
restore it immediately after.
``profile`` of None/""/"current" means "the dashboard's own profile"
a no-op scope, preserving existing behavior for old clients.
"""
requested = (profile or "").strip()
if not requested or requested.lower() == "current":
yield None
return
profile_dir = _resolve_profile_dir(requested)
from hermes_constants import set_hermes_home_override, reset_hermes_home_override
from tools import skills_tool as _skills_tool
token = set_hermes_home_override(str(profile_dir))
with _SKILLS_PROFILE_LOCK:
old_home = _skills_tool.HERMES_HOME
old_skills_dir = _skills_tool.SKILLS_DIR
_skills_tool.HERMES_HOME = profile_dir
_skills_tool.SKILLS_DIR = profile_dir / "skills"
try:
yield profile_dir
finally:
_skills_tool.HERMES_HOME = old_home
_skills_tool.SKILLS_DIR = old_skills_dir
reset_hermes_home_override(token)
class SkillToggle(BaseModel):
name: str
enabled: bool
profile: Optional[str] = None
@app.get("/api/skills")
async def get_skills():
async def get_skills(profile: Optional[str] = None):
from tools.skills_tool import _find_all_skills
from hermes_cli.skills_config import get_disabled_skills
config = load_config()
disabled = get_disabled_skills(config)
skills = _find_all_skills(skip_disabled=True)
with _profile_scope(profile):
config = load_config()
disabled = get_disabled_skills(config)
skills = _find_all_skills(skip_disabled=True)
for s in skills:
s["enabled"] = s["name"] not in disabled
return skills
@ -8356,18 +8463,19 @@ async def get_skills():
@app.put("/api/skills/toggle")
async def toggle_skill(body: SkillToggle):
from hermes_cli.skills_config import get_disabled_skills, save_disabled_skills
config = load_config()
disabled = get_disabled_skills(config)
if body.enabled:
disabled.discard(body.name)
else:
disabled.add(body.name)
save_disabled_skills(config, disabled)
with _profile_scope(body.profile):
config = load_config()
disabled = get_disabled_skills(config)
if body.enabled:
disabled.discard(body.name)
else:
disabled.add(body.name)
save_disabled_skills(config, disabled)
return {"ok": True, "name": body.name, "enabled": body.enabled}
@app.get("/api/tools/toolsets")
async def get_toolsets():
async def get_toolsets(profile: Optional[str] = None):
from hermes_cli.tools_config import (
_get_effective_configurable_toolsets,
_get_platform_tools,
@ -8376,12 +8484,13 @@ async def get_toolsets():
)
from toolsets import resolve_toolset
config = load_config()
enabled_toolsets = _get_platform_tools(
config,
"cli",
include_default_mcp_servers=False,
)
with _profile_scope(profile):
config = load_config()
enabled_toolsets = _get_platform_tools(
config,
"cli",
include_default_mcp_servers=False,
)
result = []
for name, label, desc in _get_effective_configurable_toolsets():
try:
@ -8403,6 +8512,7 @@ async def get_toolsets():
class ToolsetToggle(BaseModel):
enabled: bool
profile: Optional[str] = None
@app.put("/api/tools/toolsets/{name}")
@ -8411,7 +8521,8 @@ async def toggle_toolset(name: str, body: ToolsetToggle):
Persists to ``platform_toolsets.cli`` via the same ``_save_platform_tools``
helper the CLI ``hermes tools`` picker uses, so the GUI and CLI stay in
lockstep. Returns 400 for unknown toolset keys.
lockstep. Scoped to ``body.profile`` when provided. Returns 400 for
unknown toolset keys.
"""
from hermes_cli.tools_config import (
_get_effective_configurable_toolsets,
@ -8423,20 +8534,21 @@ async def toggle_toolset(name: str, body: ToolsetToggle):
if name not in valid:
raise HTTPException(status_code=400, detail=f"Unknown toolset: {name}")
config = load_config()
enabled = set(
_get_platform_tools(config, "cli", include_default_mcp_servers=False)
)
if body.enabled:
enabled.add(name)
else:
enabled.discard(name)
_save_platform_tools(config, "cli", enabled)
with _profile_scope(body.profile):
config = load_config()
enabled = set(
_get_platform_tools(config, "cli", include_default_mcp_servers=False)
)
if body.enabled:
enabled.add(name)
else:
enabled.discard(name)
_save_platform_tools(config, "cli", enabled)
return {"ok": True, "name": name, "enabled": body.enabled}
@app.get("/api/tools/toolsets/{name}/config")
async def get_toolset_config(name: str):
async def get_toolset_config(name: str, profile: Optional[str] = None):
"""Return the provider matrix + key status for a toolset's config panel.
Surfaces the same provider rows the CLI ``hermes tools`` picker shows
@ -8457,38 +8569,39 @@ async def get_toolset_config(name: str):
if name not in valid:
raise HTTPException(status_code=400, detail=f"Unknown toolset: {name}")
config = load_config()
cat = TOOL_CATEGORIES.get(name)
providers = []
active_provider = None
if cat:
for prov in _visible_providers(cat, config, force_fresh=True):
env_vars = [
{
"key": e["key"],
"prompt": e.get("prompt", e["key"]),
"url": e.get("url"),
"default": e.get("default"),
"is_set": bool(get_env_value(e["key"])),
}
for e in prov.get("env_vars", [])
]
# Surface the same active-provider determination the CLI picker
# uses (``_is_provider_active``) so the GUI highlights the provider
# actually written to config (e.g. web.backend), not just the first
# keyless one in the list.
is_active = _is_provider_active(prov, config, force_fresh=True)
if is_active and active_provider is None:
active_provider = prov["name"]
providers.append({
"name": prov["name"],
"badge": prov.get("badge", ""),
"tag": prov.get("tag", ""),
"env_vars": env_vars,
"post_setup": prov.get("post_setup"),
"requires_nous_auth": bool(prov.get("requires_nous_auth")),
"is_active": is_active,
})
with _profile_scope(profile):
config = load_config()
cat = TOOL_CATEGORIES.get(name)
providers = []
active_provider = None
if cat:
for prov in _visible_providers(cat, config, force_fresh=True):
env_vars = [
{
"key": e["key"],
"prompt": e.get("prompt", e["key"]),
"url": e.get("url"),
"default": e.get("default"),
"is_set": bool(get_env_value(e["key"])),
}
for e in prov.get("env_vars", [])
]
# Surface the same active-provider determination the CLI picker
# uses (``_is_provider_active``) so the GUI highlights the provider
# actually written to config (e.g. web.backend), not just the first
# keyless one in the list.
is_active = _is_provider_active(prov, config, force_fresh=True)
if is_active and active_provider is None:
active_provider = prov["name"]
providers.append({
"name": prov["name"],
"badge": prov.get("badge", ""),
"tag": prov.get("tag", ""),
"env_vars": env_vars,
"post_setup": prov.get("post_setup"),
"requires_nous_auth": bool(prov.get("requires_nous_auth")),
"is_active": is_active,
})
return {
"name": name,
"has_category": cat is not None,
@ -8499,6 +8612,7 @@ async def get_toolset_config(name: str):
class ToolsetProviderSelect(BaseModel):
provider: str
profile: Optional[str] = None
@app.put("/api/tools/toolsets/{name}/provider")
@ -8520,17 +8634,19 @@ async def select_toolset_provider(name: str, body: ToolsetProviderSelect):
if name not in valid:
raise HTTPException(status_code=400, detail=f"Unknown toolset: {name}")
config = load_config()
try:
apply_provider_selection(name, body.provider, config)
except KeyError as exc:
raise HTTPException(status_code=400, detail=str(exc).strip('"'))
save_config(config)
with _profile_scope(body.profile):
config = load_config()
try:
apply_provider_selection(name, body.provider, config)
except KeyError as exc:
raise HTTPException(status_code=400, detail=str(exc).strip('"'))
save_config(config)
return {"ok": True, "name": name, "provider": body.provider}
class ToolsetEnvUpdate(BaseModel):
env: Dict[str, str]
profile: Optional[str] = None
@app.put("/api/tools/toolsets/{name}/env")
@ -8556,34 +8672,35 @@ async def save_toolset_env(name: str, body: ToolsetEnvUpdate):
if name not in valid_ts:
raise HTTPException(status_code=400, detail=f"Unknown toolset: {name}")
config = load_config()
cat = TOOL_CATEGORIES.get(name)
allowed: set[str] = set()
if cat:
for prov in _visible_providers(cat, config, force_fresh=True):
for e in prov.get("env_vars", []):
allowed.add(e["key"])
with _profile_scope(body.profile):
config = load_config()
cat = TOOL_CATEGORIES.get(name)
allowed: set[str] = set()
if cat:
for prov in _visible_providers(cat, config, force_fresh=True):
for e in prov.get("env_vars", []):
allowed.add(e["key"])
unknown = [k for k in body.env if k not in allowed]
if unknown:
raise HTTPException(
status_code=400,
detail=f"Unknown env var(s) for toolset {name}: {', '.join(sorted(unknown))}",
)
unknown = [k for k in body.env if k not in allowed]
if unknown:
raise HTTPException(
status_code=400,
detail=f"Unknown env var(s) for toolset {name}: {', '.join(sorted(unknown))}",
)
saved: List[str] = []
skipped: List[str] = []
for key, value in body.env.items():
if value and value.strip():
try:
save_env_value(key, value.strip())
except ValueError as exc:
raise HTTPException(status_code=400, detail=str(exc))
saved.append(key)
else:
skipped.append(key)
saved: List[str] = []
skipped: List[str] = []
for key, value in body.env.items():
if value and value.strip():
try:
save_env_value(key, value.strip())
except ValueError as exc:
raise HTTPException(status_code=400, detail=str(exc))
saved.append(key)
else:
skipped.append(key)
status = {k: bool(get_env_value(k)) for k in allowed}
status = {k: bool(get_env_value(k)) for k in allowed}
return {"ok": True, "name": name, "saved": saved, "skipped": skipped, "is_set": status}

View file

@ -945,6 +945,7 @@ AUTHOR_MAP = {
"michel.belleau@malaiwah.com": "malaiwah",
"gnanasekaran.sekareee@gmail.com": "gnanam1990",
"jz.pentest@gmail.com": "0xyg3n",
"56406949+RaumfahrerSpiffy@users.noreply.github.com": "Spaceman-Spiffy", # PR #35586 (renamed account)
"ian@culling.ca": "ianculling", # PR #36087
"7093928+0xyg3n@users.noreply.github.com": "0xyg3n",
"nftpoetrist@gmail.com": "nftpoetrist", # PR #18982

View file

@ -0,0 +1,96 @@
"""Regression: output-only SDK fields must not leak into Anthropic request input.
Reproduces HTTP 400 `messages.N.content.M.text.parsed_output: Extra inputs are
not permitted`. Anthropic SDK response blocks carry output-only attributes
(text blocks: `parsed_output`, `citations=None`; tool_use blocks: `caller`)
that the Messages *input* schema forbids. normalize_response captured blocks
verbatim via _to_plain_data and replayed them as input 400.
Fix: whitelist input-permitted fields per block type at three points
normalize_response capture, _sanitize_replay_block (ordered-blocks replay), and
_convert_content_part_to_anthropic (content-list replay).
"""
import sys, os
sys.path.insert(0, os.path.expanduser("~/.hermes/hermes-agent"))
import pytest
from agent.anthropic_adapter import (
_sanitize_replay_block,
_convert_content_part_to_anthropic,
_convert_assistant_message,
)
FORBIDDEN = {"parsed_output", "caller"}
def _assert_clean(block):
"""No forbidden output-only key, and no null citations, anywhere."""
assert isinstance(block, dict)
for k in FORBIDDEN:
assert k not in block, f"forbidden field {k!r} survived: {block}"
if "citations" in block:
assert isinstance(block["citations"], list) and block["citations"], \
"citations must be a non-empty list if present (None/[] is input-invalid)"
class TestSanitizeReplayBlock:
def test_text_block_strips_parsed_output_and_null_citations(self):
poisoned = {"type": "text", "text": "hi", "parsed_output": None, "citations": None}
out = _sanitize_replay_block(poisoned)
_assert_clean(out)
assert out == {"type": "text", "text": "hi"}
def test_tool_use_strips_caller(self):
poisoned = {"type": "tool_use", "id": "toolu_1", "name": "read_file",
"input": {"path": "a"}, "caller": {"type": "agent"}}
out = _sanitize_replay_block(poisoned)
_assert_clean(out)
assert out["name"] == "read_file" and out["input"] == {"path": "a"}
def test_thinking_preserves_signature(self):
b = {"type": "thinking", "thinking": "x", "signature": "sig-AAA"}
out = _sanitize_replay_block(b)
assert out == {"type": "thinking", "thinking": "x", "signature": "sig-AAA"}
def test_text_keeps_real_citations(self):
real = [{"type": "char_location", "cited_text": "q"}]
out = _sanitize_replay_block({"type": "text", "text": "t", "citations": real})
assert out["citations"] == real
def test_unknown_type_dropped(self):
assert _sanitize_replay_block({"type": "server_tool_use", "foo": 1}) is None
class TestContentPartConversion:
def test_stored_text_block_with_parsed_output_cleaned(self):
# The exact content.N.text.parsed_output failure shape.
part = {"type": "text", "text": "hello", "parsed_output": None, "citations": None}
out = _convert_content_part_to_anthropic(part)
_assert_clean(out)
class TestAssistantReplay:
def test_interleaved_blocks_replayed_clean_and_ordered(self):
m = {
"role": "assistant",
"anthropic_content_blocks": [
{"type": "thinking", "thinking": "plan", "signature": "s1"},
{"type": "text", "text": "doing it", "parsed_output": None, "citations": None},
{"type": "tool_use", "id": "toolu_1", "name": "read_file",
"input": {"path": "a"}, "caller": {"type": "agent"}},
],
}
out = _convert_assistant_message(m)
blocks = out["content"]
# order preserved
assert [b["type"] for b in blocks] == ["thinking", "text", "tool_use"]
# every block clean
for b in blocks:
_assert_clean(b)
# signature + tool fields intact
assert blocks[0]["signature"] == "s1"
assert blocks[2]["name"] == "read_file"
if __name__ == "__main__":
raise SystemExit(pytest.main([__file__, "-v"]))

View file

@ -0,0 +1,314 @@
"""Regression test for the Anthropic interleaved thinking-block 400.
Reproduces: HTTP 400 ``messages.N.content.M: thinking or redacted_thinking
blocks in the latest assistant message cannot be modified. These blocks must
remain as they were in the original response.``
Root cause under test
----------------------
With adaptive / interleaved thinking (Claude 4.6+, e.g. Opus 4.8), a single
assistant turn can emit content blocks in an interleaved order::
thinking_1 (signed) · tool_use_1 · thinking_2 (signed) · tool_use_2
Anthropic signs each thinking block against the turn content that precedes it
at its position. ``thinking_2`` is signed with ``tool_use_1`` before it.
``AnthropicTransport.normalize_response`` (agent/transports/anthropic.py)
splits the turn into two *parallel* lists ``reasoning_details`` (thinking
blocks) and ``tool_calls`` (tool_use blocks) discarding the cross-type
ordering. ``run_agent`` stores those as separate fields on the assistant
message. On replay, ``_convert_assistant_message`` (agent/anthropic_adapter.py)
rebuilds the content as ``[all thinking][text][all tool_use]``, which reorders
``thinking_2`` ahead of ``tool_use_1``. The signature no longer matches its
original position, so Anthropic rejects the latest assistant message with the
400 above.
This test asserts that an interleaved turn round-trips through
normalize_response -> stored message -> convert_messages_to_anthropic with its
block order preserved. It FAILS on the current code (documenting the bug) and
should PASS once block ordering is preserved on replay.
"""
import json
from types import SimpleNamespace
import pytest
from agent.transports import get_transport
from agent.anthropic_adapter import convert_messages_to_anthropic
def _thinking_block(text: str, signature: str) -> SimpleNamespace:
"""A signed Anthropic thinking block, shaped like the SDK object."""
return SimpleNamespace(type="thinking", thinking=text, signature=signature)
def _tool_use_block(block_id: str, name: str, payload: dict) -> SimpleNamespace:
return SimpleNamespace(type="tool_use", id=block_id, name=name, input=payload)
def _interleaved_response() -> SimpleNamespace:
"""An assistant turn with thinking interleaved between two tool_use blocks."""
return SimpleNamespace(
content=[
_thinking_block("Plan: inspect file A first.", "sig-AAA"),
_tool_use_block("toolu_1", "read_file", {"path": "a.py"}),
_thinking_block("A looked fine; now inspect B.", "sig-BBB"),
_tool_use_block("toolu_2", "read_file", {"path": "b.py"}),
],
stop_reason="tool_use",
usage=None,
)
def _stored_assistant_message(normalized) -> dict:
"""Reconstruct the OpenAI-style assistant message the way run_agent stores it.
run_agent.py persists assistant turns as separate fields: content,
reasoning_details (from provider_data), and tool_calls. See
run_agent.py L1513-1516 and hermes_state.py.
"""
provider_data = normalized.provider_data or {}
tool_calls = []
for tc in (normalized.tool_calls or []):
tool_calls.append({
"id": tc.id,
"type": "function",
"function": {"name": tc.name, "arguments": tc.arguments},
})
msg = {
"role": "assistant",
"content": normalized.content or "",
"reasoning_details": provider_data.get("reasoning_details"),
"tool_calls": tool_calls,
}
# build_assistant_message lifts the verbatim ordered-block channel onto
# the stored message; mirror that here.
blocks = provider_data.get("anthropic_content_blocks")
if blocks:
msg["anthropic_content_blocks"] = blocks
return msg
def _original_block_order(response) -> list:
"""The (type, key) sequence of the original interleaved response."""
order = []
for b in response.content:
if b.type == "thinking":
order.append(("thinking", b.signature))
elif b.type == "tool_use":
order.append(("tool_use", b.id))
return order
def _replayed_block_order(assistant_content) -> list:
order = []
for b in assistant_content:
if not isinstance(b, dict):
continue
if b.get("type") in ("thinking", "redacted_thinking"):
order.append(("thinking", b.get("signature")))
elif b.get("type") == "tool_use":
order.append(("tool_use", b.get("id")))
return order
class TestInterleavedThinkingBlockOrder:
def test_normalize_response_loses_interleaving(self):
"""Confirm the lossy split: normalize_response stores thinking and
tool_use in independent fields with no positional linkage."""
transport = get_transport("anthropic_messages")
normalized = transport.normalize_response(_interleaved_response())
# Both thinking blocks are captured...
details = (normalized.provider_data or {}).get("reasoning_details")
assert details is not None and len(details) == 2
# ...and both tool calls...
assert normalized.tool_calls is not None and len(normalized.tool_calls) == 2
# ...but they live in separate fields. There is no single ordered
# structure recording that thinking_2 sat between the two tool calls.
# (This is the structural precondition for the reorder bug.)
def test_interleaved_order_preserved_on_replay(self):
"""The latest assistant message must replay blocks in their ORIGINAL
order, or Anthropic rejects the signed thinking blocks with a 400.
FAILS on current code: _convert_assistant_message front-loads all
thinking blocks, producing
thinking_1 · thinking_2 · tool_use_1 · tool_use_2
instead of the original
thinking_1 · tool_use_1 · thinking_2 · tool_use_2
"""
response = _interleaved_response()
original_order = _original_block_order(response)
transport = get_transport("anthropic_messages")
normalized = transport.normalize_response(response)
assistant_msg = _stored_assistant_message(normalized)
# Build a minimal conversation where this assistant turn is the LATEST
# assistant message (the one whose signed blocks are sent verbatim).
messages = [
{"role": "user", "content": "Inspect a.py and b.py."},
assistant_msg,
{"role": "tool", "tool_call_id": "toolu_1", "content": "a.py: ok"},
{"role": "tool", "tool_call_id": "toolu_2", "content": "b.py: ok"},
]
_system, anthropic_messages = convert_messages_to_anthropic(
messages,
base_url=None, # direct Anthropic
model="claude-opus-4-8", # adaptive thinking family
)
# Find the (latest) assistant message in the converted output.
assistant_out = [m for m in anthropic_messages if m.get("role") == "assistant"]
assert assistant_out, "no assistant message in converted output"
replayed_order = _replayed_block_order(assistant_out[-1]["content"])
assert replayed_order == original_order, (
"Interleaved thinking/tool_use order was not preserved on replay.\n"
f" original: {original_order}\n"
f" replayed: {replayed_order}\n"
"Anthropic signs thinking blocks against their original position; "
"reordering invalidates the signature -> HTTP 400 'thinking blocks "
"in the latest assistant message cannot be modified'."
)
def test_replay_falls_back_gracefully_without_ordered_blocks(self):
"""Without the ordered-block channel, conversion must not crash.
The channel is intentionally NOT persisted to state.db (in-memory
only): a session reloaded from disk after a crash loses the field
and falls back to reconstruction. That replay may take one HTTP 400,
which the thinking-signature recovery (#43667) absorbs by stripping
reasoning_details and retrying. This test pins the fallback shape:
conversion still produces a valid assistant message from the
parallel reasoning_details + tool_calls fields.
"""
response = _interleaved_response()
transport = get_transport("anthropic_messages")
normalized = transport.normalize_response(response)
assistant_msg = _stored_assistant_message(normalized)
# Simulate a disk reload: the in-memory-only channel is gone.
assistant_msg.pop("anthropic_content_blocks", None)
messages = [
assistant_msg,
{"role": "tool", "tool_call_id": "toolu_1", "content": "a ok"},
{"role": "tool", "tool_call_id": "toolu_2", "content": "b ok"},
]
_system, anthropic_messages = convert_messages_to_anthropic(
messages, base_url=None, model="claude-opus-4-8",
)
assistant_out = [m for m in anthropic_messages if m.get("role") == "assistant"]
assert assistant_out, "no assistant message in converted output"
content = assistant_out[-1]["content"]
assert isinstance(content, list) and content, "fallback produced empty content"
# Reconstruction keeps both tool_use blocks (answered by results).
tool_ids = [b.get("id") for b in content if isinstance(b, dict) and b.get("type") == "tool_use"]
assert set(tool_ids) == {"toolu_1", "toolu_2"}
class TestInterleavedReplayCredentialRedaction:
"""The verbatim-replay fast path must not leak un-redacted secrets.
anthropic_content_blocks captures each tool_use ``input`` from the RAW API
response (normalize_response), which is NOT credential-redacted. The
parallel tool_calls[].function.arguments IS redacted at storage time
(build_assistant_message, #19798). If the fast path replays the block's raw
input verbatim, a secret the model inlined into a tool call rides back onto
the wire even though it is redacted everywhere else in history. The fix
re-sources tool_use input from the redacted tool_calls map by id.
"""
def test_tool_use_input_resourced_from_redacted_tool_calls(self):
REDACTED = "[REDACTED_SECRET]"
# Ordered channel: raw input carries the live secret (as captured from
# the unredacted API response).
ordered = [
{"type": "thinking", "thinking": "Call the API.", "signature": "sig-AAA"},
{
"type": "tool_use",
"id": "toolu_1",
"name": "terminal",
"input": {"command": "curl -H 'Authorization: Bearer sk-LIVE-SECRET-123'"},
},
{"type": "thinking", "thinking": "Now the second call.", "signature": "sig-BBB"},
{
"type": "tool_use",
"id": "toolu_2",
"name": "terminal",
"input": {"command": "echo done"},
},
]
# Stored tool_calls: arguments already redacted (the #19798 path).
assistant_msg = {
"role": "assistant",
"content": "",
"reasoning_details": [b for b in ordered if b["type"] == "thinking"],
"tool_calls": [
{
"id": "toolu_1",
"type": "function",
"function": {
"name": "terminal",
"arguments": json.dumps(
{"command": f"curl -H 'Authorization: Bearer {REDACTED}'"}
),
},
},
{
"id": "toolu_2",
"type": "function",
"function": {
"name": "terminal",
"arguments": json.dumps({"command": "echo done"}),
},
},
],
"anthropic_content_blocks": ordered,
}
messages = [
{"role": "user", "content": "Hit the API twice."},
assistant_msg,
{"role": "tool", "tool_call_id": "toolu_1", "content": "200 OK"},
{"role": "tool", "tool_call_id": "toolu_2", "content": "done"},
]
_system, anthropic_messages = convert_messages_to_anthropic(
messages, base_url=None, model="claude-opus-4-8",
)
assistant_out = [m for m in anthropic_messages if m.get("role") == "assistant"]
assert assistant_out, "no assistant message in converted output"
blocks = assistant_out[-1]["content"]
tool_uses = {b["id"]: b for b in blocks if b.get("type") == "tool_use"}
assert set(tool_uses) == {"toolu_1", "toolu_2"}, "tool_use blocks missing/renamed"
# The replayed input must be the REDACTED value, not the live secret.
replayed_cmd = tool_uses["toolu_1"]["input"]["command"]
assert "sk-LIVE-SECRET-123" not in replayed_cmd, (
"Un-redacted secret leaked onto the wire via the verbatim-replay "
"fast path. tool_use input must be re-sourced from the redacted "
"tool_calls map, not the raw captured block."
)
assert REDACTED in replayed_cmd
# Interleave order is still preserved (the reason the channel exists).
order = [
("thinking", b.get("signature")) if b.get("type") == "thinking"
else ("tool_use", b.get("id"))
for b in blocks if b.get("type") in ("thinking", "tool_use")
]
assert order == [
("thinking", "sig-AAA"),
("tool_use", "toolu_1"),
("thinking", "sig-BBB"),
("tool_use", "toolu_2"),
]
if __name__ == "__main__":
raise SystemExit(pytest.main([__file__, "-v"]))

View file

@ -0,0 +1,405 @@
"""Tests for agent.coding_context — RuntimeMode seam, resolver, toolset, git probe."""
import json
import subprocess
from pathlib import Path
import pytest
from agent import coding_context as cc
def _git_init(path):
env = {
"GIT_AUTHOR_NAME": "t", "GIT_AUTHOR_EMAIL": "t@t",
"GIT_COMMITTER_NAME": "t", "GIT_COMMITTER_EMAIL": "t@t",
}
for args in (
["init", "-q", "-b", "main"],
["commit", "-q", "--allow-empty", "-m", "init commit"],
):
subprocess.run(["git", "-C", str(path), *args], check=True, env={**env, "HOME": str(path)})
# ── resolver ──────────────────────────────────────────────────────────────
class TestIsCodingContext:
def test_off_never_activates(self, tmp_path):
_git_init(tmp_path)
cfg = {"agent": {"coding_context": "off"}}
assert cc.is_coding_context(platform="cli", cwd=tmp_path, config=cfg) is False
def test_on_forces_even_without_git(self, tmp_path):
cfg = {"agent": {"coding_context": "on"}}
assert cc.is_coding_context(platform="telegram", cwd=tmp_path, config=cfg) is True
def test_auto_requires_git_repo(self, tmp_path):
cfg = {"agent": {"coding_context": "auto"}}
assert cc.is_coding_context(platform="cli", cwd=tmp_path, config=cfg) is False
_git_init(tmp_path)
assert cc.is_coding_context(platform="cli", cwd=tmp_path, config=cfg) is True
def test_auto_skips_messaging_surfaces(self, tmp_path):
_git_init(tmp_path)
cfg = {"agent": {"coding_context": "auto"}}
assert cc.is_coding_context(platform="discord", cwd=tmp_path, config=cfg) is False
assert cc.is_coding_context(platform="tui", cwd=tmp_path, config=cfg) is True
def test_default_mode_is_auto(self, tmp_path):
# Unknown/missing value normalizes to auto.
_git_init(tmp_path)
assert cc.is_coding_context(platform="cli", cwd=tmp_path, config={}) is True
# ── toolset substitution ────────────────────────────────────────────────────
class TestCodingSelection:
def test_selects_coding_under_focus(self, tmp_path):
_git_init(tmp_path)
cfg = {"agent": {"coding_context": "focus"}}
out = cc.coding_selection(platform="cli", cwd=tmp_path, config=cfg)
assert out is not None
assert out[0] == cc.CODING_TOOLSET
def test_auto_is_prompt_only(self, tmp_path):
# Default posture must never override the user's configured toolsets —
# off-by-default toolsets are already off, and explicit opt-ins
# (image-gen, spotify, …) survive entering a code workspace.
_git_init(tmp_path)
cfg = {"agent": {"coding_context": "auto"}}
assert cc.coding_selection(platform="cli", cwd=tmp_path, config=cfg) is None
# …while the prompt posture is still active.
assert cc.is_coding_context(platform="cli", cwd=tmp_path, config=cfg) is True
def test_on_is_prompt_only(self, tmp_path):
cfg = {"agent": {"coding_context": "on"}}
assert cc.coding_selection(platform="cli", cwd=tmp_path, config=cfg) is None
assert cc.is_coding_context(platform="cli", cwd=tmp_path, config=cfg) is True
def test_focus_requires_workspace(self, tmp_path):
# focus inherits auto's detection gate — bare dir stays general.
cfg = {"agent": {"coding_context": "focus"}}
assert cc.coding_selection(platform="cli", cwd=tmp_path, config=cfg) is None
def test_none_when_inactive(self, tmp_path):
cfg = {"agent": {"coding_context": "off"}}
assert cc.coding_selection(platform="cli", cwd=tmp_path, config=cfg) is None
def test_coding_toolset_is_registered(self):
from toolsets import resolve_toolset
tools = resolve_toolset(cc.CODING_TOOLSET)
# Coding essentials present…
for t in ("read_file", "write_file", "patch", "search_files", "terminal", "todo"):
assert t in tools
# …and the noise is gone.
for t in ("send_message", "text_to_speech", "image_generate", "computer_use"):
assert t not in tools
# ── git/workspace probe ─────────────────────────────────────────────────────
class TestWorkspaceBlock:
def test_empty_outside_repo(self, tmp_path):
assert cc.build_coding_workspace_block(tmp_path) == ""
def test_reports_branch_and_clean_status(self, tmp_path):
_git_init(tmp_path)
block = cc.build_coding_workspace_block(tmp_path)
assert "Workspace" in block
assert f"Root: {tmp_path.resolve()}" in block or "Root:" in block
assert "Branch: main" in block
assert "Status: clean" in block
assert "init commit" in block
def test_reports_dirty_counts(self, tmp_path):
_git_init(tmp_path)
(tmp_path / "untracked.txt").write_text("hi")
block = cc.build_coding_workspace_block(tmp_path)
assert "untracked" in block
assert "clean" not in block.split("Status:")[1].splitlines()[0]
# ── project facts (verify-loop detection) ───────────────────────────────────
class TestProjectFacts:
def test_package_json_scripts_surface_verify_commands(self, tmp_path):
_git_init(tmp_path)
(tmp_path / "package.json").write_text(
json.dumps({"scripts": {"test": "vitest", "lint": "eslint .", "dev": "vite"}})
)
(tmp_path / "pnpm-lock.yaml").write_text("")
block = cc.build_coding_workspace_block(tmp_path)
assert "Project: package.json (pnpm)" in block
assert "pnpm run test" in block and "pnpm run lint" in block
# Non-verify scripts (dev servers, …) stay out of the snapshot.
assert "run dev" not in block
def test_pytest_config_and_run_tests_script(self, tmp_path):
_git_init(tmp_path)
(tmp_path / "pyproject.toml").write_text("[tool.pytest.ini_options]\n")
scripts = tmp_path / "scripts"
scripts.mkdir()
(scripts / "run_tests.sh").write_text("#!/bin/sh\n")
block = cc.build_coding_workspace_block(tmp_path)
assert "scripts/run_tests.sh" in block
assert "pytest" in block.split("Verify:")[1]
def test_makefile_verify_targets_only(self, tmp_path):
_git_init(tmp_path)
(tmp_path / "Makefile").write_text("test:\n\tgo test ./...\n\ndeploy:\n\t./deploy.sh\n")
block = cc.build_coding_workspace_block(tmp_path)
assert "make test" in block
assert "make deploy" not in block
def test_context_files_listed(self, tmp_path):
_git_init(tmp_path)
(tmp_path / "AGENTS.md").write_text("# rules")
block = cc.build_coding_workspace_block(tmp_path)
assert "Context files: AGENTS.md" in block
def test_marker_only_project_gets_snapshot_without_git(self, tmp_path):
# A non-git project (manifest only) still gets a workspace snapshot —
# just without the git lines.
(tmp_path / "package.json").write_text("{}")
block = cc.build_coding_workspace_block(tmp_path)
assert f"Root: {tmp_path.resolve()}" in block
assert "package.json" in block
assert "Branch:" not in block and "Status:" not in block
def test_malformed_package_json_is_ignored(self, tmp_path):
_git_init(tmp_path)
(tmp_path / "package.json").write_text("{not json")
block = cc.build_coding_workspace_block(tmp_path)
assert "Project: package.json" in block
assert "Verify:" not in block
# ── $HOME dotfiles guard ────────────────────────────────────────────────────
class TestHomeDotfilesGuard:
def test_dotfiles_repo_at_home_is_not_coding(self, tmp_path, monkeypatch):
home = tmp_path / "home"
home.mkdir()
_git_init(home)
monkeypatch.setattr(Path, "home", lambda: home)
cfg = {"agent": {"coding_context": "auto"}}
assert cc.is_coding_context(platform="cli", cwd=home, config=cfg) is False
# …and a plain subdirectory of the dotfiles repo stays general too.
docs = home / "Documents"
docs.mkdir()
assert cc.is_coding_context(platform="cli", cwd=docs, config=cfg) is False
def test_marker_at_home_is_not_a_project_signal(self, tmp_path, monkeypatch):
home = tmp_path / "home"
home.mkdir()
(home / "Makefile").write_text("all:\n")
monkeypatch.setattr(Path, "home", lambda: home)
cfg = {"agent": {"coding_context": "auto"}}
assert cc.is_coding_context(platform="cli", cwd=home, config=cfg) is False
def test_real_project_under_dotfiles_home_still_detects(self, tmp_path, monkeypatch):
home = tmp_path / "home"
home.mkdir()
_git_init(home)
monkeypatch.setattr(Path, "home", lambda: home)
proj = home / "www" / "app"
proj.mkdir(parents=True)
(proj / "package.json").write_text("{}")
cfg = {"agent": {"coding_context": "auto"}}
assert cc.is_coding_context(platform="cli", cwd=proj, config=cfg) is True
def test_on_mode_bypasses_the_guard(self, tmp_path, monkeypatch):
home = tmp_path / "home"
home.mkdir()
monkeypatch.setattr(Path, "home", lambda: home)
cfg = {"agent": {"coding_context": "on"}}
assert cc.is_coding_context(platform="cli", cwd=home, config=cfg) is True
# ── prompt assembly integration ─────────────────────────────────────────────
class TestStatusParsing:
def test_parse_status_counts_and_branch(self):
porcelain = (
"# branch.head feature\n"
"# branch.upstream origin/feature\n"
"# branch.ab +2 -1\n"
"1 M. N... 100644 100644 100644 aaa bbb staged.py\n"
"1 .M N... 100644 100644 100644 ccc ddd modified.py\n"
"? new.py\n"
"u UU N... 1 2 3 abc def conflict.py\n"
)
branch, counts = cc._parse_status(porcelain)
assert branch["head"] == "feature"
assert branch["upstream"] == "origin/feature"
assert branch["ahead"] == "2" and branch["behind"] == "1"
assert counts["staged"] == 1
assert counts["modified"] == 1
assert counts["untracked"] == 1
assert counts["conflicts"] == 1
# ── RuntimeMode seam ────────────────────────────────────────────────────────
class TestRuntimeMode:
def test_resolves_coding_in_repo(self, tmp_path):
_git_init(tmp_path)
mode = cc.resolve_runtime_mode(platform="cli", cwd=tmp_path, config={})
assert mode.is_coding is True
assert mode.kind == "coding"
assert mode.profile is cc.CODING_PROFILE
def test_resolves_general_outside_workspace(self, tmp_path):
mode = cc.resolve_runtime_mode(platform="cli", cwd=tmp_path, config={})
assert mode.is_coding is False
assert mode.kind == "general"
# General posture pins no toolset and injects no blocks.
assert mode.toolset_selection() is None
assert mode.system_blocks() == []
def test_is_frozen(self, tmp_path):
mode = cc.resolve_runtime_mode(platform="cli", cwd=tmp_path, config={})
with pytest.raises(Exception):
mode.profile = cc.CODING_PROFILE # type: ignore[misc]
def test_system_blocks_include_brief_and_workspace(self, tmp_path):
_git_init(tmp_path)
mode = cc.resolve_runtime_mode(platform="cli", cwd=tmp_path, config={"agent": {"coding_context": "on"}})
blocks = mode.system_blocks()
assert any("coding agent" in b for b in blocks)
assert any("Workspace" in b for b in blocks)
def test_toolset_selection_gated_on_focus(self, tmp_path):
_git_init(tmp_path)
focus = cc.resolve_runtime_mode(platform="cli", cwd=tmp_path, config={"agent": {"coding_context": "focus"}})
sel = focus.toolset_selection()
assert sel and sel[0] == cc.CODING_TOOLSET
# auto/on resolve the coding profile but stay prompt-only.
for raw in ("auto", "on"):
mode = cc.resolve_runtime_mode(platform="cli", cwd=tmp_path, config={"agent": {"coding_context": raw}})
assert mode.is_coding is True
assert mode.toolset_selection() is None
# ── edit-format steering (per-model harness tuning) ──────────────────────────
class TestEditFormatSteering:
def test_family_detection(self):
assert cc._model_family("openai/gpt-5.4") == "patch"
assert cc._model_family("openai/codex-mini") == "patch"
assert cc._model_family("anthropic/claude-opus-4.8") == "replace"
assert cc._model_family("anthropic/claude-sonnet-4") == "replace"
# Gemini + open-weight coding models (RL'd on str_replace-style
# editors) steer to replace, not neutral.
for m in (
"google/gemini-3-pro", "deepseek-v3.2", "qwen3-coder",
"moonshot/kimi-k2", "zai/glm-4.6", "nousresearch/hermes-4-405b",
):
assert cc._model_family(m) == "replace"
# Unknown family and no model both fall through to neutral wording.
assert cc._model_family("acme/foo-1") is None
assert cc._model_family(None) is None
assert cc._model_family("") is None
def test_openai_family_gets_v4a_nudge(self, tmp_path):
_git_init(tmp_path)
mode = cc.resolve_runtime_mode(
platform="cli", cwd=tmp_path,
config={"agent": {"coding_context": "on"}}, model="openai/gpt-5.4",
)
brief = mode.system_blocks()[0]
assert "mode='patch'" in brief
assert "V4A" in brief
assert "write_file" in brief # new files authored, not patched
def test_anthropic_family_gets_replace_nudge(self, tmp_path):
_git_init(tmp_path)
mode = cc.resolve_runtime_mode(
platform="cli", cwd=tmp_path,
config={"agent": {"coding_context": "on"}},
model="anthropic/claude-opus-4.8",
)
brief = mode.system_blocks()[0]
assert "mode='replace'" in brief
assert "write_file" in brief # new files authored, not patched
def test_unknown_model_keeps_neutral_brief(self, tmp_path):
# No edit-format line appended — brief equals the bare profile guidance.
_git_init(tmp_path)
mode = cc.resolve_runtime_mode(
platform="cli", cwd=tmp_path,
config={"agent": {"coding_context": "on"}}, model="acme/foo-1",
)
assert mode.system_blocks()[0] == cc.CODING_AGENT_GUIDANCE
def test_no_model_keeps_neutral_brief(self, tmp_path):
_git_init(tmp_path)
mode = cc.resolve_runtime_mode(
platform="cli", cwd=tmp_path,
config={"agent": {"coding_context": "on"}},
)
assert mode.system_blocks()[0] == cc.CODING_AGENT_GUIDANCE
def test_general_posture_emits_nothing_regardless_of_model(self, tmp_path):
# Edit steering only fires inside the coding posture.
mode = cc.resolve_runtime_mode(
platform="telegram", cwd=tmp_path, config={}, model="openai/gpt-5.4",
)
assert mode.system_blocks() == []
# ── profile registry ────────────────────────────────────────────────────────
class TestProfiles:
def test_registered_profiles(self):
assert cc.get_profile("coding") is cc.CODING_PROFILE
assert cc.get_profile("general") is cc.GENERAL_PROFILE
def test_unknown_profile_falls_back_to_general(self):
assert cc.get_profile("nonsense") is cc.GENERAL_PROFILE
def test_coding_profile_shape(self):
# The coding profile declares the seams other domains read.
assert cc.CODING_PROFILE.toolset == cc.CODING_TOOLSET
assert cc.CODING_PROFILE.guidance
assert cc.CODING_PROFILE.model_hint == "coding"
# General is inert.
assert cc.GENERAL_PROFILE.toolset is None
assert cc.GENERAL_PROFILE.guidance == ""
def test_skill_pruning_scoped_to_coding_posture(self, tmp_path):
# Coding posture hides clearly-non-coding categories; coding-adjacent
# ones stay visible (deny-list semantics).
_git_init(tmp_path)
coding = cc.resolve_runtime_mode(platform="cli", cwd=tmp_path, config={})
hidden = coding.hidden_skill_categories()
assert "social-media" in hidden and "smart-home" in hidden
for kept in ("github", "devops", "software-development", "data-science"):
assert kept not in hidden
# General posture hides nothing.
general = cc.resolve_runtime_mode(
platform="telegram", cwd=tmp_path, config={}
)
assert general.hidden_skill_categories() == frozenset()
# ── detection signals ───────────────────────────────────────────────────────
class TestDetection:
@pytest.mark.parametrize("marker", ["pyproject.toml", "package.json", "go.mod", "AGENTS.md"])
def test_project_manifest_triggers_without_git(self, tmp_path, marker):
(tmp_path / marker).write_text("x")
cfg = {"agent": {"coding_context": "auto"}}
assert cc.is_coding_context(platform="cli", cwd=tmp_path, config=cfg) is True
def test_marker_in_parent_counts_from_subdir(self, tmp_path):
(tmp_path / "pyproject.toml").write_text("x")
sub = tmp_path / "src" / "pkg"
sub.mkdir(parents=True)
cfg = {"agent": {"coding_context": "auto"}}
assert cc.is_coding_context(platform="cli", cwd=sub, config=cfg) is True
def test_bare_dir_is_not_coding(self, tmp_path):
cfg = {"agent": {"coding_context": "auto"}}
assert cc.is_coding_context(platform="cli", cwd=tmp_path, config=cfg) is False

View file

@ -276,6 +276,42 @@ class TestBuildSkillsSystemPrompt:
# "search" should appear only once per category
assert result.count("- search") == 1
def test_hidden_categories_pruned_with_note(self, monkeypatch, tmp_path):
"""Posture-driven pruning drops whole categories and discloses it."""
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
for cat, name in (("social-media", "tweet-stuff"), ("github", "pr-review")):
d = tmp_path / "skills" / cat / name
d.mkdir(parents=True)
(d / "SKILL.md").write_text(
f"---\nname: {name}\ndescription: Does {name} things\n---\n"
)
result = build_skills_system_prompt(
hidden_categories=frozenset({"social-media"})
)
assert "pr-review" in result
assert "tweet-stuff" not in result
# Disclosure note so the model knows the full catalog exists.
assert "skills_list" in result
def test_hidden_categories_prune_nested_and_miss_cache_separately(
self, monkeypatch, tmp_path
):
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
d = tmp_path / "skills" / "social-media" / "twitter" / "thread-writer"
d.mkdir(parents=True)
(d / "SKILL.md").write_text(
"---\nname: thread-writer\ndescription: Write threads\n---\n"
)
# Nested category ("social-media/twitter") pruned via its parent.
pruned = build_skills_system_prompt(
hidden_categories=frozenset({"social-media"})
)
assert "thread-writer" not in pruned
# Unfiltered call must not be served from the filtered cache entry.
full = build_skills_system_prompt()
assert "thread-writer" in full
def test_excludes_incompatible_platform_skills(self, monkeypatch, tmp_path):
"""Skills with platforms: [macos] should not appear on Linux."""
monkeypatch.setenv("HERMES_HOME", str(tmp_path))

View file

@ -55,3 +55,44 @@ class TestContextFileCwd:
def test_configured_dir_when_terminal_cwd_set(self, monkeypatch, tmp_path):
monkeypatch.setenv("TERMINAL_CWD", str(tmp_path))
assert _captured_context_cwd(_make_agent()) == tmp_path
def _stable_prompt(agent):
with (
patch("run_agent.load_soul_md", return_value=""),
patch("run_agent.build_nous_subscription_prompt", return_value=""),
patch("run_agent.build_environment_hints", return_value=""),
patch("run_agent.build_context_files_prompt", return_value=""),
):
return build_system_prompt_parts(agent)["stable"]
class TestCodingContextBlock:
def test_injected_when_active(self, monkeypatch, tmp_path):
import subprocess
subprocess.run(["git", "-C", str(tmp_path), "init", "-q"], check=True)
monkeypatch.setenv("TERMINAL_CWD", str(tmp_path))
agent = _make_agent(valid_tool_names=["read_file"], platform="cli")
stable = _stable_prompt(agent)
assert "coding agent" in stable
assert "Workspace" in stable
def test_absent_when_off(self, monkeypatch, tmp_path):
import subprocess
subprocess.run(["git", "-C", str(tmp_path), "init", "-q"], check=True)
monkeypatch.setenv("TERMINAL_CWD", str(tmp_path))
agent = _make_agent(valid_tool_names=["read_file"], platform="cli")
# Drive the real path: force the resolved mode to "off" via config.
with patch("agent.coding_context._coding_mode", return_value="off"):
stable = _stable_prompt(agent)
assert "coding agent" not in stable
def test_absent_without_tools(self, monkeypatch, tmp_path):
import subprocess
subprocess.run(["git", "-C", str(tmp_path), "init", "-q"], check=True)
monkeypatch.setenv("TERMINAL_CWD", str(tmp_path))
agent = _make_agent(valid_tool_names=[], platform="cli")
assert "coding agent" not in _stable_prompt(agent)

View file

@ -1,449 +0,0 @@
"""Tests for per-job profile support in cron jobs.
Covers data-layer validation/storage, cronjob tool plumbing, scheduler runtime
HERMES_HOME scoping, and tick() serialization for profile jobs.
"""
from __future__ import annotations
import json
import os
import pytest
@pytest.fixture()
def isolated_cron_profile_home(tmp_path, monkeypatch):
"""Create an isolated Hermes root with a named profile and temp cron store."""
root = tmp_path / "hermes-root"
profile_home = root / "profiles" / "support"
profile_home.mkdir(parents=True)
(root / "cron").mkdir(parents=True)
monkeypatch.setenv("HERMES_HOME", str(root))
monkeypatch.setattr("cron.jobs.CRON_DIR", root / "cron")
monkeypatch.setattr("cron.jobs.JOBS_FILE", root / "cron" / "jobs.json")
monkeypatch.setattr("cron.jobs.OUTPUT_DIR", root / "cron" / "output")
return root, profile_home
class TestNormalizeProfile:
def test_none_and_empty_return_none(self, isolated_cron_profile_home):
from cron.jobs import _normalize_profile
assert _normalize_profile(None) is None
assert _normalize_profile("") is None
assert _normalize_profile(" ") is None
def test_default_profile_is_valid_and_normalized(self, isolated_cron_profile_home):
from cron.jobs import _normalize_profile
assert _normalize_profile("Default") == "default"
def test_named_profile_must_exist_and_is_normalized(self, isolated_cron_profile_home):
from cron.jobs import _normalize_profile
assert _normalize_profile("Support") == "support"
def test_invalid_profile_name_is_rejected(self, isolated_cron_profile_home):
from cron.jobs import _normalize_profile
with pytest.raises(ValueError):
_normalize_profile("invalid!")
def test_missing_named_profile_is_rejected(self, isolated_cron_profile_home):
from cron.jobs import _normalize_profile
with pytest.raises(FileNotFoundError):
_normalize_profile("missing")
class TestCreateAndUpdateJobProfile:
def test_create_stores_profile_id(self, isolated_cron_profile_home):
from cron.jobs import create_job, get_job
job = create_job(prompt="hello", schedule="every 1h", profile="Support")
stored = get_job(job["id"])
assert stored is not None
assert stored["profile"] == "support"
def test_create_without_profile_preserves_old_behaviour(self, isolated_cron_profile_home):
from cron.jobs import create_job, get_job
job = create_job(prompt="hello", schedule="every 1h")
stored = get_job(job["id"])
assert stored is not None
assert stored.get("profile") is None
def test_create_accepts_explicit_default(self, isolated_cron_profile_home):
from cron.jobs import create_job, get_job
job = create_job(prompt="hello", schedule="every 1h", profile="default")
stored = get_job(job["id"])
assert stored is not None
assert stored["profile"] == "default"
def test_update_sets_and_clears_profile(self, isolated_cron_profile_home):
from cron.jobs import create_job, get_job, update_job
job = create_job(prompt="x", schedule="every 1h")
update_job(job["id"], {"profile": "Support"})
stored = get_job(job["id"])
assert stored is not None
assert stored["profile"] == "support"
update_job(job["id"], {"profile": ""})
stored = get_job(job["id"])
assert stored is not None
assert stored["profile"] is None
def test_update_rejects_missing_profile(self, isolated_cron_profile_home):
from cron.jobs import create_job, update_job
job = create_job(prompt="x", schedule="every 1h")
with pytest.raises(FileNotFoundError):
update_job(job["id"], {"profile": "missing"})
class TestCronjobToolProfile:
def test_create_and_list_with_profile(self, isolated_cron_profile_home):
from tools.cronjob_tools import cronjob
created = json.loads(
cronjob(
action="create",
prompt="hi",
schedule="every 1h",
profile="Support",
)
)
assert created["success"] is True
assert created["job"]["profile"] == "support"
listing = json.loads(cronjob(action="list"))
assert listing["jobs"][0]["profile"] == "support"
def test_update_clears_profile_with_empty_string(self, isolated_cron_profile_home):
from tools.cronjob_tools import cronjob
created = json.loads(
cronjob(
action="create",
prompt="hi",
schedule="every 1h",
profile="Support",
)
)
updated = json.loads(
cronjob(action="update", job_id=created["job_id"], profile="")
)
assert updated["success"] is True
assert "profile" not in updated["job"]
def test_schema_advertises_profile(self):
from tools.cronjob_tools import CRONJOB_SCHEMA
assert "profile" in CRONJOB_SCHEMA["parameters"]["properties"]
desc = CRONJOB_SCHEMA["parameters"]["properties"]["profile"]["description"]
desc_lower = desc.lower()
assert "hermes profile" in desc_lower
assert "context-local" in desc_lower
assert "subprocess" in desc_lower
assert "temporarily sets hermes_home" not in desc_lower
class TestRunJobProfileContext:
@staticmethod
def _install_agent_stubs(monkeypatch, observed: dict):
import sys
import cron.scheduler as sched
class FakeAgent:
def __init__(self, **kwargs):
from hermes_constants import get_hermes_home
observed["env_home_during_init"] = os.environ.get("HERMES_HOME")
observed["profile_env_only_during_init"] = os.environ.get(
"HERMES_PROFILE_TEST_ONLY"
)
observed["profile_env_shared_during_init"] = os.environ.get(
"HERMES_PROFILE_TEST_SHARED"
)
observed["hermes_home_during_init"] = str(get_hermes_home())
observed["scheduler_home_during_init"] = str(sched._get_hermes_home())
observed["skip_context_files"] = kwargs.get("skip_context_files")
def run_conversation(self, *_a, **_kw):
from hermes_constants import get_hermes_home
observed["env_home_during_run"] = os.environ.get("HERMES_HOME")
observed["profile_env_only_during_run"] = os.environ.get(
"HERMES_PROFILE_TEST_ONLY"
)
observed["profile_env_shared_during_run"] = os.environ.get(
"HERMES_PROFILE_TEST_SHARED"
)
observed["hermes_home_during_run"] = str(get_hermes_home())
observed["scheduler_home_during_run"] = str(sched._get_hermes_home())
return {"final_response": "done", "messages": []}
def get_activity_summary(self):
return {"seconds_since_activity": 0.0}
def close(self):
observed["closed"] = True
fake_mod = type(sys)("run_agent")
fake_mod.AIAgent = FakeAgent
monkeypatch.setitem(sys.modules, "run_agent", fake_mod)
from hermes_cli import runtime_provider as runtime_provider
monkeypatch.setattr(
runtime_provider,
"resolve_runtime_provider",
lambda **_kw: {
"provider": "test",
"api_key": "test-key",
"base_url": "http://test.local",
"api_mode": "chat_completions",
},
)
monkeypatch.setattr(sched, "_build_job_prompt", lambda job, prerun_script=None: "hi")
monkeypatch.setattr(sched, "_resolve_origin", lambda job: None)
monkeypatch.setattr(sched, "_resolve_delivery_target", lambda job: None)
monkeypatch.setattr(sched, "_resolve_cron_enabled_toolsets", lambda job, cfg: None)
monkeypatch.setattr(sched, "_hermes_home", None)
monkeypatch.setenv("HERMES_CRON_TIMEOUT", "0")
import dotenv
def fake_load_dotenv(path, *_a, **_kw):
observed.setdefault("dotenv_paths", []).append(str(path))
return True
monkeypatch.setattr(dotenv, "load_dotenv", fake_load_dotenv)
def test_run_job_sets_and_restores_profile_home(
self, isolated_cron_profile_home, monkeypatch
):
import cron.scheduler as sched
root, profile_home = isolated_cron_profile_home
observed: dict = {}
self._install_agent_stubs(monkeypatch, observed)
job = {
"id": "abc",
"name": "profile-job",
"profile": "support",
"schedule_display": "manual",
}
success, _output, response, error = sched.run_job(job)
assert success is True, f"run_job failed: error={error!r} response={response!r}"
assert observed["dotenv_paths"] == [str(profile_home / ".env")]
assert observed["env_home_during_init"] == str(root)
assert observed["env_home_during_run"] == str(root)
assert observed["hermes_home_during_init"] == str(profile_home.resolve())
assert observed["hermes_home_during_run"] == str(profile_home.resolve())
assert observed["scheduler_home_during_init"] == str(profile_home.resolve())
assert observed["scheduler_home_during_run"] == str(profile_home.resolve())
assert observed["skip_context_files"] is True
assert os.environ["HERMES_HOME"] == str(root)
assert sched._get_hermes_home() == root
def test_profile_dotenv_environment_is_restored(
self, isolated_cron_profile_home, monkeypatch
):
import dotenv
import cron.scheduler as sched
root, profile_home = isolated_cron_profile_home
observed: dict = {}
self._install_agent_stubs(monkeypatch, observed)
monkeypatch.setenv("HERMES_PROFILE_TEST_SHARED", "outer")
monkeypatch.delenv("HERMES_PROFILE_TEST_ONLY", raising=False)
def fake_load_dotenv(path, *_a, **_kw):
observed.setdefault("dotenv_paths", []).append(str(path))
os.environ["HERMES_PROFILE_TEST_SHARED"] = "profile-value"
os.environ["HERMES_PROFILE_TEST_ONLY"] = "profile-only"
os.environ["HERMES_CRON_TIMEOUT"] = "123"
return True
monkeypatch.setattr(dotenv, "load_dotenv", fake_load_dotenv)
job = {
"id": "env-profile",
"name": "profile-env-job",
"profile": "support",
"schedule_display": "manual",
}
success, _output, _response, error = sched.run_job(job)
assert success is True, error
assert observed["dotenv_paths"] == [str(profile_home / ".env")]
assert observed["profile_env_only_during_init"] == "profile-only"
assert observed["profile_env_shared_during_init"] == "profile-value"
assert observed["profile_env_only_during_run"] == "profile-only"
assert observed["profile_env_shared_during_run"] == "profile-value"
assert os.environ["HERMES_PROFILE_TEST_SHARED"] == "outer"
assert "HERMES_PROFILE_TEST_ONLY" not in os.environ
assert os.environ["HERMES_CRON_TIMEOUT"] == "0"
assert os.environ["HERMES_HOME"] == str(root)
assert sched._get_hermes_home() == root
def test_no_agent_profile_uses_profile_scripts_dir_and_restores_env(
self, isolated_cron_profile_home, monkeypatch
):
import cron.scheduler as sched
root, profile_home = isolated_cron_profile_home
scripts_dir = profile_home / "scripts"
scripts_dir.mkdir(parents=True)
(scripts_dir / "print_home.py").write_text(
"import os\nprint(os.environ.get('HERMES_HOME', ''))\n",
encoding="utf-8",
)
monkeypatch.setattr(sched, "_hermes_home", None)
job = {
"id": "script1",
"name": "profile-script",
"profile": "support",
"script": "print_home.py",
"no_agent": True,
}
success, _doc, response, error = sched.run_job(job)
assert success is True, error
assert response.strip() == str(profile_home.resolve())
assert os.environ["HERMES_HOME"] == str(root)
assert sched._get_hermes_home() == root
def test_run_job_without_profile_leaves_hermes_home_untouched(
self, isolated_cron_profile_home, monkeypatch
):
import cron.scheduler as sched
root, _profile_home = isolated_cron_profile_home
observed: dict = {}
self._install_agent_stubs(monkeypatch, observed)
job = {
"id": "noprof",
"name": "no-profile-job",
"profile": None,
"schedule_display": "manual",
}
success, *_ = sched.run_job(job)
assert success is True
assert observed["hermes_home_during_init"] == str(root)
assert os.environ["HERMES_HOME"] == str(root)
def test_run_job_falls_back_on_missing_runtime_profile(
self, isolated_cron_profile_home, monkeypatch
):
import cron.scheduler as sched
root, _profile_home = isolated_cron_profile_home
observed: dict = {}
self._install_agent_stubs(monkeypatch, observed)
job = {
"id": "missing-profile",
"name": "missing-profile-job",
"profile": "missing",
"schedule_display": "manual",
}
# Should succeed with fallback, not raise
success, _output, response, error = sched.run_job(job)
assert success is True, f"run_job should fallback, not fail: error={error!r}"
# Verify it used the default home, not the missing profile
assert observed["hermes_home_during_init"] == str(root)
assert os.environ["HERMES_HOME"] == str(root)
class TestTickProfilePartition:
def test_profile_and_workdir_combined(self, isolated_cron_profile_home, monkeypatch):
"""Both profile and workdir set — verify both are applied and restored."""
import cron.scheduler as sched
root, profile_home = isolated_cron_profile_home
observed: dict = {}
TestRunJobProfileContext._install_agent_stubs(monkeypatch, observed)
fake_workdir = str(root / "myproject")
(root / "myproject").mkdir()
job = {
"id": "combo",
"name": "combo-job",
"profile": "support",
"workdir": fake_workdir,
"schedule_display": "manual",
}
success, _output, _response, error = sched.run_job(job)
assert success is True, error
assert observed["hermes_home_during_init"] == str(profile_home.resolve())
assert os.environ.get("TERMINAL_CWD", "") != fake_workdir, \
"TERMINAL_CWD should be restored after job"
assert os.environ["HERMES_HOME"] == str(root)
assert sched._get_hermes_home() == root
def test_profile_jobs_run_sequentially(self, isolated_cron_profile_home, monkeypatch):
import threading
import cron.scheduler as sched
# Two profile jobs (both sequential) + one parallel job.
profile_a = {"id": "a", "name": "A", "profile": "default"}
profile_b = {"id": "b", "name": "B", "profile": "default"}
parallel_job = {"id": "c", "name": "C", "profile": None}
monkeypatch.setattr(sched, "get_due_jobs", lambda: [profile_a, profile_b, parallel_job])
monkeypatch.setattr(sched, "advance_next_run", lambda *_a, **_kw: None)
calls: list[tuple[str, str]] = []
order_lock = threading.Lock()
def fake_run_job(job):
with order_lock:
calls.append((job["id"], threading.current_thread().name))
return True, "output", "response", None
monkeypatch.setattr(sched, "run_job", fake_run_job)
monkeypatch.setattr(sched, "save_job_output", lambda _jid, _o: None)
monkeypatch.setattr(sched, "mark_job_run", lambda *_a, **_kw: None)
monkeypatch.setattr(sched, "_deliver_result", lambda *_a, **_kw: None)
n = sched.tick(verbose=False)
assert n == 3
ids = [job_id for job_id, _thread_name in calls]
# Sequential profile jobs preserve submission order relative to each
# other (single-thread pool).
assert ids.index("a") < ids.index("b")
# Sequential (profile) jobs run on the persistent single-thread
# cron-seq pool — NOT the main thread — so a long profile job never
# blocks the ticker. Parallel jobs run on the cron-parallel pool.
for jid in ("a", "b"):
seq_thread = next(t for job_id, t in calls if job_id == jid)
assert seq_thread != threading.current_thread().name
assert seq_thread.startswith("cron-seq"), seq_thread
par_thread = next(t for job_id, t in calls if job_id == "c")
assert par_thread.startswith("cron-parallel"), par_thread

View file

@ -172,10 +172,10 @@ class TestSyncMode:
class TestSequentialPool:
"""Sequential (workdir/profile) jobs use the persistent cron-seq pool.
"""Sequential (workdir) jobs use the persistent cron-seq pool.
Verifies the follow-up fix: env/context-mutating jobs no longer run inline
in the ticker thread, so a long workdir/profile job can't starve the
Verifies the follow-up fix: env-mutating jobs no longer run inline
in the ticker thread, so a long workdir job can't starve the
schedule the same way the parallel path used to.
"""

View file

@ -1487,7 +1487,7 @@ class TestRunJobConfigLogging:
}
# Mock heavy post-yaml work so the test only exercises the warning
# path. Without these mocks, _run_job_impl continues into provider
# path. Without these mocks, run_job continues into provider
# resolution and MCP discovery, both of which can spawn subprocesses
# / hit the network and have caused this test to time out on CI
# (>30s wall clock) under load. See PR #33661 follow-up.

View file

@ -55,7 +55,6 @@ class TestCronCommandLifecycle:
repeat=None,
skill=None,
skills=["maps", "blogwatcher"],
profile="default",
clear_skills=False,
)
)
@ -64,7 +63,6 @@ class TestCronCommandLifecycle:
assert updated["name"] == "Edited Job"
assert updated["prompt"] == "Revised prompt"
assert updated["schedule_display"] == "every 120m"
assert updated["profile"] == "default"
cron_command(
Namespace(
@ -77,14 +75,12 @@ class TestCronCommandLifecycle:
repeat=None,
skill=None,
skills=None,
profile="",
clear_skills=True,
)
)
cleared = get_job(job["id"])
assert cleared["skills"] == []
assert cleared["skill"] is None
assert cleared["profile"] is None
out = capsys.readouterr().out
assert "Updated job" in out
@ -100,7 +96,6 @@ class TestCronCommandLifecycle:
repeat=None,
skill=None,
skills=["blogwatcher", "maps"],
profile="default",
)
)
out = capsys.readouterr().out
@ -110,7 +105,6 @@ class TestCronCommandLifecycle:
assert len(jobs) == 1
assert jobs[0]["skills"] == ["blogwatcher", "maps"]
assert jobs[0]["name"] == "Skill combo"
assert jobs[0]["profile"] == "default"
def test_list_does_not_crash_when_repeat_is_null(self, tmp_cron_dir, capsys):
"""A one-shot job can be persisted with ``"repeat": null``. `cron

View file

@ -47,20 +47,19 @@ def test_cron_aliases():
def test_cron_create_options():
parser = _build()
ns = parser.parse_args([
"cron", "create", "0 9 * * *", "do the thing",
"cron", "create", "0 9 * * *", "daily task prompt",
"--name", "daily", "--deliver", "origin", "--repeat", "3",
"--skill", "a", "--skill", "b", "--no-agent",
"--workdir", "/tmp/x", "--profile", "work",
"--workdir", "/tmp/x",
])
assert ns.schedule == "0 9 * * *"
assert ns.prompt == "do the thing"
assert ns.prompt == "daily task prompt"
assert ns.name == "daily"
assert ns.deliver == "origin"
assert ns.repeat == 3
assert ns.skills == ["a", "b"]
assert ns.no_agent is True
assert ns.workdir == "/tmp/x"
assert ns.profile == "work"
def test_cron_edit_no_agent_tristate():

View file

@ -425,3 +425,43 @@ def test_tui_launch_install_uses_workspace_scope(
install_cmd = npm_calls[0]
assert "--workspace" in install_cmd
assert "ui-tui" in install_cmd
def test_make_tui_argv_omits_workspace_when_tui_has_own_lockfile(
tmp_path: Path, main_mod, monkeypatch
) -> None:
"""When ui-tui/ has its own package-lock.json, _workspace_root returns
tui_dir itself. npm install --workspace ui-tui would fail in that case
because npm cannot find a workspace named "ui-tui" inside ui-tui/.
The fix omits --workspace and runs plain npm install from tui_dir.
See #42973.
"""
tui_dir = tmp_path / "ui-tui"
tui_dir.mkdir()
(tui_dir / "package.json").write_text("{}")
# Simulate curl-install layout: tui_dir has its own lockfile
(tui_dir / "package-lock.json").write_text("{}")
# Parent also has lockfile (but _workspace_root prefers tui_dir's own)
(tmp_path / "package-lock.json").write_text("{}")
monkeypatch.delenv("TERMUX_VERSION", raising=False)
monkeypatch.setenv("PREFIX", "/usr")
monkeypatch.setattr(main_mod, "_tui_need_npm_install", lambda _root: True)
monkeypatch.setattr(main_mod.shutil, "which", lambda name: f"/bin/{name}")
calls = []
def fake_run(*args, **kwargs):
calls.append((args, kwargs))
return types.SimpleNamespace(returncode=0, stdout="", stderr="")
monkeypatch.setattr(main_mod.subprocess, "run", fake_run)
main_mod._make_tui_argv(tui_dir, tui_dev=False)
install_cmd = calls[0][0][0]
# Must NOT contain --workspace when npm_cwd == tui_dir
assert "--workspace" not in install_cmd, (
f"npm install should omit --workspace when tui_dir has its own lockfile, got: {install_cmd}"
)
assert install_cmd[:2] == ["/bin/npm", "install"]
# cwd must be tui_dir (standalone), not parent
assert calls[0][1]["cwd"] == str(tui_dir)

View file

@ -0,0 +1,210 @@
"""Regression tests for dashboard profile-scoped skills/toolsets management.
"Set as active" on the Profiles page only flips the sticky ``active_profile``
file (future CLI/gateway runs) it never retargets the running dashboard
process. Before the ``profile`` parameter existed, toggling a skill after
"activating" a profile silently wrote into the dashboard's own config.
These tests pin the new behavior: reads and writes land in the REQUESTED
profile's HERMES_HOME, and the dashboard's own profile stays untouched.
"""
import pytest
import yaml
def _write_skill(skills_dir, name, description="test skill"):
d = skills_dir / name
d.mkdir(parents=True, exist_ok=True)
(d / "SKILL.md").write_text(
f"---\nname: {name}\ndescription: {description}\n---\n\n# {name}\n",
encoding="utf-8",
)
@pytest.fixture
def isolated_profiles(tmp_path, monkeypatch, _isolate_hermes_home):
"""Isolated default home + one named profile, each with its own skills."""
from hermes_constants import get_hermes_home
from hermes_cli import profiles
default_home = get_hermes_home()
profiles_root = default_home / "profiles"
worker_home = profiles_root / "worker_alpha"
for home in (default_home, worker_home):
(home / "skills").mkdir(parents=True, exist_ok=True)
(home / "config.yaml").write_text("{}\n", encoding="utf-8")
_write_skill(default_home / "skills", "dashboard-skill")
_write_skill(worker_home / "skills", "worker-skill")
monkeypatch.setattr(profiles, "_get_default_hermes_home", lambda: default_home)
monkeypatch.setattr(profiles, "_get_profiles_root", lambda: profiles_root)
return {"default": default_home, "worker_alpha": worker_home}
@pytest.fixture
def client(monkeypatch, isolated_profiles):
try:
from starlette.testclient import TestClient
except ImportError:
pytest.skip("fastapi/starlette not installed")
import hermes_state
from hermes_constants import get_hermes_home
from hermes_cli.web_server import app, _SESSION_HEADER_NAME, _SESSION_TOKEN
monkeypatch.setattr(hermes_state, "DEFAULT_DB_PATH", get_hermes_home() / "state.db")
c = TestClient(app)
c.headers[_SESSION_HEADER_NAME] = _SESSION_TOKEN
return c
def _load_cfg(home):
return yaml.safe_load((home / "config.yaml").read_text()) or {}
class TestProfileScopedSkills:
def test_skills_list_scopes_to_requested_profile(self, client, isolated_profiles):
resp = client.get("/api/skills", params={"profile": "worker_alpha"})
assert resp.status_code == 200
names = {s["name"] for s in resp.json()}
assert "worker-skill" in names
assert "dashboard-skill" not in names
def test_skills_list_without_profile_uses_dashboard_home(
self, client, isolated_profiles
):
resp = client.get("/api/skills")
assert resp.status_code == 200
names = {s["name"] for s in resp.json()}
assert "dashboard-skill" in names
assert "worker-skill" not in names
def test_toggle_writes_into_target_profile_only(self, client, isolated_profiles):
resp = client.put(
"/api/skills/toggle",
json={"name": "worker-skill", "enabled": False, "profile": "worker_alpha"},
)
assert resp.status_code == 200
assert resp.json() == {"ok": True, "name": "worker-skill", "enabled": False}
worker_cfg = _load_cfg(isolated_profiles["worker_alpha"])
assert "worker-skill" in worker_cfg.get("skills", {}).get("disabled", [])
# The dashboard's own config must stay untouched — this was the bug.
default_cfg = _load_cfg(isolated_profiles["default"])
assert "worker-skill" not in default_cfg.get("skills", {}).get("disabled", [])
def test_toggle_reenable_round_trip(self, client, isolated_profiles):
for enabled in (False, True):
client.put(
"/api/skills/toggle",
json={
"name": "worker-skill",
"enabled": enabled,
"profile": "worker_alpha",
},
)
worker_cfg = _load_cfg(isolated_profiles["worker_alpha"])
assert "worker-skill" not in worker_cfg.get("skills", {}).get("disabled", [])
def test_unknown_profile_returns_404(self, client, isolated_profiles):
resp = client.get("/api/skills", params={"profile": "no_such_profile"})
assert resp.status_code == 404
def test_invalid_profile_name_returns_400(self, client, isolated_profiles):
resp = client.get("/api/skills", params={"profile": "Bad Name!"})
assert resp.status_code == 400
def test_scope_restores_module_globals(self, client, isolated_profiles):
"""The SKILLS_DIR swap is per-request; the module global must be
restored even after a scoped call (cron-style locked swap)."""
import tools.skills_tool as skills_tool
before = skills_tool.SKILLS_DIR
client.get("/api/skills", params={"profile": "worker_alpha"})
assert skills_tool.SKILLS_DIR == before
class TestProfileScopedToolsets:
def test_toolset_toggle_scopes_to_profile(self, client, isolated_profiles):
resp = client.put(
"/api/tools/toolsets/x_search",
json={"enabled": True, "profile": "worker_alpha"},
)
assert resp.status_code == 200
worker_cfg = _load_cfg(isolated_profiles["worker_alpha"])
assert "x_search" in worker_cfg.get("platform_toolsets", {}).get("cli", [])
default_cfg = _load_cfg(isolated_profiles["default"])
assert "x_search" not in default_cfg.get("platform_toolsets", {}).get("cli", [])
listing = client.get(
"/api/tools/toolsets", params={"profile": "worker_alpha"}
).json()
assert {t["name"]: t for t in listing}["x_search"]["enabled"] is True
# Unscoped listing reflects the dashboard's own (untouched) config.
listing = client.get("/api/tools/toolsets").json()
assert {t["name"]: t for t in listing}["x_search"]["enabled"] is False
def test_toolset_toggle_unknown_profile_404(self, client, isolated_profiles):
resp = client.put(
"/api/tools/toolsets/x_search",
json={"enabled": True, "profile": "ghost"},
)
assert resp.status_code == 404
class TestProfileScopedHubActions:
def test_hub_install_spawns_with_profile_flag(
self, client, isolated_profiles, monkeypatch
):
"""Hub installs must go through a fresh ``hermes -p <profile>``
subprocess the in-process scope can't reach skills_hub's
import-time SKILLS_DIR binding."""
import hermes_cli.web_server as web_server
calls = []
class _FakeProc:
pid = 4242
def _fake_spawn(subcommand, name):
calls.append((list(subcommand), name))
return _FakeProc()
monkeypatch.setattr(web_server, "_spawn_hermes_action", _fake_spawn)
resp = client.post(
"/api/skills/hub/install",
json={"identifier": "official/demo", "profile": "worker_alpha"},
)
assert resp.status_code == 200
assert calls == [
(["-p", "worker_alpha", "skills", "install", "official/demo"], "skills-install")
]
def test_hub_install_without_profile_keeps_legacy_argv(
self, client, isolated_profiles, monkeypatch
):
import hermes_cli.web_server as web_server
calls = []
class _FakeProc:
pid = 4242
monkeypatch.setattr(
web_server,
"_spawn_hermes_action",
lambda subcommand, name: calls.append(list(subcommand)) or _FakeProc(),
)
resp = client.post(
"/api/skills/hub/install", json={"identifier": "official/demo"}
)
assert resp.status_code == 200
assert calls == [["skills", "install", "official/demo"]]
def test_hub_install_unknown_profile_404(self, client, isolated_profiles):
resp = client.post(
"/api/skills/hub/install",
json={"identifier": "official/demo", "profile": "ghost"},
)
assert resp.status_code == 404

View file

@ -142,6 +142,11 @@ class TestBuildWebUISkipsWhenFresh:
def test_npm_install_uses_workspace_web_scope(self, tmp_path):
web_dir, _ = _make_web_dir(tmp_path)
# Real workspace checkout: the single lockfile lives at the root, so
# _workspace_root(web_dir) resolves to the parent and --workspace web
# scopes the install. (Without a root lockfile, web_dir IS the root and
# --workspace would be dropped — see test below and #42973.)
(tmp_path / "package-lock.json").write_text("{}", encoding="utf-8")
mock_cp = __import__("subprocess").CompletedProcess([], 0, stdout="", stderr="")
build_ok = __import__("subprocess").CompletedProcess([], 0, stdout="", stderr="")
with patch("hermes_cli.main.shutil.which", return_value="/usr/bin/npm"), \
@ -153,6 +158,36 @@ class TestBuildWebUISkipsWhenFresh:
assert "--workspace" in install_cmd
assert install_cmd[install_cmd.index("--workspace") + 1] == "web"
def test_web_install_omits_workspace_when_web_has_own_lockfile(
self, tmp_path, monkeypatch
):
"""web/ with its own lockfile => _workspace_root returns web_dir, so
--workspace web would fail (npm can't find that workspace from inside
web/). The flag must be dropped and the install run plainly from web_dir.
Symmetric to the TUI fix in test_tui_npm_install.py. See #42973.
With web's own lockfile present at cwd, _run_npm_install_deterministic
uses ``npm ci`` (not ``npm install``).
"""
web_dir, _ = _make_web_dir(tmp_path)
(web_dir / "package-lock.json").write_text("{}", encoding="utf-8")
(tmp_path / "package-lock.json").write_text("{}", encoding="utf-8")
monkeypatch.delenv("TERMUX_VERSION", raising=False)
monkeypatch.setenv("PREFIX", "/usr")
install_cp = __import__("subprocess").CompletedProcess([], 0, stdout="", stderr="")
build_cp = __import__("subprocess").CompletedProcess([], 0, stdout="", stderr="")
with patch("hermes_cli.main.shutil.which", return_value="/usr/bin/npm"), \
patch("hermes_cli.main.subprocess.run", return_value=install_cp) as mock_run, \
patch("hermes_cli.main._run_with_idle_timeout", return_value=build_cp):
result = _build_web_ui(web_dir)
assert result is True
args, kwargs = mock_run.call_args
assert "--workspace" not in args[0]
assert args[0] == ["/usr/bin/npm", "ci", "--silent"]
assert kwargs["cwd"] == web_dir
def test_web_build_uses_idle_timeout_helper(self, tmp_path):
"""npm run build now goes through _run_with_idle_timeout (issue #33788).

View file

@ -459,8 +459,6 @@ def _format_job(job: Dict[str, Any]) -> Dict[str, Any]:
result["enabled_toolsets"] = job["enabled_toolsets"]
if job.get("workdir"):
result["workdir"] = job["workdir"]
if job.get("profile"):
result["profile"] = job["profile"]
return result
@ -483,7 +481,6 @@ def cronjob(
context_from: Optional[Union[str, List[str]]] = None,
enabled_toolsets: Optional[List[str]] = None,
workdir: Optional[str] = None,
profile: Optional[str] = None,
no_agent: Optional[bool] = None,
task_id: str = None,
) -> str:
@ -550,7 +547,6 @@ def cronjob(
context_from=context_from,
enabled_toolsets=enabled_toolsets or None,
workdir=_normalize_optional_job_value(workdir),
profile=_normalize_optional_job_value(profile),
no_agent=_no_agent,
)
return json.dumps(
@ -685,10 +681,6 @@ def cronjob(
# Empty string clears the field (restores old behaviour);
# otherwise pass raw — update_job() validates / normalizes.
updates["workdir"] = _normalize_optional_job_value(workdir) or None
if profile is not None:
# Empty string clears the field (restores old behaviour);
# otherwise pass raw — update_job() validates / normalizes.
updates["profile"] = _normalize_optional_job_value(profile) or None
if no_agent is not None:
# Toggling no_agent on/off at update time. If flipping to True,
# we need a script to already exist on the job (or be part of
@ -842,10 +834,6 @@ Important safety rule: cron-run sessions should not recursively schedule more cr
"type": "string",
"description": "Optional absolute path to run the job from. When set, AGENTS.md / CLAUDE.md / .cursorrules from that directory are injected into the system prompt, and the terminal/file/code_exec tools use it as their working directory — useful for running a job inside a specific project repo. Must be an absolute path that exists. When unset (default), preserves the original behaviour: no project context files, tools use the scheduler's cwd. On update, pass an empty string to clear. Jobs with workdir run sequentially (not parallel) to keep per-job directories isolated."
},
"profile": {
"type": "string",
"description": "Optional Hermes profile name to run the job under. When set, the scheduler resolves that profile, applies a context-local Hermes home override, loads that profile's config/.env for the run, and bridges HERMES_HOME into subprocesses. Any temporary process-environment changes from profile .env loading are restored after the job exits. Use 'default' for the root Hermes profile. Named profiles must already exist. When unset (default), preserves the scheduler's existing profile. On update, pass an empty string to clear. Jobs with profile run sequentially (not parallel) to keep profile-scoped runtime state isolated."
},
},
"required": ["action"]
}
@ -900,7 +888,6 @@ registry.register(
context_from=args.get("context_from"),
enabled_toolsets=args.get("enabled_toolsets"),
workdir=args.get("workdir"),
profile=args.get("profile"),
no_agent=args.get("no_agent"),
task_id=kw.get("task_id"),
))(),

View file

@ -339,6 +339,33 @@ TOOLSETS = {
"tools": [],
"includes": ["web", "vision", "image_gen"]
},
# Coding posture (base Hermes — CLI/TUI/desktop/ACP). Auto-selected in a
# code workspace; see agent/coding_context.py. Keeps everything you reach
# for while pairing on code and drops the rest (messaging, tts, image_gen,
# spotify, home-assistant, cron, computer-use).
"coding": {
"description": "Coding-focused toolset: files, terminal, search, web docs, skills, todo, delegate, vision, browser",
"tools": [
"web_search", "web_extract",
"terminal", "process", "read_terminal",
"read_file", "write_file", "patch", "search_files",
"vision_analyze",
"skills_list", "skill_view", "skill_manage",
"browser_navigate", "browser_snapshot", "browser_click",
"browser_type", "browser_scroll", "browser_back",
"browser_press", "browser_get_images",
"browser_vision", "browser_console", "browser_cdp", "browser_dialog",
"todo", "memory",
"session_search", "clarify",
"execute_code", "delegate_task",
],
"includes": [],
# Posture toolset: selected per-session by agent/coding_context.py,
# never auto-recovered into per-platform tool config (see the
# non-configurable-toolset recovery loop in hermes_cli/tools_config.py).
"posture": True,
},
# ==========================================================================
# Full Hermes toolsets (CLI + messaging platforms)

View file

@ -1680,6 +1680,22 @@ def _load_enabled_toolsets() -> list[str] | None:
cfg = None
fallback_notice = None
# Coding posture (base Hermes): with no explicit pin, collapse to the
# coding toolset (+ enabled MCP servers) when sitting in a code workspace.
# The desktop app and `hermes --tui` both land here. See
# agent/coding_context.py. No config is loaded yet at this point, so we let
# coding_selection() load it lazily (cli.py passes its already-resolved
# CLI_CONFIG instead, purely to avoid a redundant read).
if not explicit:
try:
from agent.coding_context import coding_selection
selection = coding_selection(platform="tui")
if selection is not None:
return selection
except Exception:
pass
try:
from toolsets import validate_toolset
except Exception:

View file

@ -20,6 +20,9 @@ import { cn, themedBody } from "@/lib/utils";
interface Props {
/** The toolset whose backends are being configured. */
toolset: ToolsetInfo;
/** Optional profile to scope config reads/writes to (Skills page profile
* selector). Omitted = the dashboard process's own profile. */
profile?: string;
onClose: () => void;
/** Called after a toggle/provider/key change so the parent grid refreshes. */
onChanged: () => void;
@ -31,7 +34,7 @@ interface Props {
* the toolset on/off, pick a provider, enter API keys, and run a provider's
* post-setup install hook (npm/pip/binary) with a live log tail.
*/
export function ToolsetConfigDrawer({ toolset, onClose, onChanged }: Props) {
export function ToolsetConfigDrawer({ toolset, profile, onClose, onChanged }: Props) {
const { toast, showToast } = useToast();
const [config, setConfig] = useState<ToolsetConfig | null>(null);
const [loading, setLoading] = useState(true);
@ -60,7 +63,7 @@ export function ToolsetConfigDrawer({ toolset, onClose, onChanged }: Props) {
// react-hooks/set-state-in-effect — setState only fires inside the
// async .then/.catch/.finally callbacks.
return api
.getToolsetConfig(toolset.name)
.getToolsetConfig(toolset.name, profile)
.then((cfg) => {
setConfig(cfg);
setActiveProvider(cfg.active_provider);
@ -72,7 +75,7 @@ export function ToolsetConfigDrawer({ toolset, onClose, onChanged }: Props) {
})
.catch(() => showToast("Failed to load toolset config", "error"))
.finally(() => setLoading(false));
}, [toolset.name, showToast]);
}, [toolset.name, profile, showToast]);
useEffect(() => {
void loadConfig();
@ -121,7 +124,7 @@ export function ToolsetConfigDrawer({ toolset, onClose, onChanged }: Props) {
const handleToggle = async (next: boolean) => {
setToggling(true);
try {
await api.toggleToolset(toolset.name, next);
await api.toggleToolset(toolset.name, next, profile);
setEnabled(next);
showToast(
`${toolset.label || toolset.name} ${next ? "enabled" : "disabled"}`,
@ -138,7 +141,7 @@ export function ToolsetConfigDrawer({ toolset, onClose, onChanged }: Props) {
const handleSelectProvider = async (provider: ToolsetProvider) => {
setSelecting(provider.name);
try {
await api.selectToolsetProvider(toolset.name, provider.name);
await api.selectToolsetProvider(toolset.name, provider.name, profile);
setActiveProvider(provider.name);
showToast(`Provider set to ${provider.name}`, "success");
onChanged();
@ -164,7 +167,7 @@ export function ToolsetConfigDrawer({ toolset, onClose, onChanged }: Props) {
}
setSavingProvider(provider.name);
try {
const res = await api.saveToolsetEnv(toolset.name, env);
const res = await api.saveToolsetEnv(toolset.name, env, profile);
setIsSet((prev) => ({ ...prev, ...res.is_set }));
// Clear saved drafts so the inputs reset to the "saved" placeholder.
setDrafts((prev) => {

View file

@ -408,6 +408,10 @@ export const en: Translations = {
setupNeeded: "Setup needed",
disabledForCli: "Disabled for CLI",
more: "+{count} more",
profileSelector: "Profile",
currentProfile: "current ({name})",
managingProfile:
"Managing profile \u201c{name}\u201d — toggles apply to that profile, not this dashboard\u2019s.",
},
config: {

View file

@ -404,6 +404,8 @@ export interface Translations {
modelSaved?: string;
modelSelect?: string;
actions?: string;
manageSkills?: string;
activeSetHint?: string;
};
// ── Skills page ──
@ -425,6 +427,10 @@ export interface Translations {
setupNeeded: string;
disabledForCli: string;
more: string;
/** Optional — fall back to English literals until translated. */
profileSelector?: string;
currentProfile?: string;
managingProfile?: string;
};
// ── Config page ──

View file

@ -249,6 +249,14 @@ export async function buildWsUrl(
return `${proto}//${window.location.host}${BASE}${path}?${qs}`;
}
/** Build a ``?profile=<name>`` query suffix, or "" when unset.
*
* Used by the skills/toolsets endpoints so the dashboard can manage a
* profile other than the one the server process runs under. */
function profileQuery(profile?: string): string {
return profile ? `?profile=${encodeURIComponent(profile)}` : "";
}
export const api = {
getStatus: () => fetchJSON<StatusResponse>("/api/status"),
/**
@ -542,43 +550,49 @@ export const api = {
),
// Skills & Toolsets
getSkills: () => fetchJSON<SkillInfo[]>("/api/skills"),
toggleSkill: (name: string, enabled: boolean) =>
//
// All calls accept an optional ``profile`` so the Skills page can manage
// any profile's skills/toolsets — not just the one the dashboard process
// runs under. Omitted/empty profile = the dashboard's own profile.
getSkills: (profile?: string) =>
fetchJSON<SkillInfo[]>(`/api/skills${profileQuery(profile)}`),
toggleSkill: (name: string, enabled: boolean, profile?: string) =>
fetchJSON<{ ok: boolean }>("/api/skills/toggle", {
method: "PUT",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ name, enabled }),
body: JSON.stringify({ name, enabled, profile: profile || undefined }),
}),
getToolsets: () => fetchJSON<ToolsetInfo[]>("/api/tools/toolsets"),
toggleToolset: (name: string, enabled: boolean) =>
getToolsets: (profile?: string) =>
fetchJSON<ToolsetInfo[]>(`/api/tools/toolsets${profileQuery(profile)}`),
toggleToolset: (name: string, enabled: boolean, profile?: string) =>
fetchJSON<{ ok: boolean; name: string; enabled: boolean }>(
`/api/tools/toolsets/${encodeURIComponent(name)}`,
{
method: "PUT",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ enabled }),
body: JSON.stringify({ enabled, profile: profile || undefined }),
},
),
getToolsetConfig: (name: string) =>
getToolsetConfig: (name: string, profile?: string) =>
fetchJSON<ToolsetConfig>(
`/api/tools/toolsets/${encodeURIComponent(name)}/config`,
`/api/tools/toolsets/${encodeURIComponent(name)}/config${profileQuery(profile)}`,
),
selectToolsetProvider: (name: string, provider: string) =>
selectToolsetProvider: (name: string, provider: string, profile?: string) =>
fetchJSON<{ ok: boolean; name: string; provider: string }>(
`/api/tools/toolsets/${encodeURIComponent(name)}/provider`,
{
method: "PUT",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ provider }),
body: JSON.stringify({ provider, profile: profile || undefined }),
},
),
saveToolsetEnv: (name: string, env: Record<string, string>) =>
saveToolsetEnv: (name: string, env: Record<string, string>, profile?: string) =>
fetchJSON<ToolsetEnvResult>(
`/api/tools/toolsets/${encodeURIComponent(name)}/env`,
{
method: "PUT",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ env }),
body: JSON.stringify({ env, profile: profile || undefined }),
},
),
runToolsetPostSetup: (name: string, key: string) =>
@ -986,26 +1000,34 @@ export const api = {
fetchJSON<ActionResponse>("/api/ops/checkpoints/prune", { method: "POST" }),
// ── Admin: Skills hub ───────────────────────────────────────────────
installSkillFromHub: (identifier: string) =>
// ``profile`` scopes install/uninstall/update and the installed-state
// annotations to that profile (omitted = the dashboard's own profile).
installSkillFromHub: (identifier: string, profile?: string) =>
fetchJSON<ActionResponse>("/api/skills/hub/install", {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ identifier }),
body: JSON.stringify({ identifier, profile: profile || undefined }),
}),
uninstallSkillFromHub: (name: string) =>
uninstallSkillFromHub: (name: string, profile?: string) =>
fetchJSON<ActionResponse>("/api/skills/hub/uninstall", {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ name }),
body: JSON.stringify({ name, profile: profile || undefined }),
}),
updateSkillsFromHub: () =>
fetchJSON<ActionResponse>("/api/skills/hub/update", { method: "POST" }),
searchSkillsHub: (q: string, source = "all", limit = 20) =>
updateSkillsFromHub: (profile?: string) =>
fetchJSON<ActionResponse>("/api/skills/hub/update", {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ profile: profile || undefined }),
}),
searchSkillsHub: (q: string, source = "all", limit = 20, profile?: string) =>
fetchJSON<SkillHubSearchResponse>(
`/api/skills/hub/search?q=${encodeURIComponent(q)}&source=${encodeURIComponent(source)}&limit=${limit}`,
`/api/skills/hub/search?q=${encodeURIComponent(q)}&source=${encodeURIComponent(source)}&limit=${limit}${profile ? `&profile=${encodeURIComponent(profile)}` : ""}`,
),
getSkillHubSources: (profile?: string) =>
fetchJSON<SkillHubSourcesResponse>(
`/api/skills/hub/sources${profileQuery(profile)}`,
),
getSkillHubSources: () =>
fetchJSON<SkillHubSourcesResponse>("/api/skills/hub/sources"),
previewSkillFromHub: (identifier: string) =>
fetchJSON<SkillHubPreview>(
`/api/skills/hub/preview?identifier=${encodeURIComponent(identifier)}`,

View file

@ -96,6 +96,7 @@ function ProfileActionsMenu({
onEditDescription,
onEditModel,
onEditSoul,
onManageSkills,
onRename,
onSetActive,
}: ProfileActionsMenuProps) {
@ -201,6 +202,16 @@ function ProfileActionsMenu({
{labels.editSoul}
</button>
<button
type="button"
role="menuitem"
className={itemClass}
onClick={run(onManageSkills)}
>
<Package className="h-4 w-4" />
{labels.manageSkills}
</button>
<button
type="button"
role="menuitem"
@ -241,13 +252,13 @@ function ProfileActionsMenu({
}
export default function ProfilesPage() {
const navigate = useNavigate();
const [profiles, setProfiles] = useState<ProfileInfo[]>([]);
const [activeInfo, setActiveInfo] = useState<ActiveProfileInfo | null>(null);
const [loading, setLoading] = useState(true);
const { toast, showToast } = useToast();
const { t } = useI18n();
const { setEnd } = usePageHeader();
const navigate = useNavigate();
// Locale strings with English fallbacks. The enriched keys are optional in
// the i18n type so untranslated locales don't break the build — they render
@ -291,6 +302,10 @@ export default function ProfilesPage() {
modelSaved: p.modelSaved ?? "Model updated",
modelSelect: p.modelSelect ?? "Select a model",
actions: p.actions ?? "Actions",
manageSkills: p.manageSkills ?? "Manage skills & tools",
activeSetHint:
p.activeSetHint ??
"Applies to new CLI/gateway runs. This dashboard still manages its own profile — use “Manage skills & tools” to edit {name}.",
};
}, [t.profiles]);
@ -480,7 +495,14 @@ export default function ProfilesPage() {
// The backend normalizes/validates the name; trust the canonical
// value it returns rather than the raw input.
const { active } = await api.setActiveProfile(name);
showToast(`${L.activeSet}: ${active}`, "success");
// "Set as active" only flips the sticky default for FUTURE CLI/gateway
// invocations — it does NOT retarget this running dashboard. Say so,
// or users assume skill/tool toggles now apply to the activated
// profile (they don't — that's what "Manage skills & tools" is for).
showToast(
`${L.activeSet}: ${active}${L.activeSetHint.replace("{name}", active)}`,
"success",
);
setActiveInfo((prev) =>
prev ? { ...prev, active } : { active, current: active },
);
@ -1110,6 +1132,7 @@ export default function ProfilesPage() {
editModel: L.editModel,
editDescription: L.editDescription,
editSoul: t.profiles.editSoul,
manageSkills: L.manageSkills,
openInTerminal: t.profiles.openInTerminal,
rename: t.profiles.rename,
delete: t.common.delete,
@ -1121,6 +1144,11 @@ export default function ProfilesPage() {
onEditDescription={() => openDescEditor(p)}
onEditModel={() => openModelEditor(p)}
onEditSoul={() => openSoulEditor(p.name)}
onManageSkills={() =>
navigate(
`/skills?profile=${encodeURIComponent(p.name)}`,
)
}
onRename={() => {
setRenamingFrom(p.name);
setRenameTo(p.name);
@ -1375,6 +1403,7 @@ interface ProfileActionsMenuProps {
editDescription: string;
editModel: string;
editSoul: string;
manageSkills: string;
openInTerminal: string;
rename: string;
setActive: string;
@ -1385,6 +1414,7 @@ interface ProfileActionsMenuProps {
onEditDescription: () => void;
onEditModel: () => void;
onEditSoul: () => void;
onManageSkills: () => void;
onRename: () => void;
onSetActive: () => void;
}

View file

@ -25,6 +25,7 @@ import {
AlertTriangle,
Sparkles,
Loader2,
Users,
} from "lucide-react";
import { api } from "@/lib/api";
import type {
@ -35,7 +36,9 @@ import type {
SkillHubInstalledEntry,
SkillHubPreview,
SkillHubScan,
ProfileInfo,
} from "@/lib/api";
import { useSearchParams } from "react-router-dom";
import { ToolsetConfigDrawer } from "@/components/ToolsetConfigDrawer";
import { useToast } from "@nous-research/ui/hooks/use-toast";
import { Toast } from "@nous-research/ui/ui/components/toast";
@ -133,21 +136,79 @@ export default function SkillsPage() {
const { t } = useI18n();
const { setAfterTitle, setEnd } = usePageHeader();
// ── Profile scoping ──
// The dashboard process runs under ONE profile, but skills/toolsets are
// per-profile state. Without an explicit selector, users who "activated"
// a profile on the Profiles page (which only affects FUTURE CLI/gateway
// runs) toggled skills here and silently wrote into the dashboard's own
// profile. The selector makes the write target explicit and deep-linkable
// via /skills?profile=<name>.
const [searchParams, setSearchParams] = useSearchParams();
const [profiles, setProfiles] = useState<ProfileInfo[]>([]);
const [currentProfile, setCurrentProfile] = useState<string>("");
const urlProfile = searchParams.get("profile") ?? "";
// "" = the dashboard's own profile (legacy behavior).
const selectedProfile = urlProfile;
const setSelectedProfile = useCallback(
(name: string) => {
setSearchParams(
(prev) => {
const next = new URLSearchParams(prev);
if (name) next.set("profile", name);
else next.delete("profile");
return next;
},
{ replace: true },
);
},
[setSearchParams],
);
// The profile actually being managed, for display purposes.
const managedProfile = selectedProfile || currentProfile || "default";
const managingOtherProfile =
!!selectedProfile && selectedProfile !== currentProfile;
useEffect(() => {
Promise.all([api.getSkills(), api.getToolsets()])
// Profile list + the dashboard's own profile, for the selector. Failure
// leaves the selector hidden — the page still works profile-unscoped.
api
.getProfiles()
.then((res) => setProfiles(res.profiles))
.catch(() => {});
api
.getActiveProfile()
.then((info) => setCurrentProfile(info.current || "default"))
.catch(() => setCurrentProfile("default"));
}, []);
useEffect(() => {
// Promise-chain shape: setState fires only inside async callbacks so the
// effect body stays lint-clean (react-hooks/set-state-in-effect). On a
// profile switch the old list stays visible until the new one arrives.
let cancelled = false;
Promise.all([
api.getSkills(selectedProfile || undefined),
api.getToolsets(selectedProfile || undefined),
])
.then(([s, tsets]) => {
if (cancelled) return;
setSkills(s);
setToolsets(tsets);
})
.catch(() => showToast(t.common.loading, "error"))
.finally(() => setLoading(false));
}, []);
.catch(() => !cancelled && showToast(t.common.loading, "error"))
.finally(() => !cancelled && setLoading(false));
return () => {
cancelled = true;
};
}, [selectedProfile]);
/* ---- Toggle skill ---- */
const handleToggleSkill = async (skill: SkillInfo) => {
setTogglingSkills((prev) => new Set(prev).add(skill.name));
try {
await api.toggleSkill(skill.name, !skill.enabled);
await api.toggleSkill(skill.name, !skill.enabled, selectedProfile || undefined);
setSkills((prev) =>
prev.map((s) =>
s.name === skill.name ? { ...s, enabled: !s.enabled } : s,
@ -233,10 +294,37 @@ export default function SkillsPage() {
return;
}
setAfterTitle(
<span className="whitespace-nowrap text-xs text-muted-foreground">
<span className="flex items-center gap-2 whitespace-nowrap text-xs text-muted-foreground">
{t.skills.enabledOf
.replace("{enabled}", String(enabledCount))
.replace("{total}", String(skills.length))}
{profiles.length > 1 && (
<span className="flex items-center gap-1">
<Users className="h-3 w-3" />
<select
aria-label={t.skills.profileSelector ?? "Profile"}
className="h-6 rounded-none border border-border bg-background px-1 text-xs text-foreground"
value={selectedProfile}
onChange={(e: React.ChangeEvent<HTMLSelectElement>) =>
setSelectedProfile(e.target.value)
}
>
<option value="">
{(t.skills.currentProfile ?? "current ({name})").replace(
"{name}",
currentProfile || "default",
)}
</option>
{profiles
.filter((p) => p.name !== currentProfile)
.map((p) => (
<option key={p.name} value={p.name}>
{p.name}
</option>
))}
</select>
</span>
)}
</span>,
);
setEnd(
@ -265,7 +353,19 @@ export default function SkillsPage() {
setAfterTitle(null);
setEnd(null);
};
}, [enabledCount, loading, search, setAfterTitle, setEnd, skills.length, t]);
}, [
enabledCount,
loading,
search,
setAfterTitle,
setEnd,
skills.length,
t,
profiles,
selectedProfile,
currentProfile,
setSelectedProfile,
]);
const filteredToolsets = useMemo(() => {
return toolsets.filter(
@ -291,6 +391,18 @@ export default function SkillsPage() {
<PluginSlot name="skills:top" />
<Toast toast={toast} />
{managingOtherProfile && (
<div className="flex items-center gap-2 border border-amber-500/40 bg-amber-500/10 px-3 py-2 text-xs text-amber-300">
<Users className="h-3.5 w-3.5 shrink-0" />
<span>
{(
t.skills.managingProfile ??
"Managing profile “{name}” — toggles apply to that profile, not this dashboards."
).replace("{name}", managedProfile)}
</span>
</div>
)}
<div className="flex flex-col sm:flex-row sm:items-start gap-4">
<aside aria-label={t.skills.title} className="sm:w-56 sm:shrink-0">
<div className="sm:sticky sm:top-0">
@ -540,13 +652,14 @@ export default function SkillsPage() {
)}
</>
) : (
<HubBrowser showToast={showToast} />
<HubBrowser showToast={showToast} profile={selectedProfile || undefined} />
)}
</div>
</div>
{configToolset && (
<ToolsetConfigDrawer
toolset={configToolset}
profile={selectedProfile || undefined}
onClose={() => setConfigToolset(null)}
onChanged={() => void refreshToolsets()}
/>
@ -668,8 +781,11 @@ const SEVERITY_TONE: Record<string, "destructive" | "warning" | "secondary" | "o
function HubBrowser({
showToast,
profile,
}: {
showToast: (msg: string, kind: "success" | "error") => void;
/** Optional profile scoping installs + installed-state badges. */
profile?: string;
}) {
const [query, setQuery] = useState("");
const [results, setResults] = useState<SkillHubResult[]>([]);
@ -699,7 +815,7 @@ function HubBrowser({
useEffect(() => {
let cancelled = false;
api
.getSkillHubSources()
.getSkillHubSources(profile)
.then((r) => {
if (cancelled) return;
setSources(r.sources);
@ -715,7 +831,7 @@ function HubBrowser({
return () => {
cancelled = true;
};
}, []);
}, [profile]);
/* ---- Search ---- */
const runSearch = useCallback(async () => {
@ -725,7 +841,7 @@ function HubBrowser({
setSearched(true);
const t0 = performance.now();
try {
const r = await api.searchSkillsHub(q);
const r = await api.searchSkillsHub(q, "all", 20, profile);
setResults(r.results);
setSourceCounts(r.source_counts || {});
setTimedOut(r.timed_out || []);
@ -739,7 +855,7 @@ function HubBrowser({
setSearchMs(Math.round(performance.now() - t0));
setSearching(false);
}
}, [query, showToast]);
}, [query, showToast, profile]);
/* ---- Poll a spawned action's log until it exits ---- */
useEffect(() => {
@ -757,7 +873,7 @@ function HubBrowser({
} else {
// Install finished — refresh installed-state so badges update.
api
.getSkillHubSources()
.getSkillHubSources(profile)
.then((r) => !cancelled && setInstalled(r.installed))
.catch(() => {});
}
@ -770,12 +886,12 @@ function HubBrowser({
cancelled = true;
if (timer) clearTimeout(timer);
};
}, [action]);
}, [action, profile]);
const install = useCallback(
async (identifier: string) => {
try {
const res = await api.installSkillFromHub(identifier);
const res = await api.installSkillFromHub(identifier, profile);
showToast(`Installing ${identifier}`, "success");
setActionLog([]);
setActionRunning(true);
@ -785,12 +901,12 @@ function HubBrowser({
showToast(`Install failed: ${e}`, "error");
}
},
[showToast],
[showToast, profile],
);
const updateAll = useCallback(async () => {
try {
const res = await api.updateSkillsFromHub();
const res = await api.updateSkillsFromHub(profile);
showToast("Updating installed skills…", "success");
setActionLog([]);
setActionRunning(true);
@ -798,7 +914,7 @@ function HubBrowser({
} catch (e) {
showToast(`Update failed: ${e}`, "error");
}
}, [showToast]);
}, [showToast, profile]);
const isInstalled = useCallback(
(identifier: string) => Boolean(installed[identifier]),

View file

@ -125,35 +125,6 @@ When `workdir` is set:
Jobs with a `workdir` run sequentially on the scheduler tick, not in the parallel pool. This is deliberate: the cron worker applies the job workdir through process-global terminal state, so two workdir jobs running at the same time would corrupt each other's cwd. Workdir-less jobs still run in parallel as before.
:::
## Running cron jobs in a specific profile
By default a cron job inherits whichever Hermes profile owned the gateway / CLI that created it. Pass `--profile <name>` (CLI) or `profile=` (cronjob tool) to re-target the job at a different profile — the scheduler resolves that profile's `HERMES_HOME`, temporarily switches into it for the duration of the run, loads its `.env` + `config.yaml`, and executes the job there:
```bash
# Pin a job to the `night-ops` profile regardless of where it was scheduled
hermes cron create "every 1d at 03:00" \
"Tail the security log and flag anomalies" \
--profile night-ops
```
```python
# From a chat, via the cronjob tool
cronjob(
action="create",
schedule="every 1d at 03:00",
prompt="Tail the security log and flag anomalies",
profile="night-ops",
)
```
Use `--profile default` to explicitly pin to the root Hermes profile. The named profile must already exist; the scheduler refuses to create profiles on the fly. To clear a profile pin during `cron edit`, pass an empty string (`--profile ""` or `profile=""`) — the job reverts to running in whatever profile the scheduler itself is in.
If the pinned profile is later deleted, the scheduler logs a warning and falls back to running the job in its current profile rather than crashing — so a stale `profile` reference never wedges a job.
:::note Serialization
Jobs with a `profile` set also run sequentially, for the same reason as `workdir`-pinned jobs: switching `HERMES_HOME` is a process-global mutation, so two profile-pinned jobs running in parallel would race each other. Unpinned jobs still run in the normal parallel pool.
:::
## Editing jobs
You do not need to delete and recreate jobs just to change them.
@ -223,7 +194,7 @@ What they do:
- `resume` — re-enable the job and compute the next future run
- `run` — trigger the job on the next scheduler tick
- `remove` — delete it entirely
- `edit` — modify schedule, prompt, profile, delivery, etc.
- `edit` — modify schedule, prompt, delivery, etc.
**Name-based lookup.** All four mutating verbs (`pause`, `resume`, `run`, `remove`, `edit`) plus the agent's `cronjob` tool now accept a job **name** (case-insensitive) in place of the hex ID. The agent and CLI both prefer an exact ID match if one exists; ambiguous name matches (multiple jobs sharing the same name) are refused with the full list of candidate IDs so you can pick one explicitly. Names are not unique, so this guard is load-bearing — it prevents silently mutating the wrong job when two share a name.