From 3e74f75e41ecd5a3b937d692ba7dcffbf77304f6 Mon Sep 17 00:00:00 2001 From: brooklyn! Date: Wed, 10 Jun 2026 23:06:44 -0500 Subject: [PATCH] feat(agent): coding-context posture across CLI/TUI/desktop/ACP (#43316) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat(agent): coding-context posture with per-model edit-format tuning Hermes detects when it's running in a coding context — an interactive surface (CLI, TUI, ACP, desktop) sitting in a code workspace (git repo or recognised project root) — and shifts into a coding posture. Outside that (chat platforms, non-workspaces) nothing changes. The posture is modelled as a frozen RuntimeMode selected from a small ContextProfile registry (coding/general). A profile is data: the toolset to collapse to, the operating brief to inject, and seams for model routing and memory. Every domain reads the same resolved object instead of re-probing git/config on its own: - System prompt — RuntimeMode.system_blocks(): an operating brief (gather context before editing, edit through tools not chat, verify with terminal, cap retry loops) plus a live git/workspace snapshot, built once and baked into the stable prompt tier so per-conversation caching is preserved. - Per-model edit-format tuning — the brief nudges each model family toward the patch mode it handles best: OpenAI/Codex toward mode='patch' (V4A multi-file diffs), Anthropic toward mode='replace' (string replacement). The model id rides on RuntimeMode; unknown families keep neutral wording. - Skill index — non-coding skill categories are pruned from the prompt's skill index (discovery-only; skills_list/skill_view still reach the full catalog, with a disclosure note). - Toolset — only under the opt-in 'focus' mode does the posture collapse to the coding toolset + enabled MCP servers; the default posture is prompt-only and never overrides configured toolsets. Activation via agent.coding_context: auto (default), focus, on, off. Subagents inherit the posture for free via toolset inheritance + the shared prompt builder. Detection is not memoized so a long-lived gateway/TUI process can't pin a stale posture across working directories. * feat(agent): cover new-file authoring in the coding edit-format nudge The per-model edit-format guidance only addressed editing existing code (patch mode='patch' vs 'replace'), but authoring a brand-new file — write_file, not patch — is a large fraction of real coding work and the nudge was silent on it. Surfaced when building a single-file artifact where the dominant operation was write_file and the steering offered no guidance. Both family lines now lead with "author new files with write_file; for edits to existing code prefer ...". Tests assert write_file appears in each family's brief; unknown families still get neutral wording. * docs(agent): correct memoization docstring + clarify TUI config-load asymmetry * feat(agent): sharpen the coding posture — verify-loop facts, wider edit steering, $HOME guard Tuning pass on the coding posture from dogfooding it as a harness: - Workspace snapshot now hands the model its verify loop up front: detected manifests + package manager (lockfile sniff), the exact verify commands (package.json scripts, Makefile targets, scripts/run_tests.sh, pytest config), and which context files (AGENTS.md / CLAUDE.md / .cursorrules) exist at the root. Marker-only (non-git) projects get the snapshot too instead of nothing. The "verify before claiming done" brief line was the highest-value piece in evals — this turns it from advice into an executable loop instead of making the model rediscover the test command every session. Still stat-cheap, size-guarded reads, built once at prompt time. - Edit-format steering covers the families Hermes actually serves: Gemini and open-weight coding models (DeepSeek, Qwen, Kimi, GLM, Grok, Hermes, Llama, Mistral, Devstral, MiniMax) steer to mode='replace' — their RL scaffolds use str_replace-style editors. Previously only GPT/Codex and Claude families got steering; the models Hermes users disproportionately run all fell to neutral. - Operating brief gains four behaviors elite harnesses encode: batch independent reads/searches in one turn; fix root causes and the bug class (sibling call paths), not the reported site; no drive-by refactors/renames/reformatting; never read, print, or commit secrets. Plus a patch-failure escalation ladder: after the same region fails twice, rewrite the enclosing function/file with write_file instead of a third patch attempt. - $HOME dotfiles guard: a git repo rooted exactly at the home directory (or a marker sitting in it, e.g. a global ~/AGENTS.md) is user config, not a code workspace — without the guard, every session anywhere under a dotfiles-managed home silently flipped to the coding posture. Real projects under such a home still detect via their own markers/repos; 'on' mode bypasses the guard. --- agent/coding_context.py | 700 +++++++++++++++++++++++++++++ agent/prompt_builder.py | 31 +- agent/system_prompt.py | 32 ++ cli.py | 18 +- hermes_cli/config.py | 13 + hermes_cli/tools_config.py | 4 + tests/agent/test_coding_context.py | 405 +++++++++++++++++ tests/agent/test_prompt_builder.py | 36 ++ tests/agent/test_system_prompt.py | 41 ++ toolsets.py | 27 ++ tui_gateway/server.py | 16 + 11 files changed, 1319 insertions(+), 4 deletions(-) create mode 100644 agent/coding_context.py create mode 100644 tests/agent/test_coding_context.py diff --git a/agent/coding_context.py b/agent/coding_context.py new file mode 100644 index 0000000000..f0ed3296a0 --- /dev/null +++ b/agent/coding_context.py @@ -0,0 +1,700 @@ +"""Coding-context awareness — base Hermes, every interactive surface. + +When the user runs Hermes inside a code workspace (CLI, TUI, desktop app, or an +editor over ACP), Hermes shifts into a **coding posture**. This module is the +single place that decides whether we're in that posture and what it implies, +so the rest of the codebase never re-derives "are we coding?" on its own. + +Architecture — one seam, many consumers +---------------------------------------- +The posture is modelled as a frozen :class:`RuntimeMode` selected from a small +:class:`ContextProfile` registry (today: ``coding`` and ``general``). A profile +is *data* — it declares the toolset to collapse to, the operating brief to +inject, and hints for other domains (model routing, memory, subagents). Every +domain reads the same resolved object instead of probing git/config itself: + + * **System prompt** — ``RuntimeMode.system_blocks()`` → the operating brief + + a live git/workspace snapshot (``agent/system_prompt.py``). + * **Toolset** — ``RuntimeMode.toolset_selection()`` → the ``coding`` toolset + plus the user's enabled MCP servers (``cli.py`` / ``tui_gateway``). Only + under the opt-in ``focus`` mode: the default posture is prompt-only and + never touches the user's configured toolsets (toolsets like messaging / + smart-home / music are off-by-default anyway, and someone who explicitly + enabled image-gen or Spotify shouldn't lose it for being in a git repo). + * **Delegation** — subagents inherit the parent's toolset and run through the + same prompt builder, so the coding posture propagates to children for free. + * **Model / memory / compression** — declared on the profile + (``model_hint``, ``memory_policy``) as the extension seam; consumers read + ``mode.profile`` rather than re-deciding. + +Cache safety +------------ +The mode is resolved **once** and is immutable. The workspace snapshot is built +once at prompt-build time and baked into the *stable* system-prompt tier — never +re-probed per turn (that would shatter the prompt cache). Branch and dirty state +drift mid-session, so the brief tells the model to re-check with ``git`` before +acting on the snapshot. A ``/coding`` flip therefore only takes effect next +session (deferred), the same contract as ``/skills install`` vs ``--now``. + +Activation (config ``agent.coding_context``): + + * ``auto`` (default) — posture (brief + snapshot) on an interactive coding + surface sitting in a code workspace (git repo or recognised project root). + Prompt-only; toolsets untouched. + * ``focus`` — like ``auto``, but additionally collapses the toolset to the + ``coding`` set + enabled MCP servers. Explicit opt-in for a lean schema. + * ``on`` — force the posture anywhere (incl. non-workspaces). Prompt-only. + * ``off`` — disable entirely. +""" + +from __future__ import annotations + +import json +import logging +import os +import re +import subprocess +from dataclasses import dataclass +from pathlib import Path +from typing import Any, Optional + +logger = logging.getLogger("hermes.coding_context") + +CODING_TOOLSET = "coding" + +# Surfaces where a coding posture makes sense under ``auto``. Messaging +# platforms (telegram, discord, slack, …) are intentionally absent — a chat bot +# in a group is not pair-programming. +INTERACTIVE_CODING_PLATFORMS = {"cli", "tui", "acp", "desktop", ""} + +# Project-root signals that mark a directory as a code workspace even when it +# isn't (yet) a git repo. Cheap filename checks — no parsing. +_PROJECT_MARKERS = ( + "pyproject.toml", "setup.py", "setup.cfg", "requirements.txt", + "package.json", "tsconfig.json", "deno.json", + "Cargo.toml", "go.mod", "pom.xml", "build.gradle", "build.gradle.kts", + "Gemfile", "composer.json", "mix.exs", "pubspec.yaml", + "CMakeLists.txt", "Makefile", "Dockerfile", + "AGENTS.md", "CLAUDE.md", ".cursorrules", +) + +# Agent-instruction files surfaced separately from manifests in the snapshot. +_CONTEXT_FILES = ("AGENTS.md", "CLAUDE.md", ".cursorrules") + +# Lockfile → package manager, checked in priority order. +_PY_LOCKFILES = (("uv.lock", "uv"), ("poetry.lock", "poetry"), ("Pipfile.lock", "pipenv")) +_JS_LOCKFILES = ( + ("pnpm-lock.yaml", "pnpm"), ("bun.lockb", "bun"), ("bun.lock", "bun"), + ("yarn.lock", "yarn"), ("package-lock.json", "npm"), +) + +# package.json scripts / Makefile targets worth surfacing as verify commands. +_VERIFY_TARGETS = ("test", "tests", "lint", "typecheck", "check", "build", "fmt", "format") +_MAX_VERIFY_COMMANDS = 8 +_MAX_FACT_FILE_BYTES = 256 * 1024 + +_GIT_TIMEOUT = 2.5 + + +# Per-model edit-format steering. Matching the edit tool format to how a model +# was trained reduces mistakes and wasted reasoning (OpenAI/Codex handle +# patch-style diffs best; Anthropic models — and most open-weight coding +# models, whose RL scaffolds use str_replace-style editors — do best with +# string-replacement). Our `patch` tool exposes both: mode="patch" (V4A +# multi-file) and mode="replace" (find-and-swap). We nudge each family toward +# its native format. Unknown families get nothing (the brief's neutral wording +# stands). Substrings match the model id; aligned with TOOL_USE_ENFORCEMENT_MODELS. +_EDIT_FORMAT_GUIDANCE: dict[str, tuple[tuple[str, ...], str]] = { + "patch": ( + ("gpt", "codex"), + "- Edit format: author new files with `write_file`; for edits to " + "existing code prefer `patch` with `mode='patch'` (V4A multi-file diff) " + "for structured or multi-file changes — it's the diff format you handle " + "most reliably. Use `mode='replace'` for a single small swap.", + ), + "replace": ( + ("claude", "sonnet", "opus", "haiku", + "gemini", "gemma", "deepseek", "qwen", "kimi", "glm", "grok", + "hermes", "llama", "mistral", "devstral", "minimax"), + "- Edit format: author new files with `write_file`; for edits to " + "existing code prefer `patch` in `mode='replace'` — match a unique " + "snippet and swap it. Reach for `mode='patch'` (V4A) only when an edit " + "genuinely spans several files at once.", + ), +} + + +def _model_family(model: Optional[str]) -> Optional[str]: + """Classify a model id into an edit-format family key, or ``None``. + + Used to steer the coding posture toward the edit tool format a model was + trained on. Family-agnostic by design: an unrecognised model gets ``None`` + and the operating brief's neutral edit wording applies. + """ + if not model: + return None + lowered = model.lower() + for family, (needles, _line) in _EDIT_FORMAT_GUIDANCE.items(): + if any(n in lowered for n in needles): + return family + return None + + +def _edit_format_line(model: Optional[str]) -> str: + """The edit-format guidance line for this model's family (``""`` if none).""" + family = _model_family(model) + if family is None: + return "" + return _EDIT_FORMAT_GUIDANCE[family][1] + + +# Operating brief for the coding posture. Tool names referenced here (read_file, +# search_files, patch, write_file, terminal, todo) are in the coding toolset and +# in _HERMES_CORE_TOOLS, so they're present on every surface this fires on. +CODING_AGENT_GUIDANCE = ( + "You are a coding agent pairing with the user inside their codebase. " + "Operate like a careful senior engineer.\n" + "\n" + "Gather context first:\n" + "- Read the relevant files with `read_file` and locate code with " + "`search_files` before changing anything. Trace a symbol to its definition " + "and usages rather than guessing its shape.\n" + "- Batch independent lookups: when several reads/searches don't depend on " + "each other, issue them together in one turn instead of one at a time.\n" + "- Never invent files, symbols, APIs, or imports. If you haven't seen it in " + "the repo, go look. Don't assume a library is available — check the project " + "manifest (pyproject.toml / package.json / Cargo.toml / go.mod) and how " + "neighbouring files import it.\n" + "\n" + "Make changes through the tools, not the chat:\n" + "- Edit with `patch`/`write_file`. Do NOT print code blocks to the user as " + "a substitute for editing — apply the change, then summarise it. Only show " + "code when the user explicitly asks to see it.\n" + "- Match the project's existing style and conventions; AGENTS.md / " + "CLAUDE.md / .cursorrules already in context win over your defaults. Touch " + "only what the task needs — no drive-by refactors, renames, or reformatting " + "— and add any imports/dependencies your code requires.\n" + "- If an edit fails to apply, re-read the file to get the current exact " + "contents before retrying — don't repeat a stale patch. If the same region " + "fails twice, rewrite the enclosing function or file with `write_file` " + "instead of attempting a third patch.\n" + "\n" + "Verify, and know when to stop:\n" + "- Use `terminal` for git, builds, tests, and inspection. Run the relevant " + "tests/linter/build and confirm they pass before claiming the work is done.\n" + "- Fix root causes, not symptoms: when you find a bug, check sibling call " + "paths for the same flaw and fix the class, not just the reported site.\n" + "- When fixing linter/type errors on a file, stop after about three " + "attempts on the same file and ask the user rather than looping.\n" + "- Track multi-step work with `todo`. Reference code as `path:line` instead " + "of pasting whole files.\n" + "\n" + "Respect the user's repo: don't commit, push, or rewrite history unless " + "asked, and never read, print, or commit secrets — leave `.env` and " + "credential files alone unless the user explicitly asks. The Workspace " + "block below is a snapshot from session start — re-run `git status`/" + "`git branch` before relying on it. Be concise: lead with the change or " + "answer, not a preamble." +) + + +# ── Context profiles (declarative posture definitions) ────────────────────── + + +@dataclass(frozen=True) +class ContextProfile: + """A named operating posture. Pure data — consumers read these fields. + + ``toolset`` — collapse to this toolset (+ enabled MCP) when no explicit + selection is pinned; ``None`` keeps the platform default. + ``guidance`` — operating brief injected into the stable system prompt; + ``""`` injects nothing. + ``model_hint`` — routing preference key for smart model routing + (extension seam; not yet consumed by the router). + ``memory_policy``— memory namespace/weighting hint (extension seam). + ``hidden_skill_categories`` — skill categories pruned from the system-prompt + skill index while this posture is active. Discovery-only: + nothing is disabled — ``skills_list`` still returns the + full catalog and ``skill_view`` loads anything. Deny-list + semantics so unknown/custom categories stay visible. + """ + + name: str + toolset: Optional[str] = None + guidance: str = "" + model_hint: Optional[str] = None + memory_policy: str = "default" + hidden_skill_categories: tuple[str, ...] = () + + +# Skill categories that are clearly not part of a coding workflow. Hidden from +# the prompt's skill index in the coding posture (deny-list — anything not +# listed here, incl. custom user categories, stays visible). Coding-adjacent +# categories (devops, github, mcp, data-science, diagramming, research, +# security, …) are intentionally absent. +_NON_CODING_SKILL_CATEGORIES = ( + "apple", "communication", "cooking", "creative", "email", "finance", + "gaming", "gifs", "health", "media", "music", "note-taking", + "productivity", "shopping", "smart-home", "social-media", "travel", + "yuanbao", +) + + +GENERAL_PROFILE = ContextProfile(name="general") +CODING_PROFILE = ContextProfile( + name="coding", + toolset=CODING_TOOLSET, + guidance=CODING_AGENT_GUIDANCE, + model_hint="coding", + memory_policy="project", + hidden_skill_categories=_NON_CODING_SKILL_CATEGORIES, +) + +_PROFILES: dict[str, ContextProfile] = { + GENERAL_PROFILE.name: GENERAL_PROFILE, + CODING_PROFILE.name: CODING_PROFILE, +} + + +def get_profile(name: str) -> ContextProfile: + """Return a registered profile, falling back to ``general``.""" + return _PROFILES.get(name, GENERAL_PROFILE) + + +# ── Helpers ───────────────────────────────────────────────────────────────── + + +def _coding_mode(config: Optional[dict[str, Any]]) -> str: + """Return the normalized ``agent.coding_context`` mode (auto/focus/on/off).""" + if config is None: + try: + from hermes_cli.config import load_config + + config = load_config() + except Exception: + config = {} + raw = ((config or {}).get("agent", {}) or {}).get("coding_context", "auto") + mode = str(raw).strip().lower() + if mode in {"focus", "strict", "lean"}: + return "focus" + if mode in {"on", "true", "yes", "1", "always"}: + return "on" + if mode in {"off", "false", "no", "0", "never"}: + return "off" + return "auto" + + +def _resolve_cwd(cwd: Optional[str | Path]) -> Path: + if cwd: + return Path(cwd).expanduser() + try: + from agent.runtime_cwd import resolve_agent_cwd + + return resolve_agent_cwd() + except Exception: + return Path(os.getcwd()) + + +def _git_root(cwd: Path) -> Optional[Path]: + current = cwd.resolve() + for parent in [current, *current.parents]: + if (parent / ".git").exists(): + return parent + return None + + +def _home() -> Optional[Path]: + try: + return Path.home().resolve() + except (OSError, RuntimeError): + return None + + +def _marker_root(cwd: Path) -> Optional[Path]: + """Nearest ancestor that looks like a project root, or ``None``. + + Walks up at most a few levels so a manifest in the workspace root counts + even when the user is in a subdirectory. ``$HOME`` itself is skipped — a + Makefile or AGENTS.md sitting in the home directory is global user config, + not a project-root signal. + """ + current = cwd.resolve() + home = _home() + for depth, parent in enumerate([current, *current.parents]): + if depth > 6: + break + if parent == home: + continue + for marker in _PROJECT_MARKERS: + if (parent / marker).exists(): + return parent + return None + + +def _detect_profile_name(mode: str, platform: str, cwd_str: str) -> str: + """Resolve which profile applies. + + ``auto``/``focus``: coding when the surface is interactive AND the cwd is a + code workspace (a git repo or a recognised project root). ``on``: always + coding. ``off``: always general. + + A git repo rooted at ``$HOME`` (the dotfiles pattern) is NOT a workspace + signal — without the guard, every session anywhere under a dotfiles-managed + home directory would silently flip to the coding posture. + + Detection is intentionally not memoized: it's a handful of ``stat`` calls, + and callers resolve the mode once per session anyway. Caching here would + risk a stale posture if a long-lived process (gateway/TUI) serves sessions + from different working directories. + """ + if mode == "off": + return GENERAL_PROFILE.name + if mode == "on": + return CODING_PROFILE.name + if platform and platform.strip().lower() not in INTERACTIVE_CODING_PLATFORMS: + return GENERAL_PROFILE.name + cwd = Path(cwd_str) + git_root = _git_root(cwd) + if git_root is not None and git_root == _home(): + git_root = None # dotfiles repo at $HOME — not a code workspace + if git_root is not None or _marker_root(cwd) is not None: + return CODING_PROFILE.name + return GENERAL_PROFILE.name + + +# ── RuntimeMode (the seam) ────────────────────────────────────────────────── + + +@dataclass(frozen=True) +class RuntimeMode: + """The resolved operating posture for a session. Immutable by construction. + + Built once via :func:`resolve_runtime_mode` and consumed by every domain + that cares about the coding/general distinction. Never mutate or re-resolve + mid-session — that would break the prompt cache. + """ + + profile: ContextProfile + surface: str + cwd: Path + # The normalized ``agent.coding_context`` mode this posture was resolved + # under (auto/focus/on/off). Toolset collapse is gated on ``focus``. + config_mode: str = "auto" + # The model id this session runs (e.g. "anthropic/claude-opus-4.8"). Used + # only to steer edit-format guidance toward the model's family — see + # ``_edit_format_line``. Fixed for the session, so cache-safe. + model: Optional[str] = None + + @property + def kind(self) -> str: + return self.profile.name + + @property + def is_coding(self) -> bool: + return self.profile.name == CODING_PROFILE.name + + def toolset_selection(self, config: Optional[dict[str, Any]] = None) -> Optional[list[str]]: + """Toolset list for this posture, or ``None`` to keep the platform default. + + Non-``None`` only under the opt-in ``focus`` mode. The default posture + is prompt-only: most strippable toolsets are off-by-default anyway, and + a user who explicitly enabled one (image-gen for frontend/game assets, + messaging for build notifications, …) keeps it while coding. + + Callers apply this only when the user hasn't pinned an explicit + selection (``--toolsets``, ``HERMES_TUI_TOOLSETS``, …); they never + override a pin. Returns the profile's toolset plus enabled MCP servers. + """ + if self.config_mode != "focus": + return None + if self.profile.toolset is None: + return None + return [self.profile.toolset, *_enabled_mcp_servers(config)] + + def system_blocks(self) -> list[str]: + """Stable system-prompt blocks for this posture (brief + workspace). + + The operating brief carries a model-family edit-format nudge appended + to it (one cached string, not a separate block) so the model is steered + toward the `patch` mode it handles best — see ``_edit_format_line``. + """ + if not self.is_coding: + return [] + blocks: list[str] = [] + if self.profile.guidance: + brief = self.profile.guidance + edit_line = _edit_format_line(self.model) + if edit_line: + brief = f"{brief}\n{edit_line}" + blocks.append(brief) + workspace = build_coding_workspace_block(self.cwd) + if workspace: + blocks.append(workspace) + return blocks + + def hidden_skill_categories(self) -> frozenset[str]: + """Skill categories to prune from the prompt's skill index (may be empty).""" + return frozenset(self.profile.hidden_skill_categories) + + +def resolve_runtime_mode( + *, + platform: Optional[str] = None, + cwd: Optional[str | Path] = None, + config: Optional[dict[str, Any]] = None, + model: Optional[str] = None, +) -> RuntimeMode: + """Resolve the operating posture once. Cheap — a handful of ``stat`` calls. + + This is the single entry point every domain should call. The returned + object is immutable and safe to cache for the session. Detection itself is + intentionally *not* memoized (see ``_detect_profile_name``) so a long-lived + process can't pin a stale posture; callers resolve once per session and + hold the result. ``model`` is recorded only to steer edit-format guidance; + it never affects detection. + """ + resolved_cwd = _resolve_cwd(cwd) + mode = _coding_mode(config) + name = _detect_profile_name( + mode, (platform or "").strip().lower(), str(resolved_cwd) + ) + return RuntimeMode( + profile=get_profile(name), + surface=platform or "", + cwd=resolved_cwd, + config_mode=mode, + model=model, + ) + + +# ── Back-compat surface (thin wrappers over RuntimeMode) ──────────────────── + + +def is_coding_context( + *, + platform: Optional[str] = None, + cwd: Optional[str | Path] = None, + config: Optional[dict[str, Any]] = None, +) -> bool: + """Whether Hermes should operate in its coding posture right now.""" + return resolve_runtime_mode(platform=platform, cwd=cwd, config=config).is_coding + + +def coding_selection( + *, + platform: Optional[str] = None, + cwd: Optional[str | Path] = None, + config: Optional[dict[str, Any]] = None, +) -> Optional[list[str]]: + """Toolset selection for the coding posture. + + ``None`` unless the user opted into ``focus`` mode AND the posture is + active — the default coding posture never overrides configured toolsets. + """ + return resolve_runtime_mode( + platform=platform, cwd=cwd, config=config + ).toolset_selection(config) + + +def coding_system_blocks( + *, + platform: Optional[str] = None, + cwd: Optional[str | Path] = None, + config: Optional[dict[str, Any]] = None, + model: Optional[str] = None, +) -> list[str]: + """Stable system-prompt blocks for the current posture (empty when general). + + ``model`` steers the brief's edit-format nudge toward the model's family. + """ + return resolve_runtime_mode( + platform=platform, cwd=cwd, config=config, model=model + ).system_blocks() + + +def coding_hidden_skill_categories( + *, + platform: Optional[str] = None, + cwd: Optional[str | Path] = None, + config: Optional[dict[str, Any]] = None, +) -> frozenset[str]: + """Skill categories the active posture prunes from the prompt's skill index. + + Empty outside the coding posture. Discovery-only: hidden skills remain + loadable via ``skills_list`` / ``skill_view``. + """ + return resolve_runtime_mode( + platform=platform, cwd=cwd, config=config + ).hidden_skill_categories() + + +def _enabled_mcp_servers(config: Optional[dict[str, Any]]) -> list[str]: + """Names of MCP servers the user has enabled — kept in the coding posture. + + MCP servers (figma, browser, tophat, …) are explicitly configured and part + of the coding workflow, not noise to strip. + """ + try: + from hermes_cli.config import read_raw_config + from hermes_cli.tools_config import _parse_enabled_flag + + servers = read_raw_config().get("mcp_servers") or {} + return [ + str(name) + for name, cfg in servers.items() + if isinstance(cfg, dict) + and _parse_enabled_flag(cfg.get("enabled", True), default=True) + ] + except Exception: + return [] + + +# ── git/workspace probe ───────────────────────────────────────────────────── + + +def _git(cwd: Path, *args: str) -> str: + try: + out = subprocess.run( + ["git", "-C", str(cwd), *args], + capture_output=True, + text=True, + timeout=_GIT_TIMEOUT, + ) + except (OSError, subprocess.SubprocessError): + return "" + return out.stdout.strip() if out.returncode == 0 else "" + + +def _parse_status(porcelain: str) -> tuple[dict[str, str], dict[str, int]]: + """Parse ``git status --porcelain=2 --branch`` into branch + counts.""" + branch: dict[str, str] = {} + counts = {"staged": 0, "modified": 0, "untracked": 0, "conflicts": 0} + for line in porcelain.splitlines(): + if line.startswith("# branch.head"): + branch["head"] = line.split(maxsplit=2)[-1] + elif line.startswith("# branch.upstream"): + branch["upstream"] = line.split(maxsplit=2)[-1] + elif line.startswith("# branch.ab"): + parts = line.split() + branch["ahead"], branch["behind"] = parts[2].lstrip("+"), parts[3].lstrip("-") + elif line.startswith(("1 ", "2 ")): + xy = line.split(maxsplit=2)[1] + if xy[0] != ".": + counts["staged"] += 1 + if xy[1] != ".": + counts["modified"] += 1 + elif line.startswith("u "): + counts["conflicts"] += 1 + elif line.startswith("? "): + counts["untracked"] += 1 + return branch, counts + + +def _read_small(path: Path) -> str: + """Read a small text file, or ``""`` — never raises, never reads huge files.""" + try: + if not path.is_file() or path.stat().st_size > _MAX_FACT_FILE_BYTES: + return "" + return path.read_text(encoding="utf-8", errors="replace") + except OSError: + return "" + + +def _project_facts(root: Path) -> list[str]: + """Detected project facts for the workspace snapshot. + + The point is to hand the model its *verify loop* up front — which manifest, + which package manager, and the exact test/lint/build commands — instead of + making it rediscover them every session. Cheap: stat calls plus reads of a + couple of small files; built once at prompt-build time (cache-safe). + """ + facts: list[str] = [] + + manifests = [m for m in _PROJECT_MARKERS if m not in _CONTEXT_FILES and (root / m).is_file()] + package_managers = [ + pm for lock, pm in (*_PY_LOCKFILES, *_JS_LOCKFILES) if (root / lock).is_file() + ] + if manifests: + line = f"- Project: {', '.join(manifests[:6])}" + if package_managers: + line += f" ({'/'.join(dict.fromkeys(package_managers))})" + facts.append(line) + + verify: list[str] = [] + if (root / "scripts" / "run_tests.sh").is_file(): + verify.append("scripts/run_tests.sh") + if (root / "package.json").is_file(): + try: + scripts = json.loads(_read_small(root / "package.json") or "{}").get("scripts") or {} + except (json.JSONDecodeError, AttributeError): + scripts = {} + js_pm = next((pm for lock, pm in _JS_LOCKFILES if (root / lock).is_file()), "npm") + verify.extend(f"{js_pm} run {name}" for name in _VERIFY_TARGETS if name in scripts) + if (root / "pytest.ini").is_file() or "[tool.pytest" in _read_small(root / "pyproject.toml"): + verify.append("pytest") + makefile = _read_small(root / "Makefile") + if makefile: + verify.extend( + f"make {name}" for name in _VERIFY_TARGETS + if re.search(rf"^{re.escape(name)}\s*:", makefile, re.MULTILINE) + ) + if verify: + deduped = list(dict.fromkeys(verify))[:_MAX_VERIFY_COMMANDS] + facts.append(f"- Verify: {'; '.join(deduped)}") + + context_files = [c for c in _CONTEXT_FILES if (root / c).is_file()] + if context_files: + facts.append(f"- Context files: {', '.join(context_files)}") + + return facts + + +def build_coding_workspace_block(cwd: Optional[str | Path] = None) -> str: + """Workspace snapshot for the system prompt (empty outside a workspace). + + Git state (branch/status/commits) when the cwd is in a repo, plus detected + project facts (manifest, package manager, verify commands, context files) + — so marker-only (non-git) projects still get a snapshot. + """ + resolved = _resolve_cwd(cwd) + git_root = _git_root(resolved) + root = git_root or _marker_root(resolved) + if root is None: + return "" + + lines = ["Workspace (snapshot at session start — re-check with `git` before acting on it):"] + lines.append(f"- Root: {root}") + + if git_root is not None: + branch, counts = _parse_status(_git(root, "status", "--porcelain=2", "--branch")) + head = branch.get("head", "") + if head and head != "(detached)": + line = f"- Branch: {head}" + if branch.get("upstream"): + line += f" \u2192 {branch['upstream']}" + ahead, behind = branch.get("ahead", "0"), branch.get("behind", "0") + if ahead != "0" or behind != "0": + line += f" (ahead {ahead}, behind {behind})" + lines.append(line) + elif head == "(detached)": + lines.append("- Branch: (detached HEAD)") + + # Linked worktree: the per-worktree git dir differs from the shared common dir. + git_dir, common_dir = _git(root, "rev-parse", "--git-dir"), _git(root, "rev-parse", "--git-common-dir") + if git_dir and common_dir and Path(git_dir).resolve() != Path(common_dir).resolve(): + main_tree = Path(common_dir).resolve().parent + lines.append(f"- Worktree: linked (primary tree at {main_tree})") + + dirty = [f"{n} {label}" for label, n in ( + ("staged", counts["staged"]), ("modified", counts["modified"]), + ("untracked", counts["untracked"]), ("conflicts", counts["conflicts"]), + ) if n] + lines.append(f"- Status: {', '.join(dirty) if dirty else 'clean'}") + + recent = _git(root, "log", "-3", "--pretty=%h %s") + if recent: + lines.append("- Recent commits:") + lines.extend(f" {c}" for c in recent.splitlines()) + + lines.extend(_project_facts(root)) + return "\n".join(lines) diff --git a/agent/prompt_builder.py b/agent/prompt_builder.py index cc62c13f9d..202c05cb22 100644 --- a/agent/prompt_builder.py +++ b/agent/prompt_builder.py @@ -1101,11 +1101,12 @@ def _skill_should_show( def build_skills_system_prompt( available_tools: "set[str] | None" = None, available_toolsets: "set[str] | None" = None, + hidden_categories: "frozenset[str] | None" = None, ) -> str: """Build a compact skill index for the system prompt. Two-layer cache: - 1. In-process LRU dict keyed by (skills_dir, tools, toolsets) + 1. In-process LRU dict keyed by (skills_dir, tools, toolsets, hidden) 2. Disk snapshot (``.skills_prompt_snapshot.json``) validated by mtime/size manifest — survives process restarts @@ -1115,6 +1116,12 @@ def build_skills_system_prompt( scanned alongside the local ``~/.hermes/skills/`` directory. External dirs are read-only — they appear in the index but new skills are always created in the local dir. Local skills take precedence when names collide. + + ``hidden_categories`` (e.g. from the coding posture — see + agent/coding_context.py) prunes whole categories from the rendered index. + Discovery-only: the snapshot stores everything, ``skills_list`` / + ``skill_view`` still reach every skill, and a footer note tells the model + the full catalog exists. """ skills_dir = get_skills_dir() external_dirs = get_all_skills_dirs()[1:] # skip local (index 0) @@ -1139,6 +1146,7 @@ def build_skills_system_prompt( tuple(sorted(str(ts) for ts in (available_toolsets or set()))), _platform_hint, tuple(sorted(disabled)), + tuple(sorted(hidden_categories or ())), ) with _SKILLS_PROMPT_CACHE_LOCK: cached = _SKILLS_PROMPT_CACHE.get(cache_key) @@ -1272,6 +1280,26 @@ def build_skills_system_prompt( except Exception as e: logger.debug("Could not read external skill description %s: %s", desc_file, e) + # Posture-driven category pruning (e.g. non-coding skills while pairing on + # code). Match on the top-level category segment so nested categories + # ("social-media/twitter") are pruned with their parent. + hidden_note = "" + if hidden_categories: + before = sum(len(v) for v in skills_by_category.values()) + skills_by_category = { + cat: entries + for cat, entries in skills_by_category.items() + if cat.split("/", 1)[0] not in hidden_categories + } + pruned = before - sum(len(v) for v in skills_by_category.values()) + if pruned: + hidden_note = ( + f"\n(Note: {pruned} skill(s) in categories unrelated to the " + "current coding context are not listed here. The full catalog " + "is available via skills_list if the user asks for something " + "outside this list.)" + ) + if not skills_by_category: result = "" else: @@ -1320,6 +1348,7 @@ def build_skills_system_prompt( "\n" "\n" "Only proceed without loading a skill if genuinely none are relevant to the task." + + hidden_note ) # ── Store in LRU cache ──────────────────────────────────────────── diff --git a/agent/system_prompt.py b/agent/system_prompt.py index 4038716df4..0c6da6c224 100644 --- a/agent/system_prompt.py +++ b/agent/system_prompt.py @@ -191,9 +191,21 @@ def build_system_prompt_parts(agent: Any, system_message: Optional[str] = None) ) if toolset } + # Coding posture prunes non-coding skill categories from the index + # (discovery-only — skills_list/skill_view still reach everything). + _hidden_cats = frozenset() + try: + from agent.coding_context import coding_hidden_skill_categories + + _hidden_cats = coding_hidden_skill_categories( + platform=agent.platform, cwd=resolve_context_cwd() + ) + except Exception: + _hidden_cats = frozenset() skills_prompt = _r.build_skills_system_prompt( available_tools=agent.valid_tool_names, available_toolsets=avail_toolsets, + hidden_categories=_hidden_cats or None, ) else: skills_prompt = "" @@ -221,6 +233,26 @@ def build_system_prompt_parts(agent: Any, system_message: Optional[str] = None) if _env_hints: stable_parts.append(_env_hints) + # Coding posture (base Hermes, any interactive coding surface in a code + # workspace — see agent/coding_context.py). The operating brief + the live + # git/workspace snapshot are built once here and cached for the session; + # the snapshot is never re-probed per turn (that would break the prompt + # cache), so the brief tells the model to re-check git before relying on it. + if agent.valid_tool_names: + try: + from agent.coding_context import coding_system_blocks + + stable_parts.extend( + coding_system_blocks( + platform=agent.platform, + cwd=resolve_context_cwd(), + model=agent.model, + ) + ) + except Exception: + # Coding-context probing must never block prompt build. + pass + # Local Python toolchain probe — names python/pip/uv/PEP-668 state when # something is non-default so the model can pick the right install # strategy without discovering by failure. Emits a single line; emits diff --git a/cli.py b/cli.py index 641c200ad3..412374ef29 100644 --- a/cli.py +++ b/cli.py @@ -13336,9 +13336,21 @@ def main( else: toolsets_list.append(str(t)) else: - # Use the shared resolver so MCP servers are included at runtime - from hermes_cli.tools_config import _get_platform_tools - toolsets_list = sorted(_get_platform_tools(CLI_CONFIG, "cli")) + # Coding posture (base Hermes): with no explicit --toolsets, collapse + # to the coding toolset (+ enabled MCP servers) when sitting in a code + # workspace. See agent/coding_context.py. + _coding = None + try: + from agent.coding_context import coding_selection + _coding = coding_selection(platform="cli", config=CLI_CONFIG) + except Exception: + _coding = None + if _coding is not None: + toolsets_list = _coding + else: + # Use the shared resolver so MCP servers are included at runtime + from hermes_cli.tools_config import _get_platform_tools + toolsets_list = sorted(_get_platform_tools(CLI_CONFIG, "cli")) parsed_skills = _parse_skills_argument(skills) diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 494c5ddfe3..c88400e05b 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -863,6 +863,19 @@ DEFAULT_CONFIG = { # identity slot (SOUL.md). Empty by default. The HERMES_ENVIRONMENT_HINT # env var overrides this (build-time/container mechanism). "environment_hint": "", + # Coding posture — on interactive coding surfaces (CLI, TUI, desktop + # app, ACP) in a code workspace, Hermes adds a coding operating brief + # + a live git/workspace snapshot to the system prompt. See + # agent/coding_context.py. + # "auto" (default) — prompt-only posture when the surface is + # interactive AND cwd is a code workspace. + # Toolsets are never touched; messaging platforms + # unaffected. + # "focus" — auto + collapse the toolset to the lean coding + # set (+ enabled MCP servers). Explicit opt-in. + # "on" — force the prompt posture everywhere. + # "off" — disable entirely. + "coding_context": "auto", # Staged inactivity warning: send a warning to the user at this # threshold before escalating to a full timeout. The warning fires # once per run and does not interrupt the agent. 0 = disable warning. diff --git a/hermes_cli/tools_config.py b/hermes_cli/tools_config.py index 01d4ba7279..d71fd5edb7 100644 --- a/hermes_cli/tools_config.py +++ b/hermes_cli/tools_config.py @@ -1437,6 +1437,10 @@ def _get_platform_tools( continue if ts_def.get("includes"): continue + # Posture toolsets (e.g. ``coding``) are session-level selections made + # by agent/coding_context.py — not per-platform capabilities to recover. + if ts_def.get("posture"): + continue ts_tools = set(resolve_toolset(ts_key)) if not ts_tools or not ts_tools.issubset(platform_tool_universe): continue diff --git a/tests/agent/test_coding_context.py b/tests/agent/test_coding_context.py new file mode 100644 index 0000000000..ab88e391ad --- /dev/null +++ b/tests/agent/test_coding_context.py @@ -0,0 +1,405 @@ +"""Tests for agent.coding_context — RuntimeMode seam, resolver, toolset, git probe.""" + +import json +import subprocess +from pathlib import Path + +import pytest + +from agent import coding_context as cc + + +def _git_init(path): + env = { + "GIT_AUTHOR_NAME": "t", "GIT_AUTHOR_EMAIL": "t@t", + "GIT_COMMITTER_NAME": "t", "GIT_COMMITTER_EMAIL": "t@t", + } + for args in ( + ["init", "-q", "-b", "main"], + ["commit", "-q", "--allow-empty", "-m", "init commit"], + ): + subprocess.run(["git", "-C", str(path), *args], check=True, env={**env, "HOME": str(path)}) + + +# ── resolver ────────────────────────────────────────────────────────────── + +class TestIsCodingContext: + def test_off_never_activates(self, tmp_path): + _git_init(tmp_path) + cfg = {"agent": {"coding_context": "off"}} + assert cc.is_coding_context(platform="cli", cwd=tmp_path, config=cfg) is False + + def test_on_forces_even_without_git(self, tmp_path): + cfg = {"agent": {"coding_context": "on"}} + assert cc.is_coding_context(platform="telegram", cwd=tmp_path, config=cfg) is True + + def test_auto_requires_git_repo(self, tmp_path): + cfg = {"agent": {"coding_context": "auto"}} + assert cc.is_coding_context(platform="cli", cwd=tmp_path, config=cfg) is False + _git_init(tmp_path) + assert cc.is_coding_context(platform="cli", cwd=tmp_path, config=cfg) is True + + def test_auto_skips_messaging_surfaces(self, tmp_path): + _git_init(tmp_path) + cfg = {"agent": {"coding_context": "auto"}} + assert cc.is_coding_context(platform="discord", cwd=tmp_path, config=cfg) is False + assert cc.is_coding_context(platform="tui", cwd=tmp_path, config=cfg) is True + + def test_default_mode_is_auto(self, tmp_path): + # Unknown/missing value normalizes to auto. + _git_init(tmp_path) + assert cc.is_coding_context(platform="cli", cwd=tmp_path, config={}) is True + + +# ── toolset substitution ──────────────────────────────────────────────────── + +class TestCodingSelection: + def test_selects_coding_under_focus(self, tmp_path): + _git_init(tmp_path) + cfg = {"agent": {"coding_context": "focus"}} + out = cc.coding_selection(platform="cli", cwd=tmp_path, config=cfg) + assert out is not None + assert out[0] == cc.CODING_TOOLSET + + def test_auto_is_prompt_only(self, tmp_path): + # Default posture must never override the user's configured toolsets — + # off-by-default toolsets are already off, and explicit opt-ins + # (image-gen, spotify, …) survive entering a code workspace. + _git_init(tmp_path) + cfg = {"agent": {"coding_context": "auto"}} + assert cc.coding_selection(platform="cli", cwd=tmp_path, config=cfg) is None + # …while the prompt posture is still active. + assert cc.is_coding_context(platform="cli", cwd=tmp_path, config=cfg) is True + + def test_on_is_prompt_only(self, tmp_path): + cfg = {"agent": {"coding_context": "on"}} + assert cc.coding_selection(platform="cli", cwd=tmp_path, config=cfg) is None + assert cc.is_coding_context(platform="cli", cwd=tmp_path, config=cfg) is True + + def test_focus_requires_workspace(self, tmp_path): + # focus inherits auto's detection gate — bare dir stays general. + cfg = {"agent": {"coding_context": "focus"}} + assert cc.coding_selection(platform="cli", cwd=tmp_path, config=cfg) is None + + def test_none_when_inactive(self, tmp_path): + cfg = {"agent": {"coding_context": "off"}} + assert cc.coding_selection(platform="cli", cwd=tmp_path, config=cfg) is None + + def test_coding_toolset_is_registered(self): + from toolsets import resolve_toolset + + tools = resolve_toolset(cc.CODING_TOOLSET) + # Coding essentials present… + for t in ("read_file", "write_file", "patch", "search_files", "terminal", "todo"): + assert t in tools + # …and the noise is gone. + for t in ("send_message", "text_to_speech", "image_generate", "computer_use"): + assert t not in tools + + +# ── git/workspace probe ───────────────────────────────────────────────────── + +class TestWorkspaceBlock: + def test_empty_outside_repo(self, tmp_path): + assert cc.build_coding_workspace_block(tmp_path) == "" + + def test_reports_branch_and_clean_status(self, tmp_path): + _git_init(tmp_path) + block = cc.build_coding_workspace_block(tmp_path) + assert "Workspace" in block + assert f"Root: {tmp_path.resolve()}" in block or "Root:" in block + assert "Branch: main" in block + assert "Status: clean" in block + assert "init commit" in block + + def test_reports_dirty_counts(self, tmp_path): + _git_init(tmp_path) + (tmp_path / "untracked.txt").write_text("hi") + block = cc.build_coding_workspace_block(tmp_path) + assert "untracked" in block + assert "clean" not in block.split("Status:")[1].splitlines()[0] + + +# ── project facts (verify-loop detection) ─────────────────────────────────── + +class TestProjectFacts: + def test_package_json_scripts_surface_verify_commands(self, tmp_path): + _git_init(tmp_path) + (tmp_path / "package.json").write_text( + json.dumps({"scripts": {"test": "vitest", "lint": "eslint .", "dev": "vite"}}) + ) + (tmp_path / "pnpm-lock.yaml").write_text("") + block = cc.build_coding_workspace_block(tmp_path) + assert "Project: package.json (pnpm)" in block + assert "pnpm run test" in block and "pnpm run lint" in block + # Non-verify scripts (dev servers, …) stay out of the snapshot. + assert "run dev" not in block + + def test_pytest_config_and_run_tests_script(self, tmp_path): + _git_init(tmp_path) + (tmp_path / "pyproject.toml").write_text("[tool.pytest.ini_options]\n") + scripts = tmp_path / "scripts" + scripts.mkdir() + (scripts / "run_tests.sh").write_text("#!/bin/sh\n") + block = cc.build_coding_workspace_block(tmp_path) + assert "scripts/run_tests.sh" in block + assert "pytest" in block.split("Verify:")[1] + + def test_makefile_verify_targets_only(self, tmp_path): + _git_init(tmp_path) + (tmp_path / "Makefile").write_text("test:\n\tgo test ./...\n\ndeploy:\n\t./deploy.sh\n") + block = cc.build_coding_workspace_block(tmp_path) + assert "make test" in block + assert "make deploy" not in block + + def test_context_files_listed(self, tmp_path): + _git_init(tmp_path) + (tmp_path / "AGENTS.md").write_text("# rules") + block = cc.build_coding_workspace_block(tmp_path) + assert "Context files: AGENTS.md" in block + + def test_marker_only_project_gets_snapshot_without_git(self, tmp_path): + # A non-git project (manifest only) still gets a workspace snapshot — + # just without the git lines. + (tmp_path / "package.json").write_text("{}") + block = cc.build_coding_workspace_block(tmp_path) + assert f"Root: {tmp_path.resolve()}" in block + assert "package.json" in block + assert "Branch:" not in block and "Status:" not in block + + def test_malformed_package_json_is_ignored(self, tmp_path): + _git_init(tmp_path) + (tmp_path / "package.json").write_text("{not json") + block = cc.build_coding_workspace_block(tmp_path) + assert "Project: package.json" in block + assert "Verify:" not in block + + +# ── $HOME dotfiles guard ──────────────────────────────────────────────────── + +class TestHomeDotfilesGuard: + def test_dotfiles_repo_at_home_is_not_coding(self, tmp_path, monkeypatch): + home = tmp_path / "home" + home.mkdir() + _git_init(home) + monkeypatch.setattr(Path, "home", lambda: home) + cfg = {"agent": {"coding_context": "auto"}} + assert cc.is_coding_context(platform="cli", cwd=home, config=cfg) is False + # …and a plain subdirectory of the dotfiles repo stays general too. + docs = home / "Documents" + docs.mkdir() + assert cc.is_coding_context(platform="cli", cwd=docs, config=cfg) is False + + def test_marker_at_home_is_not_a_project_signal(self, tmp_path, monkeypatch): + home = tmp_path / "home" + home.mkdir() + (home / "Makefile").write_text("all:\n") + monkeypatch.setattr(Path, "home", lambda: home) + cfg = {"agent": {"coding_context": "auto"}} + assert cc.is_coding_context(platform="cli", cwd=home, config=cfg) is False + + def test_real_project_under_dotfiles_home_still_detects(self, tmp_path, monkeypatch): + home = tmp_path / "home" + home.mkdir() + _git_init(home) + monkeypatch.setattr(Path, "home", lambda: home) + proj = home / "www" / "app" + proj.mkdir(parents=True) + (proj / "package.json").write_text("{}") + cfg = {"agent": {"coding_context": "auto"}} + assert cc.is_coding_context(platform="cli", cwd=proj, config=cfg) is True + + def test_on_mode_bypasses_the_guard(self, tmp_path, monkeypatch): + home = tmp_path / "home" + home.mkdir() + monkeypatch.setattr(Path, "home", lambda: home) + cfg = {"agent": {"coding_context": "on"}} + assert cc.is_coding_context(platform="cli", cwd=home, config=cfg) is True + + +# ── prompt assembly integration ───────────────────────────────────────────── + +class TestStatusParsing: + def test_parse_status_counts_and_branch(self): + porcelain = ( + "# branch.head feature\n" + "# branch.upstream origin/feature\n" + "# branch.ab +2 -1\n" + "1 M. N... 100644 100644 100644 aaa bbb staged.py\n" + "1 .M N... 100644 100644 100644 ccc ddd modified.py\n" + "? new.py\n" + "u UU N... 1 2 3 abc def conflict.py\n" + ) + branch, counts = cc._parse_status(porcelain) + assert branch["head"] == "feature" + assert branch["upstream"] == "origin/feature" + assert branch["ahead"] == "2" and branch["behind"] == "1" + assert counts["staged"] == 1 + assert counts["modified"] == 1 + assert counts["untracked"] == 1 + assert counts["conflicts"] == 1 + + +# ── RuntimeMode seam ──────────────────────────────────────────────────────── + +class TestRuntimeMode: + def test_resolves_coding_in_repo(self, tmp_path): + _git_init(tmp_path) + mode = cc.resolve_runtime_mode(platform="cli", cwd=tmp_path, config={}) + assert mode.is_coding is True + assert mode.kind == "coding" + assert mode.profile is cc.CODING_PROFILE + + def test_resolves_general_outside_workspace(self, tmp_path): + mode = cc.resolve_runtime_mode(platform="cli", cwd=tmp_path, config={}) + assert mode.is_coding is False + assert mode.kind == "general" + # General posture pins no toolset and injects no blocks. + assert mode.toolset_selection() is None + assert mode.system_blocks() == [] + + def test_is_frozen(self, tmp_path): + mode = cc.resolve_runtime_mode(platform="cli", cwd=tmp_path, config={}) + with pytest.raises(Exception): + mode.profile = cc.CODING_PROFILE # type: ignore[misc] + + def test_system_blocks_include_brief_and_workspace(self, tmp_path): + _git_init(tmp_path) + mode = cc.resolve_runtime_mode(platform="cli", cwd=tmp_path, config={"agent": {"coding_context": "on"}}) + blocks = mode.system_blocks() + assert any("coding agent" in b for b in blocks) + assert any("Workspace" in b for b in blocks) + + def test_toolset_selection_gated_on_focus(self, tmp_path): + _git_init(tmp_path) + focus = cc.resolve_runtime_mode(platform="cli", cwd=tmp_path, config={"agent": {"coding_context": "focus"}}) + sel = focus.toolset_selection() + assert sel and sel[0] == cc.CODING_TOOLSET + # auto/on resolve the coding profile but stay prompt-only. + for raw in ("auto", "on"): + mode = cc.resolve_runtime_mode(platform="cli", cwd=tmp_path, config={"agent": {"coding_context": raw}}) + assert mode.is_coding is True + assert mode.toolset_selection() is None + + +# ── edit-format steering (per-model harness tuning) ────────────────────────── + +class TestEditFormatSteering: + def test_family_detection(self): + assert cc._model_family("openai/gpt-5.4") == "patch" + assert cc._model_family("openai/codex-mini") == "patch" + assert cc._model_family("anthropic/claude-opus-4.8") == "replace" + assert cc._model_family("anthropic/claude-sonnet-4") == "replace" + # Gemini + open-weight coding models (RL'd on str_replace-style + # editors) steer to replace, not neutral. + for m in ( + "google/gemini-3-pro", "deepseek-v3.2", "qwen3-coder", + "moonshot/kimi-k2", "zai/glm-4.6", "nousresearch/hermes-4-405b", + ): + assert cc._model_family(m) == "replace" + # Unknown family and no model both fall through to neutral wording. + assert cc._model_family("acme/foo-1") is None + assert cc._model_family(None) is None + assert cc._model_family("") is None + + def test_openai_family_gets_v4a_nudge(self, tmp_path): + _git_init(tmp_path) + mode = cc.resolve_runtime_mode( + platform="cli", cwd=tmp_path, + config={"agent": {"coding_context": "on"}}, model="openai/gpt-5.4", + ) + brief = mode.system_blocks()[0] + assert "mode='patch'" in brief + assert "V4A" in brief + assert "write_file" in brief # new files authored, not patched + + def test_anthropic_family_gets_replace_nudge(self, tmp_path): + _git_init(tmp_path) + mode = cc.resolve_runtime_mode( + platform="cli", cwd=tmp_path, + config={"agent": {"coding_context": "on"}}, + model="anthropic/claude-opus-4.8", + ) + brief = mode.system_blocks()[0] + assert "mode='replace'" in brief + assert "write_file" in brief # new files authored, not patched + + def test_unknown_model_keeps_neutral_brief(self, tmp_path): + # No edit-format line appended — brief equals the bare profile guidance. + _git_init(tmp_path) + mode = cc.resolve_runtime_mode( + platform="cli", cwd=tmp_path, + config={"agent": {"coding_context": "on"}}, model="acme/foo-1", + ) + assert mode.system_blocks()[0] == cc.CODING_AGENT_GUIDANCE + + def test_no_model_keeps_neutral_brief(self, tmp_path): + _git_init(tmp_path) + mode = cc.resolve_runtime_mode( + platform="cli", cwd=tmp_path, + config={"agent": {"coding_context": "on"}}, + ) + assert mode.system_blocks()[0] == cc.CODING_AGENT_GUIDANCE + + def test_general_posture_emits_nothing_regardless_of_model(self, tmp_path): + # Edit steering only fires inside the coding posture. + mode = cc.resolve_runtime_mode( + platform="telegram", cwd=tmp_path, config={}, model="openai/gpt-5.4", + ) + assert mode.system_blocks() == [] + + +# ── profile registry ──────────────────────────────────────────────────────── + +class TestProfiles: + def test_registered_profiles(self): + assert cc.get_profile("coding") is cc.CODING_PROFILE + assert cc.get_profile("general") is cc.GENERAL_PROFILE + + def test_unknown_profile_falls_back_to_general(self): + assert cc.get_profile("nonsense") is cc.GENERAL_PROFILE + + def test_coding_profile_shape(self): + # The coding profile declares the seams other domains read. + assert cc.CODING_PROFILE.toolset == cc.CODING_TOOLSET + assert cc.CODING_PROFILE.guidance + assert cc.CODING_PROFILE.model_hint == "coding" + # General is inert. + assert cc.GENERAL_PROFILE.toolset is None + assert cc.GENERAL_PROFILE.guidance == "" + + def test_skill_pruning_scoped_to_coding_posture(self, tmp_path): + # Coding posture hides clearly-non-coding categories; coding-adjacent + # ones stay visible (deny-list semantics). + _git_init(tmp_path) + coding = cc.resolve_runtime_mode(platform="cli", cwd=tmp_path, config={}) + hidden = coding.hidden_skill_categories() + assert "social-media" in hidden and "smart-home" in hidden + for kept in ("github", "devops", "software-development", "data-science"): + assert kept not in hidden + # General posture hides nothing. + general = cc.resolve_runtime_mode( + platform="telegram", cwd=tmp_path, config={} + ) + assert general.hidden_skill_categories() == frozenset() + + +# ── detection signals ─────────────────────────────────────────────────────── + +class TestDetection: + @pytest.mark.parametrize("marker", ["pyproject.toml", "package.json", "go.mod", "AGENTS.md"]) + def test_project_manifest_triggers_without_git(self, tmp_path, marker): + (tmp_path / marker).write_text("x") + cfg = {"agent": {"coding_context": "auto"}} + assert cc.is_coding_context(platform="cli", cwd=tmp_path, config=cfg) is True + + def test_marker_in_parent_counts_from_subdir(self, tmp_path): + (tmp_path / "pyproject.toml").write_text("x") + sub = tmp_path / "src" / "pkg" + sub.mkdir(parents=True) + cfg = {"agent": {"coding_context": "auto"}} + assert cc.is_coding_context(platform="cli", cwd=sub, config=cfg) is True + + def test_bare_dir_is_not_coding(self, tmp_path): + cfg = {"agent": {"coding_context": "auto"}} + assert cc.is_coding_context(platform="cli", cwd=tmp_path, config=cfg) is False diff --git a/tests/agent/test_prompt_builder.py b/tests/agent/test_prompt_builder.py index 856a047253..744a917802 100644 --- a/tests/agent/test_prompt_builder.py +++ b/tests/agent/test_prompt_builder.py @@ -276,6 +276,42 @@ class TestBuildSkillsSystemPrompt: # "search" should appear only once per category assert result.count("- search") == 1 + def test_hidden_categories_pruned_with_note(self, monkeypatch, tmp_path): + """Posture-driven pruning drops whole categories and discloses it.""" + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + for cat, name in (("social-media", "tweet-stuff"), ("github", "pr-review")): + d = tmp_path / "skills" / cat / name + d.mkdir(parents=True) + (d / "SKILL.md").write_text( + f"---\nname: {name}\ndescription: Does {name} things\n---\n" + ) + + result = build_skills_system_prompt( + hidden_categories=frozenset({"social-media"}) + ) + assert "pr-review" in result + assert "tweet-stuff" not in result + # Disclosure note so the model knows the full catalog exists. + assert "skills_list" in result + + def test_hidden_categories_prune_nested_and_miss_cache_separately( + self, monkeypatch, tmp_path + ): + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + d = tmp_path / "skills" / "social-media" / "twitter" / "thread-writer" + d.mkdir(parents=True) + (d / "SKILL.md").write_text( + "---\nname: thread-writer\ndescription: Write threads\n---\n" + ) + # Nested category ("social-media/twitter") pruned via its parent. + pruned = build_skills_system_prompt( + hidden_categories=frozenset({"social-media"}) + ) + assert "thread-writer" not in pruned + # Unfiltered call must not be served from the filtered cache entry. + full = build_skills_system_prompt() + assert "thread-writer" in full + def test_excludes_incompatible_platform_skills(self, monkeypatch, tmp_path): """Skills with platforms: [macos] should not appear on Linux.""" monkeypatch.setenv("HERMES_HOME", str(tmp_path)) diff --git a/tests/agent/test_system_prompt.py b/tests/agent/test_system_prompt.py index 75bf28b54d..b9e1439e1a 100644 --- a/tests/agent/test_system_prompt.py +++ b/tests/agent/test_system_prompt.py @@ -55,3 +55,44 @@ class TestContextFileCwd: def test_configured_dir_when_terminal_cwd_set(self, monkeypatch, tmp_path): monkeypatch.setenv("TERMINAL_CWD", str(tmp_path)) assert _captured_context_cwd(_make_agent()) == tmp_path + + +def _stable_prompt(agent): + with ( + patch("run_agent.load_soul_md", return_value=""), + patch("run_agent.build_nous_subscription_prompt", return_value=""), + patch("run_agent.build_environment_hints", return_value=""), + patch("run_agent.build_context_files_prompt", return_value=""), + ): + return build_system_prompt_parts(agent)["stable"] + + +class TestCodingContextBlock: + def test_injected_when_active(self, monkeypatch, tmp_path): + import subprocess + + subprocess.run(["git", "-C", str(tmp_path), "init", "-q"], check=True) + monkeypatch.setenv("TERMINAL_CWD", str(tmp_path)) + agent = _make_agent(valid_tool_names=["read_file"], platform="cli") + stable = _stable_prompt(agent) + assert "coding agent" in stable + assert "Workspace" in stable + + def test_absent_when_off(self, monkeypatch, tmp_path): + import subprocess + + subprocess.run(["git", "-C", str(tmp_path), "init", "-q"], check=True) + monkeypatch.setenv("TERMINAL_CWD", str(tmp_path)) + agent = _make_agent(valid_tool_names=["read_file"], platform="cli") + # Drive the real path: force the resolved mode to "off" via config. + with patch("agent.coding_context._coding_mode", return_value="off"): + stable = _stable_prompt(agent) + assert "coding agent" not in stable + + def test_absent_without_tools(self, monkeypatch, tmp_path): + import subprocess + + subprocess.run(["git", "-C", str(tmp_path), "init", "-q"], check=True) + monkeypatch.setenv("TERMINAL_CWD", str(tmp_path)) + agent = _make_agent(valid_tool_names=[], platform="cli") + assert "coding agent" not in _stable_prompt(agent) diff --git a/toolsets.py b/toolsets.py index 901b072f46..5c67bfb211 100644 --- a/toolsets.py +++ b/toolsets.py @@ -339,6 +339,33 @@ TOOLSETS = { "tools": [], "includes": ["web", "vision", "image_gen"] }, + + # Coding posture (base Hermes — CLI/TUI/desktop/ACP). Auto-selected in a + # code workspace; see agent/coding_context.py. Keeps everything you reach + # for while pairing on code and drops the rest (messaging, tts, image_gen, + # spotify, home-assistant, cron, computer-use). + "coding": { + "description": "Coding-focused toolset: files, terminal, search, web docs, skills, todo, delegate, vision, browser", + "tools": [ + "web_search", "web_extract", + "terminal", "process", "read_terminal", + "read_file", "write_file", "patch", "search_files", + "vision_analyze", + "skills_list", "skill_view", "skill_manage", + "browser_navigate", "browser_snapshot", "browser_click", + "browser_type", "browser_scroll", "browser_back", + "browser_press", "browser_get_images", + "browser_vision", "browser_console", "browser_cdp", "browser_dialog", + "todo", "memory", + "session_search", "clarify", + "execute_code", "delegate_task", + ], + "includes": [], + # Posture toolset: selected per-session by agent/coding_context.py, + # never auto-recovered into per-platform tool config (see the + # non-configurable-toolset recovery loop in hermes_cli/tools_config.py). + "posture": True, + }, # ========================================================================== # Full Hermes toolsets (CLI + messaging platforms) diff --git a/tui_gateway/server.py b/tui_gateway/server.py index 5af8530abc..d932e98510 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -1680,6 +1680,22 @@ def _load_enabled_toolsets() -> list[str] | None: cfg = None fallback_notice = None + # Coding posture (base Hermes): with no explicit pin, collapse to the + # coding toolset (+ enabled MCP servers) when sitting in a code workspace. + # The desktop app and `hermes --tui` both land here. See + # agent/coding_context.py. No config is loaded yet at this point, so we let + # coding_selection() load it lazily (cli.py passes its already-resolved + # CLI_CONFIG instead, purely to avoid a redundant read). + if not explicit: + try: + from agent.coding_context import coding_selection + + selection = coding_selection(platform="tui") + if selection is not None: + return selection + except Exception: + pass + try: from toolsets import validate_toolset except Exception: