mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-11 08:42:11 +00:00
* feat(agent): coding-context posture with per-model edit-format tuning Hermes detects when it's running in a coding context — an interactive surface (CLI, TUI, ACP, desktop) sitting in a code workspace (git repo or recognised project root) — and shifts into a coding posture. Outside that (chat platforms, non-workspaces) nothing changes. The posture is modelled as a frozen RuntimeMode selected from a small ContextProfile registry (coding/general). A profile is data: the toolset to collapse to, the operating brief to inject, and seams for model routing and memory. Every domain reads the same resolved object instead of re-probing git/config on its own: - System prompt — RuntimeMode.system_blocks(): an operating brief (gather context before editing, edit through tools not chat, verify with terminal, cap retry loops) plus a live git/workspace snapshot, built once and baked into the stable prompt tier so per-conversation caching is preserved. - Per-model edit-format tuning — the brief nudges each model family toward the patch mode it handles best: OpenAI/Codex toward mode='patch' (V4A multi-file diffs), Anthropic toward mode='replace' (string replacement). The model id rides on RuntimeMode; unknown families keep neutral wording. - Skill index — non-coding skill categories are pruned from the prompt's skill index (discovery-only; skills_list/skill_view still reach the full catalog, with a disclosure note). - Toolset — only under the opt-in 'focus' mode does the posture collapse to the coding toolset + enabled MCP servers; the default posture is prompt-only and never overrides configured toolsets. Activation via agent.coding_context: auto (default), focus, on, off. Subagents inherit the posture for free via toolset inheritance + the shared prompt builder. Detection is not memoized so a long-lived gateway/TUI process can't pin a stale posture across working directories. * feat(agent): cover new-file authoring in the coding edit-format nudge The per-model edit-format guidance only addressed editing existing code (patch mode='patch' vs 'replace'), but authoring a brand-new file — write_file, not patch — is a large fraction of real coding work and the nudge was silent on it. Surfaced when building a single-file artifact where the dominant operation was write_file and the steering offered no guidance. Both family lines now lead with "author new files with write_file; for edits to existing code prefer ...". Tests assert write_file appears in each family's brief; unknown families still get neutral wording. * docs(agent): correct memoization docstring + clarify TUI config-load asymmetry * feat(agent): sharpen the coding posture — verify-loop facts, wider edit steering, $HOME guard Tuning pass on the coding posture from dogfooding it as a harness: - Workspace snapshot now hands the model its verify loop up front: detected manifests + package manager (lockfile sniff), the exact verify commands (package.json scripts, Makefile targets, scripts/run_tests.sh, pytest config), and which context files (AGENTS.md / CLAUDE.md / .cursorrules) exist at the root. Marker-only (non-git) projects get the snapshot too instead of nothing. The "verify before claiming done" brief line was the highest-value piece in evals — this turns it from advice into an executable loop instead of making the model rediscover the test command every session. Still stat-cheap, size-guarded reads, built once at prompt time. - Edit-format steering covers the families Hermes actually serves: Gemini and open-weight coding models (DeepSeek, Qwen, Kimi, GLM, Grok, Hermes, Llama, Mistral, Devstral, MiniMax) steer to mode='replace' — their RL scaffolds use str_replace-style editors. Previously only GPT/Codex and Claude families got steering; the models Hermes users disproportionately run all fell to neutral. - Operating brief gains four behaviors elite harnesses encode: batch independent reads/searches in one turn; fix root causes and the bug class (sibling call paths), not the reported site; no drive-by refactors/renames/reformatting; never read, print, or commit secrets. Plus a patch-failure escalation ladder: after the same region fails twice, rewrite the enclosing function/file with write_file instead of a third patch attempt. - $HOME dotfiles guard: a git repo rooted exactly at the home directory (or a marker sitting in it, e.g. a global ~/AGENTS.md) is user config, not a code workspace — without the guard, every session anywhere under a dotfiles-managed home silently flipped to the coding posture. Real projects under such a home still detect via their own markers/repos; 'on' mode bypasses the guard.
98 lines
3.5 KiB
Python
98 lines
3.5 KiB
Python
"""Tests for agent/system_prompt.py — context-file cwd wiring."""
|
|
|
|
from types import SimpleNamespace
|
|
from unittest.mock import patch
|
|
|
|
from agent.system_prompt import build_system_prompt_parts
|
|
|
|
|
|
def _make_agent(**overrides):
|
|
base = dict(
|
|
load_soul_identity=False,
|
|
skip_context_files=False,
|
|
valid_tool_names=[],
|
|
_task_completion_guidance=False,
|
|
_tool_use_enforcement=False,
|
|
_environment_probe=False,
|
|
_kanban_worker_guidance="",
|
|
_memory_store=None,
|
|
_memory_manager=None,
|
|
model="",
|
|
provider="",
|
|
platform="",
|
|
pass_session_id=False,
|
|
session_id="",
|
|
)
|
|
base.update(overrides)
|
|
return SimpleNamespace(**base)
|
|
|
|
|
|
def _captured_context_cwd(agent):
|
|
"""The cwd build_system_prompt_parts hands to build_context_files_prompt."""
|
|
captured = {}
|
|
|
|
def fake_context_files(cwd=None, skip_soul=False):
|
|
captured["cwd"] = cwd
|
|
return ""
|
|
|
|
with (
|
|
patch("run_agent.load_soul_md", return_value=""),
|
|
patch("run_agent.build_nous_subscription_prompt", return_value=""),
|
|
patch("run_agent.build_environment_hints", return_value=""),
|
|
patch("run_agent.build_context_files_prompt", side_effect=fake_context_files),
|
|
):
|
|
build_system_prompt_parts(agent)
|
|
return captured["cwd"]
|
|
|
|
|
|
class TestContextFileCwd:
|
|
def test_none_when_terminal_cwd_unset(self, monkeypatch):
|
|
# Unset → None, so discovery falls back to the launch dir inside
|
|
# build_context_files_prompt (the local-CLI #19242 contract).
|
|
monkeypatch.delenv("TERMINAL_CWD", raising=False)
|
|
assert _captured_context_cwd(_make_agent()) is None
|
|
|
|
def test_configured_dir_when_terminal_cwd_set(self, monkeypatch, tmp_path):
|
|
monkeypatch.setenv("TERMINAL_CWD", str(tmp_path))
|
|
assert _captured_context_cwd(_make_agent()) == tmp_path
|
|
|
|
|
|
def _stable_prompt(agent):
|
|
with (
|
|
patch("run_agent.load_soul_md", return_value=""),
|
|
patch("run_agent.build_nous_subscription_prompt", return_value=""),
|
|
patch("run_agent.build_environment_hints", return_value=""),
|
|
patch("run_agent.build_context_files_prompt", return_value=""),
|
|
):
|
|
return build_system_prompt_parts(agent)["stable"]
|
|
|
|
|
|
class TestCodingContextBlock:
|
|
def test_injected_when_active(self, monkeypatch, tmp_path):
|
|
import subprocess
|
|
|
|
subprocess.run(["git", "-C", str(tmp_path), "init", "-q"], check=True)
|
|
monkeypatch.setenv("TERMINAL_CWD", str(tmp_path))
|
|
agent = _make_agent(valid_tool_names=["read_file"], platform="cli")
|
|
stable = _stable_prompt(agent)
|
|
assert "coding agent" in stable
|
|
assert "Workspace" in stable
|
|
|
|
def test_absent_when_off(self, monkeypatch, tmp_path):
|
|
import subprocess
|
|
|
|
subprocess.run(["git", "-C", str(tmp_path), "init", "-q"], check=True)
|
|
monkeypatch.setenv("TERMINAL_CWD", str(tmp_path))
|
|
agent = _make_agent(valid_tool_names=["read_file"], platform="cli")
|
|
# Drive the real path: force the resolved mode to "off" via config.
|
|
with patch("agent.coding_context._coding_mode", return_value="off"):
|
|
stable = _stable_prompt(agent)
|
|
assert "coding agent" not in stable
|
|
|
|
def test_absent_without_tools(self, monkeypatch, tmp_path):
|
|
import subprocess
|
|
|
|
subprocess.run(["git", "-C", str(tmp_path), "init", "-q"], check=True)
|
|
monkeypatch.setenv("TERMINAL_CWD", str(tmp_path))
|
|
agent = _make_agent(valid_tool_names=[], platform="cli")
|
|
assert "coding agent" not in _stable_prompt(agent)
|