diff --git a/agent/verification_stop.py b/agent/verification_stop.py index 7aeb0ca74b7..8824267f694 100644 --- a/agent/verification_stop.py +++ b/agent/verification_stop.py @@ -15,6 +15,63 @@ from typing import Any, Iterable _MAX_CHANGED_PATHS_IN_NUDGE = 8 +# Non-code file extensions whose edits carry no verifiable runtime behavior: +# documentation, prose, and data/markup that no test/build exercises. When a +# turn touches ONLY these, verify-on-stop has nothing to check, so the nudge is +# suppressed (this is fix "C" for the doc/markdown/skill false-positive — a +# SKILL.md or README edit must never demand a /tmp verification script). A turn +# that edits any non-listed path (a real source/code/config file) still nudges. +_NON_CODE_VERIFY_EXTENSIONS = frozenset( + { + ".md", + ".markdown", + ".mdx", + ".rst", + ".txt", + ".text", + ".adoc", + ".asciidoc", + ".org", + ".log", + ".csv", + ".tsv", + } +) + +# Filenames (case-insensitive, extension-less or otherwise) that are pure prose +# even without a recognized doc extension. +_NON_CODE_VERIFY_FILENAMES = frozenset( + { + "license", + "licence", + "notice", + "authors", + "contributors", + "changelog", + "codeowners", + } +) + + +def _is_non_code_path(raw: str) -> bool: + """Return True when a changed path is documentation/prose with nothing to verify.""" + try: + p = Path(str(raw)) + except Exception: + return False + suffix = p.suffix.lower() + if suffix in _NON_CODE_VERIFY_EXTENSIONS: + return True + if not suffix and p.name.lower() in _NON_CODE_VERIFY_FILENAMES: + return True + return False + + +def _filter_verifiable_paths(paths: Iterable[str]) -> list[str]: + """Drop documentation/prose paths; keep paths that could have verifiable behavior.""" + return [p for p in paths if p and not _is_non_code_path(p)] + + # Session identities (platform or source) that are NOT human conversational # messaging surfaces: interactive coding surfaces (CLI, TUI, desktop, codex, # local, gateway) and programmatic callers (API server, webhooks, tools). @@ -79,12 +136,13 @@ def verify_on_stop_enabled(config: dict[str, Any] | None = None) -> bool: """Return whether edit -> verify-before-finish behavior is enabled. Precedence: an explicit ``HERMES_VERIFY_ON_STOP`` env var wins, then an - explicit boolean ``agent.verify_on_stop`` config value, then a surface-aware - default. The config default is the sentinel ``"auto"`` (see - ``DEFAULT_CONFIG``), which resolves to ON for interactive coding surfaces + explicit ``agent.verify_on_stop`` config value. The config default is + ``False`` (see ``DEFAULT_CONFIG``) — verify-on-stop is OFF unless the user + opts in. The legacy ``"auto"`` sentinel is still honored for anyone who + sets it explicitly: it resolves to ON for interactive coding surfaces (CLI, TUI, desktop) and programmatic callers, and OFF for conversational - messaging surfaces (Telegram, Discord, etc.) where the verification - narrative would otherwise reach a human as chat noise. + messaging surfaces (Telegram, Discord, etc.). A missing/unknown value + falls back to OFF. """ env = os.environ.get("HERMES_VERIFY_ON_STOP") if env is not None: @@ -106,8 +164,11 @@ def verify_on_stop_enabled(config: dict[str, Any] | None = None) -> bool: return True if token in {"0", "false", "no", "off"}: return False - # "auto", missing, or any other value -> surface-aware default. - return not _session_is_messaging_surface() + if token == "auto": + # Explicit opt-in to the legacy surface-aware behavior. + return not _session_is_messaging_surface() + # Missing or unknown value -> OFF (the new default). + return False def _candidate_cwds(paths: Iterable[str]) -> list[Path]: @@ -190,7 +251,10 @@ def build_verify_on_stop_nudge( max_attempts: int = 2, ) -> str | None: """Return a synthetic follow-up when edited code lacks fresh verification.""" - paths = sorted({str(p) for p in changed_paths if p}) + # Drop documentation/prose paths (markdown, skills, README, LICENSE, ...) — + # they carry no verifiable behavior, so a turn that touched only those has + # nothing to verify and must not nudge. + paths = sorted({str(p) for p in _filter_verifiable_paths(changed_paths)}) if not paths or attempts >= max_attempts: return None diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 1034816f2e8..45a32e9f8d9 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -987,12 +987,12 @@ DEFAULT_CONFIG = { # Verification closure: after the agent edits files in a code workspace, # do not accept a final answer until fresh verification evidence exists # or the agent explains why it cannot run checks. The loop is bounded - # and uses the passive verification ledger. The default "auto" enables - # it on interactive coding surfaces (CLI, TUI, desktop) and programmatic - # callers, and disables it on conversational messaging surfaces - # (Telegram, Discord, etc.) where the verification summary would reach a - # human as chat noise. Set true or false to force it on or off. - "verify_on_stop": "auto", + # and uses the passive verification ledger. Default is OFF — the + # verification narrative was more noise than signal for most users + # (it fired on doc/markdown/skill edits too). Set true to opt in, or + # "auto" for the legacy surface-aware behavior (on for interactive + # coding surfaces, off for conversational messaging surfaces). + "verify_on_stop": False, # Staged inactivity warning: send a warning to the user at this # threshold before escalating to a full timeout. The warning fires # once per run and does not interrupt the agent. 0 = disable warning. @@ -2974,7 +2974,7 @@ DEFAULT_CONFIG = { # Config schema version - bump this when adding new required fields - "_config_version": 30, + "_config_version": 31, } # ============================================================================= @@ -5296,6 +5296,36 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A "(LLM consolidation is now opt-in; pruning stays on)" ) + # ── Version 30 → 31: switch verify_on_stop OFF (one-time) ── + # verify_on_stop defaulted to the "auto" sentinel (surface-aware: on for + # interactive coding surfaces). In practice the verification narrative was + # more noise than signal — it even fired on doc/markdown/skill edits with + # nothing to verify. The new default is OFF. This migration switches + # existing installs off ONCE, but only when the user never expressed an + # explicit preference: we rewrite the value only if it's missing or still + # the "auto" sentinel. An explicit true/false the user set is preserved. + if current_ver < 31: + config = read_raw_config() + raw_agent = config.get("agent") + if not isinstance(raw_agent, dict): + raw_agent = {} + cur = raw_agent.get("verify_on_stop") + is_auto_sentinel = ( + isinstance(cur, str) and cur.strip().lower() == "auto" + ) + # Only flip the non-committal states; leave explicit bool/on/off alone. + if cur is None or is_auto_sentinel: + raw_agent["verify_on_stop"] = False + config["agent"] = raw_agent + save_config(config) + results["config_added"].append("agent.verify_on_stop=false") + if not quiet: + print( + " ✓ Turned off verify-on-stop (agent.verify_on_stop: false). " + "Set it to true to re-enable, or \"auto\" for the legacy " + "surface-aware behavior." + ) + # ── Post-migration: disable exfiltration-shaped MCP stdio entries ── # Users can hand-edit mcp_servers, and older installs may already contain a # malicious entry. Preserve the stanza for auditability but mark it diff --git a/tests/agent/test_verification_stop.py b/tests/agent/test_verification_stop.py index 535c9c10895..2f47e84bc0f 100644 --- a/tests/agent/test_verification_stop.py +++ b/tests/agent/test_verification_stop.py @@ -44,14 +44,18 @@ def clear_verify_env(monkeypatch): return monkeypatch -def test_verify_on_stop_default_is_on(clear_verify_env): - # No env, no messaging identity, no explicit config -> default ON. - assert verify_on_stop_enabled({"agent": {}}) is True +def test_verify_on_stop_default_is_off(clear_verify_env): + # No env, no explicit config -> default OFF (new default as of v31). + assert verify_on_stop_enabled({"agent": {}}) is False + + +def test_verify_on_stop_missing_agent_section_is_off(clear_verify_env): + assert verify_on_stop_enabled({}) is False def test_verify_on_stop_auto_sentinel_resolves_to_surface_default(clear_verify_env): - # The DEFAULT_CONFIG sentinel must fall through to the surface-aware default, - # not be coerced to a truthy string. + # The legacy "auto" sentinel is still honored when set explicitly: it falls + # through to the surface-aware default (ON interactive, OFF messaging). assert verify_on_stop_enabled({"agent": {"verify_on_stop": "auto"}}) is True clear_verify_env.setenv("HERMES_SESSION_PLATFORM", "telegram") assert verify_on_stop_enabled({"agent": {"verify_on_stop": "auto"}}) is False @@ -62,49 +66,65 @@ def test_verify_on_stop_env_can_disable(clear_verify_env): assert verify_on_stop_enabled({"agent": {"verify_on_stop": True}}) is False +def test_verify_on_stop_env_can_enable(clear_verify_env): + # Env wins over the default-off config. + clear_verify_env.setenv("HERMES_VERIFY_ON_STOP", "1") + assert verify_on_stop_enabled({"agent": {}}) is True + + +def test_verify_on_stop_config_true_enables(clear_verify_env): + assert verify_on_stop_enabled({"agent": {"verify_on_stop": True}}) is True + + def test_verify_on_stop_config_can_disable(clear_verify_env): assert verify_on_stop_enabled({"agent": {"verify_on_stop": False}}) is False -def test_verify_on_stop_off_on_gateway_messaging_platform(clear_verify_env): - # The gateway binds the platform value to HERMES_SESSION_PLATFORM and leaves - # HERMES_SESSION_SOURCE empty, so a real Telegram turn must default OFF. +def test_verify_on_stop_auto_off_on_gateway_messaging_platform(clear_verify_env): + # With explicit "auto", a real Telegram turn resolves OFF. clear_verify_env.setenv("HERMES_SESSION_PLATFORM", "telegram") - assert verify_on_stop_enabled({"agent": {}}) is False + assert verify_on_stop_enabled({"agent": {"verify_on_stop": "auto"}}) is False @pytest.mark.parametrize( "platform", ["discord", "whatsapp_cloud", "signal", "slack", "matrix", "email", "sms"], ) -def test_verify_on_stop_off_for_each_messaging_platform(clear_verify_env, platform): +def test_verify_on_stop_auto_off_for_each_messaging_platform(clear_verify_env, platform): clear_verify_env.setenv("HERMES_SESSION_PLATFORM", platform) - assert verify_on_stop_enabled({"agent": {}}) is False + assert verify_on_stop_enabled({"agent": {"verify_on_stop": "auto"}}) is False -def test_verify_on_stop_messaging_platform_is_case_insensitive(clear_verify_env): +def test_verify_on_stop_auto_messaging_platform_is_case_insensitive(clear_verify_env): clear_verify_env.setenv("HERMES_SESSION_PLATFORM", " Telegram ") - assert verify_on_stop_enabled({"agent": {}}) is False + assert verify_on_stop_enabled({"agent": {"verify_on_stop": "auto"}}) is False -def test_verify_on_stop_uses_hermes_platform_override(clear_verify_env): +def test_verify_on_stop_auto_uses_hermes_platform_override(clear_verify_env): # HERMES_PLATFORM mirrors the sibling platform resolution and also flags a - # messaging surface. + # messaging surface under the "auto" sentinel. clear_verify_env.setenv("HERMES_PLATFORM", "discord") - assert verify_on_stop_enabled({"agent": {}}) is False + assert verify_on_stop_enabled({"agent": {"verify_on_stop": "auto"}}) is False @pytest.mark.parametrize("source", ["cli", "tui", "desktop", "codex", "local"]) -def test_verify_on_stop_on_for_interactive_surfaces(clear_verify_env, source): - # CLI/TUI/desktop set HERMES_SESSION_SOURCE; these are coding surfaces -> ON. +def test_verify_on_stop_auto_on_for_interactive_surfaces(clear_verify_env, source): + # Under "auto", CLI/TUI/desktop coding surfaces resolve ON. clear_verify_env.setenv("HERMES_SESSION_SOURCE", source) - assert verify_on_stop_enabled({"agent": {}}) is True + assert verify_on_stop_enabled({"agent": {"verify_on_stop": "auto"}}) is True @pytest.mark.parametrize("platform", ["api_server", "webhook", "msgraph_webhook"]) -def test_verify_on_stop_on_for_programmatic_surfaces(clear_verify_env, platform): +def test_verify_on_stop_auto_on_for_programmatic_surfaces(clear_verify_env, platform): clear_verify_env.setenv("HERMES_SESSION_PLATFORM", platform) - assert verify_on_stop_enabled({"agent": {}}) is True + assert verify_on_stop_enabled({"agent": {"verify_on_stop": "auto"}}) is True + + +def test_default_off_overrides_interactive_surface(clear_verify_env): + # The new default is OFF even on an interactive coding surface — only an + # explicit "auto"/true turns it back on. + clear_verify_env.setenv("HERMES_SESSION_SOURCE", "cli") + assert verify_on_stop_enabled({"agent": {}}) is False def test_env_forces_verify_on_stop_on_for_messaging(clear_verify_env): @@ -120,20 +140,22 @@ def test_config_forces_verify_on_stop_on_for_messaging(clear_verify_env): def test_verify_on_stop_default_path_through_load_config(tmp_path, clear_verify_env): # E2E: the sole production caller passes no config, so verify_on_stop_enabled - # resolves through load_config() + DEFAULT_CONFIG. The "auto" sentinel must - # reach the surface-aware default rather than being shadowed by a static - # True. This is the path the unit-level tests above cannot exercise. + # resolves through load_config() + DEFAULT_CONFIG. The default is now the + # boolean False, so even an interactive surface resolves OFF without an + # explicit opt-in. This is the path the unit-level tests above cannot + # exercise. clear_verify_env.setenv("HERMES_HOME", str(tmp_path / ".hermes")) from hermes_cli.config import load_config merged = load_config() - assert merged["agent"]["verify_on_stop"] == "auto" + assert merged["agent"]["verify_on_stop"] is False - # Interactive (no messaging identity) resolves ON through the real loader. - assert verify_on_stop_enabled() is True + # Interactive surface still resolves OFF through the real loader. + clear_verify_env.setenv("HERMES_SESSION_SOURCE", "cli") + assert verify_on_stop_enabled() is False - # A messaging platform resolves OFF, proving the sentinel flows through. + # A messaging platform also resolves OFF. clear_verify_env.setenv("HERMES_SESSION_PLATFORM", "telegram") assert verify_on_stop_enabled() is False @@ -261,3 +283,59 @@ def test_nudge_attempts_are_bounded(tmp_path, monkeypatch): attempts=2, max_attempts=2, ) is None + + +# --------------------------------------------------------------------------- +# Fix C: documentation/prose edits carry no verifiable behavior and must never +# trip the nudge, even on an unverified workspace. +# --------------------------------------------------------------------------- + +@pytest.mark.parametrize( + "doc_name", + [ + "SKILL.md", + "README.md", + "guide.markdown", + "page.mdx", + "manual.rst", + "notes.txt", + "data.csv", + "LICENSE", + "CHANGELOG", + ], +) +def test_doc_only_edit_does_not_nudge(tmp_path, monkeypatch, doc_name): + monkeypatch.setenv("HERMES_HOME", str(tmp_path / ".hermes")) + _node_project(tmp_path) + changed = str(tmp_path / doc_name) + mark_workspace_edited(session_id="s1", cwd=tmp_path, paths=[changed]) + + # Unverified workspace, but the only edit is a doc — nothing to verify. + assert build_verify_on_stop_nudge(session_id="s1", changed_paths=[changed]) is None + + +def test_mixed_doc_and_code_edit_still_nudges(tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path / ".hermes")) + _node_project(tmp_path) + doc = str(tmp_path / "README.md") + code = str(tmp_path / "src" / "app.ts") + mark_workspace_edited(session_id="s1", cwd=tmp_path, paths=[code]) + + nudge = build_verify_on_stop_nudge( + session_id="s1", changed_paths=[doc, code] + ) + assert nudge is not None + # The doc path is filtered out of the reported set; the code path remains. + assert code in nudge + assert doc not in nudge + + +def test_is_non_code_path_classification(): + from agent.verification_stop import _is_non_code_path + + assert _is_non_code_path("docs/SKILL.md") is True + assert _is_non_code_path("README") is False # README has no extension and isn't in the prose-filename set + assert _is_non_code_path("LICENSE") is True + assert _is_non_code_path("src/app.ts") is False + assert _is_non_code_path("config.yaml") is False + assert _is_non_code_path("run_agent.py") is False diff --git a/tests/hermes_cli/test_config.py b/tests/hermes_cli/test_config.py index 17995d1fd9a..929c1bbfde1 100644 --- a/tests/hermes_cli/test_config.py +++ b/tests/hermes_cli/test_config.py @@ -1289,3 +1289,59 @@ class TestWriteApprovalMigration: # gate ends up off and there's no leftover write_mode key. assert raw["memory"].get("write_approval", False) is False assert "write_mode" not in raw.get("memory", {}) + + +class TestVerifyOnStopMigration: + """v30 → v31: switch verify_on_stop OFF once, preserving explicit choices.""" + + def _write(self, tmp_path, body): + (tmp_path / "config.yaml").write_text(body, encoding="utf-8") + + def test_auto_sentinel_flipped_to_false(self, tmp_path): + with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}): + self._write(tmp_path, "_config_version: 30\nagent:\n verify_on_stop: auto\n") + migrate_config(interactive=False, quiet=True) + raw = yaml.safe_load((tmp_path / "config.yaml").read_text()) + assert raw["agent"]["verify_on_stop"] is False + + def test_missing_key_seeded_false(self, tmp_path): + with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}): + self._write(tmp_path, "_config_version: 30\nagent:\n max_turns: 5\n") + migrate_config(interactive=False, quiet=True) + raw = yaml.safe_load((tmp_path / "config.yaml").read_text()) + assert raw["agent"]["verify_on_stop"] is False + assert raw["agent"]["max_turns"] == 5 + + def test_no_agent_section_seeded_false(self, tmp_path): + with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}): + self._write(tmp_path, "_config_version: 30\nmodel:\n provider: openrouter\n") + migrate_config(interactive=False, quiet=True) + raw = yaml.safe_load((tmp_path / "config.yaml").read_text()) + assert raw["agent"]["verify_on_stop"] is False + + def test_explicit_true_preserved(self, tmp_path): + with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}): + self._write(tmp_path, "_config_version: 30\nagent:\n verify_on_stop: true\n") + migrate_config(interactive=False, quiet=True) + raw = yaml.safe_load((tmp_path / "config.yaml").read_text()) + assert raw["agent"]["verify_on_stop"] is True + + def test_explicit_false_preserved(self, tmp_path): + with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}): + self._write(tmp_path, "_config_version: 30\nagent:\n verify_on_stop: false\n") + migrate_config(interactive=False, quiet=True) + raw = yaml.safe_load((tmp_path / "config.yaml").read_text()) + assert raw["agent"]["verify_on_stop"] is False + + def test_already_current_version_is_noop(self, tmp_path): + from hermes_cli.config import DEFAULT_CONFIG + + with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}): + self._write( + tmp_path, + f"_config_version: {DEFAULT_CONFIG['_config_version']}\n" + "agent:\n verify_on_stop: true\n", + ) + migrate_config(interactive=False, quiet=True) + raw = yaml.safe_load((tmp_path / "config.yaml").read_text()) + assert raw["agent"]["verify_on_stop"] is True