diff --git a/agent/verification_stop.py b/agent/verification_stop.py index e19cb22bc4e..7aeb0ca74b7 100644 --- a/agent/verification_stop.py +++ b/agent/verification_stop.py @@ -15,9 +15,77 @@ from typing import Any, Iterable _MAX_CHANGED_PATHS_IN_NUDGE = 8 +# Session identities (platform or source) that are NOT human conversational +# messaging surfaces: interactive coding surfaces (CLI, TUI, desktop, codex, +# local, gateway) and programmatic callers (API server, webhooks, tools). +# Verify-on-stop stays ON by default for these. Any other resolved gateway +# platform is a conversational messaging surface (Telegram, Discord, WhatsApp, +# Signal, Slack, etc.) where the verification narrative would reach a human as +# chat noise, so it defaults OFF. Mirrors LOCAL_SESSION_SOURCE_IDS in +# apps/desktop/src/lib/session-source.ts; keep roughly in sync when adding a +# local or programmatic surface. Default-deny by design: an unrecognized +# identity is treated as messaging (OFF) so a new chat platform never leaks the +# verification receipt before this set is updated. +_NON_MESSAGING_SESSION_SURFACES = frozenset( + { + "", + "cli", + "codex", + "desktop", + "gateway", + "local", + "tui", + "tool", + "api_server", + "webhook", + "msgraph_webhook", + } +) + + +def _session_is_messaging_surface() -> bool: + """Return whether this turn is delivered over a human messaging channel. + + The gateway binds the platform value (e.g. ``telegram``) to + ``HERMES_SESSION_PLATFORM``; the CLI and TUI set ``HERMES_SESSION_SOURCE`` + (e.g. ``cli``, ``tui``) instead. Both are consulted via the session-context + helper (with an ``os.environ`` fallback), alongside the ``HERMES_PLATFORM`` + override, matching the sibling platform resolution in + ``agent/skill_commands.py`` and ``agent/prompt_builder.py``. A turn is a + messaging surface when a resolved identity is present and is not a known + non-messaging surface. + """ + try: + from gateway.session_context import get_session_env + + platform = ( + os.getenv("HERMES_PLATFORM") + or get_session_env("HERMES_SESSION_PLATFORM", "") + ) + source = get_session_env("HERMES_SESSION_SOURCE", "") + except Exception: + platform = os.getenv("HERMES_PLATFORM", "") or os.environ.get( + "HERMES_SESSION_PLATFORM", "" + ) + source = os.environ.get("HERMES_SESSION_SOURCE", "") + for identity in (platform, source): + identity = str(identity or "").strip().lower() + if identity and identity not in _NON_MESSAGING_SESSION_SURFACES: + return True + return False + def verify_on_stop_enabled(config: dict[str, Any] | None = None) -> bool: - """Return whether edit -> verify-before-finish behavior is enabled.""" + """Return whether edit -> verify-before-finish behavior is enabled. + + Precedence: an explicit ``HERMES_VERIFY_ON_STOP`` env var wins, then an + explicit boolean ``agent.verify_on_stop`` config value, then a surface-aware + default. The config default is the sentinel ``"auto"`` (see + ``DEFAULT_CONFIG``), which resolves to ON for interactive coding surfaces + (CLI, TUI, desktop) and programmatic callers, and OFF for conversational + messaging surfaces (Telegram, Discord, etc.) where the verification + narrative would otherwise reach a human as chat noise. + """ env = os.environ.get("HERMES_VERIFY_ON_STOP") if env is not None: return env.strip().lower() not in {"0", "false", "no", "off"} @@ -29,9 +97,17 @@ def verify_on_stop_enabled(config: dict[str, Any] | None = None) -> bool: except Exception: config = {} agent_cfg = (config or {}).get("agent") if isinstance(config, dict) else None - if isinstance(agent_cfg, dict) and "verify_on_stop" in agent_cfg: - return bool(agent_cfg.get("verify_on_stop")) - return True + cfg_val = agent_cfg.get("verify_on_stop") if isinstance(agent_cfg, dict) else None + if isinstance(cfg_val, bool): + return cfg_val + if isinstance(cfg_val, str): + token = cfg_val.strip().lower() + if token in {"1", "true", "yes", "on"}: + return True + if token in {"0", "false", "no", "off"}: + return False + # "auto", missing, or any other value -> surface-aware default. + return not _session_is_messaging_surface() def _candidate_cwds(paths: Iterable[str]) -> list[Path]: diff --git a/cli-config.yaml.example b/cli-config.yaml.example index 15a586c3378..0985b73ea71 100644 --- a/cli-config.yaml.example +++ b/cli-config.yaml.example @@ -637,7 +637,15 @@ agent: # primaries (default 3). The OpenAI SDK does its own low-level retries # underneath this wrapper — this is the Hermes-level loop. # api_max_retries: 3 - + + # After the agent edits code without fresh passing verification, nudge it to + # verify before finishing. The default "auto" enables it on interactive + # coding surfaces (CLI, TUI, desktop) and programmatic callers, and disables + # it on conversational messaging surfaces (Telegram, Discord, etc.) where the + # verification summary would reach a human as chat noise. Set true or false to + # force it on or off; the HERMES_VERIFY_ON_STOP env var (1/0) takes precedence. + # verify_on_stop: auto + # Enable verbose logging verbose: false diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 06e3ad5d7b9..6790ece6086 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -982,8 +982,12 @@ DEFAULT_CONFIG = { # Verification closure: after the agent edits files in a code workspace, # do not accept a final answer until fresh verification evidence exists # or the agent explains why it cannot run checks. The loop is bounded - # and uses the passive verification ledger. Set false to disable. - "verify_on_stop": True, + # and uses the passive verification ledger. The default "auto" enables + # it on interactive coding surfaces (CLI, TUI, desktop) and programmatic + # callers, and disables it on conversational messaging surfaces + # (Telegram, Discord, etc.) where the verification summary would reach a + # human as chat noise. Set true or false to force it on or off. + "verify_on_stop": "auto", # Staged inactivity warning: send a warning to the user at this # threshold before escalating to a full timeout. The warning fires # once per run and does not interrupt the agent. 0 = disable warning. diff --git a/tests/agent/test_verification_stop.py b/tests/agent/test_verification_stop.py index 600fa5bf714..535c9c10895 100644 --- a/tests/agent/test_verification_stop.py +++ b/tests/agent/test_verification_stop.py @@ -2,6 +2,8 @@ import json import tempfile from pathlib import Path +import pytest + from agent.verification_evidence import ( mark_workspace_edited, record_terminal_result, @@ -25,21 +27,117 @@ def _make_project(root: Path) -> None: _node_project(root) -def test_verify_on_stop_default_is_on(monkeypatch): - monkeypatch.delenv("HERMES_VERIFY_ON_STOP", raising=False) +@pytest.fixture +def clear_verify_env(monkeypatch): + """Clear every env signal verify_on_stop_enabled consults. + + Tests then set only the variable they exercise, mirroring how the CLI/TUI + set HERMES_SESSION_SOURCE and the gateway sets HERMES_SESSION_PLATFORM. + """ + for var in ( + "HERMES_VERIFY_ON_STOP", + "HERMES_PLATFORM", + "HERMES_SESSION_PLATFORM", + "HERMES_SESSION_SOURCE", + ): + monkeypatch.delenv(var, raising=False) + return monkeypatch + + +def test_verify_on_stop_default_is_on(clear_verify_env): + # No env, no messaging identity, no explicit config -> default ON. assert verify_on_stop_enabled({"agent": {}}) is True -def test_verify_on_stop_env_can_disable(monkeypatch): - monkeypatch.setenv("HERMES_VERIFY_ON_STOP", "0") +def test_verify_on_stop_auto_sentinel_resolves_to_surface_default(clear_verify_env): + # The DEFAULT_CONFIG sentinel must fall through to the surface-aware default, + # not be coerced to a truthy string. + assert verify_on_stop_enabled({"agent": {"verify_on_stop": "auto"}}) is True + clear_verify_env.setenv("HERMES_SESSION_PLATFORM", "telegram") + assert verify_on_stop_enabled({"agent": {"verify_on_stop": "auto"}}) is False + + +def test_verify_on_stop_env_can_disable(clear_verify_env): + clear_verify_env.setenv("HERMES_VERIFY_ON_STOP", "0") assert verify_on_stop_enabled({"agent": {"verify_on_stop": True}}) is False -def test_verify_on_stop_config_can_disable(monkeypatch): - monkeypatch.delenv("HERMES_VERIFY_ON_STOP", raising=False) +def test_verify_on_stop_config_can_disable(clear_verify_env): assert verify_on_stop_enabled({"agent": {"verify_on_stop": False}}) is False +def test_verify_on_stop_off_on_gateway_messaging_platform(clear_verify_env): + # The gateway binds the platform value to HERMES_SESSION_PLATFORM and leaves + # HERMES_SESSION_SOURCE empty, so a real Telegram turn must default OFF. + clear_verify_env.setenv("HERMES_SESSION_PLATFORM", "telegram") + assert verify_on_stop_enabled({"agent": {}}) is False + + +@pytest.mark.parametrize( + "platform", + ["discord", "whatsapp_cloud", "signal", "slack", "matrix", "email", "sms"], +) +def test_verify_on_stop_off_for_each_messaging_platform(clear_verify_env, platform): + clear_verify_env.setenv("HERMES_SESSION_PLATFORM", platform) + assert verify_on_stop_enabled({"agent": {}}) is False + + +def test_verify_on_stop_messaging_platform_is_case_insensitive(clear_verify_env): + clear_verify_env.setenv("HERMES_SESSION_PLATFORM", " Telegram ") + assert verify_on_stop_enabled({"agent": {}}) is False + + +def test_verify_on_stop_uses_hermes_platform_override(clear_verify_env): + # HERMES_PLATFORM mirrors the sibling platform resolution and also flags a + # messaging surface. + clear_verify_env.setenv("HERMES_PLATFORM", "discord") + assert verify_on_stop_enabled({"agent": {}}) is False + + +@pytest.mark.parametrize("source", ["cli", "tui", "desktop", "codex", "local"]) +def test_verify_on_stop_on_for_interactive_surfaces(clear_verify_env, source): + # CLI/TUI/desktop set HERMES_SESSION_SOURCE; these are coding surfaces -> ON. + clear_verify_env.setenv("HERMES_SESSION_SOURCE", source) + assert verify_on_stop_enabled({"agent": {}}) is True + + +@pytest.mark.parametrize("platform", ["api_server", "webhook", "msgraph_webhook"]) +def test_verify_on_stop_on_for_programmatic_surfaces(clear_verify_env, platform): + clear_verify_env.setenv("HERMES_SESSION_PLATFORM", platform) + assert verify_on_stop_enabled({"agent": {}}) is True + + +def test_env_forces_verify_on_stop_on_for_messaging(clear_verify_env): + clear_verify_env.setenv("HERMES_SESSION_PLATFORM", "telegram") + clear_verify_env.setenv("HERMES_VERIFY_ON_STOP", "1") + assert verify_on_stop_enabled({"agent": {}}) is True + + +def test_config_forces_verify_on_stop_on_for_messaging(clear_verify_env): + clear_verify_env.setenv("HERMES_SESSION_PLATFORM", "telegram") + assert verify_on_stop_enabled({"agent": {"verify_on_stop": True}}) is True + + +def test_verify_on_stop_default_path_through_load_config(tmp_path, clear_verify_env): + # E2E: the sole production caller passes no config, so verify_on_stop_enabled + # resolves through load_config() + DEFAULT_CONFIG. The "auto" sentinel must + # reach the surface-aware default rather than being shadowed by a static + # True. This is the path the unit-level tests above cannot exercise. + clear_verify_env.setenv("HERMES_HOME", str(tmp_path / ".hermes")) + + from hermes_cli.config import load_config + + merged = load_config() + assert merged["agent"]["verify_on_stop"] == "auto" + + # Interactive (no messaging identity) resolves ON through the real loader. + assert verify_on_stop_enabled() is True + + # A messaging platform resolves OFF, proving the sentinel flows through. + clear_verify_env.setenv("HERMES_SESSION_PLATFORM", "telegram") + assert verify_on_stop_enabled() is False + + def test_no_nudge_after_fresh_pass(tmp_path, monkeypatch): monkeypatch.setenv("HERMES_HOME", str(tmp_path / ".hermes")) _node_project(tmp_path)