Merge pull request #52412 from GodsBoy/fix/verify-on-stop-messaging-surface-leak

fix(agent): gate verify-on-stop nudge off for messaging surfaces
This commit is contained in:
brooklyn! 2026-06-26 02:30:08 -05:00 committed by GitHub
commit a2b49e60b6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 199 additions and 13 deletions

View file

@ -15,9 +15,77 @@ from typing import Any, Iterable
_MAX_CHANGED_PATHS_IN_NUDGE = 8
# Session identities (platform or source) that are NOT human conversational
# messaging surfaces: interactive coding surfaces (CLI, TUI, desktop, codex,
# local, gateway) and programmatic callers (API server, webhooks, tools).
# Verify-on-stop stays ON by default for these. Any other resolved gateway
# platform is a conversational messaging surface (Telegram, Discord, WhatsApp,
# Signal, Slack, etc.) where the verification narrative would reach a human as
# chat noise, so it defaults OFF. Mirrors LOCAL_SESSION_SOURCE_IDS in
# apps/desktop/src/lib/session-source.ts; keep roughly in sync when adding a
# local or programmatic surface. Default-deny by design: an unrecognized
# identity is treated as messaging (OFF) so a new chat platform never leaks the
# verification receipt before this set is updated.
_NON_MESSAGING_SESSION_SURFACES = frozenset(
{
"",
"cli",
"codex",
"desktop",
"gateway",
"local",
"tui",
"tool",
"api_server",
"webhook",
"msgraph_webhook",
}
)
def _session_is_messaging_surface() -> bool:
"""Return whether this turn is delivered over a human messaging channel.
The gateway binds the platform value (e.g. ``telegram``) to
``HERMES_SESSION_PLATFORM``; the CLI and TUI set ``HERMES_SESSION_SOURCE``
(e.g. ``cli``, ``tui``) instead. Both are consulted via the session-context
helper (with an ``os.environ`` fallback), alongside the ``HERMES_PLATFORM``
override, matching the sibling platform resolution in
``agent/skill_commands.py`` and ``agent/prompt_builder.py``. A turn is a
messaging surface when a resolved identity is present and is not a known
non-messaging surface.
"""
try:
from gateway.session_context import get_session_env
platform = (
os.getenv("HERMES_PLATFORM")
or get_session_env("HERMES_SESSION_PLATFORM", "")
)
source = get_session_env("HERMES_SESSION_SOURCE", "")
except Exception:
platform = os.getenv("HERMES_PLATFORM", "") or os.environ.get(
"HERMES_SESSION_PLATFORM", ""
)
source = os.environ.get("HERMES_SESSION_SOURCE", "")
for identity in (platform, source):
identity = str(identity or "").strip().lower()
if identity and identity not in _NON_MESSAGING_SESSION_SURFACES:
return True
return False
def verify_on_stop_enabled(config: dict[str, Any] | None = None) -> bool:
"""Return whether edit -> verify-before-finish behavior is enabled."""
"""Return whether edit -> verify-before-finish behavior is enabled.
Precedence: an explicit ``HERMES_VERIFY_ON_STOP`` env var wins, then an
explicit boolean ``agent.verify_on_stop`` config value, then a surface-aware
default. The config default is the sentinel ``"auto"`` (see
``DEFAULT_CONFIG``), which resolves to ON for interactive coding surfaces
(CLI, TUI, desktop) and programmatic callers, and OFF for conversational
messaging surfaces (Telegram, Discord, etc.) where the verification
narrative would otherwise reach a human as chat noise.
"""
env = os.environ.get("HERMES_VERIFY_ON_STOP")
if env is not None:
return env.strip().lower() not in {"0", "false", "no", "off"}
@ -29,9 +97,17 @@ def verify_on_stop_enabled(config: dict[str, Any] | None = None) -> bool:
except Exception:
config = {}
agent_cfg = (config or {}).get("agent") if isinstance(config, dict) else None
if isinstance(agent_cfg, dict) and "verify_on_stop" in agent_cfg:
return bool(agent_cfg.get("verify_on_stop"))
return True
cfg_val = agent_cfg.get("verify_on_stop") if isinstance(agent_cfg, dict) else None
if isinstance(cfg_val, bool):
return cfg_val
if isinstance(cfg_val, str):
token = cfg_val.strip().lower()
if token in {"1", "true", "yes", "on"}:
return True
if token in {"0", "false", "no", "off"}:
return False
# "auto", missing, or any other value -> surface-aware default.
return not _session_is_messaging_surface()
def _candidate_cwds(paths: Iterable[str]) -> list[Path]:

View file

@ -637,7 +637,15 @@ agent:
# primaries (default 3). The OpenAI SDK does its own low-level retries
# underneath this wrapper — this is the Hermes-level loop.
# api_max_retries: 3
# After the agent edits code without fresh passing verification, nudge it to
# verify before finishing. The default "auto" enables it on interactive
# coding surfaces (CLI, TUI, desktop) and programmatic callers, and disables
# it on conversational messaging surfaces (Telegram, Discord, etc.) where the
# verification summary would reach a human as chat noise. Set true or false to
# force it on or off; the HERMES_VERIFY_ON_STOP env var (1/0) takes precedence.
# verify_on_stop: auto
# Enable verbose logging
verbose: false

View file

@ -987,8 +987,12 @@ DEFAULT_CONFIG = {
# Verification closure: after the agent edits files in a code workspace,
# do not accept a final answer until fresh verification evidence exists
# or the agent explains why it cannot run checks. The loop is bounded
# and uses the passive verification ledger. Set false to disable.
"verify_on_stop": True,
# and uses the passive verification ledger. The default "auto" enables
# it on interactive coding surfaces (CLI, TUI, desktop) and programmatic
# callers, and disables it on conversational messaging surfaces
# (Telegram, Discord, etc.) where the verification summary would reach a
# human as chat noise. Set true or false to force it on or off.
"verify_on_stop": "auto",
# Staged inactivity warning: send a warning to the user at this
# threshold before escalating to a full timeout. The warning fires
# once per run and does not interrupt the agent. 0 = disable warning.

View file

@ -2,6 +2,8 @@ import json
import tempfile
from pathlib import Path
import pytest
from agent.verification_evidence import (
mark_workspace_edited,
record_terminal_result,
@ -25,21 +27,117 @@ def _make_project(root: Path) -> None:
_node_project(root)
def test_verify_on_stop_default_is_on(monkeypatch):
monkeypatch.delenv("HERMES_VERIFY_ON_STOP", raising=False)
@pytest.fixture
def clear_verify_env(monkeypatch):
"""Clear every env signal verify_on_stop_enabled consults.
Tests then set only the variable they exercise, mirroring how the CLI/TUI
set HERMES_SESSION_SOURCE and the gateway sets HERMES_SESSION_PLATFORM.
"""
for var in (
"HERMES_VERIFY_ON_STOP",
"HERMES_PLATFORM",
"HERMES_SESSION_PLATFORM",
"HERMES_SESSION_SOURCE",
):
monkeypatch.delenv(var, raising=False)
return monkeypatch
def test_verify_on_stop_default_is_on(clear_verify_env):
# No env, no messaging identity, no explicit config -> default ON.
assert verify_on_stop_enabled({"agent": {}}) is True
def test_verify_on_stop_env_can_disable(monkeypatch):
monkeypatch.setenv("HERMES_VERIFY_ON_STOP", "0")
def test_verify_on_stop_auto_sentinel_resolves_to_surface_default(clear_verify_env):
# The DEFAULT_CONFIG sentinel must fall through to the surface-aware default,
# not be coerced to a truthy string.
assert verify_on_stop_enabled({"agent": {"verify_on_stop": "auto"}}) is True
clear_verify_env.setenv("HERMES_SESSION_PLATFORM", "telegram")
assert verify_on_stop_enabled({"agent": {"verify_on_stop": "auto"}}) is False
def test_verify_on_stop_env_can_disable(clear_verify_env):
clear_verify_env.setenv("HERMES_VERIFY_ON_STOP", "0")
assert verify_on_stop_enabled({"agent": {"verify_on_stop": True}}) is False
def test_verify_on_stop_config_can_disable(monkeypatch):
monkeypatch.delenv("HERMES_VERIFY_ON_STOP", raising=False)
def test_verify_on_stop_config_can_disable(clear_verify_env):
assert verify_on_stop_enabled({"agent": {"verify_on_stop": False}}) is False
def test_verify_on_stop_off_on_gateway_messaging_platform(clear_verify_env):
# The gateway binds the platform value to HERMES_SESSION_PLATFORM and leaves
# HERMES_SESSION_SOURCE empty, so a real Telegram turn must default OFF.
clear_verify_env.setenv("HERMES_SESSION_PLATFORM", "telegram")
assert verify_on_stop_enabled({"agent": {}}) is False
@pytest.mark.parametrize(
"platform",
["discord", "whatsapp_cloud", "signal", "slack", "matrix", "email", "sms"],
)
def test_verify_on_stop_off_for_each_messaging_platform(clear_verify_env, platform):
clear_verify_env.setenv("HERMES_SESSION_PLATFORM", platform)
assert verify_on_stop_enabled({"agent": {}}) is False
def test_verify_on_stop_messaging_platform_is_case_insensitive(clear_verify_env):
clear_verify_env.setenv("HERMES_SESSION_PLATFORM", " Telegram ")
assert verify_on_stop_enabled({"agent": {}}) is False
def test_verify_on_stop_uses_hermes_platform_override(clear_verify_env):
# HERMES_PLATFORM mirrors the sibling platform resolution and also flags a
# messaging surface.
clear_verify_env.setenv("HERMES_PLATFORM", "discord")
assert verify_on_stop_enabled({"agent": {}}) is False
@pytest.mark.parametrize("source", ["cli", "tui", "desktop", "codex", "local"])
def test_verify_on_stop_on_for_interactive_surfaces(clear_verify_env, source):
# CLI/TUI/desktop set HERMES_SESSION_SOURCE; these are coding surfaces -> ON.
clear_verify_env.setenv("HERMES_SESSION_SOURCE", source)
assert verify_on_stop_enabled({"agent": {}}) is True
@pytest.mark.parametrize("platform", ["api_server", "webhook", "msgraph_webhook"])
def test_verify_on_stop_on_for_programmatic_surfaces(clear_verify_env, platform):
clear_verify_env.setenv("HERMES_SESSION_PLATFORM", platform)
assert verify_on_stop_enabled({"agent": {}}) is True
def test_env_forces_verify_on_stop_on_for_messaging(clear_verify_env):
clear_verify_env.setenv("HERMES_SESSION_PLATFORM", "telegram")
clear_verify_env.setenv("HERMES_VERIFY_ON_STOP", "1")
assert verify_on_stop_enabled({"agent": {}}) is True
def test_config_forces_verify_on_stop_on_for_messaging(clear_verify_env):
clear_verify_env.setenv("HERMES_SESSION_PLATFORM", "telegram")
assert verify_on_stop_enabled({"agent": {"verify_on_stop": True}}) is True
def test_verify_on_stop_default_path_through_load_config(tmp_path, clear_verify_env):
# E2E: the sole production caller passes no config, so verify_on_stop_enabled
# resolves through load_config() + DEFAULT_CONFIG. The "auto" sentinel must
# reach the surface-aware default rather than being shadowed by a static
# True. This is the path the unit-level tests above cannot exercise.
clear_verify_env.setenv("HERMES_HOME", str(tmp_path / ".hermes"))
from hermes_cli.config import load_config
merged = load_config()
assert merged["agent"]["verify_on_stop"] == "auto"
# Interactive (no messaging identity) resolves ON through the real loader.
assert verify_on_stop_enabled() is True
# A messaging platform resolves OFF, proving the sentinel flows through.
clear_verify_env.setenv("HERMES_SESSION_PLATFORM", "telegram")
assert verify_on_stop_enabled() is False
def test_no_nudge_after_fresh_pass(tmp_path, monkeypatch):
monkeypatch.setenv("HERMES_HOME", str(tmp_path / ".hermes"))
_node_project(tmp_path)