Merge pull request #55449 from NousResearch/bb/verify-on-stop-auto-default

feat(agent): restore surface-aware "auto" default for verify_on_stop
This commit is contained in:
brooklyn! 2026-06-30 01:47:59 -05:00 committed by GitHub
commit d6396e6a41
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 38 additions and 31 deletions

View file

@ -137,12 +137,12 @@ def verify_on_stop_enabled(config: dict[str, Any] | None = None) -> bool:
Precedence: an explicit ``HERMES_VERIFY_ON_STOP`` env var wins, then an
explicit ``agent.verify_on_stop`` config value. The config default is
``False`` (see ``DEFAULT_CONFIG``) verify-on-stop is OFF unless the user
opts in. The legacy ``"auto"`` sentinel is still honored for anyone who
sets it explicitly: it resolves to ON for interactive coding surfaces
(CLI, TUI, desktop) and programmatic callers, and OFF for conversational
messaging surfaces (Telegram, Discord, etc.). A missing/unknown value
falls back to OFF.
``"auto"`` (see ``DEFAULT_CONFIG``) surface-aware: ON for interactive
coding surfaces (CLI, TUI, desktop) and programmatic callers, OFF for
conversational messaging surfaces (Telegram, Discord, etc.) where the
verification narrative would reach a human as chat noise. An explicit
bool forces the behavior in either direction. A missing or unrecognized
value falls back to the surface-aware ``"auto"`` default.
"""
env = os.environ.get("HERMES_VERIFY_ON_STOP")
if env is not None:
@ -165,10 +165,9 @@ def verify_on_stop_enabled(config: dict[str, Any] | None = None) -> bool:
if token in {"0", "false", "no", "off"}:
return False
if token == "auto":
# Explicit opt-in to the legacy surface-aware behavior.
return not _session_is_messaging_surface()
# Missing or unknown value -> OFF (the new default).
return False
# Missing or unrecognized value -> surface-aware "auto" default.
return not _session_is_messaging_surface()
def _candidate_cwds(paths: Iterable[str]) -> list[Path]:

View file

@ -1017,12 +1017,13 @@ DEFAULT_CONFIG = {
# Verification closure: after the agent edits files in a code workspace,
# do not accept a final answer until fresh verification evidence exists
# or the agent explains why it cannot run checks. The loop is bounded
# and uses the passive verification ledger. Default is OFF — the
# verification narrative was more noise than signal for most users
# (it fired on doc/markdown/skill edits too). Set true to opt in, or
# "auto" for the legacy surface-aware behavior (on for interactive
# coding surfaces, off for conversational messaging surfaces).
"verify_on_stop": False,
# and uses the passive verification ledger. Default is "auto" —
# surface-aware: on for interactive coding surfaces (CLI, TUI, desktop)
# and programmatic callers, off for conversational messaging surfaces
# (Telegram, Discord, etc.) where the verification narrative would reach
# a human as chat noise. Doc/markdown/skill-only edits never fire it.
# Set true to force on everywhere, or false to disable.
"verify_on_stop": "auto",
# Staged inactivity warning: send a warning to the user at this
# threshold before escalating to a full timeout. The warning fires
# once per run and does not interrupt the agent. 0 = disable warning.

View file

@ -44,13 +44,20 @@ def clear_verify_env(monkeypatch):
return monkeypatch
def test_verify_on_stop_default_is_off(clear_verify_env):
# No env, no explicit config -> default OFF (new default as of v31).
def test_verify_on_stop_default_is_auto(clear_verify_env):
# No env, no explicit config -> surface-aware "auto" default. With no
# messaging surface bound, an interactive/unknown surface resolves ON.
assert verify_on_stop_enabled({"agent": {}}) is True
def test_verify_on_stop_default_auto_off_on_messaging(clear_verify_env):
# The "auto" default resolves OFF on a conversational messaging surface.
clear_verify_env.setenv("HERMES_SESSION_PLATFORM", "telegram")
assert verify_on_stop_enabled({"agent": {}}) is False
def test_verify_on_stop_missing_agent_section_is_off(clear_verify_env):
assert verify_on_stop_enabled({}) is False
def test_verify_on_stop_missing_agent_section_uses_auto(clear_verify_env):
assert verify_on_stop_enabled({}) is True
def test_verify_on_stop_auto_sentinel_resolves_to_surface_default(clear_verify_env):
@ -67,8 +74,9 @@ def test_verify_on_stop_env_can_disable(clear_verify_env):
def test_verify_on_stop_env_can_enable(clear_verify_env):
# Env wins over the default-off config.
# Env "1" forces ON regardless of surface (here a messaging platform).
clear_verify_env.setenv("HERMES_VERIFY_ON_STOP", "1")
clear_verify_env.setenv("HERMES_SESSION_PLATFORM", "telegram")
assert verify_on_stop_enabled({"agent": {}}) is True
@ -120,11 +128,11 @@ def test_verify_on_stop_auto_on_for_programmatic_surfaces(clear_verify_env, plat
assert verify_on_stop_enabled({"agent": {"verify_on_stop": "auto"}}) is True
def test_default_off_overrides_interactive_surface(clear_verify_env):
# The new default is OFF even on an interactive coding surface — only an
# explicit "auto"/true turns it back on.
def test_default_auto_on_for_interactive_surface(clear_verify_env):
# The default is surface-aware "auto": an interactive coding surface
# resolves ON without any explicit opt-in.
clear_verify_env.setenv("HERMES_SESSION_SOURCE", "cli")
assert verify_on_stop_enabled({"agent": {}}) is False
assert verify_on_stop_enabled({"agent": {}}) is True
def test_env_forces_verify_on_stop_on_for_messaging(clear_verify_env):
@ -141,21 +149,20 @@ def test_config_forces_verify_on_stop_on_for_messaging(clear_verify_env):
def test_verify_on_stop_default_path_through_load_config(tmp_path, clear_verify_env):
# E2E: the sole production caller passes no config, so verify_on_stop_enabled
# resolves through load_config() + DEFAULT_CONFIG. The default is now the
# boolean False, so even an interactive surface resolves OFF without an
# explicit opt-in. This is the path the unit-level tests above cannot
# exercise.
# surface-aware "auto" sentinel. This is the path the unit-level tests above
# cannot exercise.
clear_verify_env.setenv("HERMES_HOME", str(tmp_path / ".hermes"))
from hermes_cli.config import load_config
merged = load_config()
assert merged["agent"]["verify_on_stop"] is False
assert merged["agent"]["verify_on_stop"] == "auto"
# Interactive surface still resolves OFF through the real loader.
# Interactive surface resolves ON through the real loader.
clear_verify_env.setenv("HERMES_SESSION_SOURCE", "cli")
assert verify_on_stop_enabled() is False
assert verify_on_stop_enabled() is True
# A messaging platform also resolves OFF.
# A messaging platform resolves OFF.
clear_verify_env.setenv("HERMES_SESSION_PLATFORM", "telegram")
assert verify_on_stop_enabled() is False