hermes-agent/agent/verification_stop.py
GodsBoy f168631be0 fix(agent): gate verify-on-stop nudge off for messaging surfaces
The verify-on-stop guard (PRs #52296, #52297) defaulted ON for every
session, so on gateway messaging surfaces (Telegram, Discord, etc.) the
model complied with the nudge by writing a hermes-verify temp script and
emitting an ad-hoc verification summary, which the gateway delivered to
the end user as chat noise.

Resolve a surface-aware default instead. The DEFAULT_CONFIG value becomes
the sentinel "auto", which verify_on_stop_enabled() resolves to ON for
interactive coding surfaces (CLI, TUI, desktop) and programmatic callers,
and OFF for conversational messaging surfaces. The surface is read from
HERMES_SESSION_PLATFORM (what the gateway actually binds), with
HERMES_SESSION_SOURCE and HERMES_PLATFORM as fallbacks, matching the
sibling resolution in skill_commands.py and prompt_builder.py. An explicit
HERMES_VERIFY_ON_STOP env var or a boolean agent.verify_on_stop config
still overrides in either direction.

The passive evidence ledger and the call site are untouched.
2026-06-25 10:05:04 +02:00

240 lines
8.7 KiB
Python

"""Turn-end verification guard for coding edits.
This module is intentionally policy-only. It never runs checks itself; it turns
the passive verification ledger into a bounded follow-up when the model tries to
finish immediately after editing code without fresh evidence.
"""
from __future__ import annotations
import os
import tempfile
from pathlib import Path
from typing import Any, Iterable
_MAX_CHANGED_PATHS_IN_NUDGE = 8
# Session identities (platform or source) that are NOT human conversational
# messaging surfaces: interactive coding surfaces (CLI, TUI, desktop, codex,
# local, gateway) and programmatic callers (API server, webhooks, tools).
# Verify-on-stop stays ON by default for these. Any other resolved gateway
# platform is a conversational messaging surface (Telegram, Discord, WhatsApp,
# Signal, Slack, etc.) where the verification narrative would reach a human as
# chat noise, so it defaults OFF. Mirrors LOCAL_SESSION_SOURCE_IDS in
# apps/desktop/src/lib/session-source.ts; keep roughly in sync when adding a
# local or programmatic surface. Default-deny by design: an unrecognized
# identity is treated as messaging (OFF) so a new chat platform never leaks the
# verification receipt before this set is updated.
_NON_MESSAGING_SESSION_SURFACES = frozenset(
{
"",
"cli",
"codex",
"desktop",
"gateway",
"local",
"tui",
"tool",
"api_server",
"webhook",
"msgraph_webhook",
}
)
def _session_is_messaging_surface() -> bool:
"""Return whether this turn is delivered over a human messaging channel.
The gateway binds the platform value (e.g. ``telegram``) to
``HERMES_SESSION_PLATFORM``; the CLI and TUI set ``HERMES_SESSION_SOURCE``
(e.g. ``cli``, ``tui``) instead. Both are consulted via the session-context
helper (with an ``os.environ`` fallback), alongside the ``HERMES_PLATFORM``
override, matching the sibling platform resolution in
``agent/skill_commands.py`` and ``agent/prompt_builder.py``. A turn is a
messaging surface when a resolved identity is present and is not a known
non-messaging surface.
"""
try:
from gateway.session_context import get_session_env
platform = (
os.getenv("HERMES_PLATFORM")
or get_session_env("HERMES_SESSION_PLATFORM", "")
)
source = get_session_env("HERMES_SESSION_SOURCE", "")
except Exception:
platform = os.getenv("HERMES_PLATFORM", "") or os.environ.get(
"HERMES_SESSION_PLATFORM", ""
)
source = os.environ.get("HERMES_SESSION_SOURCE", "")
for identity in (platform, source):
identity = str(identity or "").strip().lower()
if identity and identity not in _NON_MESSAGING_SESSION_SURFACES:
return True
return False
def verify_on_stop_enabled(config: dict[str, Any] | None = None) -> bool:
"""Return whether edit -> verify-before-finish behavior is enabled.
Precedence: an explicit ``HERMES_VERIFY_ON_STOP`` env var wins, then an
explicit boolean ``agent.verify_on_stop`` config value, then a surface-aware
default. The config default is the sentinel ``"auto"`` (see
``DEFAULT_CONFIG``), which resolves to ON for interactive coding surfaces
(CLI, TUI, desktop) and programmatic callers, and OFF for conversational
messaging surfaces (Telegram, Discord, etc.) where the verification
narrative would otherwise reach a human as chat noise.
"""
env = os.environ.get("HERMES_VERIFY_ON_STOP")
if env is not None:
return env.strip().lower() not in {"0", "false", "no", "off"}
if config is None:
try:
from hermes_cli.config import load_config
config = load_config()
except Exception:
config = {}
agent_cfg = (config or {}).get("agent") if isinstance(config, dict) else None
cfg_val = agent_cfg.get("verify_on_stop") if isinstance(agent_cfg, dict) else None
if isinstance(cfg_val, bool):
return cfg_val
if isinstance(cfg_val, str):
token = cfg_val.strip().lower()
if token in {"1", "true", "yes", "on"}:
return True
if token in {"0", "false", "no", "off"}:
return False
# "auto", missing, or any other value -> surface-aware default.
return not _session_is_messaging_surface()
def _candidate_cwds(paths: Iterable[str]) -> list[Path]:
candidates: list[Path] = []
seen: set[str] = set()
for raw in paths:
if not raw:
continue
try:
path = Path(raw).expanduser()
candidate = path if path.is_dir() else path.parent
resolved = str(candidate.resolve())
except Exception:
continue
if resolved not in seen:
seen.add(resolved)
candidates.append(Path(resolved))
return candidates
def _verification_snapshot(
*,
session_id: str | None,
changed_paths: list[str],
) -> tuple[dict[str, Any], dict[str, Any]] | None:
"""Return ``(status, facts)`` for the first edited workspace needing proof."""
try:
from agent.coding_context import project_facts_for
from agent.verification_evidence import verification_status
except Exception:
return None
first_snapshot: tuple[dict[str, Any], dict[str, Any]] | None = None
for cwd in _candidate_cwds(changed_paths):
facts = project_facts_for(cwd)
if not facts:
continue
status = verification_status(session_id=session_id, cwd=cwd)
snapshot = (status, facts)
if first_snapshot is None:
first_snapshot = snapshot
if str(status.get("status") or "unverified") != "passed":
return snapshot
return first_snapshot
def _format_changed_paths(paths: list[str]) -> str:
shown = paths[:_MAX_CHANGED_PATHS_IN_NUDGE]
lines = [f"- `{path}`" for path in shown]
remaining = len(paths) - len(shown)
if remaining > 0:
lines.append(f"- ... and {remaining} more")
return "\n".join(lines)
def _status_detail(status: dict[str, Any]) -> str:
state = str(status.get("status") or "unverified")
evidence = status.get("evidence") if isinstance(status.get("evidence"), dict) else None
if not evidence:
return state
command = evidence.get("canonical_command") or evidence.get("command")
summary = str(evidence.get("output_summary") or "").strip()
parts = [state]
if command:
parts.append(f"last command `{command}`")
if summary:
max_summary = 1200
if len(summary) > max_summary:
summary = summary[:max_summary].rstrip() + "\n... [truncated]"
parts.append(f"last output:\n{summary}")
return "\n".join(parts)
def build_verify_on_stop_nudge(
*,
session_id: str | None,
changed_paths: Iterable[str],
attempts: int = 0,
max_attempts: int = 2,
) -> str | None:
"""Return a synthetic follow-up when edited code lacks fresh verification."""
paths = sorted({str(p) for p in changed_paths if p})
if not paths or attempts >= max_attempts:
return None
snapshot = _verification_snapshot(session_id=session_id, changed_paths=paths)
if snapshot is None:
return None
status, facts = snapshot
verify_commands = [
str(cmd).strip()
for cmd in (facts.get("verifyCommands") or [])
if str(cmd).strip()
]
state = str(status.get("status") or "unverified")
if state == "passed":
return None
if verify_commands:
command_instruction = (
"Run the relevant verification command now ("
+ ", ".join(f"`{cmd}`" for cmd in verify_commands[:3])
+ (", ..." if len(verify_commands) > 3 else "")
+ "), read any failure, repair the code, and summarize what passed."
)
else:
temp_dir = tempfile.gettempdir()
command_instruction = (
"No canonical test/lint/build command was detected. Create a focused "
f"temporary verification script under `{temp_dir}` using an OS-safe "
"`tempfile` path with a `hermes-verify-` filename prefix, run it "
"against the changed behavior, clean it up when possible, and "
"summarize it explicitly as ad-hoc verification rather than suite "
"green."
)
return (
"[System: You edited code in this turn, but the workspace does not have "
"fresh passing verification evidence yet.\n\n"
f"Verification status: {_status_detail(status)}\n\n"
f"Changed paths:\n{_format_changed_paths(paths)}\n\n"
f"{command_instruction} If verification is not possible, explain the "
"concrete blocker instead of claiming the work is fully verified.]"
)
__all__ = ["build_verify_on_stop_nudge", "verify_on_stop_enabled"]