hermes-agent/tests/agent/test_verification_stop.py
Brooklyn Nicholson 2f1a47b90e feat(agent): require verification before finishing edits
Make verification closure the default coding behavior after landed file edits while keeping bounded retries and config/env switches for users who need to disable it.
2026-06-24 23:02:48 -05:00

139 lines
4.2 KiB
Python

import json
from pathlib import Path
from agent.verification_evidence import (
mark_workspace_edited,
record_terminal_result,
)
from agent.verification_stop import (
build_verify_on_stop_nudge,
verify_on_stop_enabled,
)
def _node_project(root: Path) -> None:
(root / "package.json").write_text(
json.dumps({"scripts": {"test": "vitest", "lint": "eslint ."}}),
encoding="utf-8",
)
(root / "pnpm-lock.yaml").write_text("", encoding="utf-8")
def _make_project(root: Path) -> None:
root.mkdir()
_node_project(root)
def test_verify_on_stop_default_is_on(monkeypatch):
monkeypatch.delenv("HERMES_VERIFY_ON_STOP", raising=False)
assert verify_on_stop_enabled({"agent": {}}) is True
def test_verify_on_stop_env_can_disable(monkeypatch):
monkeypatch.setenv("HERMES_VERIFY_ON_STOP", "0")
assert verify_on_stop_enabled({"agent": {"verify_on_stop": True}}) is False
def test_verify_on_stop_config_can_disable(monkeypatch):
monkeypatch.delenv("HERMES_VERIFY_ON_STOP", raising=False)
assert verify_on_stop_enabled({"agent": {"verify_on_stop": False}}) is False
def test_no_nudge_after_fresh_pass(tmp_path, monkeypatch):
monkeypatch.setenv("HERMES_HOME", str(tmp_path / ".hermes"))
_node_project(tmp_path)
changed = str(tmp_path / "src" / "app.ts")
record_terminal_result(
command="pnpm test",
cwd=tmp_path,
session_id="s1",
exit_code=0,
output="green",
)
assert build_verify_on_stop_nudge(session_id="s1", changed_paths=[changed]) is None
def test_nudge_checks_all_edited_workspaces(tmp_path, monkeypatch):
monkeypatch.setenv("HERMES_HOME", str(tmp_path / ".hermes"))
project_a = tmp_path / "a"
project_b = tmp_path / "b"
_make_project(project_a)
_make_project(project_b)
changed_a = str(project_a / "src" / "app.ts")
changed_b = str(project_b / "src" / "app.ts")
record_terminal_result(
command="pnpm test",
cwd=project_a,
session_id="s1",
exit_code=0,
output="green",
)
mark_workspace_edited(session_id="s1", cwd=project_b, paths=[changed_b])
nudge = build_verify_on_stop_nudge(
session_id="s1",
changed_paths=[changed_a, changed_b],
)
assert nudge is not None
assert "fresh passing verification evidence" in nudge
def test_nudge_after_unverified_edit_with_known_command(tmp_path, monkeypatch):
monkeypatch.setenv("HERMES_HOME", str(tmp_path / ".hermes"))
_node_project(tmp_path)
changed = str(tmp_path / "src" / "app.ts")
mark_workspace_edited(session_id="s1", cwd=tmp_path, paths=[changed])
nudge = build_verify_on_stop_nudge(session_id="s1", changed_paths=[changed])
assert nudge is not None
assert "fresh passing verification evidence" in nudge
assert "`pnpm run test`" in nudge
assert changed in nudge
def test_nudge_includes_failed_output_summary(tmp_path, monkeypatch):
monkeypatch.setenv("HERMES_HOME", str(tmp_path / ".hermes"))
_node_project(tmp_path)
changed = str(tmp_path / "src" / "app.ts")
record_terminal_result(
command="pnpm test",
cwd=tmp_path,
session_id="s1",
exit_code=1,
output="expected 1 got 2",
)
nudge = build_verify_on_stop_nudge(session_id="s1", changed_paths=[changed])
assert nudge is not None
assert "failed" in nudge
assert "expected 1 got 2" in nudge
assert "repair the code" in nudge
def test_no_nudge_without_canonical_verify_command(tmp_path, monkeypatch):
monkeypatch.setenv("HERMES_HOME", str(tmp_path / ".hermes"))
(tmp_path / "package.json").write_text("{}", encoding="utf-8")
changed = str(tmp_path / "src" / "app.ts")
assert build_verify_on_stop_nudge(session_id="s1", changed_paths=[changed]) is None
def test_nudge_attempts_are_bounded(tmp_path, monkeypatch):
monkeypatch.setenv("HERMES_HOME", str(tmp_path / ".hermes"))
_node_project(tmp_path)
changed = str(tmp_path / "src" / "app.ts")
mark_workspace_edited(session_id="s1", cwd=tmp_path, paths=[changed])
assert build_verify_on_stop_nudge(
session_id="s1",
changed_paths=[changed],
attempts=2,
max_attempts=2,
) is None