hermes-agent/tests/agent/test_verification_stop.py

import json
import tempfile
from pathlib import Path

import pytest

from agent.verification_evidence import (
    mark_workspace_edited,
    record_terminal_result,
)
from agent.verification_stop import (
    build_verify_on_stop_nudge,
    verify_on_stop_enabled,
)


def _node_project(root: Path) -> None:
    (root / "package.json").write_text(
        json.dumps({"scripts": {"test": "vitest", "lint": "eslint ."}}),
        encoding="utf-8",
    )
    (root / "pnpm-lock.yaml").write_text("", encoding="utf-8")


def _make_project(root: Path) -> None:
    root.mkdir()
    _node_project(root)


@pytest.fixture
def clear_verify_env(monkeypatch):
    """Clear every env signal verify_on_stop_enabled consults.

    Tests then set only the variable they exercise, mirroring how the CLI/TUI
    set HERMES_SESSION_SOURCE and the gateway sets HERMES_SESSION_PLATFORM.
    """
    for var in (
        "HERMES_VERIFY_ON_STOP",
        "HERMES_PLATFORM",
        "HERMES_SESSION_PLATFORM",
        "HERMES_SESSION_SOURCE",
    ):
        monkeypatch.delenv(var, raising=False)
    return monkeypatch


def test_verify_on_stop_default_is_off(clear_verify_env):
    # No env, no explicit config -> default OFF (new default as of v31).
    assert verify_on_stop_enabled({"agent": {}}) is False


def test_verify_on_stop_missing_agent_section_is_off(clear_verify_env):
    assert verify_on_stop_enabled({}) is False


def test_verify_on_stop_auto_sentinel_resolves_to_surface_default(clear_verify_env):
    # The legacy "auto" sentinel is still honored when set explicitly: it falls
    # through to the surface-aware default (ON interactive, OFF messaging).
    assert verify_on_stop_enabled({"agent": {"verify_on_stop": "auto"}}) is True
    clear_verify_env.setenv("HERMES_SESSION_PLATFORM", "telegram")
    assert verify_on_stop_enabled({"agent": {"verify_on_stop": "auto"}}) is False


def test_verify_on_stop_env_can_disable(clear_verify_env):
    clear_verify_env.setenv("HERMES_VERIFY_ON_STOP", "0")
    assert verify_on_stop_enabled({"agent": {"verify_on_stop": True}}) is False


def test_verify_on_stop_env_can_enable(clear_verify_env):
    # Env wins over the default-off config.
    clear_verify_env.setenv("HERMES_VERIFY_ON_STOP", "1")
    assert verify_on_stop_enabled({"agent": {}}) is True


def test_verify_on_stop_config_true_enables(clear_verify_env):
    assert verify_on_stop_enabled({"agent": {"verify_on_stop": True}}) is True


def test_verify_on_stop_config_can_disable(clear_verify_env):
    assert verify_on_stop_enabled({"agent": {"verify_on_stop": False}}) is False


def test_verify_on_stop_auto_off_on_gateway_messaging_platform(clear_verify_env):
    # With explicit "auto", a real Telegram turn resolves OFF.
    clear_verify_env.setenv("HERMES_SESSION_PLATFORM", "telegram")
    assert verify_on_stop_enabled({"agent": {"verify_on_stop": "auto"}}) is False


@pytest.mark.parametrize(
    "platform",
    ["discord", "whatsapp_cloud", "signal", "slack", "matrix", "email", "sms"],
)
def test_verify_on_stop_auto_off_for_each_messaging_platform(clear_verify_env, platform):
    clear_verify_env.setenv("HERMES_SESSION_PLATFORM", platform)
    assert verify_on_stop_enabled({"agent": {"verify_on_stop": "auto"}}) is False


def test_verify_on_stop_auto_messaging_platform_is_case_insensitive(clear_verify_env):
    clear_verify_env.setenv("HERMES_SESSION_PLATFORM", "  Telegram  ")
    assert verify_on_stop_enabled({"agent": {"verify_on_stop": "auto"}}) is False


def test_verify_on_stop_auto_uses_hermes_platform_override(clear_verify_env):
    # HERMES_PLATFORM mirrors the sibling platform resolution and also flags a
    # messaging surface under the "auto" sentinel.
    clear_verify_env.setenv("HERMES_PLATFORM", "discord")
    assert verify_on_stop_enabled({"agent": {"verify_on_stop": "auto"}}) is False


@pytest.mark.parametrize("source", ["cli", "tui", "desktop", "codex", "local"])
def test_verify_on_stop_auto_on_for_interactive_surfaces(clear_verify_env, source):
    # Under "auto", CLI/TUI/desktop coding surfaces resolve ON.
    clear_verify_env.setenv("HERMES_SESSION_SOURCE", source)
    assert verify_on_stop_enabled({"agent": {"verify_on_stop": "auto"}}) is True


@pytest.mark.parametrize("platform", ["api_server", "webhook", "msgraph_webhook"])
def test_verify_on_stop_auto_on_for_programmatic_surfaces(clear_verify_env, platform):
    clear_verify_env.setenv("HERMES_SESSION_PLATFORM", platform)
    assert verify_on_stop_enabled({"agent": {"verify_on_stop": "auto"}}) is True


def test_default_off_overrides_interactive_surface(clear_verify_env):
    # The new default is OFF even on an interactive coding surface — only an
    # explicit "auto"/true turns it back on.
    clear_verify_env.setenv("HERMES_SESSION_SOURCE", "cli")
    assert verify_on_stop_enabled({"agent": {}}) is False


def test_env_forces_verify_on_stop_on_for_messaging(clear_verify_env):
    clear_verify_env.setenv("HERMES_SESSION_PLATFORM", "telegram")
    clear_verify_env.setenv("HERMES_VERIFY_ON_STOP", "1")
    assert verify_on_stop_enabled({"agent": {}}) is True


def test_config_forces_verify_on_stop_on_for_messaging(clear_verify_env):
    clear_verify_env.setenv("HERMES_SESSION_PLATFORM", "telegram")
    assert verify_on_stop_enabled({"agent": {"verify_on_stop": True}}) is True


def test_verify_on_stop_default_path_through_load_config(tmp_path, clear_verify_env):
    # E2E: the sole production caller passes no config, so verify_on_stop_enabled
    # resolves through load_config() + DEFAULT_CONFIG. The default is now the
    # boolean False, so even an interactive surface resolves OFF without an
    # explicit opt-in. This is the path the unit-level tests above cannot
    # exercise.
    clear_verify_env.setenv("HERMES_HOME", str(tmp_path / ".hermes"))

    from hermes_cli.config import load_config

    merged = load_config()
    assert merged["agent"]["verify_on_stop"] is False

    # Interactive surface still resolves OFF through the real loader.
    clear_verify_env.setenv("HERMES_SESSION_SOURCE", "cli")
    assert verify_on_stop_enabled() is False

    # A messaging platform also resolves OFF.
    clear_verify_env.setenv("HERMES_SESSION_PLATFORM", "telegram")
    assert verify_on_stop_enabled() is False


def test_no_nudge_after_fresh_pass(tmp_path, monkeypatch):
    monkeypatch.setenv("HERMES_HOME", str(tmp_path / ".hermes"))
    _node_project(tmp_path)
    changed = str(tmp_path / "src" / "app.ts")

    record_terminal_result(
        command="pnpm test",
        cwd=tmp_path,
        session_id="s1",
        exit_code=0,
        output="green",
    )

    assert build_verify_on_stop_nudge(session_id="s1", changed_paths=[changed]) is None


def test_nudge_checks_all_edited_workspaces(tmp_path, monkeypatch):
    monkeypatch.setenv("HERMES_HOME", str(tmp_path / ".hermes"))
    project_a = tmp_path / "a"
    project_b = tmp_path / "b"
    _make_project(project_a)
    _make_project(project_b)
    changed_a = str(project_a / "src" / "app.ts")
    changed_b = str(project_b / "src" / "app.ts")

    record_terminal_result(
        command="pnpm test",
        cwd=project_a,
        session_id="s1",
        exit_code=0,
        output="green",
    )
    mark_workspace_edited(session_id="s1", cwd=project_b, paths=[changed_b])

    nudge = build_verify_on_stop_nudge(
        session_id="s1",
        changed_paths=[changed_a, changed_b],
    )

    assert nudge is not None
    assert "fresh passing verification evidence" in nudge


def test_nudge_after_unverified_edit_with_known_command(tmp_path, monkeypatch):
    monkeypatch.setenv("HERMES_HOME", str(tmp_path / ".hermes"))
    _node_project(tmp_path)
    changed = str(tmp_path / "src" / "app.ts")
    mark_workspace_edited(session_id="s1", cwd=tmp_path, paths=[changed])

    nudge = build_verify_on_stop_nudge(session_id="s1", changed_paths=[changed])

    assert nudge is not None
    assert "fresh passing verification evidence" in nudge
    assert "`pnpm run test`" in nudge
    assert changed in nudge


def test_nudge_includes_failed_output_summary(tmp_path, monkeypatch):
    monkeypatch.setenv("HERMES_HOME", str(tmp_path / ".hermes"))
    _node_project(tmp_path)
    changed = str(tmp_path / "src" / "app.ts")

    record_terminal_result(
        command="pnpm test",
        cwd=tmp_path,
        session_id="s1",
        exit_code=1,
        output="expected 1 got 2",
    )

    nudge = build_verify_on_stop_nudge(session_id="s1", changed_paths=[changed])

    assert nudge is not None
    assert "failed" in nudge
    assert "expected 1 got 2" in nudge
    assert "repair the code" in nudge


def test_no_suite_nudge_requests_temp_script(tmp_path, monkeypatch):
    monkeypatch.setenv("HERMES_HOME", str(tmp_path / ".hermes"))
    (tmp_path / "package.json").write_text("{}", encoding="utf-8")
    changed = str(tmp_path / "src" / "app.ts")

    nudge = build_verify_on_stop_nudge(session_id="s1", changed_paths=[changed])

    assert nudge is not None
    assert tempfile.gettempdir() in nudge
    assert "ad-hoc verification" in nudge
    assert "suite green" in nudge


def test_ad_hoc_pass_satisfies_no_suite_stop_loop(tmp_path, monkeypatch):
    monkeypatch.setenv("HERMES_HOME", str(tmp_path / ".hermes"))
    (tmp_path / "package.json").write_text("{}", encoding="utf-8")
    changed = str(tmp_path / "src" / "app.ts")
    script = Path(tempfile.gettempdir()) / f"hermes-ad-hoc-stop-{tmp_path.name}.py"
    script.write_text("print('ok')\n", encoding="utf-8")
    try:
        record_terminal_result(
            command=f"python {script}",
            cwd=tmp_path,
            session_id="s1",
            exit_code=0,
            output="ok",
        )
    finally:
        script.unlink(missing_ok=True)

    assert build_verify_on_stop_nudge(session_id="s1", changed_paths=[changed]) is None


def test_nudge_attempts_are_bounded(tmp_path, monkeypatch):
    monkeypatch.setenv("HERMES_HOME", str(tmp_path / ".hermes"))
    _node_project(tmp_path)
    changed = str(tmp_path / "src" / "app.ts")
    mark_workspace_edited(session_id="s1", cwd=tmp_path, paths=[changed])

    assert build_verify_on_stop_nudge(
        session_id="s1",
        changed_paths=[changed],
        attempts=2,
        max_attempts=2,
    ) is None


# ---------------------------------------------------------------------------
# Fix C: documentation/prose edits carry no verifiable behavior and must never
# trip the nudge, even on an unverified workspace.
# ---------------------------------------------------------------------------

@pytest.mark.parametrize(
    "doc_name",
    [
        "SKILL.md",
        "README.md",
        "guide.markdown",
        "page.mdx",
        "manual.rst",
        "notes.txt",
        "data.csv",
        "LICENSE",
        "CHANGELOG",
    ],
)
def test_doc_only_edit_does_not_nudge(tmp_path, monkeypatch, doc_name):
    monkeypatch.setenv("HERMES_HOME", str(tmp_path / ".hermes"))
    _node_project(tmp_path)
    changed = str(tmp_path / doc_name)
    mark_workspace_edited(session_id="s1", cwd=tmp_path, paths=[changed])

    # Unverified workspace, but the only edit is a doc — nothing to verify.
    assert build_verify_on_stop_nudge(session_id="s1", changed_paths=[changed]) is None


def test_mixed_doc_and_code_edit_still_nudges(tmp_path, monkeypatch):
    monkeypatch.setenv("HERMES_HOME", str(tmp_path / ".hermes"))
    _node_project(tmp_path)
    doc = str(tmp_path / "README.md")
    code = str(tmp_path / "src" / "app.ts")
    mark_workspace_edited(session_id="s1", cwd=tmp_path, paths=[code])

    nudge = build_verify_on_stop_nudge(
        session_id="s1", changed_paths=[doc, code]
    )
    assert nudge is not None
    # The doc path is filtered out of the reported set; the code path remains.
    assert code in nudge
    assert doc not in nudge


def test_is_non_code_path_classification():
    from agent.verification_stop import _is_non_code_path

    assert _is_non_code_path("docs/SKILL.md") is True
    assert _is_non_code_path("README") is False  # README has no extension and isn't in the prose-filename set
    assert _is_non_code_path("LICENSE") is True
    assert _is_non_code_path("src/app.ts") is False
    assert _is_non_code_path("config.yaml") is False
    assert _is_non_code_path("run_agent.py") is False