feat: shell hooks — wire shell scripts as Hermes hook callbacks

Users can declare shell scripts in config.yaml under a hooks: block that fire on plugin-hook events (pre_tool_call, post_tool_call, pre_llm_call, subagent_stop, etc). Scripts receive JSON on stdin, can return JSON on stdout to block tool calls or inject context pre-LLM. Key design: - Registers closures on existing PluginManager._hooks dict — zero changes to invoke_hook() call sites - subprocess.run(shell=False) via shlex.split — no shell injection - First-use consent per (event, command) pair, persisted to allowlist JSON - Bypass via --accept-hooks, HERMES_ACCEPT_HOOKS=1, or hooks_auto_accept - hermes hooks list/test/revoke/doctor CLI subcommands - Adds subagent_stop hook event fired after delegate_task children exit - Claude Code compatible response shapes accepted Cherry-picked from PR #13143 by @pefontana.
2026-04-25 00:51:20 +00:00 · 2026-04-20 20:53:20 -07:00 · 2026-04-20 20:53:20 -07:00 · 3988c3c245
commit 3988c3c245
parent 34c5c2538e
14 changed files with 3241 additions and 9 deletions
--- a/tests/hermes_cli/test_hooks_cli.py
+++ b/tests/hermes_cli/test_hooks_cli.py
@ -0,0 +1,268 @@
+"""Tests for the ``hermes hooks`` CLI subcommand."""
+
+from __future__ import annotations
+
+import io
+import json
+import sys
+from contextlib import redirect_stdout
+from pathlib import Path
+from types import SimpleNamespace
+from unittest.mock import patch
+
+import pytest
+
+from agent import shell_hooks
+from hermes_cli import hooks as hooks_cli
+
+
+@pytest.fixture(autouse=True)
+def _isolated_home(tmp_path, monkeypatch):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "home"))
+    monkeypatch.delenv("HERMES_ACCEPT_HOOKS", raising=False)
+    shell_hooks.reset_for_tests()
+    yield
+    shell_hooks.reset_for_tests()
+
+
+def _hook_script(tmp_path: Path, body: str, name: str = "hook.sh") -> Path:
+    p = tmp_path / name
+    p.write_text(body)
+    p.chmod(0o755)
+    return p
+
+
+def _run(sub_args: SimpleNamespace) -> str:
+    """Capture stdout for a hooks_command invocation."""
+    buf = io.StringIO()
+    with redirect_stdout(buf):
+        hooks_cli.hooks_command(sub_args)
+    return buf.getvalue()
+
+
+# ── list ──────────────────────────────────────────────────────────────────
+
+
+class TestHooksList:
+    def test_empty_config(self, tmp_path):
+        with patch("hermes_cli.config.load_config", return_value={}):
+            out = _run(SimpleNamespace(hooks_action="list"))
+        assert "No shell hooks configured" in out
+
+    def test_shows_configured_and_consent_status(self, tmp_path):
+        script = _hook_script(
+            tmp_path, "#!/usr/bin/env bash\nprintf '{}\\n'\n",
+        )
+        cfg = {
+            "hooks": {
+                "pre_tool_call": [
+                    {"matcher": "terminal", "command": str(script), "timeout": 30},
+                ],
+                "on_session_start": [
+                    {"command": str(script)},
+                ],
+            }
+        }
+
+        # Approve one of the two so we can see both states in the output
+        shell_hooks._record_approval("pre_tool_call", str(script))
+
+        with patch("hermes_cli.config.load_config", return_value=cfg):
+            out = _run(SimpleNamespace(hooks_action="list"))
+
+        assert "[pre_tool_call]" in out
+        assert "[on_session_start]" in out
+        assert "✓ allowed" in out
+        assert "✗ not allowlisted" in out
+        assert str(script) in out
+
+
+# ── test ──────────────────────────────────────────────────────────────────
+
+
+class TestHooksTest:
+    def test_synthetic_payload_matches_production_shape(self, tmp_path):
+        """`hermes hooks test` must feed the script stdin in the same
+        shape invoke_hook() would at runtime.  Prior to this fix,
+        run_once bypassed _serialize_payload and the two paths diverged —
+        scripts tested with `hermes hooks test` saw different top-level
+        keys than at runtime, silently breaking in production."""
+        capture = tmp_path / "captured.json"
+        script = _hook_script(
+            tmp_path,
+            f"#!/usr/bin/env bash\ncat - > {capture}\nprintf '{{}}\\n'\n",
+        )
+        cfg = {"hooks": {"subagent_stop": [{"command": str(script)}]}}
+        with patch("hermes_cli.config.load_config", return_value=cfg):
+            _run(SimpleNamespace(
+                hooks_action="test", event="subagent_stop",
+                for_tool=None, payload_file=None,
+            ))
+
+        seen = json.loads(capture.read_text())
+        # Same top-level keys _serialize_payload produces at runtime
+        assert set(seen.keys()) == {
+            "hook_event_name", "tool_name", "tool_input",
+            "session_id", "cwd", "extra",
+        }
+        # parent_session_id was routed to top-level session_id (matches runtime)
+        assert seen["session_id"] == "parent-sess"
+        assert "parent_session_id" not in seen["extra"]
+        # subagent_stop has no tool, so tool_name / tool_input are null
+        assert seen["tool_name"] is None
+        assert seen["tool_input"] is None
+
+    def test_fires_real_subprocess_and_parses_block(self, tmp_path):
+        block_script = _hook_script(
+            tmp_path,
+            "#!/usr/bin/env bash\n"
+            'printf \'{"decision": "block", "reason": "nope"}\\n\'\n',
+            name="block.sh",
+        )
+        cfg = {
+            "hooks": {
+                "pre_tool_call": [
+                    {"matcher": "terminal", "command": str(block_script)},
+                ],
+            },
+        }
+        with patch("hermes_cli.config.load_config", return_value=cfg):
+            out = _run(SimpleNamespace(
+                hooks_action="test", event="pre_tool_call",
+                for_tool="terminal", payload_file=None,
+            ))
+
+        # Parsed block appears in output
+        assert '"action": "block"' in out
+        assert '"message": "nope"' in out
+
+    def test_for_tool_matcher_filters(self, tmp_path):
+        script = _hook_script(tmp_path, "#!/usr/bin/env bash\nprintf '{}\\n'\n")
+        cfg = {
+            "hooks": {
+                "pre_tool_call": [
+                    {"matcher": "terminal", "command": str(script)},
+                ],
+            }
+        }
+        with patch("hermes_cli.config.load_config", return_value=cfg):
+            out = _run(SimpleNamespace(
+                hooks_action="test", event="pre_tool_call",
+                for_tool="web_search", payload_file=None,
+            ))
+        assert "No shell hooks" in out
+
+    def test_unknown_event(self):
+        with patch("hermes_cli.config.load_config", return_value={}):
+            out = _run(SimpleNamespace(
+                hooks_action="test", event="bogus_event",
+                for_tool=None, payload_file=None,
+            ))
+        assert "Unknown event" in out
+
+
+# ── revoke ────────────────────────────────────────────────────────────────
+
+
+class TestHooksRevoke:
+    def test_revoke_removes_entry(self, tmp_path):
+        script = _hook_script(tmp_path, "#!/usr/bin/env bash\n")
+        shell_hooks._record_approval("on_session_start", str(script))
+
+        out = _run(SimpleNamespace(hooks_action="revoke", command=str(script)))
+        assert "Removed 1" in out
+        assert shell_hooks.allowlist_entry_for(
+            "on_session_start", str(script),
+        ) is None
+
+    def test_revoke_unknown(self, tmp_path):
+        out = _run(SimpleNamespace(
+            hooks_action="revoke", command=str(tmp_path / "never.sh"),
+        ))
+        assert "No allowlist entry" in out
+
+
+# ── doctor ────────────────────────────────────────────────────────────────
+
+
+class TestHooksDoctor:
+    def test_flags_missing_exec_bit(self, tmp_path):
+        script = tmp_path / "hook.sh"
+        script.write_text("#!/usr/bin/env bash\nprintf '{}\\n'\n")
+        # No chmod — intentionally not executable
+        cfg = {"hooks": {"on_session_start": [{"command": str(script)}]}}
+        with patch("hermes_cli.config.load_config", return_value=cfg):
+            out = _run(SimpleNamespace(hooks_action="doctor"))
+        assert "not executable" in out.lower()
+
+    def test_flags_unallowlisted(self, tmp_path):
+        script = _hook_script(tmp_path, "#!/usr/bin/env bash\nprintf '{}\\n'\n")
+        cfg = {"hooks": {"on_session_start": [{"command": str(script)}]}}
+        with patch("hermes_cli.config.load_config", return_value=cfg):
+            out = _run(SimpleNamespace(hooks_action="doctor"))
+        assert "not allowlisted" in out.lower()
+
+    def test_flags_invalid_json(self, tmp_path):
+        script = _hook_script(
+            tmp_path,
+            "#!/usr/bin/env bash\necho 'not json!'\n",
+        )
+        shell_hooks._record_approval("on_session_start", str(script))
+        cfg = {"hooks": {"on_session_start": [{"command": str(script)}]}}
+        with patch("hermes_cli.config.load_config", return_value=cfg):
+            out = _run(SimpleNamespace(hooks_action="doctor"))
+        assert "not valid JSON" in out
+
+    def test_flags_mtime_drift(self, tmp_path, monkeypatch):
+        """Allowlist with older mtime than current -> drift warning."""
+        script = _hook_script(tmp_path, "#!/usr/bin/env bash\nprintf '{}\\n'\n")
+
+        # Manually stash an allowlist entry with an old mtime
+        from agent.shell_hooks import allowlist_path
+        allowlist_path().parent.mkdir(parents=True, exist_ok=True)
+        allowlist_path().write_text(json.dumps({
+            "approvals": [
+                {
+                    "event": "on_session_start",
+                    "command": str(script),
+                    "approved_at": "2000-01-01T00:00:00Z",
+                    "script_mtime_at_approval": "2000-01-01T00:00:00Z",
+                }
+            ]
+        }))
+
+        cfg = {"hooks": {"on_session_start": [{"command": str(script)}]}}
+        with patch("hermes_cli.config.load_config", return_value=cfg):
+            out = _run(SimpleNamespace(hooks_action="doctor"))
+        assert "modified since approval" in out
+
+    def test_clean_script_runs(self, tmp_path):
+        script = _hook_script(tmp_path, "#!/usr/bin/env bash\nprintf '{}\\n'\n")
+        shell_hooks._record_approval("on_session_start", str(script))
+        cfg = {"hooks": {"on_session_start": [{"command": str(script)}]}}
+        with patch("hermes_cli.config.load_config", return_value=cfg):
+            out = _run(SimpleNamespace(hooks_action="doctor"))
+        assert "All shell hooks look healthy" in out
+
+    def test_unallowlisted_script_is_not_executed(self, tmp_path):
+        """Regression for M4: `hermes hooks doctor` used to run every
+        listed script against a synthetic payload as part of its JSON
+        smoke test, which contradicted the documented workflow of
+        "spot newly-added hooks *before they register*".  An un-allowlisted
+        script must not be executed during `doctor`."""
+        sentinel = tmp_path / "executed"
+        # Script would touch the sentinel if executed; we assert it wasn't.
+        script = _hook_script(
+            tmp_path,
+            f"#!/usr/bin/env bash\ntouch {sentinel}\nprintf '{{}}\\n'\n",
+        )
+        cfg = {"hooks": {"on_session_start": [{"command": str(script)}]}}
+        with patch("hermes_cli.config.load_config", return_value=cfg):
+            out = _run(SimpleNamespace(hooks_action="doctor"))
+
+        assert not sentinel.exists(), (
+            "doctor executed an un-allowlisted script — "
+            "M4 gate regressed"
+        )
+        assert "not allowlisted" in out.lower()
+        assert "skipped JSON smoke test" in out