mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-11 08:42:11 +00:00
Follow-ups to #38199/#43354 found in post-merge review: - Inline CLI memory approval never worked: the per-thread approval callback was not passed to prompt_dangerous_approval, so the prompt_toolkit fail-closed guard (#15216) denied every gated foreground write without showing a prompt. Now invokes the registered callback directly; a crashed prompt falls back to staging instead of a silent deny. - Gateway sessions claimed inline support but prompt_dangerous_approval has no gateway round-trip (that lives in the pending-approval queue), so gated gateway memory writes hit the input() fallback and denied. Gateway contexts now stage for /memory pending review. - /skills pending|approve|reject|diff|approval now works on the gateway (gateway_config_gate on skills.write_approval), so skills staged from a messaging session can be reviewed there. Diff output truncated for chat. - memory_tool validates required params before the gate so invalid writes are rejected immediately instead of staged and failing at approve time. - Stale tri-state write_mode docstrings updated to the boolean gate; docs table corrected (inline prompt is interactive-CLI-only). - 6 new tests covering the interactive approve/deny/error paths, gateway staging, skills never-prompt invariant, and pre-gate validation.
384 lines
15 KiB
Python
384 lines
15 KiB
Python
"""Tests for the memory/skill write-approval gate (tools/write_approval.py)
|
|
and the shared slash-command handlers (hermes_cli/write_approval_commands.py).
|
|
|
|
Covers the boolean write_approval gate (off by default = write freely; on =
|
|
require approval) for both subsystems, the foreground-vs-background staging
|
|
split, pending store CRUD, and the list/approve/reject/diff/approval
|
|
subcommand dispatch.
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import tempfile
|
|
import shutil
|
|
|
|
import pytest
|
|
|
|
|
|
@pytest.fixture
|
|
def hermes_home(monkeypatch):
|
|
d = tempfile.mkdtemp(prefix="hermes_wa_test_")
|
|
home = os.path.join(d, ".hermes")
|
|
os.makedirs(home)
|
|
monkeypatch.setenv("HERMES_HOME", home)
|
|
yield home
|
|
shutil.rmtree(d, ignore_errors=True)
|
|
|
|
|
|
def _set_approval(subsystem, enabled):
|
|
import hermes_cli.config as cfg
|
|
c = cfg.load_config()
|
|
c.setdefault(subsystem, {})["write_approval"] = enabled
|
|
cfg.save_config(c)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Config resolution
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_default_gate_is_off(hermes_home):
|
|
from tools import write_approval as wa
|
|
# Default: gate off → writes flow freely.
|
|
assert wa.write_approval_enabled("memory") is False
|
|
assert wa.write_approval_enabled("skills") is False
|
|
|
|
|
|
def test_invalid_subsystem_is_off(hermes_home):
|
|
from tools import write_approval as wa
|
|
assert wa.write_approval_enabled("bogus") is False
|
|
|
|
|
|
def test_normalize_enabled_coerces_values():
|
|
from tools import write_approval as wa
|
|
# Real bools pass through.
|
|
assert wa._normalize_enabled(True) is True
|
|
assert wa._normalize_enabled(False) is False
|
|
# Truthy strings → True (incl. legacy 'approve').
|
|
assert wa._normalize_enabled("on") is True
|
|
assert wa._normalize_enabled("approve") is True
|
|
assert wa._normalize_enabled("true") is True
|
|
# Everything else → False (gate off is the safe default).
|
|
assert wa._normalize_enabled("off") is False
|
|
assert wa._normalize_enabled("garbage") is False
|
|
assert wa._normalize_enabled(None) is False
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Memory gate
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_memory_gate_off_allows_write(hermes_home):
|
|
# Default (gate off) → write straight through, no staging.
|
|
from tools.memory_tool import memory_tool, MemoryStore
|
|
from tools import write_approval as wa
|
|
store = MemoryStore(); store.load_from_disk()
|
|
r = json.loads(memory_tool("add", "user", "save me", store=store))
|
|
assert r["success"] is True
|
|
assert r["entry_count"] == 1
|
|
assert wa.pending_count("memory") == 0
|
|
|
|
|
|
def test_memory_gate_on_no_interactive_stages(hermes_home):
|
|
# Gate on, no approval callback / not a gateway context → stage.
|
|
from tools.memory_tool import memory_tool, MemoryStore
|
|
from tools import write_approval as wa
|
|
_set_approval("memory", True)
|
|
store = MemoryStore(); store.load_from_disk()
|
|
r = json.loads(memory_tool("add", "memory", "stage me", store=store))
|
|
assert r.get("staged") is True
|
|
assert r.get("pending_id")
|
|
# Not written to the live store yet.
|
|
assert store.memory_entries == []
|
|
pend = wa.list_pending("memory")
|
|
assert len(pend) == 1
|
|
assert pend[0]["id"] == r["pending_id"]
|
|
|
|
|
|
def test_memory_gate_on_then_apply(hermes_home):
|
|
from tools.memory_tool import memory_tool, MemoryStore, apply_memory_pending
|
|
from tools import write_approval as wa
|
|
_set_approval("memory", True)
|
|
store = MemoryStore(); store.load_from_disk()
|
|
r = json.loads(memory_tool("add", "user", "approved entry", store=store))
|
|
pid = r["pending_id"]
|
|
rec = wa.get_pending("memory", pid)
|
|
result = apply_memory_pending(rec["payload"], store)
|
|
assert result["success"] is True
|
|
assert "approved entry" in store.user_entries[0]
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Skill gate
|
|
# ---------------------------------------------------------------------------
|
|
|
|
_SKILL = (
|
|
"---\nname: test-skill\ndescription: A test skill\nversion: 1.0.0\n---\n"
|
|
"# Test\nbody\n"
|
|
)
|
|
|
|
|
|
def test_skill_gate_off_allows_create(hermes_home):
|
|
# Default (gate off) → skill is created normally, not staged.
|
|
import importlib
|
|
import tools.skill_manager_tool as smt
|
|
importlib.reload(smt)
|
|
from tools import write_approval as wa
|
|
r = json.loads(smt.skill_manage("create", "free-skill", content=_SKILL))
|
|
assert r.get("success") is True
|
|
assert wa.pending_count("skills") == 0
|
|
|
|
|
|
def test_skill_gate_on_always_stages(hermes_home):
|
|
# Skills stage even in the foreground (too big to review inline).
|
|
from tools.skill_manager_tool import skill_manage
|
|
from tools import write_approval as wa
|
|
_set_approval("skills", True)
|
|
r = json.loads(skill_manage("create", "staged-skill", content=_SKILL))
|
|
assert r.get("staged") is True
|
|
assert "staged-skill" in r.get("gist", "")
|
|
assert wa.pending_count("skills") == 1
|
|
|
|
|
|
def test_skill_gate_on_then_apply_writes_file(hermes_home):
|
|
# SKILLS_DIR is resolved at import time, so reload the skill module under
|
|
# this test's HERMES_HOME to exercise the real on-disk write path.
|
|
import importlib
|
|
import tools.skill_manager_tool as smt
|
|
importlib.reload(smt)
|
|
from tools import write_approval as wa
|
|
_set_approval("skills", True)
|
|
r = json.loads(smt.skill_manage("create", "applied-skill", content=_SKILL))
|
|
rec = wa.get_pending("skills", r["pending_id"])
|
|
res = json.loads(smt.apply_skill_pending(rec["payload"]))
|
|
assert res["success"] is True
|
|
assert smt._find_skill("applied-skill") is not None
|
|
|
|
|
|
def test_skill_create_diff_is_full_content(hermes_home):
|
|
from tools.skill_manager_tool import skill_manage
|
|
from tools import write_approval as wa
|
|
_set_approval("skills", True)
|
|
r = json.loads(skill_manage("create", "diff-skill", content=_SKILL))
|
|
rec = wa.get_pending("skills", r["pending_id"])
|
|
diff = wa.skill_pending_diff(rec)
|
|
assert "name: test-skill" in diff
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Pending store CRUD
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_pending_store_roundtrip(hermes_home):
|
|
from tools import write_approval as wa
|
|
rec = wa.stage_write("memory", {"action": "add", "target": "user", "content": "x"},
|
|
summary="add x", origin="foreground")
|
|
assert wa.pending_count("memory") == 1
|
|
got = wa.get_pending("memory", rec["id"])
|
|
assert got["payload"]["content"] == "x"
|
|
assert wa.discard_pending("memory", rec["id"]) is True
|
|
assert wa.pending_count("memory") == 0
|
|
assert wa.get_pending("memory", rec["id"]) is None
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Shared command handler
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_handle_pending_list_empty(hermes_home):
|
|
from hermes_cli.write_approval_commands import handle_pending_subcommand
|
|
from tools import write_approval as wa
|
|
out = handle_pending_subcommand(wa.MEMORY, ["pending"])
|
|
assert "No pending memory" in out
|
|
|
|
|
|
def test_handle_approve_all(hermes_home):
|
|
from hermes_cli.write_approval_commands import handle_pending_subcommand
|
|
from tools.memory_tool import MemoryStore
|
|
from tools import write_approval as wa
|
|
store = MemoryStore(); store.load_from_disk()
|
|
wa.stage_write("memory", {"action": "add", "target": "user", "content": "a"},
|
|
summary="a", origin="foreground")
|
|
wa.stage_write("memory", {"action": "add", "target": "user", "content": "b"},
|
|
summary="b", origin="foreground")
|
|
out = handle_pending_subcommand(wa.MEMORY, ["approve", "all"], memory_store=store)
|
|
assert "Approved 2" in out
|
|
assert wa.pending_count("memory") == 0
|
|
assert len(store.user_entries) == 2
|
|
|
|
|
|
def test_handle_reject(hermes_home):
|
|
from hermes_cli.write_approval_commands import handle_pending_subcommand
|
|
from tools import write_approval as wa
|
|
rec = wa.stage_write("skills", {"action": "create", "name": "s"},
|
|
summary="create s", origin="background_review")
|
|
out = handle_pending_subcommand(wa.SKILLS, ["reject", rec["id"]])
|
|
assert "Rejected" in out
|
|
assert wa.pending_count("skills") == 0
|
|
|
|
|
|
def test_handle_approval_on(hermes_home):
|
|
from hermes_cli.write_approval_commands import handle_pending_subcommand
|
|
from tools import write_approval as wa
|
|
captured = {}
|
|
out = handle_pending_subcommand(
|
|
wa.MEMORY, ["approval", "on"],
|
|
set_mode_fn=lambda enabled: captured.update(enabled=enabled),
|
|
)
|
|
assert captured["enabled"] is True
|
|
assert "on" in out
|
|
|
|
|
|
def test_handle_approval_off(hermes_home):
|
|
from hermes_cli.write_approval_commands import handle_pending_subcommand
|
|
from tools import write_approval as wa
|
|
captured = {}
|
|
out = handle_pending_subcommand(
|
|
wa.SKILLS, ["approval", "off"],
|
|
set_mode_fn=lambda enabled: captured.update(enabled=enabled),
|
|
)
|
|
assert captured["enabled"] is False
|
|
assert "off" in out
|
|
|
|
|
|
def test_handle_mode_alias_still_works(hermes_home):
|
|
# 'mode' is kept as a back-compat alias for 'approval'.
|
|
from hermes_cli.write_approval_commands import handle_pending_subcommand
|
|
from tools import write_approval as wa
|
|
captured = {}
|
|
out = handle_pending_subcommand(
|
|
wa.MEMORY, ["mode", "on"],
|
|
set_mode_fn=lambda enabled: captured.update(enabled=enabled),
|
|
)
|
|
assert captured["enabled"] is True
|
|
assert "on" in out
|
|
|
|
|
|
def test_handle_approval_invalid(hermes_home):
|
|
from hermes_cli.write_approval_commands import handle_pending_subcommand
|
|
from tools import write_approval as wa
|
|
out = handle_pending_subcommand(wa.MEMORY, ["approval", "bogus"],
|
|
set_mode_fn=lambda enabled: None)
|
|
assert "Invalid value" in out
|
|
|
|
|
|
def test_handle_unknown_subcommand_returns_none(hermes_home):
|
|
from hermes_cli.write_approval_commands import handle_pending_subcommand
|
|
from tools import write_approval as wa
|
|
# An unrecognized /skills subcommand (e.g. 'search') must return None so
|
|
# the CLI falls through to the skills hub.
|
|
out = handle_pending_subcommand(wa.SKILLS, ["search", "foo"])
|
|
assert out is None
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Inline (interactive CLI) approval path — regression for the bug where the
|
|
# per-thread approval callback was never passed to prompt_dangerous_approval,
|
|
# so every gated foreground memory write was silently denied.
|
|
# ---------------------------------------------------------------------------
|
|
|
|
@pytest.fixture
|
|
def approval_callback_cleanup():
|
|
yield
|
|
from tools.terminal_tool import set_approval_callback
|
|
set_approval_callback(None)
|
|
|
|
|
|
def test_memory_inline_approve_writes(hermes_home, approval_callback_cleanup):
|
|
from tools.memory_tool import memory_tool, MemoryStore
|
|
from tools.terminal_tool import set_approval_callback
|
|
from tools import write_approval as wa
|
|
_set_approval("memory", True)
|
|
|
|
calls = []
|
|
def approve_cb(command, description, **kw):
|
|
calls.append((command, description))
|
|
return "once"
|
|
set_approval_callback(approve_cb)
|
|
|
|
store = MemoryStore(); store.load_from_disk()
|
|
r = json.loads(memory_tool("add", "memory", "approved fact", store=store))
|
|
assert r["success"] is True
|
|
assert r.get("staged") is None # real write, not staged
|
|
assert store.memory_entries == ["approved fact"]
|
|
assert wa.pending_count("memory") == 0
|
|
# The registered callback must actually be invoked (not the input() path).
|
|
assert len(calls) == 1
|
|
assert "approved fact" in calls[0][0]
|
|
|
|
|
|
def test_memory_inline_deny_blocks(hermes_home, approval_callback_cleanup):
|
|
from tools.memory_tool import memory_tool, MemoryStore
|
|
from tools.terminal_tool import set_approval_callback
|
|
from tools import write_approval as wa
|
|
_set_approval("memory", True)
|
|
set_approval_callback(lambda command, description, **kw: "deny")
|
|
|
|
store = MemoryStore(); store.load_from_disk()
|
|
r = json.loads(memory_tool("add", "memory", "denied fact", store=store))
|
|
assert r["success"] is False
|
|
assert "denied" in r["error"].lower()
|
|
assert store.memory_entries == []
|
|
assert wa.pending_count("memory") == 0 # denied, not staged
|
|
|
|
|
|
def test_memory_inline_callback_error_stages(hermes_home, approval_callback_cleanup):
|
|
# If the prompt machinery fails, fall back to staging — never drop silently.
|
|
from tools.memory_tool import memory_tool, MemoryStore
|
|
from tools.terminal_tool import set_approval_callback
|
|
from tools import write_approval as wa
|
|
_set_approval("memory", True)
|
|
def broken_cb(command, description, **kw):
|
|
raise RuntimeError("boom")
|
|
set_approval_callback(broken_cb)
|
|
|
|
store = MemoryStore(); store.load_from_disk()
|
|
r = json.loads(memory_tool("add", "memory", "fallback fact", store=store))
|
|
assert r.get("staged") is True
|
|
assert wa.pending_count("memory") == 1
|
|
|
|
|
|
def test_gateway_context_stages_not_prompts(hermes_home, monkeypatch):
|
|
# A gateway session has no per-thread CLI callback; the dangerous-command
|
|
# /approve round-trip lives in the pending-queue machinery which the gate
|
|
# does not use. The gate must stage, never attempt an inline prompt
|
|
# (which would hit the input() fallback and silently deny).
|
|
from tools.memory_tool import memory_tool, MemoryStore
|
|
from tools import write_approval as wa
|
|
_set_approval("memory", True)
|
|
monkeypatch.setenv("HERMES_GATEWAY_SESSION", "1")
|
|
|
|
store = MemoryStore(); store.load_from_disk()
|
|
r = json.loads(memory_tool("add", "memory", "gateway fact", store=store))
|
|
assert r.get("staged") is True
|
|
assert store.memory_entries == []
|
|
assert wa.pending_count("memory") == 1
|
|
|
|
|
|
def test_skills_never_prompt_inline_even_with_callback(hermes_home, approval_callback_cleanup):
|
|
# Skills always stage — even when an interactive callback is registered.
|
|
from tools.skill_manager_tool import skill_manage
|
|
from tools.terminal_tool import set_approval_callback
|
|
from tools import write_approval as wa
|
|
_set_approval("skills", True)
|
|
|
|
calls = []
|
|
set_approval_callback(lambda c, d, **kw: calls.append(1) or "once")
|
|
|
|
r = json.loads(skill_manage(
|
|
action="create", name="test-inline-skill",
|
|
content="---\nname: test-inline-skill\ndescription: x\n---\nbody\n"))
|
|
assert r.get("staged") is True
|
|
assert calls == [] # never prompted
|
|
assert wa.pending_count("skills") == 1
|
|
|
|
|
|
def test_memory_invalid_params_rejected_before_staging(hermes_home):
|
|
# Param validation must run BEFORE the gate so a broken write is rejected
|
|
# immediately instead of staged and failing at approve time.
|
|
from tools.memory_tool import memory_tool, MemoryStore
|
|
from tools import write_approval as wa
|
|
_set_approval("memory", True)
|
|
store = MemoryStore(); store.load_from_disk()
|
|
r = json.loads(memory_tool("add", "memory", None, store=store))
|
|
assert r["success"] is False
|
|
assert wa.pending_count("memory") == 0
|