mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-24 10:52:21 +00:00
Follow-up to the /memory approve fresh-store fix. Both the CLI fallback and the messaging-gateway handler built a bare MemoryStore() with the hardcoded default char limits (2200/1375), ignoring the user's configured memory.memory_char_limit / user_char_limit. A live agent honors those overrides (agent/agent_init.py), so an approval applied without a live agent could accept a write the user's lower cap would reject, or vice versa. Extract a shared tools.memory_tool.load_on_disk_store() factory that reads the configured limits (falling back to defaults if config can't load) and wire both the CLI and gateway handlers to it, closing the gap on both surfaces and de-duplicating the construction block.
441 lines
17 KiB
Python
441 lines
17 KiB
Python
"""Tests for the memory/skill write-approval gate (tools/write_approval.py)
|
|
and the shared slash-command handlers (hermes_cli/write_approval_commands.py).
|
|
|
|
Covers the boolean write_approval gate (off by default = write freely; on =
|
|
require approval) for both subsystems, the foreground-vs-background staging
|
|
split, pending store CRUD, and the list/approve/reject/diff/approval
|
|
subcommand dispatch.
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import tempfile
|
|
import shutil
|
|
|
|
import pytest
|
|
|
|
|
|
@pytest.fixture
|
|
def hermes_home(monkeypatch):
|
|
d = tempfile.mkdtemp(prefix="hermes_wa_test_")
|
|
home = os.path.join(d, ".hermes")
|
|
os.makedirs(home)
|
|
monkeypatch.setenv("HERMES_HOME", home)
|
|
yield home
|
|
shutil.rmtree(d, ignore_errors=True)
|
|
|
|
|
|
def _set_approval(subsystem, enabled):
|
|
import hermes_cli.config as cfg
|
|
c = cfg.load_config()
|
|
c.setdefault(subsystem, {})["write_approval"] = enabled
|
|
cfg.save_config(c)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Config resolution
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_default_gate_is_off(hermes_home):
|
|
from tools import write_approval as wa
|
|
# Default: gate off → writes flow freely.
|
|
assert wa.write_approval_enabled("memory") is False
|
|
assert wa.write_approval_enabled("skills") is False
|
|
|
|
|
|
def test_invalid_subsystem_is_off(hermes_home):
|
|
from tools import write_approval as wa
|
|
assert wa.write_approval_enabled("bogus") is False
|
|
|
|
|
|
def test_normalize_enabled_coerces_values():
|
|
from tools import write_approval as wa
|
|
# Real bools pass through.
|
|
assert wa._normalize_enabled(True) is True
|
|
assert wa._normalize_enabled(False) is False
|
|
# Truthy strings → True (incl. legacy 'approve').
|
|
assert wa._normalize_enabled("on") is True
|
|
assert wa._normalize_enabled("approve") is True
|
|
assert wa._normalize_enabled("true") is True
|
|
# Everything else → False (gate off is the safe default).
|
|
assert wa._normalize_enabled("off") is False
|
|
assert wa._normalize_enabled("garbage") is False
|
|
assert wa._normalize_enabled(None) is False
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Memory gate
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_memory_gate_off_allows_write(hermes_home):
|
|
# Default (gate off) → write straight through, no staging.
|
|
from tools.memory_tool import memory_tool, MemoryStore
|
|
from tools import write_approval as wa
|
|
store = MemoryStore(); store.load_from_disk()
|
|
r = json.loads(memory_tool("add", "user", "save me", store=store))
|
|
assert r["success"] is True
|
|
assert r["entry_count"] == 1
|
|
assert wa.pending_count("memory") == 0
|
|
|
|
|
|
def test_memory_gate_on_no_interactive_stages(hermes_home):
|
|
# Gate on, no approval callback / not a gateway context → stage.
|
|
from tools.memory_tool import memory_tool, MemoryStore
|
|
from tools import write_approval as wa
|
|
_set_approval("memory", True)
|
|
store = MemoryStore(); store.load_from_disk()
|
|
r = json.loads(memory_tool("add", "memory", "stage me", store=store))
|
|
assert r.get("staged") is True
|
|
assert r.get("pending_id")
|
|
# Not written to the live store yet.
|
|
assert store.memory_entries == []
|
|
pend = wa.list_pending("memory")
|
|
assert len(pend) == 1
|
|
assert pend[0]["id"] == r["pending_id"]
|
|
|
|
|
|
def test_memory_gate_on_then_apply(hermes_home):
|
|
from tools.memory_tool import memory_tool, MemoryStore, apply_memory_pending
|
|
from tools import write_approval as wa
|
|
_set_approval("memory", True)
|
|
store = MemoryStore(); store.load_from_disk()
|
|
r = json.loads(memory_tool("add", "user", "approved entry", store=store))
|
|
pid = r["pending_id"]
|
|
rec = wa.get_pending("memory", pid)
|
|
result = apply_memory_pending(rec["payload"], store)
|
|
assert result["success"] is True
|
|
assert "approved entry" in store.user_entries[0]
|
|
|
|
|
|
def test_cli_memory_approve_without_live_agent_uses_fresh_store(hermes_home, capsys):
|
|
"""#46783: ``/memory approve`` from a context with no live agent (e.g. the
|
|
Desktop GUI) passed ``memory_store=None`` into the shared handler, which
|
|
returned "memory store unavailable" and applied nothing. The CLI handler must
|
|
fall back to a freshly loaded on-disk store, like the gateway path does."""
|
|
import json
|
|
from tools.memory_tool import memory_tool, MemoryStore
|
|
from tools import write_approval as wa
|
|
from hermes_cli.cli_commands_mixin import CLICommandsMixin
|
|
|
|
_set_approval("memory", True)
|
|
staging = MemoryStore(); staging.load_from_disk()
|
|
r = json.loads(memory_tool("add", "memory", "remember the launch date", store=staging))
|
|
assert r.get("pending_id"), r
|
|
assert wa.pending_count("memory") == 1
|
|
|
|
# Bare CLI handler with no live agent → store resolves to None pre-fix.
|
|
handler = CLICommandsMixin.__new__(CLICommandsMixin)
|
|
handler.agent = None
|
|
handler._handle_memory_command("/memory approve all")
|
|
|
|
out = capsys.readouterr().out
|
|
assert "memory store unavailable" not in out, out
|
|
assert "Approved 1" in out, out
|
|
assert wa.pending_count("memory") == 0
|
|
# The approved write landed in a freshly loaded on-disk store (MEMORY.md).
|
|
reloaded = MemoryStore(); reloaded.load_from_disk()
|
|
assert any("remember the launch date" in e for e in reloaded.memory_entries)
|
|
|
|
|
|
def test_load_on_disk_store_honors_configured_char_limits(hermes_home, monkeypatch):
|
|
"""load_on_disk_store() must read memory.memory_char_limit /
|
|
user_char_limit from config so approvals applied without a live agent
|
|
enforce the SAME caps as the live agent (agent_init.py). Falls back to
|
|
defaults when config can't be loaded.
|
|
"""
|
|
from tools.memory_tool import load_on_disk_store
|
|
|
|
# Config override path: helper picks up the configured limits.
|
|
monkeypatch.setattr(
|
|
"hermes_cli.config.load_config",
|
|
lambda: {"memory": {"memory_char_limit": 999, "user_char_limit": 444}},
|
|
)
|
|
store = load_on_disk_store()
|
|
assert store.memory_char_limit == 999
|
|
assert store.user_char_limit == 444
|
|
|
|
# Failure path: config raises → defaults, never blows up.
|
|
def _boom():
|
|
raise RuntimeError("no config")
|
|
|
|
monkeypatch.setattr("hermes_cli.config.load_config", _boom)
|
|
fallback = load_on_disk_store()
|
|
assert fallback.memory_char_limit == 2200
|
|
assert fallback.user_char_limit == 1375
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Skill gate
|
|
# ---------------------------------------------------------------------------
|
|
|
|
_SKILL = (
|
|
"---\nname: test-skill\ndescription: A test skill\nversion: 1.0.0\n---\n"
|
|
"# Test\nbody\n"
|
|
)
|
|
|
|
|
|
def test_skill_gate_off_allows_create(hermes_home):
|
|
# Default (gate off) → skill is created normally, not staged.
|
|
import importlib
|
|
import tools.skill_manager_tool as smt
|
|
importlib.reload(smt)
|
|
from tools import write_approval as wa
|
|
r = json.loads(smt.skill_manage("create", "free-skill", content=_SKILL))
|
|
assert r.get("success") is True
|
|
assert wa.pending_count("skills") == 0
|
|
|
|
|
|
def test_skill_gate_on_always_stages(hermes_home):
|
|
# Skills stage even in the foreground (too big to review inline).
|
|
from tools.skill_manager_tool import skill_manage
|
|
from tools import write_approval as wa
|
|
_set_approval("skills", True)
|
|
r = json.loads(skill_manage("create", "staged-skill", content=_SKILL))
|
|
assert r.get("staged") is True
|
|
assert "staged-skill" in r.get("gist", "")
|
|
assert wa.pending_count("skills") == 1
|
|
|
|
|
|
def test_skill_gate_on_then_apply_writes_file(hermes_home):
|
|
# SKILLS_DIR is resolved at import time, so reload the skill module under
|
|
# this test's HERMES_HOME to exercise the real on-disk write path.
|
|
import importlib
|
|
import tools.skill_manager_tool as smt
|
|
importlib.reload(smt)
|
|
from tools import write_approval as wa
|
|
_set_approval("skills", True)
|
|
r = json.loads(smt.skill_manage("create", "applied-skill", content=_SKILL))
|
|
rec = wa.get_pending("skills", r["pending_id"])
|
|
res = json.loads(smt.apply_skill_pending(rec["payload"]))
|
|
assert res["success"] is True
|
|
assert smt._find_skill("applied-skill") is not None
|
|
|
|
|
|
def test_skill_create_diff_is_full_content(hermes_home):
|
|
from tools.skill_manager_tool import skill_manage
|
|
from tools import write_approval as wa
|
|
_set_approval("skills", True)
|
|
r = json.loads(skill_manage("create", "diff-skill", content=_SKILL))
|
|
rec = wa.get_pending("skills", r["pending_id"])
|
|
diff = wa.skill_pending_diff(rec)
|
|
assert "name: test-skill" in diff
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Pending store CRUD
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_pending_store_roundtrip(hermes_home):
|
|
from tools import write_approval as wa
|
|
rec = wa.stage_write("memory", {"action": "add", "target": "user", "content": "x"},
|
|
summary="add x", origin="foreground")
|
|
assert wa.pending_count("memory") == 1
|
|
got = wa.get_pending("memory", rec["id"])
|
|
assert got["payload"]["content"] == "x"
|
|
assert wa.discard_pending("memory", rec["id"]) is True
|
|
assert wa.pending_count("memory") == 0
|
|
assert wa.get_pending("memory", rec["id"]) is None
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Shared command handler
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_handle_pending_list_empty(hermes_home):
|
|
from hermes_cli.write_approval_commands import handle_pending_subcommand
|
|
from tools import write_approval as wa
|
|
out = handle_pending_subcommand(wa.MEMORY, ["pending"])
|
|
assert "No pending memory" in out
|
|
|
|
|
|
def test_handle_approve_all(hermes_home):
|
|
from hermes_cli.write_approval_commands import handle_pending_subcommand
|
|
from tools.memory_tool import MemoryStore
|
|
from tools import write_approval as wa
|
|
store = MemoryStore(); store.load_from_disk()
|
|
wa.stage_write("memory", {"action": "add", "target": "user", "content": "a"},
|
|
summary="a", origin="foreground")
|
|
wa.stage_write("memory", {"action": "add", "target": "user", "content": "b"},
|
|
summary="b", origin="foreground")
|
|
out = handle_pending_subcommand(wa.MEMORY, ["approve", "all"], memory_store=store)
|
|
assert "Approved 2" in out
|
|
assert wa.pending_count("memory") == 0
|
|
assert len(store.user_entries) == 2
|
|
|
|
|
|
def test_handle_reject(hermes_home):
|
|
from hermes_cli.write_approval_commands import handle_pending_subcommand
|
|
from tools import write_approval as wa
|
|
rec = wa.stage_write("skills", {"action": "create", "name": "s"},
|
|
summary="create s", origin="background_review")
|
|
out = handle_pending_subcommand(wa.SKILLS, ["reject", rec["id"]])
|
|
assert "Rejected" in out
|
|
assert wa.pending_count("skills") == 0
|
|
|
|
|
|
def test_handle_approval_on(hermes_home):
|
|
from hermes_cli.write_approval_commands import handle_pending_subcommand
|
|
from tools import write_approval as wa
|
|
captured = {}
|
|
out = handle_pending_subcommand(
|
|
wa.MEMORY, ["approval", "on"],
|
|
set_mode_fn=lambda enabled: captured.update(enabled=enabled),
|
|
)
|
|
assert captured["enabled"] is True
|
|
assert "on" in out
|
|
|
|
|
|
def test_handle_approval_off(hermes_home):
|
|
from hermes_cli.write_approval_commands import handle_pending_subcommand
|
|
from tools import write_approval as wa
|
|
captured = {}
|
|
out = handle_pending_subcommand(
|
|
wa.SKILLS, ["approval", "off"],
|
|
set_mode_fn=lambda enabled: captured.update(enabled=enabled),
|
|
)
|
|
assert captured["enabled"] is False
|
|
assert "off" in out
|
|
|
|
|
|
def test_handle_mode_alias_still_works(hermes_home):
|
|
# 'mode' is kept as a back-compat alias for 'approval'.
|
|
from hermes_cli.write_approval_commands import handle_pending_subcommand
|
|
from tools import write_approval as wa
|
|
captured = {}
|
|
out = handle_pending_subcommand(
|
|
wa.MEMORY, ["mode", "on"],
|
|
set_mode_fn=lambda enabled: captured.update(enabled=enabled),
|
|
)
|
|
assert captured["enabled"] is True
|
|
assert "on" in out
|
|
|
|
|
|
def test_handle_approval_invalid(hermes_home):
|
|
from hermes_cli.write_approval_commands import handle_pending_subcommand
|
|
from tools import write_approval as wa
|
|
out = handle_pending_subcommand(wa.MEMORY, ["approval", "bogus"],
|
|
set_mode_fn=lambda enabled: None)
|
|
assert "Invalid value" in out
|
|
|
|
|
|
def test_handle_unknown_subcommand_returns_none(hermes_home):
|
|
from hermes_cli.write_approval_commands import handle_pending_subcommand
|
|
from tools import write_approval as wa
|
|
# An unrecognized /skills subcommand (e.g. 'search') must return None so
|
|
# the CLI falls through to the skills hub.
|
|
out = handle_pending_subcommand(wa.SKILLS, ["search", "foo"])
|
|
assert out is None
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Inline (interactive CLI) approval path — regression for the bug where the
|
|
# per-thread approval callback was never passed to prompt_dangerous_approval,
|
|
# so every gated foreground memory write was silently denied.
|
|
# ---------------------------------------------------------------------------
|
|
|
|
@pytest.fixture
|
|
def approval_callback_cleanup():
|
|
yield
|
|
from tools.terminal_tool import set_approval_callback
|
|
set_approval_callback(None)
|
|
|
|
|
|
def test_memory_inline_approve_writes(hermes_home, approval_callback_cleanup):
|
|
from tools.memory_tool import memory_tool, MemoryStore
|
|
from tools.terminal_tool import set_approval_callback
|
|
from tools import write_approval as wa
|
|
_set_approval("memory", True)
|
|
|
|
calls = []
|
|
def approve_cb(command, description, **kw):
|
|
calls.append((command, description))
|
|
return "once"
|
|
set_approval_callback(approve_cb)
|
|
|
|
store = MemoryStore(); store.load_from_disk()
|
|
r = json.loads(memory_tool("add", "memory", "approved fact", store=store))
|
|
assert r["success"] is True
|
|
assert r.get("staged") is None # real write, not staged
|
|
assert store.memory_entries == ["approved fact"]
|
|
assert wa.pending_count("memory") == 0
|
|
# The registered callback must actually be invoked (not the input() path).
|
|
assert len(calls) == 1
|
|
assert "approved fact" in calls[0][0]
|
|
|
|
|
|
def test_memory_inline_deny_blocks(hermes_home, approval_callback_cleanup):
|
|
from tools.memory_tool import memory_tool, MemoryStore
|
|
from tools.terminal_tool import set_approval_callback
|
|
from tools import write_approval as wa
|
|
_set_approval("memory", True)
|
|
set_approval_callback(lambda command, description, **kw: "deny")
|
|
|
|
store = MemoryStore(); store.load_from_disk()
|
|
r = json.loads(memory_tool("add", "memory", "denied fact", store=store))
|
|
assert r["success"] is False
|
|
assert "denied" in r["error"].lower()
|
|
assert store.memory_entries == []
|
|
assert wa.pending_count("memory") == 0 # denied, not staged
|
|
|
|
|
|
def test_memory_inline_callback_error_stages(hermes_home, approval_callback_cleanup):
|
|
# If the prompt machinery fails, fall back to staging — never drop silently.
|
|
from tools.memory_tool import memory_tool, MemoryStore
|
|
from tools.terminal_tool import set_approval_callback
|
|
from tools import write_approval as wa
|
|
_set_approval("memory", True)
|
|
def broken_cb(command, description, **kw):
|
|
raise RuntimeError("boom")
|
|
set_approval_callback(broken_cb)
|
|
|
|
store = MemoryStore(); store.load_from_disk()
|
|
r = json.loads(memory_tool("add", "memory", "fallback fact", store=store))
|
|
assert r.get("staged") is True
|
|
assert wa.pending_count("memory") == 1
|
|
|
|
|
|
def test_gateway_context_stages_not_prompts(hermes_home, monkeypatch):
|
|
# A gateway session has no per-thread CLI callback; the dangerous-command
|
|
# /approve round-trip lives in the pending-queue machinery which the gate
|
|
# does not use. The gate must stage, never attempt an inline prompt
|
|
# (which would hit the input() fallback and silently deny).
|
|
from tools.memory_tool import memory_tool, MemoryStore
|
|
from tools import write_approval as wa
|
|
_set_approval("memory", True)
|
|
monkeypatch.setenv("HERMES_GATEWAY_SESSION", "1")
|
|
|
|
store = MemoryStore(); store.load_from_disk()
|
|
r = json.loads(memory_tool("add", "memory", "gateway fact", store=store))
|
|
assert r.get("staged") is True
|
|
assert store.memory_entries == []
|
|
assert wa.pending_count("memory") == 1
|
|
|
|
|
|
def test_skills_never_prompt_inline_even_with_callback(hermes_home, approval_callback_cleanup):
|
|
# Skills always stage — even when an interactive callback is registered.
|
|
from tools.skill_manager_tool import skill_manage
|
|
from tools.terminal_tool import set_approval_callback
|
|
from tools import write_approval as wa
|
|
_set_approval("skills", True)
|
|
|
|
calls = []
|
|
set_approval_callback(lambda c, d, **kw: calls.append(1) or "once")
|
|
|
|
r = json.loads(skill_manage(
|
|
action="create", name="test-inline-skill",
|
|
content="---\nname: test-inline-skill\ndescription: x\n---\nbody\n"))
|
|
assert r.get("staged") is True
|
|
assert calls == [] # never prompted
|
|
assert wa.pending_count("skills") == 1
|
|
|
|
|
|
def test_memory_invalid_params_rejected_before_staging(hermes_home):
|
|
# Param validation must run BEFORE the gate so a broken write is rejected
|
|
# immediately instead of staged and failing at approve time.
|
|
from tools.memory_tool import memory_tool, MemoryStore
|
|
from tools import write_approval as wa
|
|
_set_approval("memory", True)
|
|
store = MemoryStore(); store.load_from_disk()
|
|
r = json.loads(memory_tool("add", "memory", None, store=store))
|
|
assert r["success"] is False
|
|
assert wa.pending_count("memory") == 0
|