hermes-agent/tests/tools/test_write_approval.py
kshitijk4poor 0e69cd4b37 fix(memory): honor configured char limits in the no-agent on-disk store
Follow-up to the /memory approve fresh-store fix. Both the CLI fallback and
the messaging-gateway handler built a bare MemoryStore() with the hardcoded
default char limits (2200/1375), ignoring the user's configured
memory.memory_char_limit / user_char_limit. A live agent honors those
overrides (agent/agent_init.py), so an approval applied without a live agent
could accept a write the user's lower cap would reject, or vice versa.

Extract a shared tools.memory_tool.load_on_disk_store() factory that reads
the configured limits (falling back to defaults if config can't load) and
wire both the CLI and gateway handlers to it, closing the gap on both
surfaces and de-duplicating the construction block.
2026-06-23 03:10:53 +05:30

441 lines
17 KiB
Python

"""Tests for the memory/skill write-approval gate (tools/write_approval.py)
and the shared slash-command handlers (hermes_cli/write_approval_commands.py).
Covers the boolean write_approval gate (off by default = write freely; on =
require approval) for both subsystems, the foreground-vs-background staging
split, pending store CRUD, and the list/approve/reject/diff/approval
subcommand dispatch.
"""
import json
import os
import tempfile
import shutil
import pytest
@pytest.fixture
def hermes_home(monkeypatch):
d = tempfile.mkdtemp(prefix="hermes_wa_test_")
home = os.path.join(d, ".hermes")
os.makedirs(home)
monkeypatch.setenv("HERMES_HOME", home)
yield home
shutil.rmtree(d, ignore_errors=True)
def _set_approval(subsystem, enabled):
import hermes_cli.config as cfg
c = cfg.load_config()
c.setdefault(subsystem, {})["write_approval"] = enabled
cfg.save_config(c)
# ---------------------------------------------------------------------------
# Config resolution
# ---------------------------------------------------------------------------
def test_default_gate_is_off(hermes_home):
from tools import write_approval as wa
# Default: gate off → writes flow freely.
assert wa.write_approval_enabled("memory") is False
assert wa.write_approval_enabled("skills") is False
def test_invalid_subsystem_is_off(hermes_home):
from tools import write_approval as wa
assert wa.write_approval_enabled("bogus") is False
def test_normalize_enabled_coerces_values():
from tools import write_approval as wa
# Real bools pass through.
assert wa._normalize_enabled(True) is True
assert wa._normalize_enabled(False) is False
# Truthy strings → True (incl. legacy 'approve').
assert wa._normalize_enabled("on") is True
assert wa._normalize_enabled("approve") is True
assert wa._normalize_enabled("true") is True
# Everything else → False (gate off is the safe default).
assert wa._normalize_enabled("off") is False
assert wa._normalize_enabled("garbage") is False
assert wa._normalize_enabled(None) is False
# ---------------------------------------------------------------------------
# Memory gate
# ---------------------------------------------------------------------------
def test_memory_gate_off_allows_write(hermes_home):
# Default (gate off) → write straight through, no staging.
from tools.memory_tool import memory_tool, MemoryStore
from tools import write_approval as wa
store = MemoryStore(); store.load_from_disk()
r = json.loads(memory_tool("add", "user", "save me", store=store))
assert r["success"] is True
assert r["entry_count"] == 1
assert wa.pending_count("memory") == 0
def test_memory_gate_on_no_interactive_stages(hermes_home):
# Gate on, no approval callback / not a gateway context → stage.
from tools.memory_tool import memory_tool, MemoryStore
from tools import write_approval as wa
_set_approval("memory", True)
store = MemoryStore(); store.load_from_disk()
r = json.loads(memory_tool("add", "memory", "stage me", store=store))
assert r.get("staged") is True
assert r.get("pending_id")
# Not written to the live store yet.
assert store.memory_entries == []
pend = wa.list_pending("memory")
assert len(pend) == 1
assert pend[0]["id"] == r["pending_id"]
def test_memory_gate_on_then_apply(hermes_home):
from tools.memory_tool import memory_tool, MemoryStore, apply_memory_pending
from tools import write_approval as wa
_set_approval("memory", True)
store = MemoryStore(); store.load_from_disk()
r = json.loads(memory_tool("add", "user", "approved entry", store=store))
pid = r["pending_id"]
rec = wa.get_pending("memory", pid)
result = apply_memory_pending(rec["payload"], store)
assert result["success"] is True
assert "approved entry" in store.user_entries[0]
def test_cli_memory_approve_without_live_agent_uses_fresh_store(hermes_home, capsys):
"""#46783: ``/memory approve`` from a context with no live agent (e.g. the
Desktop GUI) passed ``memory_store=None`` into the shared handler, which
returned "memory store unavailable" and applied nothing. The CLI handler must
fall back to a freshly loaded on-disk store, like the gateway path does."""
import json
from tools.memory_tool import memory_tool, MemoryStore
from tools import write_approval as wa
from hermes_cli.cli_commands_mixin import CLICommandsMixin
_set_approval("memory", True)
staging = MemoryStore(); staging.load_from_disk()
r = json.loads(memory_tool("add", "memory", "remember the launch date", store=staging))
assert r.get("pending_id"), r
assert wa.pending_count("memory") == 1
# Bare CLI handler with no live agent → store resolves to None pre-fix.
handler = CLICommandsMixin.__new__(CLICommandsMixin)
handler.agent = None
handler._handle_memory_command("/memory approve all")
out = capsys.readouterr().out
assert "memory store unavailable" not in out, out
assert "Approved 1" in out, out
assert wa.pending_count("memory") == 0
# The approved write landed in a freshly loaded on-disk store (MEMORY.md).
reloaded = MemoryStore(); reloaded.load_from_disk()
assert any("remember the launch date" in e for e in reloaded.memory_entries)
def test_load_on_disk_store_honors_configured_char_limits(hermes_home, monkeypatch):
"""load_on_disk_store() must read memory.memory_char_limit /
user_char_limit from config so approvals applied without a live agent
enforce the SAME caps as the live agent (agent_init.py). Falls back to
defaults when config can't be loaded.
"""
from tools.memory_tool import load_on_disk_store
# Config override path: helper picks up the configured limits.
monkeypatch.setattr(
"hermes_cli.config.load_config",
lambda: {"memory": {"memory_char_limit": 999, "user_char_limit": 444}},
)
store = load_on_disk_store()
assert store.memory_char_limit == 999
assert store.user_char_limit == 444
# Failure path: config raises → defaults, never blows up.
def _boom():
raise RuntimeError("no config")
monkeypatch.setattr("hermes_cli.config.load_config", _boom)
fallback = load_on_disk_store()
assert fallback.memory_char_limit == 2200
assert fallback.user_char_limit == 1375
# ---------------------------------------------------------------------------
# Skill gate
# ---------------------------------------------------------------------------
_SKILL = (
"---\nname: test-skill\ndescription: A test skill\nversion: 1.0.0\n---\n"
"# Test\nbody\n"
)
def test_skill_gate_off_allows_create(hermes_home):
# Default (gate off) → skill is created normally, not staged.
import importlib
import tools.skill_manager_tool as smt
importlib.reload(smt)
from tools import write_approval as wa
r = json.loads(smt.skill_manage("create", "free-skill", content=_SKILL))
assert r.get("success") is True
assert wa.pending_count("skills") == 0
def test_skill_gate_on_always_stages(hermes_home):
# Skills stage even in the foreground (too big to review inline).
from tools.skill_manager_tool import skill_manage
from tools import write_approval as wa
_set_approval("skills", True)
r = json.loads(skill_manage("create", "staged-skill", content=_SKILL))
assert r.get("staged") is True
assert "staged-skill" in r.get("gist", "")
assert wa.pending_count("skills") == 1
def test_skill_gate_on_then_apply_writes_file(hermes_home):
# SKILLS_DIR is resolved at import time, so reload the skill module under
# this test's HERMES_HOME to exercise the real on-disk write path.
import importlib
import tools.skill_manager_tool as smt
importlib.reload(smt)
from tools import write_approval as wa
_set_approval("skills", True)
r = json.loads(smt.skill_manage("create", "applied-skill", content=_SKILL))
rec = wa.get_pending("skills", r["pending_id"])
res = json.loads(smt.apply_skill_pending(rec["payload"]))
assert res["success"] is True
assert smt._find_skill("applied-skill") is not None
def test_skill_create_diff_is_full_content(hermes_home):
from tools.skill_manager_tool import skill_manage
from tools import write_approval as wa
_set_approval("skills", True)
r = json.loads(skill_manage("create", "diff-skill", content=_SKILL))
rec = wa.get_pending("skills", r["pending_id"])
diff = wa.skill_pending_diff(rec)
assert "name: test-skill" in diff
# ---------------------------------------------------------------------------
# Pending store CRUD
# ---------------------------------------------------------------------------
def test_pending_store_roundtrip(hermes_home):
from tools import write_approval as wa
rec = wa.stage_write("memory", {"action": "add", "target": "user", "content": "x"},
summary="add x", origin="foreground")
assert wa.pending_count("memory") == 1
got = wa.get_pending("memory", rec["id"])
assert got["payload"]["content"] == "x"
assert wa.discard_pending("memory", rec["id"]) is True
assert wa.pending_count("memory") == 0
assert wa.get_pending("memory", rec["id"]) is None
# ---------------------------------------------------------------------------
# Shared command handler
# ---------------------------------------------------------------------------
def test_handle_pending_list_empty(hermes_home):
from hermes_cli.write_approval_commands import handle_pending_subcommand
from tools import write_approval as wa
out = handle_pending_subcommand(wa.MEMORY, ["pending"])
assert "No pending memory" in out
def test_handle_approve_all(hermes_home):
from hermes_cli.write_approval_commands import handle_pending_subcommand
from tools.memory_tool import MemoryStore
from tools import write_approval as wa
store = MemoryStore(); store.load_from_disk()
wa.stage_write("memory", {"action": "add", "target": "user", "content": "a"},
summary="a", origin="foreground")
wa.stage_write("memory", {"action": "add", "target": "user", "content": "b"},
summary="b", origin="foreground")
out = handle_pending_subcommand(wa.MEMORY, ["approve", "all"], memory_store=store)
assert "Approved 2" in out
assert wa.pending_count("memory") == 0
assert len(store.user_entries) == 2
def test_handle_reject(hermes_home):
from hermes_cli.write_approval_commands import handle_pending_subcommand
from tools import write_approval as wa
rec = wa.stage_write("skills", {"action": "create", "name": "s"},
summary="create s", origin="background_review")
out = handle_pending_subcommand(wa.SKILLS, ["reject", rec["id"]])
assert "Rejected" in out
assert wa.pending_count("skills") == 0
def test_handle_approval_on(hermes_home):
from hermes_cli.write_approval_commands import handle_pending_subcommand
from tools import write_approval as wa
captured = {}
out = handle_pending_subcommand(
wa.MEMORY, ["approval", "on"],
set_mode_fn=lambda enabled: captured.update(enabled=enabled),
)
assert captured["enabled"] is True
assert "on" in out
def test_handle_approval_off(hermes_home):
from hermes_cli.write_approval_commands import handle_pending_subcommand
from tools import write_approval as wa
captured = {}
out = handle_pending_subcommand(
wa.SKILLS, ["approval", "off"],
set_mode_fn=lambda enabled: captured.update(enabled=enabled),
)
assert captured["enabled"] is False
assert "off" in out
def test_handle_mode_alias_still_works(hermes_home):
# 'mode' is kept as a back-compat alias for 'approval'.
from hermes_cli.write_approval_commands import handle_pending_subcommand
from tools import write_approval as wa
captured = {}
out = handle_pending_subcommand(
wa.MEMORY, ["mode", "on"],
set_mode_fn=lambda enabled: captured.update(enabled=enabled),
)
assert captured["enabled"] is True
assert "on" in out
def test_handle_approval_invalid(hermes_home):
from hermes_cli.write_approval_commands import handle_pending_subcommand
from tools import write_approval as wa
out = handle_pending_subcommand(wa.MEMORY, ["approval", "bogus"],
set_mode_fn=lambda enabled: None)
assert "Invalid value" in out
def test_handle_unknown_subcommand_returns_none(hermes_home):
from hermes_cli.write_approval_commands import handle_pending_subcommand
from tools import write_approval as wa
# An unrecognized /skills subcommand (e.g. 'search') must return None so
# the CLI falls through to the skills hub.
out = handle_pending_subcommand(wa.SKILLS, ["search", "foo"])
assert out is None
# ---------------------------------------------------------------------------
# Inline (interactive CLI) approval path — regression for the bug where the
# per-thread approval callback was never passed to prompt_dangerous_approval,
# so every gated foreground memory write was silently denied.
# ---------------------------------------------------------------------------
@pytest.fixture
def approval_callback_cleanup():
yield
from tools.terminal_tool import set_approval_callback
set_approval_callback(None)
def test_memory_inline_approve_writes(hermes_home, approval_callback_cleanup):
from tools.memory_tool import memory_tool, MemoryStore
from tools.terminal_tool import set_approval_callback
from tools import write_approval as wa
_set_approval("memory", True)
calls = []
def approve_cb(command, description, **kw):
calls.append((command, description))
return "once"
set_approval_callback(approve_cb)
store = MemoryStore(); store.load_from_disk()
r = json.loads(memory_tool("add", "memory", "approved fact", store=store))
assert r["success"] is True
assert r.get("staged") is None # real write, not staged
assert store.memory_entries == ["approved fact"]
assert wa.pending_count("memory") == 0
# The registered callback must actually be invoked (not the input() path).
assert len(calls) == 1
assert "approved fact" in calls[0][0]
def test_memory_inline_deny_blocks(hermes_home, approval_callback_cleanup):
from tools.memory_tool import memory_tool, MemoryStore
from tools.terminal_tool import set_approval_callback
from tools import write_approval as wa
_set_approval("memory", True)
set_approval_callback(lambda command, description, **kw: "deny")
store = MemoryStore(); store.load_from_disk()
r = json.loads(memory_tool("add", "memory", "denied fact", store=store))
assert r["success"] is False
assert "denied" in r["error"].lower()
assert store.memory_entries == []
assert wa.pending_count("memory") == 0 # denied, not staged
def test_memory_inline_callback_error_stages(hermes_home, approval_callback_cleanup):
# If the prompt machinery fails, fall back to staging — never drop silently.
from tools.memory_tool import memory_tool, MemoryStore
from tools.terminal_tool import set_approval_callback
from tools import write_approval as wa
_set_approval("memory", True)
def broken_cb(command, description, **kw):
raise RuntimeError("boom")
set_approval_callback(broken_cb)
store = MemoryStore(); store.load_from_disk()
r = json.loads(memory_tool("add", "memory", "fallback fact", store=store))
assert r.get("staged") is True
assert wa.pending_count("memory") == 1
def test_gateway_context_stages_not_prompts(hermes_home, monkeypatch):
# A gateway session has no per-thread CLI callback; the dangerous-command
# /approve round-trip lives in the pending-queue machinery which the gate
# does not use. The gate must stage, never attempt an inline prompt
# (which would hit the input() fallback and silently deny).
from tools.memory_tool import memory_tool, MemoryStore
from tools import write_approval as wa
_set_approval("memory", True)
monkeypatch.setenv("HERMES_GATEWAY_SESSION", "1")
store = MemoryStore(); store.load_from_disk()
r = json.loads(memory_tool("add", "memory", "gateway fact", store=store))
assert r.get("staged") is True
assert store.memory_entries == []
assert wa.pending_count("memory") == 1
def test_skills_never_prompt_inline_even_with_callback(hermes_home, approval_callback_cleanup):
# Skills always stage — even when an interactive callback is registered.
from tools.skill_manager_tool import skill_manage
from tools.terminal_tool import set_approval_callback
from tools import write_approval as wa
_set_approval("skills", True)
calls = []
set_approval_callback(lambda c, d, **kw: calls.append(1) or "once")
r = json.loads(skill_manage(
action="create", name="test-inline-skill",
content="---\nname: test-inline-skill\ndescription: x\n---\nbody\n"))
assert r.get("staged") is True
assert calls == [] # never prompted
assert wa.pending_count("skills") == 1
def test_memory_invalid_params_rejected_before_staging(hermes_home):
# Param validation must run BEFORE the gate so a broken write is rejected
# immediately instead of staged and failing at approve time.
from tools.memory_tool import memory_tool, MemoryStore
from tools import write_approval as wa
_set_approval("memory", True)
store = MemoryStore(); store.load_from_disk()
r = json.loads(memory_tool("add", "memory", None, store=store))
assert r["success"] is False
assert wa.pending_count("memory") == 0