feat(sessions): opt-in per-session JSON snapshot writer

PR #29182 deleted the per-session JSON snapshot writer outright because
state.db is canonical and the snapshots had no in-tree consumer.  Some
users have external tooling that reads `~/.hermes/sessions/session_{sid}.json`
directly, so reintroduce the writer behind a config flag that defaults
to off.

- Add `sessions.write_json_snapshots` (default False) to DEFAULT_CONFIG
- Restore `AIAgent._save_session_log` + `_clean_session_content` as
  gated methods.  When the flag is off the call is a fast no-op; when
  on, the writer behaves as before (atomic write, truncation guard
  preserved, REASONING_SCRATCHPAD → think tag normalization)
- Re-derive the target path from `agent.session_id` on each call so
  `/branch` and `/compress` re-points happen automatically — no need
  to restore the explicit re-point bookkeeping at call sites
- Wire the single call site in `_persist_session` (the cleanup-on-exit
  hook).  Did NOT restore the 7 intra-turn calls the original PR deleted
  — those were redundant writes within the same turn that doubled disk
  I/O without adding any persistence guarantee `_persist_session` does
  not already provide
- Read the flag once at agent init via `load_config()`, cache as
  `agent._session_json_enabled`
- Update `TestNoSessionJsonSnapshot` → `TestSessionJsonSnapshotOptIn`
  to pin behavior: default off (no file), opt-in true (file written),
  no-op method on default agents, logs_dir retained unconditionally
- Update CONTRIBUTING.md and the bundled `hermes-agent` skill to
  document the flag and its default
This commit is contained in:
Teknium 2026-05-20 03:26:00 -07:00
parent 6fc1989a5d
commit eeb747de25
6 changed files with 149 additions and 19 deletions

View file

@ -554,28 +554,49 @@ class TestExtractReasoning:
assert result == "from structured field"
class TestNoSessionJsonSnapshot:
"""Regression: agent must not write session_{sid}.json snapshots.
class TestSessionJsonSnapshotOptIn:
"""Regression: per-session JSON snapshot writer is opt-in via config.
state.db is the canonical message store after #29182. The legacy snapshot
writer was removed; this test pins that contract so a future refactor
can't silently reintroduce the file (and the ~500MB/950-file disk usage
that came with it).
state.db is canonical (PR #29182). ``sessions.write_json_snapshots``
defaults to False, so the agent must NOT write ``session_{sid}.json``
files by default that behavior caused multi-GB sessions directories
on heavy users. Users can opt back in for external tooling that reads
the JSON files directly.
"""
def test_session_log_file_attribute_not_set(self, agent):
assert not hasattr(agent, "session_log_file"), (
"session_log_file attribute removed in #29182 — state.db is canonical"
def test_session_json_disabled_by_default(self, agent):
# Default config: writer is gated off.
assert getattr(agent, "_session_json_enabled", False) is False, (
"sessions.write_json_snapshots must default to False"
)
def test_no_session_log_writer_method(self, agent):
assert not hasattr(agent, "_save_session_log"), (
"_save_session_log method removed in #29182"
def test_save_session_log_noops_when_disabled(self, agent, tmp_path):
# When disabled, calling the method must not write any file even
# if logs_dir is writable and messages are non-empty.
agent._session_json_enabled = False
agent.logs_dir = tmp_path
agent._session_messages = [{"role": "user", "content": "hello"}]
agent._save_session_log()
# No session_*.json must appear under logs_dir.
assert list(tmp_path.glob("session_*.json")) == []
def test_save_session_log_writes_when_enabled(self, agent, tmp_path):
# Opt-in path: with the flag on and a session_id, the writer must
# produce ``session_{sid}.json`` under logs_dir.
agent._session_json_enabled = True
agent.logs_dir = tmp_path
messages = [{"role": "user", "content": "hello"}]
agent._save_session_log(messages)
expected = tmp_path / f"session_{agent.session_id}.json"
assert expected.exists(), (
"Opt-in writer must produce session_{sid}.json under logs_dir"
)
def test_logs_dir_retained_for_request_dumps(self, agent):
# logs_dir is kept because agent_runtime_helpers.dump_api_request_debug
# still writes request_dump_*.json there (debug breadcrumb path).
# logs_dir is kept unconditionally because
# agent_runtime_helpers.dump_api_request_debug still writes
# request_dump_*.json there (debug breadcrumb path), independent of
# the session JSON opt-in.
assert hasattr(agent, "logs_dir")