From eeb747de25b0e3909bfeda7f2bbd5fadc1e3c790 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Wed, 20 May 2026 03:26:00 -0700 Subject: [PATCH] feat(sessions): opt-in per-session JSON snapshot writer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR #29182 deleted the per-session JSON snapshot writer outright because state.db is canonical and the snapshots had no in-tree consumer. Some users have external tooling that reads `~/.hermes/sessions/session_{sid}.json` directly, so reintroduce the writer behind a config flag that defaults to off. - Add `sessions.write_json_snapshots` (default False) to DEFAULT_CONFIG - Restore `AIAgent._save_session_log` + `_clean_session_content` as gated methods. When the flag is off the call is a fast no-op; when on, the writer behaves as before (atomic write, truncation guard preserved, REASONING_SCRATCHPAD → think tag normalization) - Re-derive the target path from `agent.session_id` on each call so `/branch` and `/compress` re-points happen automatically — no need to restore the explicit re-point bookkeeping at call sites - Wire the single call site in `_persist_session` (the cleanup-on-exit hook). Did NOT restore the 7 intra-turn calls the original PR deleted — those were redundant writes within the same turn that doubled disk I/O without adding any persistence guarantee `_persist_session` does not already provide - Read the flag once at agent init via `load_config()`, cache as `agent._session_json_enabled` - Update `TestNoSessionJsonSnapshot` → `TestSessionJsonSnapshotOptIn` to pin behavior: default off (no file), opt-in true (file written), no-op method on default agents, logs_dir retained unconditionally - Update CONTRIBUTING.md and the bundled `hermes-agent` skill to document the flag and its default --- CONTRIBUTING.md | 4 +- agent/agent_init.py | 15 +++- hermes_cli/config.py | 9 ++ run_agent.py | 89 +++++++++++++++++++ .../hermes-agent/SKILL.md | 2 +- tests/run_agent/test_run_agent.py | 49 +++++++--- 6 files changed, 149 insertions(+), 19 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 8b140e35c09..5b1ae34aa07 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -210,7 +210,7 @@ hermes-agent/ | `~/.hermes/skills/` | All active skills (bundled + hub-installed + agent-created) | | `~/.hermes/memories/` | Persistent memory (MEMORY.md, USER.md) | | `~/.hermes/state.db` | SQLite session database | -| `~/.hermes/sessions/` | Legacy session artifacts (no longer written; state.db is canonical). Holds the gateway routing index (`sessions.json`) and request-dump breadcrumbs. | +| `~/.hermes/sessions/` | Gateway routing index (`sessions.json`), request-dump breadcrumbs, gateway `*.jsonl` transcripts, and (optionally) per-session JSON snapshots when `sessions.write_json_snapshots: true` is set. The per-session snapshots are off by default; state.db is canonical. | | `~/.hermes/cron/` | Scheduled job data | | `~/.hermes/whatsapp/session/` | WhatsApp bridge credentials | @@ -239,7 +239,7 @@ User message → AIAgent._run_agent_loop() - **Self-registering tools**: Each tool file calls `registry.register()` at import time. `model_tools.py` triggers discovery by importing all tool modules. - **Toolset grouping**: Tools are grouped into toolsets (`web`, `terminal`, `file`, `browser`, etc.) that can be enabled/disabled per platform. -- **Session persistence**: All conversations are stored in SQLite (`hermes_state.py`) with full-text search and unique session titles. Per-session JSON snapshots in `~/.hermes/sessions/` were superseded by the SQLite store and are no longer written. +- **Session persistence**: All conversations are stored in SQLite (`hermes_state.py`) with full-text search and unique session titles. Per-session JSON snapshots in `~/.hermes/sessions/` were superseded by the SQLite store and are off by default; opt back in with `sessions.write_json_snapshots: true` if you have external tooling that consumes the JSON files directly. - **Ephemeral injection**: System prompts and prefill messages are injected at API call time, never persisted to the database or logs. - **Provider abstraction**: The agent works with any OpenAI-compatible API. Provider resolution happens at init time (Nous Portal OAuth, OpenRouter API key, or custom endpoint). - **Provider routing**: When using OpenRouter, `provider_routing` in config.yaml controls provider selection (sort by throughput/latency/price, allow/ignore specific providers, data retention policies). These are injected as `extra_body.provider` in API requests. diff --git a/agent/agent_init.py b/agent/agent_init.py index fa9d8f64d0a..c39712d4d02 100644 --- a/agent/agent_init.py +++ b/agent/agent_init.py @@ -901,8 +901,19 @@ def init_agent( hermes_home = get_hermes_home() agent.logs_dir = hermes_home / "sessions" agent.logs_dir.mkdir(parents=True, exist_ok=True) - # session_log_file removed — state.db is the canonical message store. - # logs_dir retained for request_dump_*.json (debug breadcrumb path). + # Per-session JSON snapshot writer (~/.hermes/sessions/session_{sid}.json) + # is opt-in via sessions.write_json_snapshots (default False). state.db + # is canonical — the snapshot is only useful for external tooling that + # reads the JSON files directly. See run_agent._save_session_log. + agent._session_json_enabled = False + try: + from hermes_cli.config import load_config as _load_sess_cfg + _sess_cfg = (_load_sess_cfg().get("sessions") or {}) + agent._session_json_enabled = bool(_sess_cfg.get("write_json_snapshots", False)) + except Exception: + pass + # logs_dir is retained unconditionally for request_dump_*.json (debug + # breadcrumb path written by agent_runtime_helpers.dump_api_request_debug). # Track conversation messages for session logging agent._session_messages: List[Dict[str, Any]] = [] diff --git a/hermes_cli/config.py b/hermes_cli/config.py index dd470bdbbf3..de8ca79cd88 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -1648,6 +1648,15 @@ DEFAULT_CONFIG = { # the sweep on every CLI invocation). Tracked via state_meta in # state.db itself, so it's shared across all processes. "min_interval_hours": 24, + # Legacy per-session JSON snapshot writer. When true, the agent + # rewrites ``~/.hermes/sessions/session_{sid}.json`` on every turn + # boundary with the full message list. state.db is canonical and + # has every field the snapshot stored (plus per-message timestamps + # and token counts), so this is off by default — the snapshots had + # no consumer outside their own overwrite guard and accumulated + # GBs of disk on heavy users. Opt in only if you have an external + # tool that consumes the JSON files directly. + "write_json_snapshots": False, }, # Contextual first-touch onboarding hints (see agent/onboarding.py). diff --git a/run_agent.py b/run_agent.py index 0f85bf690f0..6c4d54d7581 100644 --- a/run_agent.py +++ b/run_agent.py @@ -168,6 +168,7 @@ from agent.tool_result_classification import ( file_mutation_result_landed, ) from agent.trajectory import ( + convert_scratchpad_to_think, save_trajectory as _save_trajectory_to_file, ) from agent.message_sanitization import ( @@ -1175,6 +1176,7 @@ class AIAgent: self._drop_trailing_empty_response_scaffolding(messages) self._apply_persist_user_message_override(messages) self._session_messages = messages + self._save_session_log(messages) self._flush_messages_to_session_db(messages, conversation_history) def _drop_trailing_empty_response_scaffolding(self, messages: List[Dict]) -> None: @@ -1504,6 +1506,93 @@ class AIAgent: from agent.agent_runtime_helpers import dump_api_request_debug return dump_api_request_debug(self, api_kwargs, reason=reason, error=error) + @staticmethod + def _clean_session_content(content: str) -> str: + """Convert REASONING_SCRATCHPAD to think tags and clean up whitespace.""" + if not content: + return content + content = convert_scratchpad_to_think(content) + content = re.sub(r'\n+()', r'\n\1', content) + content = re.sub(r'()\n+', r'\1\n', content) + return content.strip() + + def _save_session_log(self, messages: List[Dict[str, Any]] = None): + """Optional per-session JSON snapshot writer. + + Gated by ``sessions.write_json_snapshots`` (default False). state.db + is the canonical message store; this writer exists only for users + whose external tooling consumes ``~/.hermes/sessions/session_{sid}.json`` + directly. When the flag is off this is a fast no-op. + + When enabled, rewrites the snapshot after every persistence point with + the full message list (assistant content normalized via + ``_clean_session_content`` to convert REASONING_SCRATCHPAD to think + tags). The truncation guard ("don't overwrite a larger log with + fewer messages") is preserved so resume + branch don't clobber a + fuller existing snapshot. + """ + if not getattr(self, "_session_json_enabled", False): + return + messages = messages or self._session_messages + if not messages: + return + + # Re-derive the target path each call so /branch and /compress + # session-id changes land in the right file without any re-point + # bookkeeping at the call sites. + try: + log_file = self.logs_dir / f"session_{self.session_id}.json" + except Exception: + return + + try: + cleaned = [] + for msg in messages: + if msg.get("role") == "assistant" and msg.get("content"): + msg = dict(msg) + msg["content"] = self._clean_session_content(msg["content"]) + cleaned.append(msg) + + # Guard: never overwrite a larger session log with fewer messages. + # Protects against data loss when a resumed agent starts with + # partial history and would otherwise clobber the full JSON log. + if log_file.exists(): + try: + existing = json.loads(log_file.read_text(encoding="utf-8")) + existing_count = existing.get("message_count", len(existing.get("messages", []))) + if existing_count > len(cleaned): + logging.debug( + "Skipping session log overwrite: existing has %d messages, current has %d", + existing_count, len(cleaned), + ) + return + except Exception: + pass # corrupted existing file — allow the overwrite + + entry = { + "session_id": self.session_id, + "model": self.model, + "base_url": self.base_url, + "platform": self.platform, + "session_start": self.session_start.isoformat(), + "last_updated": datetime.now().isoformat(), + "system_prompt": self._cached_system_prompt or "", + "tools": self.tools or [], + "message_count": len(cleaned), + "messages": cleaned, + } + + atomic_json_write( + log_file, + entry, + indent=2, + default=str, + ) + + except Exception as e: + if self.verbose_logging: + logging.warning(f"Failed to save session log: {e}") + def interrupt(self, message: str = None) -> None: """ diff --git a/skills/autonomous-ai-agents/hermes-agent/SKILL.md b/skills/autonomous-ai-agents/hermes-agent/SKILL.md index 03d31777fd9..2177c9c6a5c 100644 --- a/skills/autonomous-ai-agents/hermes-agent/SKILL.md +++ b/skills/autonomous-ai-agents/hermes-agent/SKILL.md @@ -336,7 +336,7 @@ The registry of record is `hermes_cli/commands.py` — every consumer ~/.hermes/config.yaml Main configuration ~/.hermes/.env API keys and secrets $HERMES_HOME/skills/ Installed skills -~/.hermes/sessions/ Legacy session artifacts (no longer written; state.db is canonical) +~/.hermes/sessions/ Gateway routing index, request dumps, *.jsonl transcripts (and optional per-session JSON snapshots when sessions.write_json_snapshots: true) ~/.hermes/state.db Canonical session store (SQLite + FTS5) ~/.hermes/logs/ Gateway and error logs ~/.hermes/auth.json OAuth tokens and credential pools diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py index f3ea9f4f59c..821228075c3 100644 --- a/tests/run_agent/test_run_agent.py +++ b/tests/run_agent/test_run_agent.py @@ -554,28 +554,49 @@ class TestExtractReasoning: assert result == "from structured field" -class TestNoSessionJsonSnapshot: - """Regression: agent must not write session_{sid}.json snapshots. +class TestSessionJsonSnapshotOptIn: + """Regression: per-session JSON snapshot writer is opt-in via config. - state.db is the canonical message store after #29182. The legacy snapshot - writer was removed; this test pins that contract so a future refactor - can't silently reintroduce the file (and the ~500MB/950-file disk usage - that came with it). + state.db is canonical (PR #29182). ``sessions.write_json_snapshots`` + defaults to False, so the agent must NOT write ``session_{sid}.json`` + files by default — that behavior caused multi-GB sessions directories + on heavy users. Users can opt back in for external tooling that reads + the JSON files directly. """ - def test_session_log_file_attribute_not_set(self, agent): - assert not hasattr(agent, "session_log_file"), ( - "session_log_file attribute removed in #29182 — state.db is canonical" + def test_session_json_disabled_by_default(self, agent): + # Default config: writer is gated off. + assert getattr(agent, "_session_json_enabled", False) is False, ( + "sessions.write_json_snapshots must default to False" ) - def test_no_session_log_writer_method(self, agent): - assert not hasattr(agent, "_save_session_log"), ( - "_save_session_log method removed in #29182" + def test_save_session_log_noops_when_disabled(self, agent, tmp_path): + # When disabled, calling the method must not write any file even + # if logs_dir is writable and messages are non-empty. + agent._session_json_enabled = False + agent.logs_dir = tmp_path + agent._session_messages = [{"role": "user", "content": "hello"}] + agent._save_session_log() + # No session_*.json must appear under logs_dir. + assert list(tmp_path.glob("session_*.json")) == [] + + def test_save_session_log_writes_when_enabled(self, agent, tmp_path): + # Opt-in path: with the flag on and a session_id, the writer must + # produce ``session_{sid}.json`` under logs_dir. + agent._session_json_enabled = True + agent.logs_dir = tmp_path + messages = [{"role": "user", "content": "hello"}] + agent._save_session_log(messages) + expected = tmp_path / f"session_{agent.session_id}.json" + assert expected.exists(), ( + "Opt-in writer must produce session_{sid}.json under logs_dir" ) def test_logs_dir_retained_for_request_dumps(self, agent): - # logs_dir is kept because agent_runtime_helpers.dump_api_request_debug - # still writes request_dump_*.json there (debug breadcrumb path). + # logs_dir is kept unconditionally because + # agent_runtime_helpers.dump_api_request_debug still writes + # request_dump_*.json there (debug breadcrumb path), independent of + # the session JSON opt-in. assert hasattr(agent, "logs_dir")