From 351fdcc6e6d763bd5d405d90d467a6d52eabf1f5 Mon Sep 17 00:00:00 2001 From: yoniebans Date: Wed, 20 May 2026 09:28:10 +0200 Subject: [PATCH] refactor(gateway): stop writing JSONL in append_to_transcript / rewrite_transcript state.db is canonical. JSONL transcripts were a transition fallback; the fallback was removed in the previous commit. Existing *.jsonl files on disk are left untouched. --- gateway/session.py | 40 ++++++--------------------- tests/run_agent/test_860_dedup.py | 46 +++---------------------------- 2 files changed, 12 insertions(+), 74 deletions(-) diff --git a/gateway/session.py b/gateway/session.py index 52cf68753cd..4ad2600c1e8 100644 --- a/gateway/session.py +++ b/gateway/session.py @@ -1248,20 +1248,15 @@ class SessionStore: return entries - def get_transcript_path(self, session_id: str) -> Path: - """Get the path to a session's legacy transcript file.""" - return self.sessions_dir / f"{session_id}.jsonl" - def append_to_transcript(self, session_id: str, message: Dict[str, Any], skip_db: bool = False) -> None: - """Append a message to a session's transcript (SQLite + legacy JSONL). + """Append a message to a session's transcript (SQLite). Args: - skip_db: When True, only write to JSONL and skip the SQLite write. - Used when the agent already persisted messages to SQLite - via its own _flush_messages_to_session_db(), preventing - the duplicate-write bug (#860). + skip_db: When True, skip the SQLite write. Used when the agent + already persisted messages to SQLite via its own + _flush_messages_to_session_db(), preventing the + duplicate-write bug (#860). """ - # Write to SQLite (unless the agent already handled it) if self._db and not skip_db: try: self._db.append_message( @@ -1279,37 +1274,18 @@ class SessionStore: ) except Exception as e: logger.debug("Session DB operation failed: %s", e) - - # Also write legacy JSONL (keeps existing tooling working during transition) - transcript_path = self.get_transcript_path(session_id) - try: - with self._lock: - with open(transcript_path, "a", encoding="utf-8") as f: - f.write(json.dumps(message, ensure_ascii=False) + "\n") - except OSError as e: - # Disk full / read-only fs / permission errors must not crash the - # message handler — the SQLite write above is the primary store. - logger.debug("Failed to write JSONL transcript for %s: %s", session_id, e) def rewrite_transcript(self, session_id: str, messages: List[Dict[str, Any]]) -> None: """Replace the entire transcript for a session with new messages. - - Used by /retry, /undo, and /compress to persist modified conversation history. - Rewrites both SQLite and legacy JSONL storage. + + Used by /retry, /undo, and /compress to persist modified conversation + history. state.db is the canonical store. """ - # SQLite: replace atomically so a mid-rewrite failure doesn't leave - # the session half-empty in the DB while JSONL still has history. if self._db: try: self._db.replace_messages(session_id, messages) except Exception as e: logger.debug("Failed to rewrite transcript in DB: %s", e) - - # JSONL: overwrite the file - transcript_path = self.get_transcript_path(session_id) - with open(transcript_path, "w", encoding="utf-8") as f: - for msg in messages: - f.write(json.dumps(msg, ensure_ascii=False) + "\n") def load_transcript(self, session_id: str) -> List[Dict[str, Any]]: """Load all messages from a session's transcript. diff --git a/tests/run_agent/test_860_dedup.py b/tests/run_agent/test_860_dedup.py index 6349595e894..070936af67b 100644 --- a/tests/run_agent/test_860_dedup.py +++ b/tests/run_agent/test_860_dedup.py @@ -170,33 +170,7 @@ class TestFlushDeduplication: # --------------------------------------------------------------------------- class TestAppendToTranscriptSkipDb: - """Verify skip_db=True writes JSONL but not SQLite.""" - - @pytest.fixture() - def store(self, tmp_path): - from gateway.config import GatewayConfig - from gateway.session import SessionStore - config = GatewayConfig() - with patch("gateway.session.SessionStore._ensure_loaded"): - s = SessionStore(sessions_dir=tmp_path, config=config) - s._db = None # no SQLite for these JSONL-focused tests - s._loaded = True - return s - - def test_skip_db_writes_jsonl_only(self, store, tmp_path): - """With skip_db=True, message appears in JSONL but not SQLite.""" - session_id = "test-skip-db" - msg = {"role": "assistant", "content": "hello world"} - store.append_to_transcript(session_id, msg, skip_db=True) - - # JSONL should have the message - jsonl_path = store.get_transcript_path(session_id) - assert jsonl_path.exists() - with open(jsonl_path) as f: - lines = f.readlines() - assert len(lines) == 1 - parsed = json.loads(lines[0]) - assert parsed["content"] == "hello world" + """Verify skip_db=True skips the SQLite write.""" def test_skip_db_prevents_sqlite_write(self, tmp_path): """With skip_db=True and a real DB, message does NOT appear in SQLite.""" @@ -223,14 +197,8 @@ class TestAppendToTranscriptSkipDb: rows = db.get_messages(session_id) assert len(rows) == 0, f"Expected 0 DB rows with skip_db=True, got {len(rows)}" - # But JSONL should have it - jsonl_path = store.get_transcript_path(session_id) - with open(jsonl_path) as f: - lines = f.readlines() - assert len(lines) == 1 - - def test_default_writes_both(self, tmp_path): - """Without skip_db, message appears in both JSONL and SQLite.""" + def test_default_writes_to_sqlite(self, tmp_path): + """Without skip_db, message appears in SQLite.""" from gateway.config import GatewayConfig from gateway.session import SessionStore from hermes_state import SessionDB @@ -250,13 +218,7 @@ class TestAppendToTranscriptSkipDb: msg = {"role": "user", "content": "test message"} store.append_to_transcript(session_id, msg) - # JSONL should have the message - jsonl_path = store.get_transcript_path(session_id) - with open(jsonl_path) as f: - lines = f.readlines() - assert len(lines) == 1 - - # SQLite should also have the message + # SQLite should have the message rows = db.get_messages(session_id) assert len(rows) == 1