refactor(gateway): drop JSONL fallback in load_transcript

state.db is canonical. The 'use whichever source is longer' branch was defensive code for the pre-DB migration; on every real DB it has not fired (verified on a session corpus with 27 jsonl files / 950 sessions — zero jsonl-bigger cases). Test changes: - TestLoadTranscriptCorruptLines: deleted (tested dead JSONL code path) - TestLoadTranscriptPreferLongerSource: deleted (tested removed fallback) - Replaced with TestLoadTranscriptDBOnly (DB-only reads) - TestSessionStoreRewriteTranscript: fixture now creates DB session - test_gateway_retry_replaces_last_user_turn: fixture uses real DB
2026-06-06 07:51:53 +00:00 · 2026-05-20 09:20:09 +02:00 · 2026-05-20 09:20:09 +02:00 · 024a8e3ee9
commit 024a8e3ee9
parent 1d27be0ff3
4 changed files with 35 additions and 229 deletions
--- a/gateway/session.py
+++ b/gateway/session.py
@ -1312,58 +1312,19 @@ class SessionStore:
                f.write(json.dumps(msg, ensure_ascii=False) + "\n")

    def load_transcript(self, session_id: str) -> List[Dict[str, Any]]:
-        """Load all messages from a session's transcript."""
-        db_messages = []
-        # Try SQLite first
-        if self._db:
-            try:
-                db_messages = self._db.get_messages_as_conversation(session_id)
-            except Exception as e:
-                logger.debug("Could not load messages from DB: %s", e)
+        """Load all messages from a session's transcript.

-        # Load legacy JSONL transcript (may contain more history than SQLite
-        # for sessions created before the DB layer was introduced).
-        transcript_path = self.get_transcript_path(session_id)
-        jsonl_messages = []
-        if transcript_path.exists():
-            try:
-                with open(transcript_path, "r", encoding="utf-8") as f:
-                    for line in f:
-                        line = line.strip()
-                        if line:
-                            try:
-                                jsonl_messages.append(json.loads(line))
-                            except json.JSONDecodeError:
-                                logger.warning(
-                                    "Skipping corrupt line in transcript %s: %s",
-                                    session_id, line[:120],
-                                )
-            except OSError as e:
-                # JSONL is the legacy compatibility store. If it becomes
-                # unreadable, keep gateway recovery working by falling back to
-                # SQLite rows loaded above (or [] when no DB exists).
-                logger.debug("Failed to read JSONL transcript for %s: %s", session_id, e)
-
-        # Prefer whichever source has more messages.
-        #
-        # Background: when a session pre-dates SQLite storage (or when the DB
-        # layer was added while a long-lived session was already active), the
-        # first post-migration turn writes only the *new* messages to SQLite
-        # (because _flush_messages_to_session_db skips messages already in
-        # conversation_history, assuming they're persisted).  On the *next*
-        # turn load_transcript returns those few SQLite rows and ignores the
-        # full JSONL history — the model sees a context of 1-4 messages instead
-        # of hundreds.  Using the longer source prevents this silent truncation.
-        if len(jsonl_messages) > len(db_messages):
-            if db_messages:
-                logger.debug(
-                    "Session %s: JSONL has %d messages vs SQLite %d — "
-                    "using JSONL (legacy session not yet fully migrated)",
-                    session_id, len(jsonl_messages), len(db_messages),
-                )
-            return jsonl_messages
-
-        return db_messages
+        state.db is the canonical store. The legacy JSONL fallback was removed
+        in spec 002 — pre-DB sessions on existing disks have already been
+        migrated (their DB row holds the full message history).
+        """
+        if not self._db:
+            return []
+        try:
+            return self._db.get_messages_as_conversation(session_id)
+        except Exception as e:
+            logger.debug("Could not load messages from DB: %s", e)
+            return []


 def build_session_context(