mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-06 07:51:53 +00:00
refactor(gateway): drop JSONL fallback in load_transcript
state.db is canonical. The 'use whichever source is longer' branch was defensive code for the pre-DB migration; on every real DB it has not fired (verified on a session corpus with 27 jsonl files / 950 sessions — zero jsonl-bigger cases). Test changes: - TestLoadTranscriptCorruptLines: deleted (tested dead JSONL code path) - TestLoadTranscriptPreferLongerSource: deleted (tested removed fallback) - Replaced with TestLoadTranscriptDBOnly (DB-only reads) - TestSessionStoreRewriteTranscript: fixture now creates DB session - test_gateway_retry_replaces_last_user_turn: fixture uses real DB
This commit is contained in:
parent
1d27be0ff3
commit
024a8e3ee9
4 changed files with 35 additions and 229 deletions
|
|
@ -1312,58 +1312,19 @@ class SessionStore:
|
|||
f.write(json.dumps(msg, ensure_ascii=False) + "\n")
|
||||
|
||||
def load_transcript(self, session_id: str) -> List[Dict[str, Any]]:
|
||||
"""Load all messages from a session's transcript."""
|
||||
db_messages = []
|
||||
# Try SQLite first
|
||||
if self._db:
|
||||
try:
|
||||
db_messages = self._db.get_messages_as_conversation(session_id)
|
||||
except Exception as e:
|
||||
logger.debug("Could not load messages from DB: %s", e)
|
||||
"""Load all messages from a session's transcript.
|
||||
|
||||
# Load legacy JSONL transcript (may contain more history than SQLite
|
||||
# for sessions created before the DB layer was introduced).
|
||||
transcript_path = self.get_transcript_path(session_id)
|
||||
jsonl_messages = []
|
||||
if transcript_path.exists():
|
||||
try:
|
||||
with open(transcript_path, "r", encoding="utf-8") as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if line:
|
||||
try:
|
||||
jsonl_messages.append(json.loads(line))
|
||||
except json.JSONDecodeError:
|
||||
logger.warning(
|
||||
"Skipping corrupt line in transcript %s: %s",
|
||||
session_id, line[:120],
|
||||
)
|
||||
except OSError as e:
|
||||
# JSONL is the legacy compatibility store. If it becomes
|
||||
# unreadable, keep gateway recovery working by falling back to
|
||||
# SQLite rows loaded above (or [] when no DB exists).
|
||||
logger.debug("Failed to read JSONL transcript for %s: %s", session_id, e)
|
||||
|
||||
# Prefer whichever source has more messages.
|
||||
#
|
||||
# Background: when a session pre-dates SQLite storage (or when the DB
|
||||
# layer was added while a long-lived session was already active), the
|
||||
# first post-migration turn writes only the *new* messages to SQLite
|
||||
# (because _flush_messages_to_session_db skips messages already in
|
||||
# conversation_history, assuming they're persisted). On the *next*
|
||||
# turn load_transcript returns those few SQLite rows and ignores the
|
||||
# full JSONL history — the model sees a context of 1-4 messages instead
|
||||
# of hundreds. Using the longer source prevents this silent truncation.
|
||||
if len(jsonl_messages) > len(db_messages):
|
||||
if db_messages:
|
||||
logger.debug(
|
||||
"Session %s: JSONL has %d messages vs SQLite %d — "
|
||||
"using JSONL (legacy session not yet fully migrated)",
|
||||
session_id, len(jsonl_messages), len(db_messages),
|
||||
)
|
||||
return jsonl_messages
|
||||
|
||||
return db_messages
|
||||
state.db is the canonical store. The legacy JSONL fallback was removed
|
||||
in spec 002 — pre-DB sessions on existing disks have already been
|
||||
migrated (their DB row holds the full message history).
|
||||
"""
|
||||
if not self._db:
|
||||
return []
|
||||
try:
|
||||
return self._db.get_messages_as_conversation(session_id)
|
||||
except Exception as e:
|
||||
logger.debug("Could not load messages from DB: %s", e)
|
||||
return []
|
||||
|
||||
|
||||
def build_session_context(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue