feat(state.db): persist platform_message_id; restore yuanbao exact-id recall

PR #29211 dropped JSONL gateway transcripts and noted that the platform's own `message_id` field (used by Yuanbao's recall guard to redact a message by exact platform id) was no longer preserved — falling back to content-match. That fallback works for the common case but redacts the wrong row when two messages share text (or fails to match when content is post-processed). Restore exact-id matching by giving state.db a column for it: - New `platform_message_id TEXT` column on the messages table (SCHEMA_VERSION bump 11 → 12; column added via declarative reconciler on existing DBs, no version-gated migration block needed) - Partial index `idx_messages_platform_msg_id` on (session_id, platform_message_id) to keep recall's point-lookup cheap even on large sessions - `append_message()` and `replace_messages()` accept the new value: the gateway-facing `append_to_transcript` in `gateway/session.py` forwards either `message["platform_message_id"]` or the legacy `message["message_id"]` key (yuanbao's existing convention) - `get_messages_as_conversation()` surfaces the column back on the message dict as `message_id` so platform code reads the same shape it used to read from JSONL - Yuanbao `_patch_transcript`: restore branch A1 (exact id match) ahead of A2 (content match) ahead of B (system-note). Both branches log which one fired so operators can tell from gateway.log whether recall hit the canonical path or had to fall back. Tests: - New low-level round-trip tests in `test_hermes_state.py` for both `append_message` and `replace_messages` paths - The PR's `test_yuanbao_recall_db_only.py` was rewritten to assert the new contract: branch A1 (id match) works against DB-only transcripts, and branch A2 (content match) still recovers rows that were observed without a platform id (e.g. agent-processed @bot messages where run.py doesn't carry msg_id through)
2026-07-14 14:12:44 +00:00 · 2026-05-20 12:55:01 -07:00 · 2026-05-20 12:55:01 -07:00 · 31a0100104
commit 31a0100104
parent 0cc1a1d2d9
5 changed files with 185 additions and 38 deletions
--- a/tests/gateway/platforms/test_yuanbao_recall_db_only.py
+++ b/tests/gateway/platforms/test_yuanbao_recall_db_only.py
@ -1,31 +1,88 @@
-"""Yuanbao recall: branch A (content-match) works against DB-only transcripts."""
+"""Yuanbao recall: branch A1 (exact id) and A2 (content-match) against DB-only transcripts.
+
+state.db persists the platform-side ``message_id`` via the
+``platform_message_id`` column (added in the salvage of PR #29211) and
+``load_transcript`` surfaces it back on each message dict as ``message_id``
+— so the recall guard's exact-id match path stays canonical even with the
+JSONL file gone.  When a row has no platform id (e.g. agent-processed
+@bot messages whose adapter didn't carry a msg_id, or pre-column legacy
+rows), recall falls through to content-match.
+"""
 from gateway.session import SessionStore
 from gateway.config import GatewayConfig


-def test_recall_content_match_finds_target_in_db_transcript(tmp_path, monkeypatch):
-    """state.db doesn't preserve message_id, so recall uses content-match.
-
-    Pin DEFAULT_DB_PATH to tmp_path so SessionDB() can't write to the real
-    ~/.hermes/state.db. (Module-level constant snapshot, see test_load_transcript_db_only.)
-    """
+def _pin_db(monkeypatch, tmp_path):
+    """Force SessionDB() to write into tmp_path instead of the real ~/.hermes."""
    import hermes_state
    monkeypatch.setattr(hermes_state, "DEFAULT_DB_PATH", tmp_path / "state.db")

+
+def test_recall_branch_a1_exact_id_match_round_trips_through_db(tmp_path, monkeypatch):
+    """A user message persisted with ``message_id`` must round-trip through
+    state.db so recall can find and redact it by exact id (branch A1)."""
+    _pin_db(monkeypatch, tmp_path)
+
    config = GatewayConfig()
    store = SessionStore(sessions_dir=tmp_path, config=config)

-    sid = "test-yuanbao-recall"
+    sid = "test-yuanbao-recall-a1"
    store._db.create_session(session_id=sid, source="yuanbao:group:G")
-    store.append_to_transcript(sid, {"role": "user", "content": "sensitive content", "timestamp": 1.0})
-    store.append_to_transcript(sid, {"role": "assistant", "content": "ack", "timestamp": 2.0})
+    store.append_to_transcript(sid, {
+        "role": "user",
+        "content": "sensitive content",
+        "timestamp": 1.0,
+        "message_id": "platform-msg-abc",
+    })
+    store.append_to_transcript(sid, {
+        "role": "assistant",
+        "content": "ack",
+        "timestamp": 2.0,
+    })

-    # DB-only history carries no platform message_id (PR #29211 dropped that path).
    history = store.load_transcript(sid)
-    assert all("message_id" not in msg for msg in history)
+    # The user row must carry its platform id back so the recall guard can
+    # match by exact id; the assistant row had no platform id so it should
+    # not gain one spuriously.
+    user_msg = next(m for m in history if m["role"] == "user")
+    assistant_msg = next(m for m in history if m["role"] == "assistant")
+    assert user_msg.get("message_id") == "platform-msg-abc"
+    assert "message_id" not in assistant_msg

-    # Branch A: content match finds the target row that recall would redact.
-    target = next((m for m in history
-                   if m.get("role") == "user" and m.get("content") == "sensitive content"), None)
+    # Branch A1: locate the row by exact platform id — no content heuristics.
+    target = next(
+        (m for m in history if m.get("message_id") == "platform-msg-abc"),
+        None,
+    )
+    assert target is not None
+    assert target["content"] == "sensitive content"
+
+
+def test_recall_branch_a2_content_match_when_no_platform_id(tmp_path, monkeypatch):
+    """Rows that lack a platform_message_id (e.g. agent-processed @bot
+    messages) still match by content as a fallback."""
+    _pin_db(monkeypatch, tmp_path)
+
+    config = GatewayConfig()
+    store = SessionStore(sessions_dir=tmp_path, config=config)
+
+    sid = "test-yuanbao-recall-a2"
+    store._db.create_session(session_id=sid, source="yuanbao:group:G")
+    # No message_id on the dict — simulates an agent-processed message
+    # that did not carry the platform msg_id through.
+    store.append_to_transcript(sid, {
+        "role": "user",
+        "content": "sensitive content",
+        "timestamp": 1.0,
+    })
+
+    history = store.load_transcript(sid)
+    assert all("message_id" not in m for m in history)
+
+    # Branch A2: content match recovers the target.
+    target = next(
+        (m for m in history
+         if m.get("role") == "user" and m.get("content") == "sensitive content"),
+        None,
+    )
    assert target is not None
-    # Caller would then redact: target["content"] = REDACTED; store.rewrite_transcript(sid, history)
--- a/tests/test_hermes_state.py
+++ b/tests/test_hermes_state.py
@ -316,6 +316,42 @@ class TestMessageStorage:
        assert conv[0] == {"role": "user", "content": "Hello"}
        assert conv[1] == {"role": "assistant", "content": "Hi!"}

+    def test_platform_message_id_round_trips(self, db):
+        """Platform-side message ids (yuanbao msg_id, telegram update_id, …)
+        survive append → get_messages_as_conversation under the
+        ``message_id`` key so platform recall flows can match by exact id."""
+        db.create_session(session_id="s_pmi", source="yuanbao")
+        db.append_message(
+            "s_pmi",
+            role="user",
+            content="hi",
+            platform_message_id="abc-123",
+        )
+        db.append_message("s_pmi", role="assistant", content="hello")
+
+        conv = db.get_messages_as_conversation("s_pmi")
+        user_msg = next(m for m in conv if m["role"] == "user")
+        assistant_msg = next(m for m in conv if m["role"] == "assistant")
+        assert user_msg.get("message_id") == "abc-123"
+        # Assistant row had no platform id — must not gain one spuriously.
+        assert "message_id" not in assistant_msg
+
+    def test_replace_messages_preserves_platform_message_id(self, db):
+        """``rewrite_transcript`` (which goes through replace_messages) must
+        keep the platform_message_id round-trip working for /retry, /undo,
+        /compress and yuanbao's recall rewrite path."""
+        db.create_session(session_id="s_rep", source="yuanbao")
+        db.replace_messages(
+            "s_rep",
+            [
+                {"role": "user", "content": "x", "message_id": "ext-1"},
+                {"role": "assistant", "content": "y"},
+            ],
+        )
+        conv = db.get_messages_as_conversation("s_rep")
+        assert next(m for m in conv if m["role"] == "user").get("message_id") == "ext-1"
+        assert "message_id" not in next(m for m in conv if m["role"] == "assistant")
+
    def test_get_messages_as_conversation_includes_ancestor_chain(self, db):
        db.create_session("root", "tui")
        db.append_message("root", role="user", content="first prompt")
@ -1462,9 +1498,10 @@ class TestSchemaInit:
        assert "schema_version" in tables

    def test_schema_version(self, db):
+        from hermes_state import SCHEMA_VERSION
        cursor = db._conn.execute("SELECT version FROM schema_version")
        version = cursor.fetchone()[0]
-        assert version == 11
+        assert version == SCHEMA_VERSION

    def test_title_column_exists(self, db):
        """Verify the title column was created in the sessions table."""
@ -1760,8 +1797,9 @@ class TestSchemaInit:
        migrated_db = SessionDB(db_path=db_path)

        # Verify migration
+        from hermes_state import SCHEMA_VERSION
        cursor = migrated_db._conn.execute("SELECT version FROM schema_version")
-        assert cursor.fetchone()[0] == 11
+        assert cursor.fetchone()[0] == SCHEMA_VERSION

        # Verify title column exists and is NULL for existing sessions
        session = migrated_db.get_session("existing")
@ -2952,11 +2990,12 @@ class TestFTS5ToolCallMigration:
            assert len(session_db.search_messages("LEGACYARG")) == 1, \
                "v11 migration must backfill tool_calls JSON into FTS"
            # schema_version bumped
+            from hermes_state import SCHEMA_VERSION
            row = session_db._conn.execute(
                "SELECT version FROM schema_version LIMIT 1"
            ).fetchone()
            version = row["version"] if hasattr(row, "keys") else row[0]
-            assert version == 11
+            assert version == SCHEMA_VERSION
        finally:
            session_db.close()