mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-06 07:51:53 +00:00
feat(state.db): persist platform_message_id; restore yuanbao exact-id recall
PR #29211 dropped JSONL gateway transcripts and noted that the platform's own `message_id` field (used by Yuanbao's recall guard to redact a message by exact platform id) was no longer preserved — falling back to content-match. That fallback works for the common case but redacts the wrong row when two messages share text (or fails to match when content is post-processed). Restore exact-id matching by giving state.db a column for it: - New `platform_message_id TEXT` column on the messages table (SCHEMA_VERSION bump 11 → 12; column added via declarative reconciler on existing DBs, no version-gated migration block needed) - Partial index `idx_messages_platform_msg_id` on (session_id, platform_message_id) to keep recall's point-lookup cheap even on large sessions - `append_message()` and `replace_messages()` accept the new value: the gateway-facing `append_to_transcript` in `gateway/session.py` forwards either `message["platform_message_id"]` or the legacy `message["message_id"]` key (yuanbao's existing convention) - `get_messages_as_conversation()` surfaces the column back on the message dict as `message_id` so platform code reads the same shape it used to read from JSONL - Yuanbao `_patch_transcript`: restore branch A1 (exact id match) ahead of A2 (content match) ahead of B (system-note). Both branches log which one fired so operators can tell from gateway.log whether recall hit the canonical path or had to fall back. Tests: - New low-level round-trip tests in `test_hermes_state.py` for both `append_message` and `replace_messages` paths - The PR's `test_yuanbao_recall_db_only.py` was rewritten to assert the new contract: branch A1 (id match) works against DB-only transcripts, and branch A2 (content match) still recovers rows that were observed without a platform id (e.g. agent-processed @bot messages where run.py doesn't carry msg_id through)
This commit is contained in:
parent
0cc1a1d2d9
commit
31a0100104
5 changed files with 185 additions and 38 deletions
|
|
@ -316,6 +316,42 @@ class TestMessageStorage:
|
|||
assert conv[0] == {"role": "user", "content": "Hello"}
|
||||
assert conv[1] == {"role": "assistant", "content": "Hi!"}
|
||||
|
||||
def test_platform_message_id_round_trips(self, db):
|
||||
"""Platform-side message ids (yuanbao msg_id, telegram update_id, …)
|
||||
survive append → get_messages_as_conversation under the
|
||||
``message_id`` key so platform recall flows can match by exact id."""
|
||||
db.create_session(session_id="s_pmi", source="yuanbao")
|
||||
db.append_message(
|
||||
"s_pmi",
|
||||
role="user",
|
||||
content="hi",
|
||||
platform_message_id="abc-123",
|
||||
)
|
||||
db.append_message("s_pmi", role="assistant", content="hello")
|
||||
|
||||
conv = db.get_messages_as_conversation("s_pmi")
|
||||
user_msg = next(m for m in conv if m["role"] == "user")
|
||||
assistant_msg = next(m for m in conv if m["role"] == "assistant")
|
||||
assert user_msg.get("message_id") == "abc-123"
|
||||
# Assistant row had no platform id — must not gain one spuriously.
|
||||
assert "message_id" not in assistant_msg
|
||||
|
||||
def test_replace_messages_preserves_platform_message_id(self, db):
|
||||
"""``rewrite_transcript`` (which goes through replace_messages) must
|
||||
keep the platform_message_id round-trip working for /retry, /undo,
|
||||
/compress and yuanbao's recall rewrite path."""
|
||||
db.create_session(session_id="s_rep", source="yuanbao")
|
||||
db.replace_messages(
|
||||
"s_rep",
|
||||
[
|
||||
{"role": "user", "content": "x", "message_id": "ext-1"},
|
||||
{"role": "assistant", "content": "y"},
|
||||
],
|
||||
)
|
||||
conv = db.get_messages_as_conversation("s_rep")
|
||||
assert next(m for m in conv if m["role"] == "user").get("message_id") == "ext-1"
|
||||
assert "message_id" not in next(m for m in conv if m["role"] == "assistant")
|
||||
|
||||
def test_get_messages_as_conversation_includes_ancestor_chain(self, db):
|
||||
db.create_session("root", "tui")
|
||||
db.append_message("root", role="user", content="first prompt")
|
||||
|
|
@ -1462,9 +1498,10 @@ class TestSchemaInit:
|
|||
assert "schema_version" in tables
|
||||
|
||||
def test_schema_version(self, db):
|
||||
from hermes_state import SCHEMA_VERSION
|
||||
cursor = db._conn.execute("SELECT version FROM schema_version")
|
||||
version = cursor.fetchone()[0]
|
||||
assert version == 11
|
||||
assert version == SCHEMA_VERSION
|
||||
|
||||
def test_title_column_exists(self, db):
|
||||
"""Verify the title column was created in the sessions table."""
|
||||
|
|
@ -1760,8 +1797,9 @@ class TestSchemaInit:
|
|||
migrated_db = SessionDB(db_path=db_path)
|
||||
|
||||
# Verify migration
|
||||
from hermes_state import SCHEMA_VERSION
|
||||
cursor = migrated_db._conn.execute("SELECT version FROM schema_version")
|
||||
assert cursor.fetchone()[0] == 11
|
||||
assert cursor.fetchone()[0] == SCHEMA_VERSION
|
||||
|
||||
# Verify title column exists and is NULL for existing sessions
|
||||
session = migrated_db.get_session("existing")
|
||||
|
|
@ -2952,11 +2990,12 @@ class TestFTS5ToolCallMigration:
|
|||
assert len(session_db.search_messages("LEGACYARG")) == 1, \
|
||||
"v11 migration must backfill tool_calls JSON into FTS"
|
||||
# schema_version bumped
|
||||
from hermes_state import SCHEMA_VERSION
|
||||
row = session_db._conn.execute(
|
||||
"SELECT version FROM schema_version LIMIT 1"
|
||||
).fetchone()
|
||||
version = row["version"] if hasattr(row, "keys") else row[0]
|
||||
assert version == 11
|
||||
assert version == SCHEMA_VERSION
|
||||
finally:
|
||||
session_db.close()
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue