mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-30 06:41:51 +00:00
refactor(yuanbao): migrate recall to load_transcript()
Yuanbao's recall feature was reading the gateway JSONL directly to look up messages by platform message_id, which state.db does not preserve. Migrated to use load_transcript() which returns DB messages. Recall branch A1 (message_id match) now falls through to A2 (content match) or B (system note) for all sessions — a documented degradation. Follow-up issue: add platform_message_id column to state.db messages to restore exact-id matching.
This commit is contained in:
parent
024a8e3ee9
commit
971cfaa38c
3 changed files with 37 additions and 12 deletions
|
|
@ -1410,19 +1410,19 @@ class RecallGuardMiddleware(InboundMiddleware):
|
|||
logger.warning("[%s] Recall: failed to resolve session: %s", adapter.name, exc)
|
||||
return
|
||||
|
||||
# Read JSONL directly — SQLite doesn't preserve message_id field.
|
||||
transcript: list = []
|
||||
# Load transcript from canonical store (state.db).
|
||||
#
|
||||
# Branch A1 below tries to match the recalled message by its platform
|
||||
# `message_id`. state.db does NOT preserve `message_id` (only its own
|
||||
# autoincrement primary key), so A1 will not match for any message
|
||||
# persisted post-DB-canonical (i.e. all messages going forward). Recall
|
||||
# falls through to A2 (content match) or B (system redaction note), both
|
||||
# of which work DB-only.
|
||||
#
|
||||
# TODO: add a `platform_message_id` column to state.db messages to restore
|
||||
# exact-id matching. Tracked separately.
|
||||
try:
|
||||
path = store.get_transcript_path(sid)
|
||||
if path.exists():
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if line:
|
||||
try:
|
||||
transcript.append(json.loads(line))
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
transcript = store.load_transcript(sid)
|
||||
except Exception as exc:
|
||||
logger.warning("[%s] Recall: failed to load transcript: %s", adapter.name, exc)
|
||||
return
|
||||
|
|
|
|||
0
tests/gateway/platforms/__init__.py
Normal file
0
tests/gateway/platforms/__init__.py
Normal file
25
tests/gateway/platforms/test_yuanbao_recall_db_only.py
Normal file
25
tests/gateway/platforms/test_yuanbao_recall_db_only.py
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
"""Yuanbao recall: branch A2 (content-match) works without JSONL message_id."""
|
||||
from gateway.session import SessionStore
|
||||
from gateway.config import GatewayConfig
|
||||
|
||||
|
||||
def test_recall_falls_through_to_content_match_without_message_id(tmp_path):
|
||||
"""When transcript has no message_id field, A2 content-match still works."""
|
||||
config = GatewayConfig()
|
||||
store = SessionStore(sessions_dir=tmp_path, config=config)
|
||||
|
||||
sid = "test-yuanbao-recall"
|
||||
store._db.create_session(session_id=sid, source="yuanbao:group:G")
|
||||
store.append_to_transcript(sid, {"role": "user", "content": "sensitive content", "timestamp": 1.0})
|
||||
store.append_to_transcript(sid, {"role": "assistant", "content": "ack", "timestamp": 2.0})
|
||||
|
||||
# The post-PR state: load_transcript returns DB-only, no message_id field.
|
||||
history = store.load_transcript(sid)
|
||||
assert all("message_id" not in msg for msg in history), \
|
||||
"DB-only history should not carry message_id"
|
||||
|
||||
# Branch A2: content match should still find the message
|
||||
target = next((m for m in history
|
||||
if m.get("role") == "user" and m.get("content") == "sensitive content"), None)
|
||||
assert target is not None
|
||||
# Caller would then redact: target["content"] = REDACTED; store.rewrite_transcript(sid, history)
|
||||
Loading…
Add table
Add a link
Reference in a new issue