refactor(yuanbao): migrate recall to load_transcript()

Yuanbao's recall feature was reading the gateway JSONL directly to look up
messages by platform message_id, which state.db does not preserve. Migrated
to use load_transcript() which returns DB messages.

Recall branch A1 (message_id match) now falls through to A2 (content match)
or B (system note) for all sessions — a documented degradation. Follow-up
issue: add platform_message_id column to state.db messages to restore
exact-id matching.
This commit is contained in:
yoniebans 2026-05-20 09:21:17 +02:00 committed by Teknium
parent 024a8e3ee9
commit 971cfaa38c
3 changed files with 37 additions and 12 deletions

View file

View file

@ -0,0 +1,25 @@
"""Yuanbao recall: branch A2 (content-match) works without JSONL message_id."""
from gateway.session import SessionStore
from gateway.config import GatewayConfig
def test_recall_falls_through_to_content_match_without_message_id(tmp_path):
"""When transcript has no message_id field, A2 content-match still works."""
config = GatewayConfig()
store = SessionStore(sessions_dir=tmp_path, config=config)
sid = "test-yuanbao-recall"
store._db.create_session(session_id=sid, source="yuanbao:group:G")
store.append_to_transcript(sid, {"role": "user", "content": "sensitive content", "timestamp": 1.0})
store.append_to_transcript(sid, {"role": "assistant", "content": "ack", "timestamp": 2.0})
# The post-PR state: load_transcript returns DB-only, no message_id field.
history = store.load_transcript(sid)
assert all("message_id" not in msg for msg in history), \
"DB-only history should not carry message_id"
# Branch A2: content match should still find the message
target = next((m for m in history
if m.get("role") == "user" and m.get("content") == "sensitive content"), None)
assert target is not None
# Caller would then redact: target["content"] = REDACTED; store.rewrite_transcript(sid, history)