diff --git a/gateway/platforms/yuanbao.py b/gateway/platforms/yuanbao.py index 7015e0c848c..aed6717bd36 100644 --- a/gateway/platforms/yuanbao.py +++ b/gateway/platforms/yuanbao.py @@ -1410,19 +1410,19 @@ class RecallGuardMiddleware(InboundMiddleware): logger.warning("[%s] Recall: failed to resolve session: %s", adapter.name, exc) return - # Read JSONL directly — SQLite doesn't preserve message_id field. - transcript: list = [] + # Load transcript from canonical store (state.db). + # + # Branch A1 below tries to match the recalled message by its platform + # `message_id`. state.db does NOT preserve `message_id` (only its own + # autoincrement primary key), so A1 will not match for any message + # persisted post-DB-canonical (i.e. all messages going forward). Recall + # falls through to A2 (content match) or B (system redaction note), both + # of which work DB-only. + # + # TODO: add a `platform_message_id` column to state.db messages to restore + # exact-id matching. Tracked separately. try: - path = store.get_transcript_path(sid) - if path.exists(): - with open(path, "r", encoding="utf-8") as f: - for line in f: - line = line.strip() - if line: - try: - transcript.append(json.loads(line)) - except json.JSONDecodeError: - pass + transcript = store.load_transcript(sid) except Exception as exc: logger.warning("[%s] Recall: failed to load transcript: %s", adapter.name, exc) return diff --git a/tests/gateway/platforms/__init__.py b/tests/gateway/platforms/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/gateway/platforms/test_yuanbao_recall_db_only.py b/tests/gateway/platforms/test_yuanbao_recall_db_only.py new file mode 100644 index 00000000000..6186df6787a --- /dev/null +++ b/tests/gateway/platforms/test_yuanbao_recall_db_only.py @@ -0,0 +1,25 @@ +"""Yuanbao recall: branch A2 (content-match) works without JSONL message_id.""" +from gateway.session import SessionStore +from gateway.config import GatewayConfig + + +def test_recall_falls_through_to_content_match_without_message_id(tmp_path): + """When transcript has no message_id field, A2 content-match still works.""" + config = GatewayConfig() + store = SessionStore(sessions_dir=tmp_path, config=config) + + sid = "test-yuanbao-recall" + store._db.create_session(session_id=sid, source="yuanbao:group:G") + store.append_to_transcript(sid, {"role": "user", "content": "sensitive content", "timestamp": 1.0}) + store.append_to_transcript(sid, {"role": "assistant", "content": "ack", "timestamp": 2.0}) + + # The post-PR state: load_transcript returns DB-only, no message_id field. + history = store.load_transcript(sid) + assert all("message_id" not in msg for msg in history), \ + "DB-only history should not carry message_id" + + # Branch A2: content match should still find the message + target = next((m for m in history + if m.get("role") == "user" and m.get("content") == "sensitive content"), None) + assert target is not None + # Caller would then redact: target["content"] = REDACTED; store.rewrite_transcript(sid, history)