mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-29 06:31:32 +00:00
refactor(yuanbao): drop dead branch A1 message_id loop + pin missing fixture
PR #29211 review findings: 1. test_retry_replacement: pin DEFAULT_DB_PATH so SessionDB() doesn't write to the real ~/.hermes/state.db. Same fix as the other DB-only fixtures. 2. yuanbao recall branch A1 (message_id exact match) was structurally dead once load_transcript() became DB-only — state.db never preserves the platform message_id. Removed the dead loop, consolidated to a single content-match branch (renamed 'A: content match'). Branch B (system note) unchanged. Updated the test name + docstring to reflect this. Note: self._lock is no longer taken in append_to_transcript (was guarding the JSONL file append). SQLite append_message handles its own concurrency via WAL mode, so this is safe; flagging for awareness.
This commit is contained in:
parent
c634c07bcc
commit
0cc1a1d2d9
3 changed files with 24 additions and 28 deletions
|
|
@ -1410,32 +1410,24 @@ class RecallGuardMiddleware(InboundMiddleware):
|
|||
logger.warning("[%s] Recall: failed to resolve session: %s", adapter.name, exc)
|
||||
return
|
||||
|
||||
# Load transcript from canonical store (state.db).
|
||||
#
|
||||
# Branch A1 below tries to match the recalled message by its platform
|
||||
# `message_id`. state.db does NOT preserve `message_id` (only its own
|
||||
# autoincrement primary key), so A1 will not match for any message
|
||||
# persisted post-DB-canonical (i.e. all messages going forward). Recall
|
||||
# falls through to A2 (content match) or B (system redaction note), both
|
||||
# of which work DB-only.
|
||||
#
|
||||
# TODO: add a `platform_message_id` column to state.db messages to restore
|
||||
# exact-id matching. Tracked separately.
|
||||
# Load transcript from canonical store (state.db). See Branch A below
|
||||
# for why we can no longer match by platform `message_id`.
|
||||
try:
|
||||
transcript = store.load_transcript(sid)
|
||||
except Exception as exc:
|
||||
logger.warning("[%s] Recall: failed to load transcript: %s", adapter.name, exc)
|
||||
return
|
||||
|
||||
# Branch A: redact — try message_id first, then content fallback.
|
||||
# Observed messages have message_id; agent-processed @bot messages
|
||||
# only have content (run.py doesn't write message_id to transcript).
|
||||
# Branch A: content-match redaction. state.db does NOT preserve the
|
||||
# platform `message_id` (only its own autoincrement primary key), so we
|
||||
# cannot redact by exact id. Match by content instead. Most yuanbao
|
||||
# recalls carry the recalled text via `recalled_content`, which is
|
||||
# sufficient for any non-duplicate message.
|
||||
#
|
||||
# TODO: add a `platform_message_id` column to state.db messages to
|
||||
# restore exact-id matching. Tracked separately.
|
||||
target = None
|
||||
for entry in transcript:
|
||||
if entry.get("message_id") == recalled_id:
|
||||
target = entry
|
||||
break
|
||||
if target is None and recalled_content:
|
||||
if recalled_content:
|
||||
for entry in transcript:
|
||||
if entry.get("role") == "user" and entry.get("content") == recalled_content:
|
||||
target = entry
|
||||
|
|
@ -1444,7 +1436,7 @@ class RecallGuardMiddleware(InboundMiddleware):
|
|||
target["content"] = cls._REDACTED
|
||||
try:
|
||||
store.rewrite_transcript(sid, transcript)
|
||||
logger.info("[%s] Recall: redacted msg_id=%s (branch A)", adapter.name, recalled_id)
|
||||
logger.info("[%s] Recall: redacted msg_id=%s (branch A: content match)", adapter.name, recalled_id)
|
||||
except Exception as exc:
|
||||
logger.warning("[%s] Recall: rewrite_transcript failed: %s", adapter.name, exc)
|
||||
return
|
||||
|
|
|
|||
|
|
@ -1,10 +1,10 @@
|
|||
"""Yuanbao recall: branch A2 (content-match) works without JSONL message_id."""
|
||||
"""Yuanbao recall: branch A (content-match) works against DB-only transcripts."""
|
||||
from gateway.session import SessionStore
|
||||
from gateway.config import GatewayConfig
|
||||
|
||||
|
||||
def test_recall_falls_through_to_content_match_without_message_id(tmp_path, monkeypatch):
|
||||
"""When transcript has no message_id field, A2 content-match still works.
|
||||
def test_recall_content_match_finds_target_in_db_transcript(tmp_path, monkeypatch):
|
||||
"""state.db doesn't preserve message_id, so recall uses content-match.
|
||||
|
||||
Pin DEFAULT_DB_PATH to tmp_path so SessionDB() can't write to the real
|
||||
~/.hermes/state.db. (Module-level constant snapshot, see test_load_transcript_db_only.)
|
||||
|
|
@ -20,12 +20,11 @@ def test_recall_falls_through_to_content_match_without_message_id(tmp_path, monk
|
|||
store.append_to_transcript(sid, {"role": "user", "content": "sensitive content", "timestamp": 1.0})
|
||||
store.append_to_transcript(sid, {"role": "assistant", "content": "ack", "timestamp": 2.0})
|
||||
|
||||
# The post-PR state: load_transcript returns DB-only, no message_id field.
|
||||
# DB-only history carries no platform message_id (PR #29211 dropped that path).
|
||||
history = store.load_transcript(sid)
|
||||
assert all("message_id" not in msg for msg in history), \
|
||||
"DB-only history should not carry message_id"
|
||||
assert all("message_id" not in msg for msg in history)
|
||||
|
||||
# Branch A2: content match should still find the message
|
||||
# Branch A: content match finds the target row that recall would redact.
|
||||
target = next((m for m in history
|
||||
if m.get("role") == "user" and m.get("content") == "sensitive content"), None)
|
||||
assert target is not None
|
||||
|
|
|
|||
|
|
@ -11,7 +11,12 @@ from gateway.session import SessionStore
|
|||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_gateway_retry_replaces_last_user_turn_in_transcript(tmp_path):
|
||||
async def test_gateway_retry_replaces_last_user_turn_in_transcript(tmp_path, monkeypatch):
|
||||
# Pin DEFAULT_DB_PATH so SessionDB() doesn't write to the real ~/.hermes/state.db.
|
||||
# (Module-level constant snapshot, see test_load_transcript_db_only.)
|
||||
import hermes_state
|
||||
monkeypatch.setattr(hermes_state, "DEFAULT_DB_PATH", tmp_path / "state.db")
|
||||
|
||||
config = GatewayConfig()
|
||||
store = SessionStore(sessions_dir=tmp_path, config=config)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue