diff --git a/agent/conversation_loop.py b/agent/conversation_loop.py index 8096b754298..d3d47a5a101 100644 --- a/agent/conversation_loop.py +++ b/agent/conversation_loop.py @@ -82,6 +82,108 @@ def _ra(): return run_agent +def _restore_or_build_system_prompt(agent, system_message, conversation_history): + """Restore the cached system prompt from the session DB or build it fresh. + + Mutates ``agent._cached_system_prompt`` and persists a freshly-built + prompt back to the session DB on first build. Extracted from + ``run_conversation`` so the prefix-cache restore path can be tested in + isolation. + + Three-way state distinction for the stored row, surfaced via logs so + silent prefix-cache misses are visible in ``agent.log``: + + * ``missing`` — no session row yet (legitimate first turn). + * ``null`` — row exists, ``system_prompt`` column is NULL. + Legacy session predating system-prompt persistence, or a migration + leftover. Warns when ``conversation_history`` is non-empty. + * ``empty`` — row exists, ``system_prompt`` column is the empty + string. Indicates a previous-turn write that ran but stored + nothing (silent persistence bug). Always warns. + * ``present`` — row exists with a usable prompt → reused verbatim. + + Read or write failures against the session DB log at WARNING (not + DEBUG) so persistent issues (disk full, schema drift, lock contention) + surface without needing verbose mode. This used to be a debug-level + log that silently broke prefix-cache reuse on the gateway path + (which constructs a fresh ``AIAgent`` per turn and depends on this + DB roundtrip). + """ + stored_prompt = None + stored_state = "missing" + if conversation_history and agent._session_db: + try: + session_row = agent._session_db.get_session(agent.session_id) + if session_row is not None: + raw_prompt = session_row.get("system_prompt") + if raw_prompt is None: + stored_state = "null" + elif raw_prompt == "": + stored_state = "empty" + else: + stored_prompt = raw_prompt + stored_state = "present" + except Exception as exc: + logger.warning( + "Session DB get_session failed for system-prompt restore " + "(session=%s): %s. Falling back to fresh build — prefix " + "cache will miss for this turn.", + agent.session_id, exc, + ) + + if stored_prompt: + # Continuing session — reuse the exact system prompt from the + # previous turn so the Anthropic cache prefix matches. + agent._cached_system_prompt = stored_prompt + return + + if conversation_history and stored_state in ("null", "empty"): + # Continuing session whose stored prompt is unusable. The + # previous turn's write either never happened or wrote an empty + # string — either way every turn now rebuilds and the prefix + # cache misses every time. + logger.warning( + "Stored system prompt for session %s is %s; rebuilding " + "from scratch this turn. Prefix cache will miss until " + "the rebuild persists. Investigate the previous turn's " + "update_system_prompt write path.", + agent.session_id, stored_state, + ) + + # First turn of a new session (or recovering from a broken stored + # prompt) — build from scratch. + agent._cached_system_prompt = agent._build_system_prompt(system_message) + + # Plugin hook: on_session_start — fired once when a brand-new + # session is created (not on continuation). Plugins can use this + # to initialise session-scoped state (e.g. warm a memory cache). + try: + from hermes_cli.plugins import invoke_hook as _invoke_hook + _invoke_hook( + "on_session_start", + session_id=agent.session_id, + model=agent.model, + platform=getattr(agent, "platform", None) or "", + ) + except Exception as exc: + logger.warning("on_session_start hook failed: %s", exc) + + # Persist the system prompt snapshot in SQLite. Failure here used + # to log at DEBUG, which silently broke prefix-cache reuse on the + # gateway path (fresh AIAgent per turn → reads from this row every + # subsequent turn). + if agent._session_db: + try: + agent._session_db.update_system_prompt(agent.session_id, agent._cached_system_prompt) + except Exception as exc: + logger.warning( + "Session DB update_system_prompt failed for session %s: " + "%s. Subsequent turns will rebuild the system prompt and " + "miss the prefix cache.", + agent.session_id, exc, + ) + + def run_conversation( agent, user_message: str, @@ -313,43 +415,7 @@ def run_conversation( # producing a different system prompt and breaking the Anthropic # prefix cache. if agent._cached_system_prompt is None: - stored_prompt = None - if conversation_history and agent._session_db: - try: - session_row = agent._session_db.get_session(agent.session_id) - if session_row: - stored_prompt = session_row.get("system_prompt") or None - except Exception: - pass # Fall through to build fresh - - if stored_prompt: - # Continuing session — reuse the exact system prompt from - # the previous turn so the Anthropic cache prefix matches. - agent._cached_system_prompt = stored_prompt - else: - # First turn of a new session — build from scratch. - agent._cached_system_prompt = agent._build_system_prompt(system_message) - # Plugin hook: on_session_start - # Fired once when a brand-new session is created (not on - # continuation). Plugins can use this to initialise - # session-scoped state (e.g. warm a memory cache). - try: - from hermes_cli.plugins import invoke_hook as _invoke_hook - _invoke_hook( - "on_session_start", - session_id=agent.session_id, - model=agent.model, - platform=getattr(agent, "platform", None) or "", - ) - except Exception as exc: - logger.warning("on_session_start hook failed: %s", exc) - - # Store the system prompt snapshot in SQLite - if agent._session_db: - try: - agent._session_db.update_system_prompt(agent.session_id, agent._cached_system_prompt) - except Exception as e: - logger.debug("Session DB update_system_prompt failed: %s", e) + _restore_or_build_system_prompt(agent, system_message, conversation_history) active_system_prompt = agent._cached_system_prompt diff --git a/agent/system_prompt.py b/agent/system_prompt.py index d69d8c32099..a9815a2f2f4 100644 --- a/agent/system_prompt.py +++ b/agent/system_prompt.py @@ -258,7 +258,13 @@ def build_system_prompt_parts(agent: Any, system_message: Optional[str] = None) from hermes_time import now as _hermes_now now = _hermes_now() - timestamp_line = f"Conversation started: {now.strftime('%A, %B %d, %Y %I:%M %p')}" + # Date-only (not minute-precision) so the system prompt is byte-stable + # for the full day. Minute-precision changes invalidate prefix-cache KV + # on every rebuild path (compression boundary, fresh-agent gateway turns, + # session resume without a stored prompt). The model can still query the + # exact wall-clock time via tools when it actually needs it. + # Credit: @iamfoz (PR #20451). + timestamp_line = f"Conversation started: {now.strftime('%A, %B %d, %Y')}" if agent.pass_session_id and agent.session_id: timestamp_line += f"\nSession ID: {agent.session_id}" if agent.model: diff --git a/tests/agent/test_system_prompt_restore.py b/tests/agent/test_system_prompt_restore.py new file mode 100644 index 00000000000..ecfd57b1dfe --- /dev/null +++ b/tests/agent/test_system_prompt_restore.py @@ -0,0 +1,223 @@ +"""Tests for ``agent.conversation_loop._restore_or_build_system_prompt``. + +Validates the gateway DB-roundtrip path that keeps the system prompt +byte-stable across turns (fresh AIAgent → must restore from session DB +instead of rebuilding). Covers: + + * Successful restore from a stored prompt (present row). + * Legitimate first-turn build (no history). + * Silent-failure recovery paths: + - DB read raises → WARNING + fresh build + - Row has system_prompt=NULL → WARNING + fresh build + - Row has system_prompt="" → WARNING + fresh build + - DB write fails → WARNING (subsequent turns will miss cache) +""" + +from __future__ import annotations + +import logging +from unittest.mock import MagicMock + +import pytest + +from agent.conversation_loop import _restore_or_build_system_prompt + + +def _make_agent(session_db=None, prebuilt_prompt: str = "BUILT_PROMPT"): + """Construct the minimal agent fake the helper needs.""" + agent = MagicMock() + agent._cached_system_prompt = None + agent.session_id = "test-session-id" + agent.model = "test-model" + agent.platform = "cli" + agent._session_db = session_db + agent._build_system_prompt = MagicMock(return_value=prebuilt_prompt) + return agent + + +# --------------------------------------------------------------------------- +# Happy paths +# --------------------------------------------------------------------------- + + +class TestStoredPromptReuse: + def test_present_row_is_reused_verbatim(self, caplog): + """Continuing session with a stored prompt → reuse byte-for-byte.""" + stored = "Stored prompt from turn 1 — byte-identical reuse" + db = MagicMock() + db.get_session.return_value = {"system_prompt": stored} + agent = _make_agent(session_db=db) + + with caplog.at_level(logging.WARNING, logger="agent.conversation_loop"): + _restore_or_build_system_prompt(agent, None, [{"role": "user", "content": "hi"}]) + + assert agent._cached_system_prompt == stored + agent._build_system_prompt.assert_not_called() + db.update_system_prompt.assert_not_called() + # No warnings on the happy path + assert not [r for r in caplog.records if r.levelno >= logging.WARNING] + + def test_present_row_with_unicode_preserved(self): + """Non-ASCII bytes in the stored prompt are not mangled.""" + stored = "Stored prompt with unicode: ☤ ⚗ ◆ — and emoji 🦊" + db = MagicMock() + db.get_session.return_value = {"system_prompt": stored} + agent = _make_agent(session_db=db) + + _restore_or_build_system_prompt(agent, None, [{"role": "user", "content": "hi"}]) + assert agent._cached_system_prompt == stored + + +# --------------------------------------------------------------------------- +# Legitimate fresh-build paths (no history, no DB) +# --------------------------------------------------------------------------- + + +class TestLegitimateFreshBuild: + def test_no_history_skips_db_and_builds_fresh(self, caplog): + """First turn with empty history → build fresh, don't touch the DB.""" + db = MagicMock() + agent = _make_agent(session_db=db) + + with caplog.at_level(logging.WARNING, logger="agent.conversation_loop"): + _restore_or_build_system_prompt(agent, None, []) + + # No history → DB read skipped entirely + db.get_session.assert_not_called() + agent._build_system_prompt.assert_called_once_with(None) + assert agent._cached_system_prompt == "BUILT_PROMPT" + # Persisted to DB + db.update_system_prompt.assert_called_once_with(agent.session_id, "BUILT_PROMPT") + assert not [r for r in caplog.records if r.levelno >= logging.WARNING] + + def test_no_db_skips_persistence(self): + """When session DB is None, build and skip persistence silently.""" + agent = _make_agent(session_db=None) + _restore_or_build_system_prompt(agent, None, []) + agent._build_system_prompt.assert_called_once() + assert agent._cached_system_prompt == "BUILT_PROMPT" + + +# --------------------------------------------------------------------------- +# Silent-failure recovery — these are the new A/B logging paths +# --------------------------------------------------------------------------- + + +class TestSilentFailureWarnings: + def test_db_read_exception_warns_and_rebuilds(self, caplog): + """DB read raising → WARNING + fall through to fresh build.""" + db = MagicMock() + db.get_session.side_effect = RuntimeError("disk full") + agent = _make_agent(session_db=db) + + with caplog.at_level(logging.WARNING, logger="agent.conversation_loop"): + _restore_or_build_system_prompt(agent, None, [{"role": "user", "content": "hi"}]) + + # Built fresh + agent._build_system_prompt.assert_called_once() + assert agent._cached_system_prompt == "BUILT_PROMPT" + # Loud warning about the read failure + warnings = [r for r in caplog.records if r.levelno >= logging.WARNING] + assert any("get_session failed" in r.getMessage() for r in warnings), \ + f"Expected a get_session warning, got: {[r.getMessage() for r in warnings]}" + assert any("disk full" in r.getMessage() for r in warnings) + + def test_null_system_prompt_warns_about_unusable_stored_state(self, caplog): + """Row exists but system_prompt is NULL → WARNING + fresh build.""" + db = MagicMock() + db.get_session.return_value = {"system_prompt": None} + agent = _make_agent(session_db=db) + + with caplog.at_level(logging.WARNING, logger="agent.conversation_loop"): + _restore_or_build_system_prompt(agent, None, [{"role": "user", "content": "hi"}]) + + agent._build_system_prompt.assert_called_once() + warnings = [r.getMessage() for r in caplog.records if r.levelno >= logging.WARNING] + assert any("is null" in m and "rebuilding" in m for m in warnings), \ + f"Expected null-stored-prompt warning, got: {warnings}" + + def test_empty_system_prompt_warns_about_silent_persistence_bug(self, caplog): + """Row exists but system_prompt is '' → WARNING about silent write bug.""" + db = MagicMock() + db.get_session.return_value = {"system_prompt": ""} + agent = _make_agent(session_db=db) + + with caplog.at_level(logging.WARNING, logger="agent.conversation_loop"): + _restore_or_build_system_prompt(agent, None, [{"role": "user", "content": "hi"}]) + + agent._build_system_prompt.assert_called_once() + warnings = [r.getMessage() for r in caplog.records if r.levelno >= logging.WARNING] + assert any("is empty" in m and "rebuilding" in m for m in warnings), \ + f"Expected empty-stored-prompt warning, got: {warnings}" + + def test_db_write_failure_warns_loudly(self, caplog): + """update_system_prompt raising → WARNING (was DEBUG before).""" + db = MagicMock() + # No prior row (first turn) + db.get_session.return_value = None + db.update_system_prompt.side_effect = RuntimeError("database is locked") + agent = _make_agent(session_db=db) + + with caplog.at_level(logging.WARNING, logger="agent.conversation_loop"): + _restore_or_build_system_prompt(agent, None, []) + + # Built and assigned the cache anyway + agent._build_system_prompt.assert_called_once() + assert agent._cached_system_prompt == "BUILT_PROMPT" + # Warning surfaced + warnings = [r.getMessage() for r in caplog.records if r.levelno >= logging.WARNING] + assert any( + "update_system_prompt failed" in m and "database is locked" in m + for m in warnings + ), f"Expected write-failure warning, got: {warnings}" + + def test_no_history_with_null_row_does_not_warn(self, caplog): + """First turn (no history) hitting a null row is not surprising — no warn.""" + db = MagicMock() + db.get_session.return_value = {"system_prompt": None} + agent = _make_agent(session_db=db) + + with caplog.at_level(logging.WARNING, logger="agent.conversation_loop"): + # Empty history → DB read is skipped entirely + _restore_or_build_system_prompt(agent, None, []) + + db.get_session.assert_not_called() + # No "rebuilding from scratch" warning because history is empty + warnings = [r.getMessage() for r in caplog.records if r.levelno >= logging.WARNING] + assert not any("rebuilding" in m for m in warnings) + + +# --------------------------------------------------------------------------- +# Byte-stability invariant +# --------------------------------------------------------------------------- + + +class TestPromptStabilityInvariant: + def test_restored_prompt_is_byte_identical_to_stored(self): + """The restored prompt must equal the stored bytes exactly — no + normalization, trimming, or concat that could shift the prefix. + + This is the core invariant: any byte-level change at this point + invalidates KV cache on every prefix-cache backend. + """ + stored = ( + "You are Hermes Agent.\n" + "\n" + "Conversation started: Sunday, May 17, 2026\n" + "Session ID: 20260517_153500_abc123\n" + ) + db = MagicMock() + db.get_session.return_value = {"system_prompt": stored} + agent = _make_agent(session_db=db) + + _restore_or_build_system_prompt(agent, None, [{"role": "user", "content": "hi"}]) + + # Identity check — must be the same object reference for maximum + # confidence we're not slicing/copying/normalizing. + assert agent._cached_system_prompt == stored + # Byte-level check + assert agent._cached_system_prompt.encode("utf-8") == stored.encode("utf-8") + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py index 55cc8186205..9ff7ab28612 100644 --- a/tests/run_agent/test_run_agent.py +++ b/tests/run_agent/test_run_agent.py @@ -989,6 +989,28 @@ class TestBuildSystemPrompt: # Should contain current date info like "Conversation started:" assert "Conversation started:" in prompt + def test_datetime_is_date_only_not_minute_precision(self, agent): + """Timestamp must be date-only (no HH:MM) so the system prompt + stays byte-stable for the full day. Minute precision invalidates + prefix-cache KV on every rebuild path (compression, fresh-agent + gateway turns, session resume without a stored prompt).""" + prompt = agent._build_system_prompt() + # Find the line and strip it for inspection + for line in prompt.splitlines(): + if line.startswith("Conversation started:"): + # Must NOT contain AM/PM indicator (minute precision had %I:%M %p) + assert " AM" not in line and " PM" not in line, ( + f"Timestamp line has time-of-day, breaks daily cache stability: {line!r}" + ) + # Must NOT contain a colon followed by two digits (HH:MM pattern) + import re as _re + assert not _re.search(r":\d{2}", line), ( + f"Timestamp line has HH:MM, breaks daily cache stability: {line!r}" + ) + break + else: + assert False, "Expected a 'Conversation started:' line in the system prompt" + def test_includes_nous_subscription_prompt(self, agent, monkeypatch): monkeypatch.setattr(run_agent, "build_nous_subscription_prompt", lambda tool_names: "NOUS SUBSCRIPTION BLOCK") prompt = agent._build_system_prompt()