mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-29 06:31:32 +00:00
perf(prompt-cache): date-only timestamp + loud gateway-DB roundtrip logging
The system prompt's 'Conversation started:' line carried minute precision
(%I:%M %p), making it byte-unstable across every rebuild path. Within a
CLI session the in-memory cache held, but on the gateway path (fresh
AIAgent per turn → restore from session DB), any silent failure in the
read or write path dropped the cache stem and forced a full re-prefill
on every subsequent turn. Local prefix-caching backends (llama.cpp /
vLLM) saw this as KV-cache invalidation; remote prefix-caching providers
saw it as an Anthropic-style cache miss.
Three changes:
1. Date-only timestamp ('Sunday, May 17, 2026' instead of '... 03:42 PM').
System prompt now byte-stable for the full day. The model can still
query exact time via tools when it actually needs it. Credit:
@iamfoz (PR #20451).
2. Loud logging on session DB write failures. The update_system_prompt
call used to log at DEBUG, hiding disk-full / locked-database / schema
drift behind a silent fall-through that forced fresh rebuilds on
every subsequent turn. Now WARN with the session id and exception so
persistent issues show up in agent.log without verbose mode.
3. Three-way stored-state distinction on read. The previous
'session_row.get("system_prompt") or None' collapsed three states
into one (missing row / null column / empty string). Now we tell them
apart and WARN when a continuing session lands on null/empty (which
means the previous turn's write never persisted — every subsequent
turn rebuilds and the prefix cache misses every time).
The restore block is extracted into _restore_or_build_system_prompt()
so the prefix-cache path can be unit-tested in isolation.
E2E proof: fresh AIAgent constructed for turn 2 across a minute-boundary
sleep restores byte-identical bytes from the session DB. NULL stored
prompt fires the new warning. Date-only timestamp survives the rebuild
path. All on real SessionDB, no mocks.
Tests:
- tests/agent/test_system_prompt_restore.py (10 new tests)
- tests/run_agent/test_run_agent.py::TestBuildSystemPrompt::
test_datetime_is_date_only_not_minute_precision
Closes #20451 (date-only), #18547 (prefix stabilization),
#8689 (stabilize timestamp across compression), #15866 (timestamp
caching question), #8687 (compression timestamp), #27339
(claim #3: live timestamp in cached system prompt).
Co-authored-by: Martyn Forryan <9133432+iamfoz@users.noreply.github.com>
This commit is contained in:
parent
9b91377bec
commit
4a3f13b47b
4 changed files with 355 additions and 38 deletions
|
|
@ -82,6 +82,108 @@ def _ra():
|
|||
return run_agent
|
||||
|
||||
|
||||
def _restore_or_build_system_prompt(agent, system_message, conversation_history):
|
||||
"""Restore the cached system prompt from the session DB or build it fresh.
|
||||
|
||||
Mutates ``agent._cached_system_prompt`` and persists a freshly-built
|
||||
prompt back to the session DB on first build. Extracted from
|
||||
``run_conversation`` so the prefix-cache restore path can be tested in
|
||||
isolation.
|
||||
|
||||
Three-way state distinction for the stored row, surfaced via logs so
|
||||
silent prefix-cache misses are visible in ``agent.log``:
|
||||
|
||||
* ``missing`` — no session row yet (legitimate first turn).
|
||||
* ``null`` — row exists, ``system_prompt`` column is NULL.
|
||||
Legacy session predating system-prompt persistence, or a migration
|
||||
leftover. Warns when ``conversation_history`` is non-empty.
|
||||
* ``empty`` — row exists, ``system_prompt`` column is the empty
|
||||
string. Indicates a previous-turn write that ran but stored
|
||||
nothing (silent persistence bug). Always warns.
|
||||
* ``present`` — row exists with a usable prompt → reused verbatim.
|
||||
|
||||
Read or write failures against the session DB log at WARNING (not
|
||||
DEBUG) so persistent issues (disk full, schema drift, lock contention)
|
||||
surface without needing verbose mode. This used to be a debug-level
|
||||
log that silently broke prefix-cache reuse on the gateway path
|
||||
(which constructs a fresh ``AIAgent`` per turn and depends on this
|
||||
DB roundtrip).
|
||||
"""
|
||||
stored_prompt = None
|
||||
stored_state = "missing"
|
||||
if conversation_history and agent._session_db:
|
||||
try:
|
||||
session_row = agent._session_db.get_session(agent.session_id)
|
||||
if session_row is not None:
|
||||
raw_prompt = session_row.get("system_prompt")
|
||||
if raw_prompt is None:
|
||||
stored_state = "null"
|
||||
elif raw_prompt == "":
|
||||
stored_state = "empty"
|
||||
else:
|
||||
stored_prompt = raw_prompt
|
||||
stored_state = "present"
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"Session DB get_session failed for system-prompt restore "
|
||||
"(session=%s): %s. Falling back to fresh build — prefix "
|
||||
"cache will miss for this turn.",
|
||||
agent.session_id, exc,
|
||||
)
|
||||
|
||||
if stored_prompt:
|
||||
# Continuing session — reuse the exact system prompt from the
|
||||
# previous turn so the Anthropic cache prefix matches.
|
||||
agent._cached_system_prompt = stored_prompt
|
||||
return
|
||||
|
||||
if conversation_history and stored_state in ("null", "empty"):
|
||||
# Continuing session whose stored prompt is unusable. The
|
||||
# previous turn's write either never happened or wrote an empty
|
||||
# string — either way every turn now rebuilds and the prefix
|
||||
# cache misses every time.
|
||||
logger.warning(
|
||||
"Stored system prompt for session %s is %s; rebuilding "
|
||||
"from scratch this turn. Prefix cache will miss until "
|
||||
"the rebuild persists. Investigate the previous turn's "
|
||||
"update_system_prompt write path.",
|
||||
agent.session_id, stored_state,
|
||||
)
|
||||
|
||||
# First turn of a new session (or recovering from a broken stored
|
||||
# prompt) — build from scratch.
|
||||
agent._cached_system_prompt = agent._build_system_prompt(system_message)
|
||||
|
||||
# Plugin hook: on_session_start — fired once when a brand-new
|
||||
# session is created (not on continuation). Plugins can use this
|
||||
# to initialise session-scoped state (e.g. warm a memory cache).
|
||||
try:
|
||||
from hermes_cli.plugins import invoke_hook as _invoke_hook
|
||||
_invoke_hook(
|
||||
"on_session_start",
|
||||
session_id=agent.session_id,
|
||||
model=agent.model,
|
||||
platform=getattr(agent, "platform", None) or "",
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning("on_session_start hook failed: %s", exc)
|
||||
|
||||
# Persist the system prompt snapshot in SQLite. Failure here used
|
||||
# to log at DEBUG, which silently broke prefix-cache reuse on the
|
||||
# gateway path (fresh AIAgent per turn → reads from this row every
|
||||
# subsequent turn).
|
||||
if agent._session_db:
|
||||
try:
|
||||
agent._session_db.update_system_prompt(agent.session_id, agent._cached_system_prompt)
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"Session DB update_system_prompt failed for session %s: "
|
||||
"%s. Subsequent turns will rebuild the system prompt and "
|
||||
"miss the prefix cache.",
|
||||
agent.session_id, exc,
|
||||
)
|
||||
|
||||
|
||||
def run_conversation(
|
||||
agent,
|
||||
user_message: str,
|
||||
|
|
@ -313,43 +415,7 @@ def run_conversation(
|
|||
# producing a different system prompt and breaking the Anthropic
|
||||
# prefix cache.
|
||||
if agent._cached_system_prompt is None:
|
||||
stored_prompt = None
|
||||
if conversation_history and agent._session_db:
|
||||
try:
|
||||
session_row = agent._session_db.get_session(agent.session_id)
|
||||
if session_row:
|
||||
stored_prompt = session_row.get("system_prompt") or None
|
||||
except Exception:
|
||||
pass # Fall through to build fresh
|
||||
|
||||
if stored_prompt:
|
||||
# Continuing session — reuse the exact system prompt from
|
||||
# the previous turn so the Anthropic cache prefix matches.
|
||||
agent._cached_system_prompt = stored_prompt
|
||||
else:
|
||||
# First turn of a new session — build from scratch.
|
||||
agent._cached_system_prompt = agent._build_system_prompt(system_message)
|
||||
# Plugin hook: on_session_start
|
||||
# Fired once when a brand-new session is created (not on
|
||||
# continuation). Plugins can use this to initialise
|
||||
# session-scoped state (e.g. warm a memory cache).
|
||||
try:
|
||||
from hermes_cli.plugins import invoke_hook as _invoke_hook
|
||||
_invoke_hook(
|
||||
"on_session_start",
|
||||
session_id=agent.session_id,
|
||||
model=agent.model,
|
||||
platform=getattr(agent, "platform", None) or "",
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning("on_session_start hook failed: %s", exc)
|
||||
|
||||
# Store the system prompt snapshot in SQLite
|
||||
if agent._session_db:
|
||||
try:
|
||||
agent._session_db.update_system_prompt(agent.session_id, agent._cached_system_prompt)
|
||||
except Exception as e:
|
||||
logger.debug("Session DB update_system_prompt failed: %s", e)
|
||||
_restore_or_build_system_prompt(agent, system_message, conversation_history)
|
||||
|
||||
active_system_prompt = agent._cached_system_prompt
|
||||
|
||||
|
|
|
|||
|
|
@ -258,7 +258,13 @@ def build_system_prompt_parts(agent: Any, system_message: Optional[str] = None)
|
|||
|
||||
from hermes_time import now as _hermes_now
|
||||
now = _hermes_now()
|
||||
timestamp_line = f"Conversation started: {now.strftime('%A, %B %d, %Y %I:%M %p')}"
|
||||
# Date-only (not minute-precision) so the system prompt is byte-stable
|
||||
# for the full day. Minute-precision changes invalidate prefix-cache KV
|
||||
# on every rebuild path (compression boundary, fresh-agent gateway turns,
|
||||
# session resume without a stored prompt). The model can still query the
|
||||
# exact wall-clock time via tools when it actually needs it.
|
||||
# Credit: @iamfoz (PR #20451).
|
||||
timestamp_line = f"Conversation started: {now.strftime('%A, %B %d, %Y')}"
|
||||
if agent.pass_session_id and agent.session_id:
|
||||
timestamp_line += f"\nSession ID: {agent.session_id}"
|
||||
if agent.model:
|
||||
|
|
|
|||
223
tests/agent/test_system_prompt_restore.py
Normal file
223
tests/agent/test_system_prompt_restore.py
Normal file
|
|
@ -0,0 +1,223 @@
|
|||
"""Tests for ``agent.conversation_loop._restore_or_build_system_prompt``.
|
||||
|
||||
Validates the gateway DB-roundtrip path that keeps the system prompt
|
||||
byte-stable across turns (fresh AIAgent → must restore from session DB
|
||||
instead of rebuilding). Covers:
|
||||
|
||||
* Successful restore from a stored prompt (present row).
|
||||
* Legitimate first-turn build (no history).
|
||||
* Silent-failure recovery paths:
|
||||
- DB read raises → WARNING + fresh build
|
||||
- Row has system_prompt=NULL → WARNING + fresh build
|
||||
- Row has system_prompt="" → WARNING + fresh build
|
||||
- DB write fails → WARNING (subsequent turns will miss cache)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from agent.conversation_loop import _restore_or_build_system_prompt
|
||||
|
||||
|
||||
def _make_agent(session_db=None, prebuilt_prompt: str = "BUILT_PROMPT"):
|
||||
"""Construct the minimal agent fake the helper needs."""
|
||||
agent = MagicMock()
|
||||
agent._cached_system_prompt = None
|
||||
agent.session_id = "test-session-id"
|
||||
agent.model = "test-model"
|
||||
agent.platform = "cli"
|
||||
agent._session_db = session_db
|
||||
agent._build_system_prompt = MagicMock(return_value=prebuilt_prompt)
|
||||
return agent
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Happy paths
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestStoredPromptReuse:
|
||||
def test_present_row_is_reused_verbatim(self, caplog):
|
||||
"""Continuing session with a stored prompt → reuse byte-for-byte."""
|
||||
stored = "Stored prompt from turn 1 — byte-identical reuse"
|
||||
db = MagicMock()
|
||||
db.get_session.return_value = {"system_prompt": stored}
|
||||
agent = _make_agent(session_db=db)
|
||||
|
||||
with caplog.at_level(logging.WARNING, logger="agent.conversation_loop"):
|
||||
_restore_or_build_system_prompt(agent, None, [{"role": "user", "content": "hi"}])
|
||||
|
||||
assert agent._cached_system_prompt == stored
|
||||
agent._build_system_prompt.assert_not_called()
|
||||
db.update_system_prompt.assert_not_called()
|
||||
# No warnings on the happy path
|
||||
assert not [r for r in caplog.records if r.levelno >= logging.WARNING]
|
||||
|
||||
def test_present_row_with_unicode_preserved(self):
|
||||
"""Non-ASCII bytes in the stored prompt are not mangled."""
|
||||
stored = "Stored prompt with unicode: ☤ ⚗ ◆ — and emoji 🦊"
|
||||
db = MagicMock()
|
||||
db.get_session.return_value = {"system_prompt": stored}
|
||||
agent = _make_agent(session_db=db)
|
||||
|
||||
_restore_or_build_system_prompt(agent, None, [{"role": "user", "content": "hi"}])
|
||||
assert agent._cached_system_prompt == stored
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Legitimate fresh-build paths (no history, no DB)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestLegitimateFreshBuild:
|
||||
def test_no_history_skips_db_and_builds_fresh(self, caplog):
|
||||
"""First turn with empty history → build fresh, don't touch the DB."""
|
||||
db = MagicMock()
|
||||
agent = _make_agent(session_db=db)
|
||||
|
||||
with caplog.at_level(logging.WARNING, logger="agent.conversation_loop"):
|
||||
_restore_or_build_system_prompt(agent, None, [])
|
||||
|
||||
# No history → DB read skipped entirely
|
||||
db.get_session.assert_not_called()
|
||||
agent._build_system_prompt.assert_called_once_with(None)
|
||||
assert agent._cached_system_prompt == "BUILT_PROMPT"
|
||||
# Persisted to DB
|
||||
db.update_system_prompt.assert_called_once_with(agent.session_id, "BUILT_PROMPT")
|
||||
assert not [r for r in caplog.records if r.levelno >= logging.WARNING]
|
||||
|
||||
def test_no_db_skips_persistence(self):
|
||||
"""When session DB is None, build and skip persistence silently."""
|
||||
agent = _make_agent(session_db=None)
|
||||
_restore_or_build_system_prompt(agent, None, [])
|
||||
agent._build_system_prompt.assert_called_once()
|
||||
assert agent._cached_system_prompt == "BUILT_PROMPT"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Silent-failure recovery — these are the new A/B logging paths
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestSilentFailureWarnings:
|
||||
def test_db_read_exception_warns_and_rebuilds(self, caplog):
|
||||
"""DB read raising → WARNING + fall through to fresh build."""
|
||||
db = MagicMock()
|
||||
db.get_session.side_effect = RuntimeError("disk full")
|
||||
agent = _make_agent(session_db=db)
|
||||
|
||||
with caplog.at_level(logging.WARNING, logger="agent.conversation_loop"):
|
||||
_restore_or_build_system_prompt(agent, None, [{"role": "user", "content": "hi"}])
|
||||
|
||||
# Built fresh
|
||||
agent._build_system_prompt.assert_called_once()
|
||||
assert agent._cached_system_prompt == "BUILT_PROMPT"
|
||||
# Loud warning about the read failure
|
||||
warnings = [r for r in caplog.records if r.levelno >= logging.WARNING]
|
||||
assert any("get_session failed" in r.getMessage() for r in warnings), \
|
||||
f"Expected a get_session warning, got: {[r.getMessage() for r in warnings]}"
|
||||
assert any("disk full" in r.getMessage() for r in warnings)
|
||||
|
||||
def test_null_system_prompt_warns_about_unusable_stored_state(self, caplog):
|
||||
"""Row exists but system_prompt is NULL → WARNING + fresh build."""
|
||||
db = MagicMock()
|
||||
db.get_session.return_value = {"system_prompt": None}
|
||||
agent = _make_agent(session_db=db)
|
||||
|
||||
with caplog.at_level(logging.WARNING, logger="agent.conversation_loop"):
|
||||
_restore_or_build_system_prompt(agent, None, [{"role": "user", "content": "hi"}])
|
||||
|
||||
agent._build_system_prompt.assert_called_once()
|
||||
warnings = [r.getMessage() for r in caplog.records if r.levelno >= logging.WARNING]
|
||||
assert any("is null" in m and "rebuilding" in m for m in warnings), \
|
||||
f"Expected null-stored-prompt warning, got: {warnings}"
|
||||
|
||||
def test_empty_system_prompt_warns_about_silent_persistence_bug(self, caplog):
|
||||
"""Row exists but system_prompt is '' → WARNING about silent write bug."""
|
||||
db = MagicMock()
|
||||
db.get_session.return_value = {"system_prompt": ""}
|
||||
agent = _make_agent(session_db=db)
|
||||
|
||||
with caplog.at_level(logging.WARNING, logger="agent.conversation_loop"):
|
||||
_restore_or_build_system_prompt(agent, None, [{"role": "user", "content": "hi"}])
|
||||
|
||||
agent._build_system_prompt.assert_called_once()
|
||||
warnings = [r.getMessage() for r in caplog.records if r.levelno >= logging.WARNING]
|
||||
assert any("is empty" in m and "rebuilding" in m for m in warnings), \
|
||||
f"Expected empty-stored-prompt warning, got: {warnings}"
|
||||
|
||||
def test_db_write_failure_warns_loudly(self, caplog):
|
||||
"""update_system_prompt raising → WARNING (was DEBUG before)."""
|
||||
db = MagicMock()
|
||||
# No prior row (first turn)
|
||||
db.get_session.return_value = None
|
||||
db.update_system_prompt.side_effect = RuntimeError("database is locked")
|
||||
agent = _make_agent(session_db=db)
|
||||
|
||||
with caplog.at_level(logging.WARNING, logger="agent.conversation_loop"):
|
||||
_restore_or_build_system_prompt(agent, None, [])
|
||||
|
||||
# Built and assigned the cache anyway
|
||||
agent._build_system_prompt.assert_called_once()
|
||||
assert agent._cached_system_prompt == "BUILT_PROMPT"
|
||||
# Warning surfaced
|
||||
warnings = [r.getMessage() for r in caplog.records if r.levelno >= logging.WARNING]
|
||||
assert any(
|
||||
"update_system_prompt failed" in m and "database is locked" in m
|
||||
for m in warnings
|
||||
), f"Expected write-failure warning, got: {warnings}"
|
||||
|
||||
def test_no_history_with_null_row_does_not_warn(self, caplog):
|
||||
"""First turn (no history) hitting a null row is not surprising — no warn."""
|
||||
db = MagicMock()
|
||||
db.get_session.return_value = {"system_prompt": None}
|
||||
agent = _make_agent(session_db=db)
|
||||
|
||||
with caplog.at_level(logging.WARNING, logger="agent.conversation_loop"):
|
||||
# Empty history → DB read is skipped entirely
|
||||
_restore_or_build_system_prompt(agent, None, [])
|
||||
|
||||
db.get_session.assert_not_called()
|
||||
# No "rebuilding from scratch" warning because history is empty
|
||||
warnings = [r.getMessage() for r in caplog.records if r.levelno >= logging.WARNING]
|
||||
assert not any("rebuilding" in m for m in warnings)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Byte-stability invariant
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestPromptStabilityInvariant:
|
||||
def test_restored_prompt_is_byte_identical_to_stored(self):
|
||||
"""The restored prompt must equal the stored bytes exactly — no
|
||||
normalization, trimming, or concat that could shift the prefix.
|
||||
|
||||
This is the core invariant: any byte-level change at this point
|
||||
invalidates KV cache on every prefix-cache backend.
|
||||
"""
|
||||
stored = (
|
||||
"You are Hermes Agent.\n"
|
||||
"\n"
|
||||
"Conversation started: Sunday, May 17, 2026\n"
|
||||
"Session ID: 20260517_153500_abc123\n"
|
||||
)
|
||||
db = MagicMock()
|
||||
db.get_session.return_value = {"system_prompt": stored}
|
||||
agent = _make_agent(session_db=db)
|
||||
|
||||
_restore_or_build_system_prompt(agent, None, [{"role": "user", "content": "hi"}])
|
||||
|
||||
# Identity check — must be the same object reference for maximum
|
||||
# confidence we're not slicing/copying/normalizing.
|
||||
assert agent._cached_system_prompt == stored
|
||||
# Byte-level check
|
||||
assert agent._cached_system_prompt.encode("utf-8") == stored.encode("utf-8")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__, "-v"])
|
||||
|
|
@ -989,6 +989,28 @@ class TestBuildSystemPrompt:
|
|||
# Should contain current date info like "Conversation started:"
|
||||
assert "Conversation started:" in prompt
|
||||
|
||||
def test_datetime_is_date_only_not_minute_precision(self, agent):
|
||||
"""Timestamp must be date-only (no HH:MM) so the system prompt
|
||||
stays byte-stable for the full day. Minute precision invalidates
|
||||
prefix-cache KV on every rebuild path (compression, fresh-agent
|
||||
gateway turns, session resume without a stored prompt)."""
|
||||
prompt = agent._build_system_prompt()
|
||||
# Find the line and strip it for inspection
|
||||
for line in prompt.splitlines():
|
||||
if line.startswith("Conversation started:"):
|
||||
# Must NOT contain AM/PM indicator (minute precision had %I:%M %p)
|
||||
assert " AM" not in line and " PM" not in line, (
|
||||
f"Timestamp line has time-of-day, breaks daily cache stability: {line!r}"
|
||||
)
|
||||
# Must NOT contain a colon followed by two digits (HH:MM pattern)
|
||||
import re as _re
|
||||
assert not _re.search(r":\d{2}", line), (
|
||||
f"Timestamp line has HH:MM, breaks daily cache stability: {line!r}"
|
||||
)
|
||||
break
|
||||
else:
|
||||
assert False, "Expected a 'Conversation started:' line in the system prompt"
|
||||
|
||||
def test_includes_nous_subscription_prompt(self, agent, monkeypatch):
|
||||
monkeypatch.setattr(run_agent, "build_nous_subscription_prompt", lambda tool_names: "NOUS SUBSCRIPTION BLOCK")
|
||||
prompt = agent._build_system_prompt()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue