fix(desktop): keep live model switch metadata truthful

A live config.set model switch already moved the next API call to the new model,
but the conversation could still restore an old sessions.system_prompt snapshot
whose Model/Provider lines named the previous runtime. That made "what model are
you?" answer from stale metadata even while inference ran on the new model.

After a live switch we now refresh the stored system prompt and append a real
system-history pivot (not a fake user turn) so the transcript itself records the
new model/provider. Restore also rejects already-stale prompt snapshots when
their Model/Provider lines disagree with the runtime, so existing bad sessions
self-heal.
This commit is contained in:
Brooklyn Nicholson 2026-06-16 09:50:17 -05:00
parent cb6b4127e7
commit 7d938cc5c9
4 changed files with 185 additions and 1 deletions

View file

@ -300,11 +300,20 @@ def _restore_or_build_system_prompt(agent, system_message, conversation_history)
agent.session_id, exc,
)
if stored_prompt:
if stored_prompt and _stored_prompt_matches_runtime(agent, stored_prompt):
# Continuing session — reuse the exact system prompt from the
# previous turn so the Anthropic cache prefix matches.
agent._cached_system_prompt = stored_prompt
return
if stored_prompt:
stored_state = "stale_runtime"
logger.info(
"Stored system prompt for session %s has stale runtime identity; "
"rebuilding for model=%s provider=%s.",
agent.session_id,
getattr(agent, "model", "") or "",
getattr(agent, "provider", "") or "",
)
if conversation_history and stored_state in ("null", "empty"):
# Continuing session whose stored prompt is unusable. The
@ -366,6 +375,30 @@ def _restore_or_build_system_prompt(agent, system_message, conversation_history)
)
def _stored_prompt_matches_runtime(agent, prompt: str) -> bool:
"""Return False when the persisted Model/Provider lines are stale."""
def line_value(label: str) -> str:
prefix = f"{label}:"
value = ""
for line in prompt.splitlines():
if line.startswith(prefix):
value = line[len(prefix):].strip()
return value
stored_model = line_value("Model")
current_model = str(getattr(agent, "model", "") or "").strip()
if stored_model and current_model and stored_model != current_model:
return False
stored_provider = line_value("Provider")
current_provider = str(getattr(agent, "provider", "") or "").strip()
if stored_provider and current_provider and stored_provider != current_provider:
return False
return True
def _get_continuation_prompt(is_partial_stub: bool, dropped_tools: Optional[List[str]] = None) -> str:
if is_partial_stub and dropped_tools:
tool_list = ", ".join(dropped_tools[:3])

View file

@ -29,6 +29,7 @@ def _make_agent(session_db=None, prebuilt_prompt: str = "BUILT_PROMPT"):
agent._cached_system_prompt = None
agent.session_id = "test-session-id"
agent.model = "test-model"
agent.provider = "openrouter"
agent.platform = "cli"
agent._session_db = session_db
agent._build_system_prompt = MagicMock(return_value=prebuilt_prompt)
@ -67,6 +68,47 @@ class TestStoredPromptReuse:
_restore_or_build_system_prompt(agent, None, [{"role": "user", "content": "hi"}])
assert agent._cached_system_prompt == stored
def test_present_row_with_stale_runtime_identity_rebuilds(self, caplog):
"""Stored prompts are cache gold unless their runtime identity is stale.
A live /model switch updates the agent and DB model_config immediately.
If the old system_prompt snapshot still says the previous model,
blindly restoring it makes the next turn call the new model while the
model reads old `Model:` metadata ("what model are you?" lies).
"""
stored = (
"You are Hermes Agent.\n\n"
"Conversation started: Tuesday, June 16, 2026\n"
"Session ID: test-session-id\n"
"Model: anthropic/claude-opus-4.8-fast\n"
"Provider: openrouter"
)
db = MagicMock()
db.get_session.return_value = {"system_prompt": stored}
agent = _make_agent(
session_db=db,
prebuilt_prompt=(
"You are Hermes Agent.\n\n"
"Conversation started: Tuesday, June 16, 2026\n"
"Session ID: test-session-id\n"
"Model: openai/gpt-5.5\n"
"Provider: openrouter"
),
)
agent.model = "openai/gpt-5.5"
with caplog.at_level(logging.INFO, logger="agent.conversation_loop"):
_restore_or_build_system_prompt(agent, None, [{"role": "user", "content": "hi"}])
assert agent._cached_system_prompt.endswith(
"Model: openai/gpt-5.5\nProvider: openrouter"
)
agent._build_system_prompt.assert_called_once_with(None)
db.update_system_prompt.assert_called_once_with(
agent.session_id, agent._cached_system_prompt
)
assert any("stale runtime identity" in r.getMessage() for r in caplog.records)
# ---------------------------------------------------------------------------
# Legitimate fresh-build paths (no history, no DB)

View file

@ -3326,12 +3326,39 @@ def test_config_set_model_switches_agent_without_touching_env(monkeypatch):
provider = "openai-codex"
base_url = ""
api_key = ""
session_id = "sid"
_cached_system_prompt = "Model: gpt-5.3-codex\nProvider: openai-codex"
def switch_model(self, **kwargs):
self.model = kwargs["new_model"]
self.provider = kwargs["new_provider"]
def _build_system_prompt(self, _system_message=None):
return f"Model: {self.model}\nProvider: {self.provider}"
class SessionDB:
def __init__(self):
self.model_config = None
self.system_prompt = None
self.messages = []
def get_session(self, _session_id):
return {"model_config": self.model_config}
def update_session_meta(self, _session_id, model_config_json, _model=None):
self.model_config = model_config_json
def update_system_prompt(self, _session_id, system_prompt):
self.system_prompt = system_prompt
def append_message(self, session_id, role, content=None, **_kwargs):
self.messages.append(
{"session_id": session_id, "role": role, "content": content}
)
agent = Agent()
db = SessionDB()
agent._session_db = db
session = _session(agent=agent)
server._sessions["sid"] = session
monkeypatch.setenv("HERMES_TUI_PROVIDER", "openai-codex")
@ -3373,6 +3400,21 @@ def test_config_set_model_switches_agent_without_touching_env(monkeypatch):
# ...override recorded on the session...
assert session["model_override"]["model"] == "anthropic/claude-sonnet-4.6"
assert session["model_override"]["provider"] == "anthropic"
# ...the persisted prompt snapshot tracks the new runtime identity too.
# Without this, the next turn restored the old system prompt from the DB:
# API calls went to the new model, but "what model are you?" still read
# "Model: old/model" from the stored prompt.
assert db.system_prompt == (
"Model: anthropic/claude-sonnet-4.6\nProvider: anthropic"
)
assert agent._cached_system_prompt == db.system_prompt
assert session["history"][-1]["role"] == "system"
assert "changed to anthropic/claude-sonnet-4.6" in session["history"][-1]["content"]
assert db.messages[-1] == {
"session_id": "session-key",
"role": "system",
"content": session["history"][-1]["content"],
}
# ...and the shared process env was NOT touched.
assert os.environ["HERMES_TUI_PROVIDER"] == "openai-codex"
assert "HERMES_MODEL" not in os.environ

View file

@ -1673,6 +1673,69 @@ def _persist_live_session_runtime(session: dict | None) -> None:
logger.debug("failed to persist live session runtime", exc_info=True)
def _persist_live_session_system_prompt(session: dict | None) -> None:
"""Refresh the stored system prompt after a live runtime identity change."""
if not session:
return
agent = session.get("agent")
session_key = str(session.get("session_key") or "").strip()
if agent is None or not session_key or not hasattr(agent, "_build_system_prompt"):
return
db = getattr(agent, "_session_db", None) or _get_db()
if db is None or not hasattr(db, "update_system_prompt"):
return
try:
prompt = agent._build_system_prompt(None)
agent._cached_system_prompt = prompt
db.update_system_prompt(getattr(agent, "session_id", None) or session_key, prompt)
except Exception:
logger.debug("failed to persist live session system prompt", exc_info=True)
def _append_model_switch_marker(session: dict | None, *, model: str, provider: str) -> None:
"""Record a real system-history pivot after a live model switch."""
if not session:
return
session_key = str(session.get("session_key") or "").strip()
if not session_key:
return
provider_part = f" via provider {provider}" if provider else ""
marker = (
"[System: The active model for this chat has changed to "
f"{model}{provider_part}. From this point forward, use this runtime "
"metadata when answering questions about what model/provider is active.]"
)
entry = {"role": "system", "content": marker}
lock = session.get("history_lock")
if lock is not None:
with lock:
session.setdefault("history", []).append(entry)
session["history_version"] = int(session.get("history_version", 0)) + 1
else:
session.setdefault("history", []).append(entry)
session["history_version"] = int(session.get("history_version", 0)) + 1
try:
agent = session.get("agent")
db = getattr(agent, "_session_db", None) if agent is not None else None
if db is not None:
db.append_message(session_id=session_key, role="system", content=marker)
return
_ensure_session_db_row(session)
with _session_db(session) as scoped_db:
if scoped_db is not None:
scoped_db.append_message(
session_id=session_key, role="system", content=marker
)
except Exception:
logger.debug("failed to persist model switch marker", exc_info=True)
def _write_config_key(key_path: str, value):
cfg = _load_cfg()
current = cfg
@ -2092,6 +2155,10 @@ def _apply_model_switch(
)
_restart_slash_worker(sid, session)
_persist_live_session_runtime(session)
_persist_live_session_system_prompt(session)
_append_model_switch_marker(
session, model=result.new_model, provider=result.target_provider
)
_emit("session.info", sid, _session_info(agent, session))
# Record the switch as a PER-SESSION override so a later rebuild of THIS