From 7d938cc5c9c7beff22e7cb48886cba33753a5376 Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Tue, 16 Jun 2026 09:50:17 -0500 Subject: [PATCH] fix(desktop): keep live model switch metadata truthful A live config.set model switch already moved the next API call to the new model, but the conversation could still restore an old sessions.system_prompt snapshot whose Model/Provider lines named the previous runtime. That made "what model are you?" answer from stale metadata even while inference ran on the new model. After a live switch we now refresh the stored system prompt and append a real system-history pivot (not a fake user turn) so the transcript itself records the new model/provider. Restore also rejects already-stale prompt snapshots when their Model/Provider lines disagree with the runtime, so existing bad sessions self-heal. --- agent/conversation_loop.py | 35 +++++++++++- tests/agent/test_system_prompt_restore.py | 42 ++++++++++++++ tests/test_tui_gateway_server.py | 42 ++++++++++++++ tui_gateway/server.py | 67 +++++++++++++++++++++++ 4 files changed, 185 insertions(+), 1 deletion(-) diff --git a/agent/conversation_loop.py b/agent/conversation_loop.py index 379a038a9e0..45722d2657f 100644 --- a/agent/conversation_loop.py +++ b/agent/conversation_loop.py @@ -300,11 +300,20 @@ def _restore_or_build_system_prompt(agent, system_message, conversation_history) agent.session_id, exc, ) - if stored_prompt: + if stored_prompt and _stored_prompt_matches_runtime(agent, stored_prompt): # Continuing session — reuse the exact system prompt from the # previous turn so the Anthropic cache prefix matches. agent._cached_system_prompt = stored_prompt return + if stored_prompt: + stored_state = "stale_runtime" + logger.info( + "Stored system prompt for session %s has stale runtime identity; " + "rebuilding for model=%s provider=%s.", + agent.session_id, + getattr(agent, "model", "") or "", + getattr(agent, "provider", "") or "", + ) if conversation_history and stored_state in ("null", "empty"): # Continuing session whose stored prompt is unusable. The @@ -366,6 +375,30 @@ def _restore_or_build_system_prompt(agent, system_message, conversation_history) ) +def _stored_prompt_matches_runtime(agent, prompt: str) -> bool: + """Return False when the persisted Model/Provider lines are stale.""" + + def line_value(label: str) -> str: + prefix = f"{label}:" + value = "" + for line in prompt.splitlines(): + if line.startswith(prefix): + value = line[len(prefix):].strip() + return value + + stored_model = line_value("Model") + current_model = str(getattr(agent, "model", "") or "").strip() + if stored_model and current_model and stored_model != current_model: + return False + + stored_provider = line_value("Provider") + current_provider = str(getattr(agent, "provider", "") or "").strip() + if stored_provider and current_provider and stored_provider != current_provider: + return False + + return True + + def _get_continuation_prompt(is_partial_stub: bool, dropped_tools: Optional[List[str]] = None) -> str: if is_partial_stub and dropped_tools: tool_list = ", ".join(dropped_tools[:3]) diff --git a/tests/agent/test_system_prompt_restore.py b/tests/agent/test_system_prompt_restore.py index ecfd57b1dfe..956c1152a42 100644 --- a/tests/agent/test_system_prompt_restore.py +++ b/tests/agent/test_system_prompt_restore.py @@ -29,6 +29,7 @@ def _make_agent(session_db=None, prebuilt_prompt: str = "BUILT_PROMPT"): agent._cached_system_prompt = None agent.session_id = "test-session-id" agent.model = "test-model" + agent.provider = "openrouter" agent.platform = "cli" agent._session_db = session_db agent._build_system_prompt = MagicMock(return_value=prebuilt_prompt) @@ -67,6 +68,47 @@ class TestStoredPromptReuse: _restore_or_build_system_prompt(agent, None, [{"role": "user", "content": "hi"}]) assert agent._cached_system_prompt == stored + def test_present_row_with_stale_runtime_identity_rebuilds(self, caplog): + """Stored prompts are cache gold unless their runtime identity is stale. + + A live /model switch updates the agent and DB model_config immediately. + If the old system_prompt snapshot still says the previous model, + blindly restoring it makes the next turn call the new model while the + model reads old `Model:` metadata ("what model are you?" lies). + """ + stored = ( + "You are Hermes Agent.\n\n" + "Conversation started: Tuesday, June 16, 2026\n" + "Session ID: test-session-id\n" + "Model: anthropic/claude-opus-4.8-fast\n" + "Provider: openrouter" + ) + db = MagicMock() + db.get_session.return_value = {"system_prompt": stored} + agent = _make_agent( + session_db=db, + prebuilt_prompt=( + "You are Hermes Agent.\n\n" + "Conversation started: Tuesday, June 16, 2026\n" + "Session ID: test-session-id\n" + "Model: openai/gpt-5.5\n" + "Provider: openrouter" + ), + ) + agent.model = "openai/gpt-5.5" + + with caplog.at_level(logging.INFO, logger="agent.conversation_loop"): + _restore_or_build_system_prompt(agent, None, [{"role": "user", "content": "hi"}]) + + assert agent._cached_system_prompt.endswith( + "Model: openai/gpt-5.5\nProvider: openrouter" + ) + agent._build_system_prompt.assert_called_once_with(None) + db.update_system_prompt.assert_called_once_with( + agent.session_id, agent._cached_system_prompt + ) + assert any("stale runtime identity" in r.getMessage() for r in caplog.records) + # --------------------------------------------------------------------------- # Legitimate fresh-build paths (no history, no DB) diff --git a/tests/test_tui_gateway_server.py b/tests/test_tui_gateway_server.py index 77884c5920e..2ab4128bb20 100644 --- a/tests/test_tui_gateway_server.py +++ b/tests/test_tui_gateway_server.py @@ -3326,12 +3326,39 @@ def test_config_set_model_switches_agent_without_touching_env(monkeypatch): provider = "openai-codex" base_url = "" api_key = "" + session_id = "sid" + _cached_system_prompt = "Model: gpt-5.3-codex\nProvider: openai-codex" def switch_model(self, **kwargs): self.model = kwargs["new_model"] self.provider = kwargs["new_provider"] + def _build_system_prompt(self, _system_message=None): + return f"Model: {self.model}\nProvider: {self.provider}" + + class SessionDB: + def __init__(self): + self.model_config = None + self.system_prompt = None + self.messages = [] + + def get_session(self, _session_id): + return {"model_config": self.model_config} + + def update_session_meta(self, _session_id, model_config_json, _model=None): + self.model_config = model_config_json + + def update_system_prompt(self, _session_id, system_prompt): + self.system_prompt = system_prompt + + def append_message(self, session_id, role, content=None, **_kwargs): + self.messages.append( + {"session_id": session_id, "role": role, "content": content} + ) + agent = Agent() + db = SessionDB() + agent._session_db = db session = _session(agent=agent) server._sessions["sid"] = session monkeypatch.setenv("HERMES_TUI_PROVIDER", "openai-codex") @@ -3373,6 +3400,21 @@ def test_config_set_model_switches_agent_without_touching_env(monkeypatch): # ...override recorded on the session... assert session["model_override"]["model"] == "anthropic/claude-sonnet-4.6" assert session["model_override"]["provider"] == "anthropic" + # ...the persisted prompt snapshot tracks the new runtime identity too. + # Without this, the next turn restored the old system prompt from the DB: + # API calls went to the new model, but "what model are you?" still read + # "Model: old/model" from the stored prompt. + assert db.system_prompt == ( + "Model: anthropic/claude-sonnet-4.6\nProvider: anthropic" + ) + assert agent._cached_system_prompt == db.system_prompt + assert session["history"][-1]["role"] == "system" + assert "changed to anthropic/claude-sonnet-4.6" in session["history"][-1]["content"] + assert db.messages[-1] == { + "session_id": "session-key", + "role": "system", + "content": session["history"][-1]["content"], + } # ...and the shared process env was NOT touched. assert os.environ["HERMES_TUI_PROVIDER"] == "openai-codex" assert "HERMES_MODEL" not in os.environ diff --git a/tui_gateway/server.py b/tui_gateway/server.py index d0e52635e7c..072a0c959b6 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -1673,6 +1673,69 @@ def _persist_live_session_runtime(session: dict | None) -> None: logger.debug("failed to persist live session runtime", exc_info=True) +def _persist_live_session_system_prompt(session: dict | None) -> None: + """Refresh the stored system prompt after a live runtime identity change.""" + if not session: + return + agent = session.get("agent") + session_key = str(session.get("session_key") or "").strip() + if agent is None or not session_key or not hasattr(agent, "_build_system_prompt"): + return + + db = getattr(agent, "_session_db", None) or _get_db() + if db is None or not hasattr(db, "update_system_prompt"): + return + + try: + prompt = agent._build_system_prompt(None) + agent._cached_system_prompt = prompt + db.update_system_prompt(getattr(agent, "session_id", None) or session_key, prompt) + except Exception: + logger.debug("failed to persist live session system prompt", exc_info=True) + + +def _append_model_switch_marker(session: dict | None, *, model: str, provider: str) -> None: + """Record a real system-history pivot after a live model switch.""" + if not session: + return + session_key = str(session.get("session_key") or "").strip() + if not session_key: + return + + provider_part = f" via provider {provider}" if provider else "" + marker = ( + "[System: The active model for this chat has changed to " + f"{model}{provider_part}. From this point forward, use this runtime " + "metadata when answering questions about what model/provider is active.]" + ) + entry = {"role": "system", "content": marker} + + lock = session.get("history_lock") + if lock is not None: + with lock: + session.setdefault("history", []).append(entry) + session["history_version"] = int(session.get("history_version", 0)) + 1 + else: + session.setdefault("history", []).append(entry) + session["history_version"] = int(session.get("history_version", 0)) + 1 + + try: + agent = session.get("agent") + db = getattr(agent, "_session_db", None) if agent is not None else None + if db is not None: + db.append_message(session_id=session_key, role="system", content=marker) + return + + _ensure_session_db_row(session) + with _session_db(session) as scoped_db: + if scoped_db is not None: + scoped_db.append_message( + session_id=session_key, role="system", content=marker + ) + except Exception: + logger.debug("failed to persist model switch marker", exc_info=True) + + def _write_config_key(key_path: str, value): cfg = _load_cfg() current = cfg @@ -2092,6 +2155,10 @@ def _apply_model_switch( ) _restart_slash_worker(sid, session) _persist_live_session_runtime(session) + _persist_live_session_system_prompt(session) + _append_model_switch_marker( + session, model=result.new_model, provider=result.target_provider + ) _emit("session.info", sid, _session_info(agent, session)) # Record the switch as a PER-SESSION override so a later rebuild of THIS