fix(desktop): keep live model switch metadata truthful

A live config.set model switch already moved the next API call to the new model, but the conversation could still restore an old sessions.system_prompt snapshot whose Model/Provider lines named the previous runtime. That made "what model are you?" answer from stale metadata even while inference ran on the new model. After a live switch we now refresh the stored system prompt and append a real system-history pivot (not a fake user turn) so the transcript itself records the new model/provider. Restore also rejects already-stale prompt snapshots when their Model/Provider lines disagree with the runtime, so existing bad sessions self-heal.
2026-06-17 09:41:58 +00:00 · 2026-06-16 09:50:17 -05:00 · 2026-06-16 09:50:17 -05:00 · 7d938cc5c9
commit 7d938cc5c9
parent cb6b4127e7
4 changed files with 185 additions and 1 deletions
--- a/agent/conversation_loop.py
+++ b/agent/conversation_loop.py
@ -300,11 +300,20 @@ def _restore_or_build_system_prompt(agent, system_message, conversation_history)
                agent.session_id, exc,
            )

-    if stored_prompt:
+    if stored_prompt and _stored_prompt_matches_runtime(agent, stored_prompt):
        # Continuing session — reuse the exact system prompt from the
        # previous turn so the Anthropic cache prefix matches.
        agent._cached_system_prompt = stored_prompt
        return
+    if stored_prompt:
+        stored_state = "stale_runtime"
+        logger.info(
+            "Stored system prompt for session %s has stale runtime identity; "
+            "rebuilding for model=%s provider=%s.",
+            agent.session_id,
+            getattr(agent, "model", "") or "",
+            getattr(agent, "provider", "") or "",
+        )

    if conversation_history and stored_state in ("null", "empty"):
        # Continuing session whose stored prompt is unusable.  The
@ -366,6 +375,30 @@ def _restore_or_build_system_prompt(agent, system_message, conversation_history)
            )


+def _stored_prompt_matches_runtime(agent, prompt: str) -> bool:
+    """Return False when the persisted Model/Provider lines are stale."""
+
+    def line_value(label: str) -> str:
+        prefix = f"{label}:"
+        value = ""
+        for line in prompt.splitlines():
+            if line.startswith(prefix):
+                value = line[len(prefix):].strip()
+        return value
+
+    stored_model = line_value("Model")
+    current_model = str(getattr(agent, "model", "") or "").strip()
+    if stored_model and current_model and stored_model != current_model:
+        return False
+
+    stored_provider = line_value("Provider")
+    current_provider = str(getattr(agent, "provider", "") or "").strip()
+    if stored_provider and current_provider and stored_provider != current_provider:
+        return False
+
+    return True
+
+
 def _get_continuation_prompt(is_partial_stub: bool, dropped_tools: Optional[List[str]] = None) -> str:
    if is_partial_stub and dropped_tools:
        tool_list = ", ".join(dropped_tools[:3])
--- a/tests/agent/test_system_prompt_restore.py
+++ b/tests/agent/test_system_prompt_restore.py
@ -29,6 +29,7 @@ def _make_agent(session_db=None, prebuilt_prompt: str = "BUILT_PROMPT"):
    agent._cached_system_prompt = None
    agent.session_id = "test-session-id"
    agent.model = "test-model"
+    agent.provider = "openrouter"
    agent.platform = "cli"
    agent._session_db = session_db
    agent._build_system_prompt = MagicMock(return_value=prebuilt_prompt)
@ -67,6 +68,47 @@ class TestStoredPromptReuse:
        _restore_or_build_system_prompt(agent, None, [{"role": "user", "content": "hi"}])
        assert agent._cached_system_prompt == stored

+    def test_present_row_with_stale_runtime_identity_rebuilds(self, caplog):
+        """Stored prompts are cache gold unless their runtime identity is stale.
+
+        A live /model switch updates the agent and DB model_config immediately.
+        If the old system_prompt snapshot still says the previous model,
+        blindly restoring it makes the next turn call the new model while the
+        model reads old `Model:` metadata ("what model are you?" lies).
+        """
+        stored = (
+            "You are Hermes Agent.\n\n"
+            "Conversation started: Tuesday, June 16, 2026\n"
+            "Session ID: test-session-id\n"
+            "Model: anthropic/claude-opus-4.8-fast\n"
+            "Provider: openrouter"
+        )
+        db = MagicMock()
+        db.get_session.return_value = {"system_prompt": stored}
+        agent = _make_agent(
+            session_db=db,
+            prebuilt_prompt=(
+                "You are Hermes Agent.\n\n"
+                "Conversation started: Tuesday, June 16, 2026\n"
+                "Session ID: test-session-id\n"
+                "Model: openai/gpt-5.5\n"
+                "Provider: openrouter"
+            ),
+        )
+        agent.model = "openai/gpt-5.5"
+
+        with caplog.at_level(logging.INFO, logger="agent.conversation_loop"):
+            _restore_or_build_system_prompt(agent, None, [{"role": "user", "content": "hi"}])
+
+        assert agent._cached_system_prompt.endswith(
+            "Model: openai/gpt-5.5\nProvider: openrouter"
+        )
+        agent._build_system_prompt.assert_called_once_with(None)
+        db.update_system_prompt.assert_called_once_with(
+            agent.session_id, agent._cached_system_prompt
+        )
+        assert any("stale runtime identity" in r.getMessage() for r in caplog.records)
+

 # ---------------------------------------------------------------------------
 # Legitimate fresh-build paths (no history, no DB)
--- a/tests/test_tui_gateway_server.py
+++ b/tests/test_tui_gateway_server.py
@ -3326,12 +3326,39 @@ def test_config_set_model_switches_agent_without_touching_env(monkeypatch):
        provider = "openai-codex"
        base_url = ""
        api_key = ""
+        session_id = "sid"
+        _cached_system_prompt = "Model: gpt-5.3-codex\nProvider: openai-codex"

        def switch_model(self, **kwargs):
            self.model = kwargs["new_model"]
            self.provider = kwargs["new_provider"]

+        def _build_system_prompt(self, _system_message=None):
+            return f"Model: {self.model}\nProvider: {self.provider}"
+
+    class SessionDB:
+        def __init__(self):
+            self.model_config = None
+            self.system_prompt = None
+            self.messages = []
+
+        def get_session(self, _session_id):
+            return {"model_config": self.model_config}
+
+        def update_session_meta(self, _session_id, model_config_json, _model=None):
+            self.model_config = model_config_json
+
+        def update_system_prompt(self, _session_id, system_prompt):
+            self.system_prompt = system_prompt
+
+        def append_message(self, session_id, role, content=None, **_kwargs):
+            self.messages.append(
+                {"session_id": session_id, "role": role, "content": content}
+            )
+
    agent = Agent()
+    db = SessionDB()
+    agent._session_db = db
    session = _session(agent=agent)
    server._sessions["sid"] = session
    monkeypatch.setenv("HERMES_TUI_PROVIDER", "openai-codex")
@ -3373,6 +3400,21 @@ def test_config_set_model_switches_agent_without_touching_env(monkeypatch):
        # ...override recorded on the session...
        assert session["model_override"]["model"] == "anthropic/claude-sonnet-4.6"
        assert session["model_override"]["provider"] == "anthropic"
+        # ...the persisted prompt snapshot tracks the new runtime identity too.
+        # Without this, the next turn restored the old system prompt from the DB:
+        # API calls went to the new model, but "what model are you?" still read
+        # "Model: old/model" from the stored prompt.
+        assert db.system_prompt == (
+            "Model: anthropic/claude-sonnet-4.6\nProvider: anthropic"
+        )
+        assert agent._cached_system_prompt == db.system_prompt
+        assert session["history"][-1]["role"] == "system"
+        assert "changed to anthropic/claude-sonnet-4.6" in session["history"][-1]["content"]
+        assert db.messages[-1] == {
+            "session_id": "session-key",
+            "role": "system",
+            "content": session["history"][-1]["content"],
+        }
        # ...and the shared process env was NOT touched.
        assert os.environ["HERMES_TUI_PROVIDER"] == "openai-codex"
        assert "HERMES_MODEL" not in os.environ
--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@ -1673,6 +1673,69 @@ def _persist_live_session_runtime(session: dict | None) -> None:
        logger.debug("failed to persist live session runtime", exc_info=True)


+def _persist_live_session_system_prompt(session: dict | None) -> None:
+    """Refresh the stored system prompt after a live runtime identity change."""
+    if not session:
+        return
+    agent = session.get("agent")
+    session_key = str(session.get("session_key") or "").strip()
+    if agent is None or not session_key or not hasattr(agent, "_build_system_prompt"):
+        return
+
+    db = getattr(agent, "_session_db", None) or _get_db()
+    if db is None or not hasattr(db, "update_system_prompt"):
+        return
+
+    try:
+        prompt = agent._build_system_prompt(None)
+        agent._cached_system_prompt = prompt
+        db.update_system_prompt(getattr(agent, "session_id", None) or session_key, prompt)
+    except Exception:
+        logger.debug("failed to persist live session system prompt", exc_info=True)
+
+
+def _append_model_switch_marker(session: dict | None, *, model: str, provider: str) -> None:
+    """Record a real system-history pivot after a live model switch."""
+    if not session:
+        return
+    session_key = str(session.get("session_key") or "").strip()
+    if not session_key:
+        return
+
+    provider_part = f" via provider {provider}" if provider else ""
+    marker = (
+        "[System: The active model for this chat has changed to "
+        f"{model}{provider_part}. From this point forward, use this runtime "
+        "metadata when answering questions about what model/provider is active.]"
+    )
+    entry = {"role": "system", "content": marker}
+
+    lock = session.get("history_lock")
+    if lock is not None:
+        with lock:
+            session.setdefault("history", []).append(entry)
+            session["history_version"] = int(session.get("history_version", 0)) + 1
+    else:
+        session.setdefault("history", []).append(entry)
+        session["history_version"] = int(session.get("history_version", 0)) + 1
+
+    try:
+        agent = session.get("agent")
+        db = getattr(agent, "_session_db", None) if agent is not None else None
+        if db is not None:
+            db.append_message(session_id=session_key, role="system", content=marker)
+            return
+
+        _ensure_session_db_row(session)
+        with _session_db(session) as scoped_db:
+            if scoped_db is not None:
+                scoped_db.append_message(
+                    session_id=session_key, role="system", content=marker
+                )
+    except Exception:
+        logger.debug("failed to persist model switch marker", exc_info=True)
+
+
 def _write_config_key(key_path: str, value):
    cfg = _load_cfg()
    current = cfg
@ -2092,6 +2155,10 @@ def _apply_model_switch(
        )
        _restart_slash_worker(sid, session)
        _persist_live_session_runtime(session)
+        _persist_live_session_system_prompt(session)
+        _append_model_switch_marker(
+            session, model=result.new_model, provider=result.target_provider
+        )
        _emit("session.info", sid, _session_info(agent, session))

    # Record the switch as a PER-SESSION override so a later rebuild of THIS