fix(agent): keep system-prompt model identity in sync across provider failover

The session-stable system prompt embeds Model:/Provider: identity lines, but mid-turn failover (try_activate_fallback) swaps the runtime without touching them, so a fallback model misreports itself as the primary when asked "what model are you?". rewrite_prompt_model_identity() rewrites the last occurrence of each line on _cached_system_prompt when a fallback activates (and back on restore, byte-identical so the primary's prefix cache still hits). The rewrite is never persisted to the session DB. _sync_failover_system_message() patches the in-flight api_messages[0] at all 8 failover sites so the current turn ships the corrected identity. Cache-safe: the fallback's prefix cache is cold on a model switch anyway. Co-authored-by: Hermes Agent <noreply@nousresearch.com>
2026-06-21 10:22:18 +00:00 · 2026-06-19 16:17:58 -07:00 · 2026-06-19 16:17:58 -07:00 · c884ff64ea
commit c884ff64ea
parent 11c6f4c7bc
4 changed files with 184 additions and 0 deletions
--- a/agent/agent_runtime_helpers.py
+++ b/agent/agent_runtime_helpers.py
@ -1050,6 +1050,11 @@ def restore_primary_runtime(agent) -> bool:
        agent._fallback_activated = False
        agent._fallback_index = 0

+        # Undo the fallback's identity rewrite so the prompt is
+        # byte-identical to the stored copy again (prefix cache match).
+        from agent.chat_completion_helpers import rewrite_prompt_model_identity
+        rewrite_prompt_model_identity(agent, rt["model"], rt["provider"])
+
        logger.info(
            "Primary runtime restored for new turn: %s (%s)",
            agent.model, agent.provider,
--- a/agent/chat_completion_helpers.py
+++ b/agent/chat_completion_helpers.py
@ -1042,6 +1042,35 @@ def build_assistant_message(agent, assistant_message, finish_reason: str) -> dic



+def rewrite_prompt_model_identity(agent, model: str, provider: str) -> None:
+    """Point the cached system prompt's ``Model:``/``Provider:`` lines at
+    the active runtime after a provider switch.
+
+    The system prompt is session-stable and replayed verbatim for prefix-cache
+    warmth, but after a failover the new backend's cache is cold anyway —
+    while a stale identity line makes the agent misreport which model it is
+    when asked.  Rewrite the lines in place WITHOUT persisting to the session
+    DB: the stored row keeps the primary's labels, so when the primary is
+    restored the prompt is byte-identical to the stored copy again and its
+    prefix cache still matches.
+
+    Only the LAST occurrence of each line is touched — the identity lines
+    live in the volatile tail of the prompt, and earlier matches could be
+    user content (memory snapshots, context files).
+    """
+    sp = getattr(agent, "_cached_system_prompt", None)
+    if not isinstance(sp, str) or not sp:
+        return
+    for label, value in (("Model", model), ("Provider", provider)):
+        if not value:
+            continue
+        matches = list(re.finditer(rf"(?m)^{label}: .*$", sp))
+        if matches:
+            last = matches[-1]
+            sp = f"{sp[:last.start()]}{label}: {value}{sp[last.end():]}"
+    agent._cached_system_prompt = sp
+
+
 def try_activate_fallback(agent, reason: "FailoverReason | None" = None) -> bool:
    """Switch to the next fallback model/provider in the chain.

@ -1287,6 +1316,10 @@ def try_activate_fallback(agent, reason: "FailoverReason | None" = None) -> bool
                api_mode=agent.api_mode,
            )

+        # Keep the prompt's self-identity in sync with the model actually
+        # answering, so "what model are you?" doesn't report the primary.
+        rewrite_prompt_model_identity(agent, fb_model, fb_provider)
+
        agent._buffer_status(
            f"🔄 Primary model failed — switching to fallback: "
            f"{fb_model} via {fb_provider}"
--- a/agent/conversation_loop.py
+++ b/agent/conversation_loop.py
@ -466,6 +466,32 @@ def _content_policy_blocked_result(
    }


+def _sync_failover_system_message(agent, api_messages, active_system_prompt):
+    """Refresh the in-flight system message after a provider failover.
+
+    ``try_activate_fallback`` rewrites the ``Model:``/``Provider:`` identity
+    lines on ``agent._cached_system_prompt`` (see
+    ``rewrite_prompt_model_identity``) so the agent reports the model that is
+    actually answering.  But the current call block's ``api_messages`` were
+    built from the pre-failover prompt, and the retry loop rebuilds
+    ``api_kwargs`` from that list each iteration — without this sync the
+    whole turn (and every gateway turn, since fallback re-activates per
+    message while the primary is down) ships the stale identity.
+
+    Mutates ``api_messages[0]`` in place and returns the prompt to use as
+    ``active_system_prompt`` for subsequent call-block rebuilds.
+    """
+    sp = getattr(agent, "_cached_system_prompt", None)
+    if not isinstance(sp, str) or not sp:
+        return active_system_prompt
+    if api_messages and api_messages[0].get("role") == "system":
+        effective = sp
+        if agent.ephemeral_system_prompt:
+            effective = (effective + "\n\n" + agent.ephemeral_system_prompt).strip()
+        api_messages[0]["content"] = effective
+    return sp
+
+
 def run_conversation(
    agent,
    user_message: str,
@ -940,6 +966,8 @@ def run_conversation(
                        )
                        agent._buffer_status(f"⏳ {_nous_msg}")
                        if agent._try_activate_fallback():
+                            active_system_prompt = _sync_failover_system_message(
+                                agent, api_messages, active_system_prompt)
                            retry_count = 0
                            compression_attempts = 0
                            _retry.primary_recovery_attempted = False
@ -1265,6 +1293,8 @@ def run_conversation(
                    if agent._fallback_index < len(agent._fallback_chain):
                        agent._buffer_status("⚠️ Empty/malformed response — switching to fallback...")
                    if agent._try_activate_fallback():
+                        active_system_prompt = _sync_failover_system_message(
+                            agent, api_messages, active_system_prompt)
                        retry_count = 0
                        compression_attempts = 0
                        _retry.primary_recovery_attempted = False
@ -1336,6 +1366,8 @@ def run_conversation(
                        if agent._has_pending_fallback():
                            agent._buffer_status(f"⚠️ Max retries ({max_retries}) for invalid responses — trying fallback...")
                        if agent._try_activate_fallback():
+                            active_system_prompt = _sync_failover_system_message(
+                                agent, api_messages, active_system_prompt)
                            retry_count = 0
                            compression_attempts = 0
                            _retry.primary_recovery_attempted = False
@ -1479,6 +1511,8 @@ def run_conversation(
                            "⚠️ Model declined to respond (safety refusal) — trying fallback..."
                        )
                    if agent._try_activate_fallback():
+                        active_system_prompt = _sync_failover_system_message(
+                            agent, api_messages, active_system_prompt)
                        retry_count = 0
                        compression_attempts = 0
                        _retry.primary_recovery_attempted = False
@ -2783,6 +2817,8 @@ def run_conversation(
                        else:
                            agent._buffer_status("⚠️ Rate limited — switching to fallback provider...")
                        if agent._try_activate_fallback(reason=classified.reason):
+                            active_system_prompt = _sync_failover_system_message(
+                                agent, api_messages, active_system_prompt)
                            retry_count = 0
                            compression_attempts = 0
                            _retry.primary_recovery_attempted = False
@ -3186,6 +3222,8 @@ def run_conversation(
                        else:
                            agent._buffer_status(f"⚠️ Non-retryable error (HTTP {status_code}) — trying fallback...")
                    if agent._try_activate_fallback():
+                        active_system_prompt = _sync_failover_system_message(
+                            agent, api_messages, active_system_prompt)
                        retry_count = 0
                        compression_attempts = 0
                        _retry.primary_recovery_attempted = False
@ -3333,6 +3371,8 @@ def run_conversation(
                    if agent._has_pending_fallback():
                        agent._buffer_status(f"⚠️ Max retries ({max_retries}) exhausted — trying fallback...")
                    if agent._try_activate_fallback():
+                        active_system_prompt = _sync_failover_system_message(
+                            agent, api_messages, active_system_prompt)
                        retry_count = 0
                        compression_attempts = 0
                        _retry.primary_recovery_attempted = False
@ -4279,6 +4319,8 @@ def run_conversation(
                            "switching to fallback provider..."
                        )
                        if agent._try_activate_fallback():
+                            active_system_prompt = _sync_failover_system_message(
+                                agent, api_messages, active_system_prompt)
                            agent._empty_content_retries = 0
                            agent._buffer_status(
                                f"↻ Switched to fallback: {agent.model} "
--- a/tests/agent/test_failover_identity.py
+++ b/tests/agent/test_failover_identity.py
@ -0,0 +1,104 @@
+"""Tests for system-prompt model-identity sync across provider failover.
+
+The system prompt is session-stable and embeds ``Model:``/``Provider:``
+identity lines.  When ``try_activate_fallback`` swaps the runtime, the
+prompt must be rewritten in place (and synced into the in-flight
+``api_messages``) or the agent reports the primary model's name while a
+fallback model is answering — e.g. a local gemma fallback claiming to be
+gpt-5.4-mini after a Codex usage-limit 429.
+"""
+
+from types import SimpleNamespace
+
+from agent.chat_completion_helpers import rewrite_prompt_model_identity
+from agent.conversation_loop import _sync_failover_system_message
+
+
+_PROMPT = (
+    "You are a helpful assistant.\n"
+    "\n"
+    "Memory note at line start:\n"
+    "Model: decoy-from-memory\n"
+    "\n"
+    "Conversation started: Wednesday, June 10, 2026\n"
+    "Model: gpt-5.4-mini\n"
+    "Provider: openai-codex"
+)
+
+
+def _agent(prompt=_PROMPT, ephemeral=None):
+    return SimpleNamespace(
+        _cached_system_prompt=prompt,
+        ephemeral_system_prompt=ephemeral,
+    )
+
+
+class TestRewritePromptModelIdentity:
+    def test_swaps_identity_lines_to_fallback_runtime(self):
+        agent = _agent()
+        rewrite_prompt_model_identity(agent, "gemma4:e2b-mlx", "custom")
+        assert "Model: gemma4:e2b-mlx" in agent._cached_system_prompt
+        assert "Provider: custom" in agent._cached_system_prompt
+        assert "Model: gpt-5.4-mini" not in agent._cached_system_prompt
+        assert "Provider: openai-codex" not in agent._cached_system_prompt
+
+    def test_only_last_occurrence_is_rewritten(self):
+        agent = _agent()
+        rewrite_prompt_model_identity(agent, "gemma4:e2b-mlx", "custom")
+        # Earlier matching lines may be user content (memory snapshots,
+        # context files) and must survive untouched.
+        assert "Model: decoy-from-memory" in agent._cached_system_prompt
+
+    def test_round_trip_restores_byte_identical_prompt(self):
+        # restore_primary_runtime rewrites the lines back; the result must
+        # match the stored prompt byte-for-byte so the primary's prefix
+        # cache still hits after restoration.
+        agent = _agent()
+        rewrite_prompt_model_identity(agent, "gemma4:e2b-mlx", "custom")
+        rewrite_prompt_model_identity(agent, "gpt-5.4-mini", "openai-codex")
+        assert agent._cached_system_prompt == _PROMPT
+
+    def test_noop_when_prompt_missing_or_empty(self):
+        for prompt in (None, ""):
+            agent = _agent(prompt=prompt)
+            rewrite_prompt_model_identity(agent, "m", "p")
+            assert agent._cached_system_prompt == prompt
+
+    def test_empty_values_leave_lines_unchanged(self):
+        agent = _agent()
+        rewrite_prompt_model_identity(agent, "", "")
+        assert agent._cached_system_prompt == _PROMPT
+
+
+class TestSyncFailoverSystemMessage:
+    def test_patches_in_flight_system_message(self):
+        agent = _agent()
+        rewrite_prompt_model_identity(agent, "gemma4:e2b-mlx", "custom")
+        api_messages = [
+            {"role": "system", "content": _PROMPT},
+            {"role": "user", "content": "what model are you?"},
+        ]
+        result = _sync_failover_system_message(agent, api_messages, _PROMPT)
+        assert "Model: gemma4:e2b-mlx" in api_messages[0]["content"]
+        assert result == agent._cached_system_prompt
+
+    def test_appends_ephemeral_system_prompt(self):
+        agent = _agent(ephemeral="Stay terse.")
+        api_messages = [{"role": "system", "content": _PROMPT}]
+        _sync_failover_system_message(agent, api_messages, _PROMPT)
+        assert api_messages[0]["content"].endswith("Stay terse.")
+
+    def test_noop_without_cached_prompt(self):
+        agent = _agent(prompt=None)
+        api_messages = [{"role": "system", "content": "original"}]
+        result = _sync_failover_system_message(agent, api_messages, "active")
+        assert api_messages[0]["content"] == "original"
+        assert result == "active"
+
+    def test_noop_when_first_message_is_not_system(self):
+        agent = _agent()
+        api_messages = [{"role": "user", "content": "hi"}]
+        result = _sync_failover_system_message(agent, api_messages, "active")
+        assert api_messages == [{"role": "user", "content": "hi"}]
+        # Still returns the cached prompt for subsequent call-block rebuilds.
+        assert result == agent._cached_system_prompt