From 6d05e3d56f49a67ec9084bdd1a74befd8723e5f2 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sun, 12 Apr 2026 01:58:17 -0700
Subject: [PATCH] fix(gateway): evict cached agent on /model switch + add
 diagnostic logging (#8276)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

After /model switches the model (both picker and text paths), the cached
agent's config signature becomes stale — the agent was updated in-place
via switch_model() but the cache tuple's signature was never refreshed.
The next turn *should* detect the signature mismatch and create a fresh
agent, but this relies on the new model's signature differing from the
old one in _agent_config_signature().

Evicting the cached agent explicitly after storing the session override
is more defensive — the next turn is guaranteed to create a fresh agent
from the override without depending on signature mismatch detection.

Also adds debug logging at three key decision points so we can trace
exactly what happens when /model + /retry interact:
- _resolve_session_agent_runtime: which override path is taken (fast
  with api_key vs fallback), or why no override was found
- _run_agent.run_sync: final resolved model/provider before agent
  creation

Reported: /model switch to xiaomi/mimo-v2-pro followed by /retry still
used the old model (glm-5.1).
---
 gateway/run.py | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/gateway/run.py b/gateway/run.py
index d95d5eb1e1..3edd905f35 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -876,7 +876,24 @@ class GatewayRunner:
                 "api_mode": override.get("api_mode"),
             }
             if override_runtime.get("api_key"):
+                logger.debug(
+                    "Session model override (fast): session=%s config_model=%s -> override_model=%s provider=%s",
+                    (resolved_session_key or "")[:30], model, override_model,
+                    override_runtime.get("provider"),
+                )
                 return override_model, override_runtime
+            # Override exists but has no api_key — fall through to env-based
+            # resolution and apply model/provider from the override on top.
+            logger.debug(
+                "Session model override (no api_key, fallback): session=%s config_model=%s override_model=%s",
+                (resolved_session_key or "")[:30], model, override_model,
+            )
+        else:
+            logger.debug(
+                "No session model override: session=%s config_model=%s override_keys=%s",
+                (resolved_session_key or "")[:30], model,
+                list(self._session_model_overrides.keys())[:5] if self._session_model_overrides else "[]",
+            )
 
         runtime_kwargs = _resolve_runtime_agent_kwargs()
         if override and resolved_session_key:
@@ -4304,6 +4321,11 @@ class GatewayRunner:
                             "api_mode": result.api_mode,
                         }
 
+                        # Evict cached agent so the next turn creates a fresh
+                        # agent from the override rather than relying on the
+                        # stale cache signature to trigger a rebuild.
+                        _self._evict_cached_agent(_session_key)
+
                         # Build confirmation text
                         plabel = result.provider_label or result.target_provider
                         lines = [f"Model switched to `{result.new_model}`"]
@@ -4417,6 +4439,10 @@ class GatewayRunner:
             "api_mode": result.api_mode,
         }
 
+        # Evict cached agent so the next turn creates a fresh agent from the
+        # override rather than relying on cache signature mismatch detection.
+        self._evict_cached_agent(session_key)
+
         # Persist to config if --global
         if persist_global:
             try:
@@ -7545,6 +7571,10 @@ class GatewayRunner:
                     session_key=session_key,
                     user_config=user_config,
                 )
+                logger.debug(
+                    "run_agent resolved: model=%s provider=%s session=%s",
+                    model, runtime_kwargs.get("provider"), (session_key or "")[:30],
+                )
             except Exception as exc:
                 return {
                     "final_response": f"⚠️ Provider authentication failed: {exc}",