fix(gateway): evict cached agent on /model switch + add diagnostic logging (#8276)

After /model switches the model (both picker and text paths), the cached agent's config signature becomes stale — the agent was updated in-place via switch_model() but the cache tuple's signature was never refreshed. The next turn *should* detect the signature mismatch and create a fresh agent, but this relies on the new model's signature differing from the old one in _agent_config_signature(). Evicting the cached agent explicitly after storing the session override is more defensive — the next turn is guaranteed to create a fresh agent from the override without depending on signature mismatch detection. Also adds debug logging at three key decision points so we can trace exactly what happens when /model + /retry interact: - _resolve_session_agent_runtime: which override path is taken (fast with api_key vs fallback), or why no override was found - _run_agent.run_sync: final resolved model/provider before agent creation Reported: /model switch to xiaomi/mimo-v2-pro followed by /retry still used the old model (glm-5.1).
2026-04-25 00:51:20 +00:00 · 2026-04-12 01:58:17 -07:00 · 2026-04-12 01:58:17 -07:00 · 6d05e3d56f
commit 6d05e3d56f
parent 4aa534eae5
1 changed files with 30 additions and 0 deletions
--- a/gateway/run.py
+++ b/gateway/run.py
@ -876,7 +876,24 @@ class GatewayRunner:
                "api_mode": override.get("api_mode"),
            }
            if override_runtime.get("api_key"):
+                logger.debug(
+                    "Session model override (fast): session=%s config_model=%s -> override_model=%s provider=%s",
+                    (resolved_session_key or "")[:30], model, override_model,
+                    override_runtime.get("provider"),
+                )
                return override_model, override_runtime
+            # Override exists but has no api_key — fall through to env-based
+            # resolution and apply model/provider from the override on top.
+            logger.debug(
+                "Session model override (no api_key, fallback): session=%s config_model=%s override_model=%s",
+                (resolved_session_key or "")[:30], model, override_model,
+            )
+        else:
+            logger.debug(
+                "No session model override: session=%s config_model=%s override_keys=%s",
+                (resolved_session_key or "")[:30], model,
+                list(self._session_model_overrides.keys())[:5] if self._session_model_overrides else "[]",
+            )

        runtime_kwargs = _resolve_runtime_agent_kwargs()
        if override and resolved_session_key:
@ -4304,6 +4321,11 @@ class GatewayRunner:
                            "api_mode": result.api_mode,
                        }

+                        # Evict cached agent so the next turn creates a fresh
+                        # agent from the override rather than relying on the
+                        # stale cache signature to trigger a rebuild.
+                        _self._evict_cached_agent(_session_key)
+
                        # Build confirmation text
                        plabel = result.provider_label or result.target_provider
                        lines = [f"Model switched to `{result.new_model}`"]
@ -4417,6 +4439,10 @@ class GatewayRunner:
            "api_mode": result.api_mode,
        }

+        # Evict cached agent so the next turn creates a fresh agent from the
+        # override rather than relying on cache signature mismatch detection.
+        self._evict_cached_agent(session_key)
+
        # Persist to config if --global
        if persist_global:
            try:
@ -7545,6 +7571,10 @@ class GatewayRunner:
                    session_key=session_key,
                    user_config=user_config,
                )
+                logger.debug(
+                    "run_agent resolved: model=%s provider=%s session=%s",
+                    model, runtime_kwargs.get("provider"), (session_key or "")[:30],
+                )
            except Exception as exc:
                return {
                    "final_response": f"⚠️ Provider authentication failed: {exc}",