From 6d05e3d56f49a67ec9084bdd1a74befd8723e5f2 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Sun, 12 Apr 2026 01:58:17 -0700 Subject: [PATCH] fix(gateway): evict cached agent on /model switch + add diagnostic logging (#8276) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After /model switches the model (both picker and text paths), the cached agent's config signature becomes stale — the agent was updated in-place via switch_model() but the cache tuple's signature was never refreshed. The next turn *should* detect the signature mismatch and create a fresh agent, but this relies on the new model's signature differing from the old one in _agent_config_signature(). Evicting the cached agent explicitly after storing the session override is more defensive — the next turn is guaranteed to create a fresh agent from the override without depending on signature mismatch detection. Also adds debug logging at three key decision points so we can trace exactly what happens when /model + /retry interact: - _resolve_session_agent_runtime: which override path is taken (fast with api_key vs fallback), or why no override was found - _run_agent.run_sync: final resolved model/provider before agent creation Reported: /model switch to xiaomi/mimo-v2-pro followed by /retry still used the old model (glm-5.1). --- gateway/run.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/gateway/run.py b/gateway/run.py index d95d5eb1e1..3edd905f35 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -876,7 +876,24 @@ class GatewayRunner: "api_mode": override.get("api_mode"), } if override_runtime.get("api_key"): + logger.debug( + "Session model override (fast): session=%s config_model=%s -> override_model=%s provider=%s", + (resolved_session_key or "")[:30], model, override_model, + override_runtime.get("provider"), + ) return override_model, override_runtime + # Override exists but has no api_key — fall through to env-based + # resolution and apply model/provider from the override on top. + logger.debug( + "Session model override (no api_key, fallback): session=%s config_model=%s override_model=%s", + (resolved_session_key or "")[:30], model, override_model, + ) + else: + logger.debug( + "No session model override: session=%s config_model=%s override_keys=%s", + (resolved_session_key or "")[:30], model, + list(self._session_model_overrides.keys())[:5] if self._session_model_overrides else "[]", + ) runtime_kwargs = _resolve_runtime_agent_kwargs() if override and resolved_session_key: @@ -4304,6 +4321,11 @@ class GatewayRunner: "api_mode": result.api_mode, } + # Evict cached agent so the next turn creates a fresh + # agent from the override rather than relying on the + # stale cache signature to trigger a rebuild. + _self._evict_cached_agent(_session_key) + # Build confirmation text plabel = result.provider_label or result.target_provider lines = [f"Model switched to `{result.new_model}`"] @@ -4417,6 +4439,10 @@ class GatewayRunner: "api_mode": result.api_mode, } + # Evict cached agent so the next turn creates a fresh agent from the + # override rather than relying on cache signature mismatch detection. + self._evict_cached_agent(session_key) + # Persist to config if --global if persist_global: try: @@ -7545,6 +7571,10 @@ class GatewayRunner: session_key=session_key, user_config=user_config, ) + logger.debug( + "run_agent resolved: model=%s provider=%s session=%s", + model, runtime_kwargs.get("provider"), (session_key or "")[:30], + ) except Exception as exc: return { "final_response": f"⚠️ Provider authentication failed: {exc}",