From 5a3092b601060e04dccbb515961eaed977c62d7b Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Sun, 7 Jun 2026 02:33:28 -0700 Subject: [PATCH] fix(desktop): scope in-session /model switch per-session, stop process-env leak (#41120) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(desktop): scope in-session /model switch per-session, stop process-env leak The desktop/dashboard tui_gateway backend hosts every same-profile session in ONE process. An in-session /model switch wrote process-global env vars (HERMES_MODEL / HERMES_INFERENCE_MODEL / HERMES_TUI_PROVIDER / HERMES_INFERENCE_PROVIDER), which _resolve_startup_runtime() reads when building a fresh agent. So switching the model in one session leaked into every other live session's next agent rebuild (/new, resume) — changing the model in session B silently changed it in session A. Fix: record the switch as a per-session model_override on the session dict instead of mutating os.environ. _make_agent honors that override on rebuild (carrying the concrete base_url/api_key/api_mode the switch resolved), and falls back to global config when absent. Global persistence on the --global flag is unchanged. Also a cleaner fix for #16857 (/new after switching to a custom-provider model): the override carries the resolved credentials, so the rebuild keeps the right endpoint without relying on the leaky env vars. Reported via Twitter (@Da7_Tech): MiniMax M3 in one session + GLM 5.1 in another interfere when switching between them. * test(tui_gateway): align /model switch tests with per-session override contract The three test_config_set_model_syncs_* tests asserted the old leaky contract (switch writes HERMES_MODEL / HERMES_TUI_PROVIDER / HERMES_INFERENCE_PROVIDER to process env). That env-sync IS the cross-session contamination bug this PR removes. Updated to assert the new contract: shared process env untouched, the switch recorded as a per-session model_override carrying provider/model/base_url/ api_key/api_mode. #16857's intent (a custom-provider switch survives /new) is still covered — now via the override _make_agent honors on rebuild. --- tests/test_tui_gateway_server.py | 135 ++++++++++------- tests/tui_gateway/test_make_agent_provider.py | 137 ++++++++++++++++++ tui_gateway/server.py | 89 +++++++++--- 3 files changed, 291 insertions(+), 70 deletions(-) diff --git a/tests/test_tui_gateway_server.py b/tests/test_tui_gateway_server.py index e7d68736415..d8b4723e3a2 100644 --- a/tests/test_tui_gateway_server.py +++ b/tests/test_tui_gateway_server.py @@ -2169,15 +2169,15 @@ def test_config_set_model_global_persists(monkeypatch): assert saved["model"]["base_url"] == "https://api.anthropic.com" -def test_config_set_model_syncs_inference_provider_env(monkeypatch): - """After an explicit provider switch, HERMES_INFERENCE_PROVIDER must - reflect the user's choice so ambient re-resolution (credential pool - refresh, aux clients) picks up the new provider instead of the original - one persisted in config or shell env. +def test_config_set_model_does_not_leak_inference_provider_env(monkeypatch): + """A /model switch must NOT mutate process-global env vars. The desktop / + dashboard tui_gateway backend hosts every same-profile session in one + process; writing HERMES_INFERENCE_PROVIDER on a switch leaked the new + provider into every other live session's next agent rebuild. The switch + must instead record a per-session override and leave shared env untouched. - Regression: a TUI user switched openrouter → anthropic and the TUI kept - trying openrouter because the env-var-backed resolvers still saw the old - provider. + (Was test_config_set_model_syncs_inference_provider_env, which asserted the + leaky env-sync contract that caused the cross-session contamination bug.) """ class _Agent: @@ -2199,7 +2199,8 @@ def test_config_set_model_syncs_inference_provider_env(monkeypatch): warning_message="", ) - server._sessions["sid"] = _session(agent=_Agent()) + session = _session(agent=_Agent()) + server._sessions["sid"] = session monkeypatch.setenv("HERMES_INFERENCE_PROVIDER", "openrouter") monkeypatch.setattr( "hermes_cli.model_switch.switch_model", lambda **_kwargs: result @@ -2207,27 +2208,36 @@ def test_config_set_model_syncs_inference_provider_env(monkeypatch): monkeypatch.setattr(server, "_restart_slash_worker", lambda session: None) monkeypatch.setattr(server, "_emit", lambda *args, **kwargs: None) - server.handle_request( - { - "id": "1", - "method": "config.set", - "params": { - "session_id": "sid", - "key": "model", - "value": "claude-sonnet-4.6 --provider anthropic", - }, - } - ) + try: + server.handle_request( + { + "id": "1", + "method": "config.set", + "params": { + "session_id": "sid", + "key": "model", + "value": "claude-sonnet-4.6 --provider anthropic", + }, + } + ) - assert os.environ["HERMES_INFERENCE_PROVIDER"] == "anthropic" + # Shared process env is UNCHANGED (the contamination vector is gone). + assert os.environ["HERMES_INFERENCE_PROVIDER"] == "openrouter" + # The switch was recorded as a per-session override instead. + assert session["model_override"]["provider"] == "anthropic" + assert session["model_override"]["model"] == "claude-sonnet-4.6" + finally: + server._sessions.clear() -def test_config_set_model_syncs_tui_provider_unconditionally(monkeypatch): - """Regression for #16857: /model must set HERMES_TUI_PROVIDER even when - it wasn't pre-set on launch, so a later /new (which re-runs - _resolve_startup_runtime) honours the user's explicit provider choice - instead of falling through to static-catalog detection and picking a - coincidentally-matching native provider. +def test_config_set_model_records_per_session_override_not_env(monkeypatch): + """Regression for #16857 via the per-session override (not env vars): + /model must record the user's explicit provider on the session so a later + /new (which rebuilds via _make_agent honoring model_override) honours that + choice — WITHOUT writing process-global env vars that would leak into + sibling sessions. + + (Was test_config_set_model_syncs_tui_provider_unconditionally.) """ class _Agent: @@ -2249,7 +2259,8 @@ def test_config_set_model_syncs_tui_provider_unconditionally(monkeypatch): warning_message="", ) - server._sessions["sid"] = _session(agent=_Agent()) + session = _session(agent=_Agent()) + server._sessions["sid"] = session monkeypatch.delenv("HERMES_TUI_PROVIDER", raising=False) monkeypatch.delenv("HERMES_INFERENCE_PROVIDER", raising=False) monkeypatch.setattr( @@ -2258,26 +2269,42 @@ def test_config_set_model_syncs_tui_provider_unconditionally(monkeypatch): monkeypatch.setattr(server, "_restart_slash_worker", lambda session: None) monkeypatch.setattr(server, "_emit", lambda *args, **kwargs: None) - server.handle_request( - { - "id": "1", - "method": "config.set", - "params": { - "session_id": "sid", - "key": "model", - "value": "deepseek-v4-pro --provider custom:xuanji", - }, - } - ) + try: + server.handle_request( + { + "id": "1", + "method": "config.set", + "params": { + "session_id": "sid", + "key": "model", + "value": "deepseek-v4-pro --provider custom:xuanji", + }, + } + ) - # Both env vars must reflect the user's choice. HERMES_TUI_PROVIDER is - # the canonical explicit-this-process carrier consumed by - # _resolve_startup_runtime() on /new. - assert os.environ["HERMES_TUI_PROVIDER"] == "custom:xuanji" - assert os.environ["HERMES_INFERENCE_PROVIDER"] == "custom:xuanji" + # No process-global env mutation. + assert "HERMES_TUI_PROVIDER" not in os.environ + assert "HERMES_INFERENCE_PROVIDER" not in os.environ + # The user's explicit provider + resolved endpoint live on the session, + # carried into the next /new rebuild by _make_agent. + override = session["model_override"] + assert override["provider"] == "custom:xuanji" + assert override["model"] == "deepseek-v4-pro" + assert override["base_url"] == "https://xuanji.example/v1" + assert override["api_key"] == "sk-xuanji" + assert override["api_mode"] == "chat_completions" + finally: + server._sessions.clear() -def test_config_set_model_syncs_tui_provider_env(monkeypatch): +def test_config_set_model_switches_agent_without_touching_env(monkeypatch): + """A /model switch mutates the target session's agent in place and records + a per-session override; it does NOT write HERMES_MODEL / HERMES_TUI_PROVIDER + etc. into the shared process environment. + + (Was test_config_set_model_syncs_tui_provider_env.) + """ + class Agent: model = "gpt-5.3-codex" provider = "openai-codex" @@ -2289,8 +2316,11 @@ def test_config_set_model_syncs_tui_provider_env(monkeypatch): self.provider = kwargs["new_provider"] agent = Agent() - server._sessions["sid"] = _session(agent=agent) + session = _session(agent=agent) + server._sessions["sid"] = session monkeypatch.setenv("HERMES_TUI_PROVIDER", "openai-codex") + monkeypatch.delenv("HERMES_MODEL", raising=False) + monkeypatch.delenv("HERMES_INFERENCE_MODEL", raising=False) monkeypatch.setattr(server, "_restart_slash_worker", lambda session: None) monkeypatch.setattr(server, "_emit", lambda *args, **kwargs: None) @@ -2321,9 +2351,16 @@ def test_config_set_model_syncs_tui_provider_env(monkeypatch): ) assert resp["result"]["value"] == "anthropic/claude-sonnet-4.6" - assert os.environ["HERMES_TUI_PROVIDER"] == "anthropic" - assert os.environ["HERMES_MODEL"] == "anthropic/claude-sonnet-4.6" - assert os.environ["HERMES_INFERENCE_MODEL"] == "anthropic/claude-sonnet-4.6" + # Agent switched in place... + assert agent.model == "anthropic/claude-sonnet-4.6" + assert agent.provider == "anthropic" + # ...override recorded on the session... + assert session["model_override"]["model"] == "anthropic/claude-sonnet-4.6" + assert session["model_override"]["provider"] == "anthropic" + # ...and the shared process env was NOT touched. + assert os.environ["HERMES_TUI_PROVIDER"] == "openai-codex" + assert "HERMES_MODEL" not in os.environ + assert "HERMES_INFERENCE_MODEL" not in os.environ finally: server._sessions.clear() diff --git a/tests/tui_gateway/test_make_agent_provider.py b/tests/tui_gateway/test_make_agent_provider.py index 896f68a3828..0b147ee286e 100644 --- a/tests/tui_gateway/test_make_agent_provider.py +++ b/tests/tui_gateway/test_make_agent_provider.py @@ -233,3 +233,140 @@ def test_make_agent_tolerates_null_personalities_with_active_personality(): assert mock_agent.called assert mock_agent.call_args.kwargs["ephemeral_system_prompt"] is None + + +def test_make_agent_honors_per_session_model_override(): + """Regression for cross-session model contamination: a per-session + ``model_override`` (set by an in-session /model switch) must drive the + rebuilt agent's model/provider/base_url, NOT global config — and without + reading process-global env vars that a sibling session may have changed. + """ + + # resolve_runtime_provider echoes the requested provider so we can prove + # the override's provider (not the global default) was passed through. + def echo_runtime(requested=None, target_model=None): + return { + "provider": requested or "GLOBAL_DEFAULT", + "base_url": "global-url", + "api_key": "global-key", + "api_mode": "chat_completions", + "command": None, + "args": None, + "credential_pool": None, + } + + fake_cfg = { + "agent": {"system_prompt": ""}, + "model": {"default": "global/model", "provider": "globalprov"}, + } + + override = { + "model": "zai/glm-5.1", + "provider": "zai", + "base_url": "https://api.z.ai/v1", + "api_key": "sk-glm", + "api_mode": "chat_completions", + } + + with ( + # Ensure no leaked env biases _resolve_startup_runtime (it must not even + # be consulted when an override is present). + patch.dict(os.environ, {}, clear=False), + patch("tui_gateway.server._load_cfg", return_value=fake_cfg), + patch("tui_gateway.server._get_db", return_value=MagicMock()), + patch("tui_gateway.server._load_reasoning_config", return_value=None), + patch("tui_gateway.server._load_service_tier", return_value=None), + patch("tui_gateway.server._load_enabled_toolsets", return_value=None), + patch( + "hermes_cli.runtime_provider.resolve_runtime_provider", + side_effect=echo_runtime, + ), + patch("run_agent.AIAgent") as mock_agent, + ): + for var in ( + "HERMES_MODEL", + "HERMES_INFERENCE_MODEL", + "HERMES_TUI_PROVIDER", + "HERMES_INFERENCE_PROVIDER", + ): + os.environ.pop(var, None) + + from tui_gateway.server import _make_agent + + _make_agent( + "sid-override", "key-override", model_override=override + ) + + kwargs = mock_agent.call_args.kwargs + assert kwargs["model"] == "zai/glm-5.1" + assert kwargs["provider"] == "zai" + # Concrete credentials from the switch survive the rebuild. + assert kwargs["base_url"] == "https://api.z.ai/v1" + assert kwargs["api_key"] == "sk-glm" + + +def test_apply_model_switch_does_not_leak_process_env(): + """Core fix for cross-session contamination: an in-session /model switch + must mutate only the target session (record a per-session override + switch + that session's agent in place) and must NOT write process-global env vars, + which the single-process desktop backend shares across every live session. + """ + from tui_gateway import server + + class _FakeResult: + success = True + error_message = "" + warning_message = "" + new_model = "zai/glm-5.1" + target_provider = "zai" + base_url = "https://api.z.ai/v1" + api_key = "sk-glm" + api_mode = "chat_completions" + + class _FakeAgent: + def __init__(self): + self.model = "minimax/m3" + self.provider = "minimax" + self.base_url = "" + self.api_key = "" + + def switch_model(self, **kw): + self.model = kw["new_model"] + self.provider = kw["new_provider"] + + env_keys = ( + "HERMES_MODEL", + "HERMES_INFERENCE_MODEL", + "HERMES_TUI_PROVIDER", + "HERMES_INFERENCE_PROVIDER", + ) + + sess_b = {"agent": _FakeAgent(), "session_key": "k-B", "model_override": None} + sess_a = {"agent": _FakeAgent(), "session_key": "k-A", "model_override": None} + + with ( + patch("hermes_cli.model_switch.parse_model_flags", + return_value=("glm-5.1", None, False, False)), + patch("hermes_cli.model_switch.switch_model", return_value=_FakeResult()), + patch("tui_gateway.server._emit"), + patch("tui_gateway.server._restart_slash_worker"), + patch("tui_gateway.server._session_info", return_value={}), + patch("tui_gateway.server._persist_model_switch") as mock_persist, + ): + before = {k: os.environ.get(k) for k in env_keys} + result = server._apply_model_switch("sidB", sess_b, "glm-5.1") + after = {k: os.environ.get(k) for k in env_keys} + + assert result["value"] == "zai/glm-5.1" + # No process-global env mutation (the contamination vector). + assert before == after + # persist_global was False → config untouched. + mock_persist.assert_not_called() + # Target session recorded a per-session override. + assert sess_b["model_override"]["model"] == "zai/glm-5.1" + assert sess_b["model_override"]["provider"] == "zai" + # The switched agent mutated in place. + assert sess_b["agent"].model == "zai/glm-5.1" + # Sibling session is completely untouched. + assert sess_a["model_override"] is None + assert sess_a["agent"].model == "minimax/m3" diff --git a/tui_gateway/server.py b/tui_gateway/server.py index 0823490bff1..d85e78b9c8a 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -1521,21 +1521,27 @@ def _apply_model_switch(sid: str, session: dict, raw_input: str) -> dict: _restart_slash_worker(session) _emit("session.info", sid, _session_info(agent, session)) - os.environ["HERMES_MODEL"] = result.new_model - os.environ["HERMES_INFERENCE_MODEL"] = result.new_model - # Keep the process-level provider env vars in sync with the user's - # explicit choice so any ambient re-resolution (credential pool refresh, - # compressor rebuild, aux clients) and startup re-resolution on /new - # both pick up the new provider instead of the original one persisted - # in config or env. + # Record the switch as a PER-SESSION override so a later rebuild of THIS + # session (e.g. /new via _reset_session_agent, or resume) re-derives the + # user's chosen model/provider instead of falling back to global config. # - # HERMES_TUI_PROVIDER is the canonical "explicit-this-process" carrier - # consumed by _resolve_startup_runtime() — set it unconditionally on - # /model so /new can't fall through to static-catalog detection and - # pick a coincidentally-matching native provider (fixes #16857). - if result.target_provider: - os.environ["HERMES_INFERENCE_PROVIDER"] = result.target_provider - os.environ["HERMES_TUI_PROVIDER"] = result.target_provider + # We deliberately do NOT write process-global env vars (HERMES_MODEL / + # HERMES_INFERENCE_MODEL / HERMES_TUI_PROVIDER / HERMES_INFERENCE_PROVIDER) + # here. The desktop backend hosts every same-profile session in ONE process, + # so mutating os.environ on a /model switch leaked the new model/provider + # into every OTHER live session's next agent rebuild — switching the model + # in one session silently changed it in the others (the cross-session + # contamination bug). agent.switch_model() above already mutated the right + # agent in place; the override dict makes that choice survive a rebuild + # without touching shared process state. + if isinstance(session, dict): + session["model_override"] = { + "model": result.new_model, + "provider": result.target_provider, + "base_url": result.base_url, + "api_key": result.api_key, + "api_mode": result.api_mode, + } if persist_global: _persist_model_switch(result) return {"value": result.new_model, "warning": result.warning_message or ""} @@ -2546,7 +2552,14 @@ def _reset_session_agent(sid: str, session: dict) -> dict: tokens = _set_session_context(session["session_key"]) try: new_agent = _make_agent( - sid, session["session_key"], session_id=session["session_key"] + sid, + session["session_key"], + session_id=session["session_key"], + # Preserve this session's chosen model across /new so a reset + # doesn't silently revert to global config (or to a model another + # session set). See the cross-session-contamination note in + # _apply_model_switch. + model_override=session.get("model_override"), ) finally: _clear_session_context(tokens) @@ -2567,7 +2580,13 @@ def _reset_session_agent(sid: str, session: dict) -> dict: return info -def _make_agent(sid: str, key: str, session_id: str | None = None, session_db=None): +def _make_agent( + sid: str, + key: str, + session_id: str | None = None, + session_db=None, + model_override: dict | None = None, +): from run_agent import AIAgent from hermes_cli.runtime_provider import resolve_runtime_provider @@ -2601,11 +2620,35 @@ def _make_agent(sid: str, key: str, session_id: str | None = None, session_db=No system_prompt = "\n\n".join( part for part in (system_prompt, skills_prompt) if part ).strip() - model, requested_provider = _resolve_startup_runtime() - runtime = resolve_runtime_provider( - requested=requested_provider, - target_model=model or None, - ) + # Prefer a per-session model override (set by a prior in-session /model + # switch) over global config/env resolution. This keeps a rebuilt session + # (/new, resume) on the model the user picked FOR THIS SESSION, without + # reading process-global env vars that another session may have changed. + if model_override and model_override.get("model"): + model = str(model_override.get("model") or "") + requested_provider = model_override.get("provider") or None + override_base_url = model_override.get("base_url") + override_api_key = model_override.get("api_key") + override_api_mode = model_override.get("api_mode") + runtime = resolve_runtime_provider( + requested=requested_provider, + target_model=model or None, + ) + # The switch already resolved concrete credentials/endpoint; honor them + # so a custom/named endpoint survives the rebuild even if global + # resolution would pick a different one. + if override_base_url: + runtime["base_url"] = override_base_url + if override_api_key: + runtime["api_key"] = override_api_key + if override_api_mode: + runtime["api_mode"] = override_api_mode + else: + model, requested_provider = _resolve_startup_runtime() + runtime = resolve_runtime_provider( + requested=requested_provider, + target_model=model or None, + ) return AIAgent( model=model, max_iterations=_cfg_max_turns(cfg, 90), @@ -2659,6 +2702,10 @@ def _init_session(sid: str, key: str, agent, history: list, cols: int = 80): "tool_progress_mode": _load_tool_progress_mode(), "edit_snapshots": {}, "tool_started_at": {}, + # Per-session model override set by an in-session /model switch. + # Honored on rebuild (/new, resume) so a switch in THIS session + # never leaks into siblings via process-global env vars. + "model_override": None, # Pin async event emissions to whichever transport created the # session (stdio for Ink, JSON-RPC WS for the dashboard sidebar). "transport": current_transport() or _stdio_transport,