diff --git a/tests/test_tui_gateway_server.py b/tests/test_tui_gateway_server.py index 688fcfed5b3..baa8b7f79e6 100644 --- a/tests/test_tui_gateway_server.py +++ b/tests/test_tui_gateway_server.py @@ -1185,6 +1185,26 @@ def test_config_sync_failure_emits_error_once_per_edit(monkeypatch): assert "broken/model" in emits[0][1]["message"] +def test_config_sync_config_wins_over_env_seed(monkeypatch): + # Hosted instances set HERMES_INFERENCE_MODEL as a provision-time seed; + # the per-turn sync must follow config.yaml edits, not stay pinned to it. + monkeypatch.setenv("HERMES_INFERENCE_MODEL", "seed/model") + monkeypatch.delenv("HERMES_MODEL", raising=False) + monkeypatch.setattr(server, "_load_cfg", lambda: {"model": {"default": "new/model"}}) + session = _sync_test_session(config_model_seen=("seed/model", "")) + calls = [] + monkeypatch.setattr( + server, + "_apply_model_switch", + lambda sid, sess, raw, **kw: calls.append(raw), + ) + + server._sync_agent_model_with_config("sid", session) + + assert calls == ["new/model"] + assert session["config_model_seen"] == ("new/model", "") + + def test_startup_runtime_uses_tui_provider_env(monkeypatch): monkeypatch.setenv("HERMES_MODEL", "nous/hermes-test") monkeypatch.setenv("HERMES_TUI_PROVIDER", "nous") diff --git a/tui_gateway/server.py b/tui_gateway/server.py index 5f9b7e1fb6f..4c1746b8f77 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -1418,14 +1418,27 @@ def _resolve_model() -> str: def _config_model_target() -> tuple[str, str]: - """(model, provider) currently selected by env/config.""" - model = _resolve_model() + """(model, provider) currently selected by config (env as fallback). + + config.yaml wins over HERMES_MODEL / HERMES_INFERENCE_MODEL here, the + reverse of `_resolve_model()`'s startup order. Those env vars are a + provision-time seed (hosted instances set HERMES_INFERENCE_MODEL in the + container env); if they outranked config.yaml, the per-turn sync would + stay pinned to the seed forever and dashboard/CLI model changes would + never reach an open chat — the exact bug this sync exists to fix. + """ cfg_model = _load_cfg().get("model") + model = "" provider = "" if isinstance(cfg_model, dict): + model = str(cfg_model.get("default", "") or "").strip() provider = str(cfg_model.get("provider") or "").strip() if provider.lower() == "auto": provider = "" + elif isinstance(cfg_model, str): + model = cfg_model.strip() + if not model: + model = _resolve_model() return model, provider