diff --git a/tests/test_tui_gateway_server.py b/tests/test_tui_gateway_server.py index 2ad990afb1b..01a0859fd6a 100644 --- a/tests/test_tui_gateway_server.py +++ b/tests/test_tui_gateway_server.py @@ -3681,11 +3681,11 @@ def test_config_set_model_switches_agent_without_touching_env(monkeypatch): "Model: anthropic/claude-sonnet-4.6\nProvider: anthropic" ) assert agent._cached_system_prompt == db.system_prompt - assert session["history"][-1]["role"] == "system" + assert session["history"][-1]["role"] == "user" assert "changed to anthropic/claude-sonnet-4.6" in session["history"][-1]["content"] assert db.messages[-1] == { "session_id": "session-key", - "role": "system", + "role": "user", "content": session["history"][-1]["content"], } # ...and the shared process env was NOT touched. diff --git a/tests/tui_gateway/test_model_switch_marker_role.py b/tests/tui_gateway/test_model_switch_marker_role.py new file mode 100644 index 00000000000..1312eae2333 --- /dev/null +++ b/tests/tui_gateway/test_model_switch_marker_role.py @@ -0,0 +1,105 @@ +"""Tests for _append_model_switch_marker role fix (issue #48338). + +The model switch marker must NOT use role="system" because strict providers +(vLLM, Qwen) reject system messages that appear mid-conversation. Using +role="user" is safe — the system prompt is prepended to the API message list, +so a user-role marker can appear at any later position, and the gateway's +sanitize/merge pass already coalesces consecutive user messages. +""" + +from __future__ import annotations + +import threading +from types import SimpleNamespace +from unittest.mock import MagicMock + +from tui_gateway.server import _append_model_switch_marker + + +class TestAppendModelSwitchMarkerRole: + """Verify the marker uses role='user', not role='system'.""" + + def test_marker_uses_user_role(self) -> None: + """The history entry must be role='user', not role='system'.""" + session: dict = {"session_key": "test-session", "history": []} + _append_model_switch_marker(session, model="gpt-4o", provider="openai") + assert len(session["history"]) == 1 + entry = session["history"][0] + assert entry["role"] == "user", ( + f"Expected role='user' but got role='{entry['role']}'. " + "Strict providers (vLLM, Qwen) reject mid-conversation system messages." + ) + + def test_marker_content_preserved(self) -> None: + """The marker content must still describe the model switch.""" + session: dict = {"session_key": "s", "history": []} + _append_model_switch_marker(session, model="qwen3.6-35b", provider="vllm") + content = session["history"][0]["content"] + assert "qwen3.6-35b" in content + assert "vllm" in content + assert "model" in content.lower() + + def test_marker_with_empty_provider(self) -> None: + """Provider part should be omitted when provider is empty.""" + session: dict = {"session_key": "s", "history": []} + _append_model_switch_marker(session, model="claude-sonnet-4", provider="") + content = session["history"][0]["content"] + assert "claude-sonnet-4" in content + assert "via provider" not in content + + def test_marker_with_lock(self) -> None: + """Marker should work correctly when session has a history_lock.""" + session: dict = { + "session_key": "s", + "history": [], + "history_lock": threading.Lock(), + } + _append_model_switch_marker(session, model="gpt-4o", provider="openai") + assert len(session["history"]) == 1 + assert session["history"][0]["role"] == "user" + + def test_marker_increments_history_version(self) -> None: + """history_version should be incremented after appending.""" + session: dict = {"session_key": "s", "history": [], "history_version": 5} + _append_model_switch_marker(session, model="gpt-4o", provider="openai") + assert session["history_version"] == 6 + + def test_no_marker_for_none_session(self) -> None: + """None session should be a no-op.""" + _append_model_switch_marker(None, model="gpt-4o", provider="openai") + + def test_no_marker_for_empty_session_key(self) -> None: + """Empty session_key should be a no-op.""" + session: dict = {"session_key": "", "history": []} + _append_model_switch_marker(session, model="gpt-4o", provider="openai") + assert len(session["history"]) == 0 + + def test_marker_not_mid_history_system_after_turns(self) -> None: + """The marker appended after real turns must not be a system role. + + Reproduces the #48338 shape: a switch mid-conversation must not inject + a second system message after user/assistant turns, which strict + OpenAI-compatible providers reject. + """ + db = MagicMock() + session: dict = { + "session_key": "sess-1", + "history": [ + {"role": "user", "content": "hello"}, + {"role": "assistant", "content": "hi"}, + ], + "history_version": 7, + "agent": SimpleNamespace(_session_db=db), + } + _append_model_switch_marker( + session, model="qwen3.6-35b", provider="vllm" + ) + marker = session["history"][-1] + assert marker["role"] == "user" + assert session["history_version"] == 8 + # The persisted row must mirror the in-memory role. + db.append_message.assert_called_once_with( + session_id="sess-1", + role="user", + content=marker["content"], + ) diff --git a/tui_gateway/server.py b/tui_gateway/server.py index 53c9f266f94..21590a51582 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -2198,7 +2198,11 @@ def _append_model_switch_marker(session: dict | None, *, model: str, provider: s f"{model}{provider_part}. From this point forward, use this runtime " "metadata when answering questions about what model/provider is active.]" ) - entry = {"role": "system", "content": marker} + # Persist as a user message, not a system message. The gateway appends + # this marker after prior conversation turns, and strict OpenAI-compatible + # providers (vLLM, Qwen) reject system messages that are not at the + # beginning of the API message list (#48338). + entry = {"role": "user", "content": marker} lock = session.get("history_lock") if lock is not None: @@ -2213,14 +2217,14 @@ def _append_model_switch_marker(session: dict | None, *, model: str, provider: s agent = session.get("agent") db = getattr(agent, "_session_db", None) if agent is not None else None if db is not None: - db.append_message(session_id=session_key, role="system", content=marker) + db.append_message(session_id=session_key, role="user", content=marker) return _ensure_session_db_row(session) with _session_db(session) as scoped_db: if scoped_db is not None: scoped_db.append_message( - session_id=session_key, role="system", content=marker + session_id=session_key, role="user", content=marker ) except Exception: logger.debug("failed to persist model switch marker", exc_info=True)