From 61622bb56a7a24c0fc39e8a5a46537dfd2b2d9b6 Mon Sep 17 00:00:00 2001 From: teknium1 <127238744+teknium1@users.noreply.github.com> Date: Sun, 28 Jun 2026 04:28:55 -0700 Subject: [PATCH] fix(tui): use role=user for model switch marker to avoid HTTP 400 on strict providers (#48338) _append_model_switch_marker() appended the post-/model-switch context marker to session history as {"role": "system"}. The cached system prompt is prepended to the API message list (conversation_loop.py), so this marker became a SECOND system message mid-array after prior user/assistant turns. Strict OpenAI-compatible providers (vLLM, Qwen) reject any system message that is not at the beginning of the array, returning HTTP 400 and killing the conversation on the next turn. Flip the marker to role="user" (history entry + both session-DB persist sites), matching the existing personality-overlay marker which already uses role="user". repair_message_sequence() then coalesces it with adjacent user turns as needed. Co-authored-by: liuhao1024 Co-authored-by: Lucas Nicolas --- tests/test_tui_gateway_server.py | 4 +- .../test_model_switch_marker_role.py | 105 ++++++++++++++++++ tui_gateway/server.py | 10 +- 3 files changed, 114 insertions(+), 5 deletions(-) create mode 100644 tests/tui_gateway/test_model_switch_marker_role.py diff --git a/tests/test_tui_gateway_server.py b/tests/test_tui_gateway_server.py index 2ad990afb1b..01a0859fd6a 100644 --- a/tests/test_tui_gateway_server.py +++ b/tests/test_tui_gateway_server.py @@ -3681,11 +3681,11 @@ def test_config_set_model_switches_agent_without_touching_env(monkeypatch): "Model: anthropic/claude-sonnet-4.6\nProvider: anthropic" ) assert agent._cached_system_prompt == db.system_prompt - assert session["history"][-1]["role"] == "system" + assert session["history"][-1]["role"] == "user" assert "changed to anthropic/claude-sonnet-4.6" in session["history"][-1]["content"] assert db.messages[-1] == { "session_id": "session-key", - "role": "system", + "role": "user", "content": session["history"][-1]["content"], } # ...and the shared process env was NOT touched. diff --git a/tests/tui_gateway/test_model_switch_marker_role.py b/tests/tui_gateway/test_model_switch_marker_role.py new file mode 100644 index 00000000000..1312eae2333 --- /dev/null +++ b/tests/tui_gateway/test_model_switch_marker_role.py @@ -0,0 +1,105 @@ +"""Tests for _append_model_switch_marker role fix (issue #48338). + +The model switch marker must NOT use role="system" because strict providers +(vLLM, Qwen) reject system messages that appear mid-conversation. Using +role="user" is safe — the system prompt is prepended to the API message list, +so a user-role marker can appear at any later position, and the gateway's +sanitize/merge pass already coalesces consecutive user messages. +""" + +from __future__ import annotations + +import threading +from types import SimpleNamespace +from unittest.mock import MagicMock + +from tui_gateway.server import _append_model_switch_marker + + +class TestAppendModelSwitchMarkerRole: + """Verify the marker uses role='user', not role='system'.""" + + def test_marker_uses_user_role(self) -> None: + """The history entry must be role='user', not role='system'.""" + session: dict = {"session_key": "test-session", "history": []} + _append_model_switch_marker(session, model="gpt-4o", provider="openai") + assert len(session["history"]) == 1 + entry = session["history"][0] + assert entry["role"] == "user", ( + f"Expected role='user' but got role='{entry['role']}'. " + "Strict providers (vLLM, Qwen) reject mid-conversation system messages." + ) + + def test_marker_content_preserved(self) -> None: + """The marker content must still describe the model switch.""" + session: dict = {"session_key": "s", "history": []} + _append_model_switch_marker(session, model="qwen3.6-35b", provider="vllm") + content = session["history"][0]["content"] + assert "qwen3.6-35b" in content + assert "vllm" in content + assert "model" in content.lower() + + def test_marker_with_empty_provider(self) -> None: + """Provider part should be omitted when provider is empty.""" + session: dict = {"session_key": "s", "history": []} + _append_model_switch_marker(session, model="claude-sonnet-4", provider="") + content = session["history"][0]["content"] + assert "claude-sonnet-4" in content + assert "via provider" not in content + + def test_marker_with_lock(self) -> None: + """Marker should work correctly when session has a history_lock.""" + session: dict = { + "session_key": "s", + "history": [], + "history_lock": threading.Lock(), + } + _append_model_switch_marker(session, model="gpt-4o", provider="openai") + assert len(session["history"]) == 1 + assert session["history"][0]["role"] == "user" + + def test_marker_increments_history_version(self) -> None: + """history_version should be incremented after appending.""" + session: dict = {"session_key": "s", "history": [], "history_version": 5} + _append_model_switch_marker(session, model="gpt-4o", provider="openai") + assert session["history_version"] == 6 + + def test_no_marker_for_none_session(self) -> None: + """None session should be a no-op.""" + _append_model_switch_marker(None, model="gpt-4o", provider="openai") + + def test_no_marker_for_empty_session_key(self) -> None: + """Empty session_key should be a no-op.""" + session: dict = {"session_key": "", "history": []} + _append_model_switch_marker(session, model="gpt-4o", provider="openai") + assert len(session["history"]) == 0 + + def test_marker_not_mid_history_system_after_turns(self) -> None: + """The marker appended after real turns must not be a system role. + + Reproduces the #48338 shape: a switch mid-conversation must not inject + a second system message after user/assistant turns, which strict + OpenAI-compatible providers reject. + """ + db = MagicMock() + session: dict = { + "session_key": "sess-1", + "history": [ + {"role": "user", "content": "hello"}, + {"role": "assistant", "content": "hi"}, + ], + "history_version": 7, + "agent": SimpleNamespace(_session_db=db), + } + _append_model_switch_marker( + session, model="qwen3.6-35b", provider="vllm" + ) + marker = session["history"][-1] + assert marker["role"] == "user" + assert session["history_version"] == 8 + # The persisted row must mirror the in-memory role. + db.append_message.assert_called_once_with( + session_id="sess-1", + role="user", + content=marker["content"], + ) diff --git a/tui_gateway/server.py b/tui_gateway/server.py index 53c9f266f94..21590a51582 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -2198,7 +2198,11 @@ def _append_model_switch_marker(session: dict | None, *, model: str, provider: s f"{model}{provider_part}. From this point forward, use this runtime " "metadata when answering questions about what model/provider is active.]" ) - entry = {"role": "system", "content": marker} + # Persist as a user message, not a system message. The gateway appends + # this marker after prior conversation turns, and strict OpenAI-compatible + # providers (vLLM, Qwen) reject system messages that are not at the + # beginning of the API message list (#48338). + entry = {"role": "user", "content": marker} lock = session.get("history_lock") if lock is not None: @@ -2213,14 +2217,14 @@ def _append_model_switch_marker(session: dict | None, *, model: str, provider: s agent = session.get("agent") db = getattr(agent, "_session_db", None) if agent is not None else None if db is not None: - db.append_message(session_id=session_key, role="system", content=marker) + db.append_message(session_id=session_key, role="user", content=marker) return _ensure_session_db_row(session) with _session_db(session) as scoped_db: if scoped_db is not None: scoped_db.append_message( - session_id=session_key, role="system", content=marker + session_id=session_key, role="user", content=marker ) except Exception: logger.debug("failed to persist model switch marker", exc_info=True)