fix(tui): use role=user for model switch marker to avoid HTTP 400 on strict providers (#48338)

_append_model_switch_marker() appended the post-/model-switch context marker
to session history as {"role": "system"}. The cached system prompt is
prepended to the API message list (conversation_loop.py), so this marker
became a SECOND system message mid-array after prior user/assistant turns.
Strict OpenAI-compatible providers (vLLM, Qwen) reject any system message
that is not at the beginning of the array, returning HTTP 400 and killing
the conversation on the next turn.

Flip the marker to role="user" (history entry + both session-DB persist
sites), matching the existing personality-overlay marker which already uses
role="user". repair_message_sequence() then coalesces it with adjacent user
turns as needed.

Co-authored-by: liuhao1024 <sunsky.lau@gmail.com>
Co-authored-by: Lucas Nicolas <lucas.nicolas@proton.me>
This commit is contained in:
teknium1 2026-06-28 04:28:55 -07:00 committed by Teknium
parent 376d021fee
commit 61622bb56a
3 changed files with 114 additions and 5 deletions

View file

@ -3681,11 +3681,11 @@ def test_config_set_model_switches_agent_without_touching_env(monkeypatch):
"Model: anthropic/claude-sonnet-4.6\nProvider: anthropic"
)
assert agent._cached_system_prompt == db.system_prompt
assert session["history"][-1]["role"] == "system"
assert session["history"][-1]["role"] == "user"
assert "changed to anthropic/claude-sonnet-4.6" in session["history"][-1]["content"]
assert db.messages[-1] == {
"session_id": "session-key",
"role": "system",
"role": "user",
"content": session["history"][-1]["content"],
}
# ...and the shared process env was NOT touched.

View file

@ -0,0 +1,105 @@
"""Tests for _append_model_switch_marker role fix (issue #48338).
The model switch marker must NOT use role="system" because strict providers
(vLLM, Qwen) reject system messages that appear mid-conversation. Using
role="user" is safe the system prompt is prepended to the API message list,
so a user-role marker can appear at any later position, and the gateway's
sanitize/merge pass already coalesces consecutive user messages.
"""
from __future__ import annotations
import threading
from types import SimpleNamespace
from unittest.mock import MagicMock
from tui_gateway.server import _append_model_switch_marker
class TestAppendModelSwitchMarkerRole:
"""Verify the marker uses role='user', not role='system'."""
def test_marker_uses_user_role(self) -> None:
"""The history entry must be role='user', not role='system'."""
session: dict = {"session_key": "test-session", "history": []}
_append_model_switch_marker(session, model="gpt-4o", provider="openai")
assert len(session["history"]) == 1
entry = session["history"][0]
assert entry["role"] == "user", (
f"Expected role='user' but got role='{entry['role']}'. "
"Strict providers (vLLM, Qwen) reject mid-conversation system messages."
)
def test_marker_content_preserved(self) -> None:
"""The marker content must still describe the model switch."""
session: dict = {"session_key": "s", "history": []}
_append_model_switch_marker(session, model="qwen3.6-35b", provider="vllm")
content = session["history"][0]["content"]
assert "qwen3.6-35b" in content
assert "vllm" in content
assert "model" in content.lower()
def test_marker_with_empty_provider(self) -> None:
"""Provider part should be omitted when provider is empty."""
session: dict = {"session_key": "s", "history": []}
_append_model_switch_marker(session, model="claude-sonnet-4", provider="")
content = session["history"][0]["content"]
assert "claude-sonnet-4" in content
assert "via provider" not in content
def test_marker_with_lock(self) -> None:
"""Marker should work correctly when session has a history_lock."""
session: dict = {
"session_key": "s",
"history": [],
"history_lock": threading.Lock(),
}
_append_model_switch_marker(session, model="gpt-4o", provider="openai")
assert len(session["history"]) == 1
assert session["history"][0]["role"] == "user"
def test_marker_increments_history_version(self) -> None:
"""history_version should be incremented after appending."""
session: dict = {"session_key": "s", "history": [], "history_version": 5}
_append_model_switch_marker(session, model="gpt-4o", provider="openai")
assert session["history_version"] == 6
def test_no_marker_for_none_session(self) -> None:
"""None session should be a no-op."""
_append_model_switch_marker(None, model="gpt-4o", provider="openai")
def test_no_marker_for_empty_session_key(self) -> None:
"""Empty session_key should be a no-op."""
session: dict = {"session_key": "", "history": []}
_append_model_switch_marker(session, model="gpt-4o", provider="openai")
assert len(session["history"]) == 0
def test_marker_not_mid_history_system_after_turns(self) -> None:
"""The marker appended after real turns must not be a system role.
Reproduces the #48338 shape: a switch mid-conversation must not inject
a second system message after user/assistant turns, which strict
OpenAI-compatible providers reject.
"""
db = MagicMock()
session: dict = {
"session_key": "sess-1",
"history": [
{"role": "user", "content": "hello"},
{"role": "assistant", "content": "hi"},
],
"history_version": 7,
"agent": SimpleNamespace(_session_db=db),
}
_append_model_switch_marker(
session, model="qwen3.6-35b", provider="vllm"
)
marker = session["history"][-1]
assert marker["role"] == "user"
assert session["history_version"] == 8
# The persisted row must mirror the in-memory role.
db.append_message.assert_called_once_with(
session_id="sess-1",
role="user",
content=marker["content"],
)

View file

@ -2198,7 +2198,11 @@ def _append_model_switch_marker(session: dict | None, *, model: str, provider: s
f"{model}{provider_part}. From this point forward, use this runtime "
"metadata when answering questions about what model/provider is active.]"
)
entry = {"role": "system", "content": marker}
# Persist as a user message, not a system message. The gateway appends
# this marker after prior conversation turns, and strict OpenAI-compatible
# providers (vLLM, Qwen) reject system messages that are not at the
# beginning of the API message list (#48338).
entry = {"role": "user", "content": marker}
lock = session.get("history_lock")
if lock is not None:
@ -2213,14 +2217,14 @@ def _append_model_switch_marker(session: dict | None, *, model: str, provider: s
agent = session.get("agent")
db = getattr(agent, "_session_db", None) if agent is not None else None
if db is not None:
db.append_message(session_id=session_key, role="system", content=marker)
db.append_message(session_id=session_key, role="user", content=marker)
return
_ensure_session_db_row(session)
with _session_db(session) as scoped_db:
if scoped_db is not None:
scoped_db.append_message(
session_id=session_key, role="system", content=marker
session_id=session_key, role="user", content=marker
)
except Exception:
logger.debug("failed to persist model switch marker", exc_info=True)