mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-30 11:52:04 +00:00
fix(tui): use role=user for model switch marker to avoid HTTP 400 on strict providers (#48338)
_append_model_switch_marker() appended the post-/model-switch context marker
to session history as {"role": "system"}. The cached system prompt is
prepended to the API message list (conversation_loop.py), so this marker
became a SECOND system message mid-array after prior user/assistant turns.
Strict OpenAI-compatible providers (vLLM, Qwen) reject any system message
that is not at the beginning of the array, returning HTTP 400 and killing
the conversation on the next turn.
Flip the marker to role="user" (history entry + both session-DB persist
sites), matching the existing personality-overlay marker which already uses
role="user". repair_message_sequence() then coalesces it with adjacent user
turns as needed.
Co-authored-by: liuhao1024 <sunsky.lau@gmail.com>
Co-authored-by: Lucas Nicolas <lucas.nicolas@proton.me>
This commit is contained in:
parent
376d021fee
commit
61622bb56a
3 changed files with 114 additions and 5 deletions
|
|
@ -3681,11 +3681,11 @@ def test_config_set_model_switches_agent_without_touching_env(monkeypatch):
|
|||
"Model: anthropic/claude-sonnet-4.6\nProvider: anthropic"
|
||||
)
|
||||
assert agent._cached_system_prompt == db.system_prompt
|
||||
assert session["history"][-1]["role"] == "system"
|
||||
assert session["history"][-1]["role"] == "user"
|
||||
assert "changed to anthropic/claude-sonnet-4.6" in session["history"][-1]["content"]
|
||||
assert db.messages[-1] == {
|
||||
"session_id": "session-key",
|
||||
"role": "system",
|
||||
"role": "user",
|
||||
"content": session["history"][-1]["content"],
|
||||
}
|
||||
# ...and the shared process env was NOT touched.
|
||||
|
|
|
|||
105
tests/tui_gateway/test_model_switch_marker_role.py
Normal file
105
tests/tui_gateway/test_model_switch_marker_role.py
Normal file
|
|
@ -0,0 +1,105 @@
|
|||
"""Tests for _append_model_switch_marker role fix (issue #48338).
|
||||
|
||||
The model switch marker must NOT use role="system" because strict providers
|
||||
(vLLM, Qwen) reject system messages that appear mid-conversation. Using
|
||||
role="user" is safe — the system prompt is prepended to the API message list,
|
||||
so a user-role marker can appear at any later position, and the gateway's
|
||||
sanitize/merge pass already coalesces consecutive user messages.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import threading
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
from tui_gateway.server import _append_model_switch_marker
|
||||
|
||||
|
||||
class TestAppendModelSwitchMarkerRole:
|
||||
"""Verify the marker uses role='user', not role='system'."""
|
||||
|
||||
def test_marker_uses_user_role(self) -> None:
|
||||
"""The history entry must be role='user', not role='system'."""
|
||||
session: dict = {"session_key": "test-session", "history": []}
|
||||
_append_model_switch_marker(session, model="gpt-4o", provider="openai")
|
||||
assert len(session["history"]) == 1
|
||||
entry = session["history"][0]
|
||||
assert entry["role"] == "user", (
|
||||
f"Expected role='user' but got role='{entry['role']}'. "
|
||||
"Strict providers (vLLM, Qwen) reject mid-conversation system messages."
|
||||
)
|
||||
|
||||
def test_marker_content_preserved(self) -> None:
|
||||
"""The marker content must still describe the model switch."""
|
||||
session: dict = {"session_key": "s", "history": []}
|
||||
_append_model_switch_marker(session, model="qwen3.6-35b", provider="vllm")
|
||||
content = session["history"][0]["content"]
|
||||
assert "qwen3.6-35b" in content
|
||||
assert "vllm" in content
|
||||
assert "model" in content.lower()
|
||||
|
||||
def test_marker_with_empty_provider(self) -> None:
|
||||
"""Provider part should be omitted when provider is empty."""
|
||||
session: dict = {"session_key": "s", "history": []}
|
||||
_append_model_switch_marker(session, model="claude-sonnet-4", provider="")
|
||||
content = session["history"][0]["content"]
|
||||
assert "claude-sonnet-4" in content
|
||||
assert "via provider" not in content
|
||||
|
||||
def test_marker_with_lock(self) -> None:
|
||||
"""Marker should work correctly when session has a history_lock."""
|
||||
session: dict = {
|
||||
"session_key": "s",
|
||||
"history": [],
|
||||
"history_lock": threading.Lock(),
|
||||
}
|
||||
_append_model_switch_marker(session, model="gpt-4o", provider="openai")
|
||||
assert len(session["history"]) == 1
|
||||
assert session["history"][0]["role"] == "user"
|
||||
|
||||
def test_marker_increments_history_version(self) -> None:
|
||||
"""history_version should be incremented after appending."""
|
||||
session: dict = {"session_key": "s", "history": [], "history_version": 5}
|
||||
_append_model_switch_marker(session, model="gpt-4o", provider="openai")
|
||||
assert session["history_version"] == 6
|
||||
|
||||
def test_no_marker_for_none_session(self) -> None:
|
||||
"""None session should be a no-op."""
|
||||
_append_model_switch_marker(None, model="gpt-4o", provider="openai")
|
||||
|
||||
def test_no_marker_for_empty_session_key(self) -> None:
|
||||
"""Empty session_key should be a no-op."""
|
||||
session: dict = {"session_key": "", "history": []}
|
||||
_append_model_switch_marker(session, model="gpt-4o", provider="openai")
|
||||
assert len(session["history"]) == 0
|
||||
|
||||
def test_marker_not_mid_history_system_after_turns(self) -> None:
|
||||
"""The marker appended after real turns must not be a system role.
|
||||
|
||||
Reproduces the #48338 shape: a switch mid-conversation must not inject
|
||||
a second system message after user/assistant turns, which strict
|
||||
OpenAI-compatible providers reject.
|
||||
"""
|
||||
db = MagicMock()
|
||||
session: dict = {
|
||||
"session_key": "sess-1",
|
||||
"history": [
|
||||
{"role": "user", "content": "hello"},
|
||||
{"role": "assistant", "content": "hi"},
|
||||
],
|
||||
"history_version": 7,
|
||||
"agent": SimpleNamespace(_session_db=db),
|
||||
}
|
||||
_append_model_switch_marker(
|
||||
session, model="qwen3.6-35b", provider="vllm"
|
||||
)
|
||||
marker = session["history"][-1]
|
||||
assert marker["role"] == "user"
|
||||
assert session["history_version"] == 8
|
||||
# The persisted row must mirror the in-memory role.
|
||||
db.append_message.assert_called_once_with(
|
||||
session_id="sess-1",
|
||||
role="user",
|
||||
content=marker["content"],
|
||||
)
|
||||
|
|
@ -2198,7 +2198,11 @@ def _append_model_switch_marker(session: dict | None, *, model: str, provider: s
|
|||
f"{model}{provider_part}. From this point forward, use this runtime "
|
||||
"metadata when answering questions about what model/provider is active.]"
|
||||
)
|
||||
entry = {"role": "system", "content": marker}
|
||||
# Persist as a user message, not a system message. The gateway appends
|
||||
# this marker after prior conversation turns, and strict OpenAI-compatible
|
||||
# providers (vLLM, Qwen) reject system messages that are not at the
|
||||
# beginning of the API message list (#48338).
|
||||
entry = {"role": "user", "content": marker}
|
||||
|
||||
lock = session.get("history_lock")
|
||||
if lock is not None:
|
||||
|
|
@ -2213,14 +2217,14 @@ def _append_model_switch_marker(session: dict | None, *, model: str, provider: s
|
|||
agent = session.get("agent")
|
||||
db = getattr(agent, "_session_db", None) if agent is not None else None
|
||||
if db is not None:
|
||||
db.append_message(session_id=session_key, role="system", content=marker)
|
||||
db.append_message(session_id=session_key, role="user", content=marker)
|
||||
return
|
||||
|
||||
_ensure_session_db_row(session)
|
||||
with _session_db(session) as scoped_db:
|
||||
if scoped_db is not None:
|
||||
scoped_db.append_message(
|
||||
session_id=session_key, role="system", content=marker
|
||||
session_id=session_key, role="user", content=marker
|
||||
)
|
||||
except Exception:
|
||||
logger.debug("failed to persist model switch marker", exc_info=True)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue