fix(api_server): normalize array-based content parts in chat completions

Some OpenAI-compatible clients (Open WebUI, LobeChat, etc.) send
message content as an array of typed parts instead of a plain string:

    [{"type": "text", "text": "hello"}]

The agent pipeline expects strings, so these array payloads caused
silent failures or empty messages.

Add _normalize_chat_content() with defensive limits (recursion depth,
list size, output length) and apply it to both the Chat Completions
and Responses API endpoints. The Responses path had inline
normalization that only handled input_text/output_text — the shared
function also handles the standard 'text' type.

Salvaged from PR #7980 (ikelvingo) — only the content normalization;
the SSE and Weixin changes in that PR were regressions and are not
included.

Co-authored-by: ikelvingo <ikelvingo@users.noreply.github.com>
This commit is contained in:
Teknium 2026-04-12 17:16:16 -07:00 committed by Teknium
parent e8385f6f89
commit bcad679799
2 changed files with 149 additions and 13 deletions

View file

@ -54,6 +54,66 @@ DEFAULT_PORT = 8642
MAX_STORED_RESPONSES = 100
MAX_REQUEST_BYTES = 1_000_000 # 1 MB default limit for POST bodies
CHAT_COMPLETIONS_SSE_KEEPALIVE_SECONDS = 30.0
MAX_NORMALIZED_TEXT_LENGTH = 65_536 # 64 KB cap for normalized content parts
MAX_CONTENT_LIST_SIZE = 1_000 # Max items when content is an array
def _normalize_chat_content(
content: Any, *, _max_depth: int = 10, _depth: int = 0,
) -> str:
"""Normalize OpenAI chat message content into a plain text string.
Some clients (Open WebUI, LobeChat, etc.) send content as an array of
typed parts instead of a plain string::
[{"type": "text", "text": "hello"}, {"type": "input_text", "text": "..."}]
This function flattens those into a single string so the agent pipeline
(which expects strings) doesn't choke.
Defensive limits prevent abuse: recursion depth, list size, and output
length are all bounded.
"""
if _depth > _max_depth:
return ""
if content is None:
return ""
if isinstance(content, str):
return content[:MAX_NORMALIZED_TEXT_LENGTH] if len(content) > MAX_NORMALIZED_TEXT_LENGTH else content
if isinstance(content, list):
parts: List[str] = []
items = content[:MAX_CONTENT_LIST_SIZE] if len(content) > MAX_CONTENT_LIST_SIZE else content
for item in items:
if isinstance(item, str):
if item:
parts.append(item[:MAX_NORMALIZED_TEXT_LENGTH])
elif isinstance(item, dict):
item_type = str(item.get("type") or "").strip().lower()
if item_type in {"text", "input_text", "output_text"}:
text = item.get("text", "")
if text:
try:
parts.append(str(text)[:MAX_NORMALIZED_TEXT_LENGTH])
except Exception:
pass
# Silently skip image_url / other non-text parts
elif isinstance(item, list):
nested = _normalize_chat_content(item, _max_depth=_max_depth, _depth=_depth + 1)
if nested:
parts.append(nested)
# Check accumulated size
if sum(len(p) for p in parts) >= MAX_NORMALIZED_TEXT_LENGTH:
break
result = "\n".join(parts)
return result[:MAX_NORMALIZED_TEXT_LENGTH] if len(result) > MAX_NORMALIZED_TEXT_LENGTH else result
# Fallback for unexpected types (int, float, bool, etc.)
try:
result = str(content)
return result[:MAX_NORMALIZED_TEXT_LENGTH] if len(result) > MAX_NORMALIZED_TEXT_LENGTH else result
except Exception:
return ""
def check_api_server_requirements() -> bool:
@ -553,7 +613,7 @@ class APIServerAdapter(BasePlatformAdapter):
for msg in messages:
role = msg.get("role", "")
content = msg.get("content", "")
content = _normalize_chat_content(msg.get("content", ""))
if role == "system":
# Accumulate system messages
if system_prompt is None:
@ -926,18 +986,7 @@ class APIServerAdapter(BasePlatformAdapter):
input_messages.append({"role": "user", "content": item})
elif isinstance(item, dict):
role = item.get("role", "user")
content = item.get("content", "")
# Handle content that may be a list of content parts
if isinstance(content, list):
text_parts = []
for part in content:
if isinstance(part, dict) and part.get("type") == "input_text":
text_parts.append(part.get("text", ""))
elif isinstance(part, dict) and part.get("type") == "output_text":
text_parts.append(part.get("text", ""))
elif isinstance(part, str):
text_parts.append(part)
content = "\n".join(text_parts)
content = _normalize_chat_content(item.get("content", ""))
input_messages.append({"role": role, "content": content})
else:
return web.json_response(_openai_error("'input' must be a string or array"), status=400)

View file

@ -0,0 +1,87 @@
"""Tests for _normalize_chat_content in the API server adapter."""
from gateway.platforms.api_server import _normalize_chat_content
class TestNormalizeChatContent:
"""Content normalization converts array-based content parts to plain text."""
def test_none_returns_empty_string(self):
assert _normalize_chat_content(None) == ""
def test_plain_string_returned_as_is(self):
assert _normalize_chat_content("hello world") == "hello world"
def test_empty_string_returned_as_is(self):
assert _normalize_chat_content("") == ""
def test_text_content_part(self):
content = [{"type": "text", "text": "hello"}]
assert _normalize_chat_content(content) == "hello"
def test_input_text_content_part(self):
content = [{"type": "input_text", "text": "user input"}]
assert _normalize_chat_content(content) == "user input"
def test_output_text_content_part(self):
content = [{"type": "output_text", "text": "assistant output"}]
assert _normalize_chat_content(content) == "assistant output"
def test_multiple_text_parts_joined_with_newline(self):
content = [
{"type": "text", "text": "first"},
{"type": "text", "text": "second"},
]
assert _normalize_chat_content(content) == "first\nsecond"
def test_mixed_string_and_dict_parts(self):
content = ["plain string", {"type": "text", "text": "dict part"}]
assert _normalize_chat_content(content) == "plain string\ndict part"
def test_image_url_parts_silently_skipped(self):
content = [
{"type": "text", "text": "check this:"},
{"type": "image_url", "image_url": {"url": "https://example.com/img.png"}},
]
assert _normalize_chat_content(content) == "check this:"
def test_integer_content_converted(self):
assert _normalize_chat_content(42) == "42"
def test_boolean_content_converted(self):
assert _normalize_chat_content(True) == "True"
def test_deeply_nested_list_respects_depth_limit(self):
"""Nesting beyond max_depth returns empty string."""
content = [[[[[[[[[[[["deep"]]]]]]]]]]]]
result = _normalize_chat_content(content)
# The deep nesting should be truncated, not crash
assert isinstance(result, str)
def test_large_list_capped(self):
"""Lists beyond MAX_CONTENT_LIST_SIZE are truncated."""
content = [{"type": "text", "text": f"item{i}"} for i in range(2000)]
result = _normalize_chat_content(content)
# Should not contain all 2000 items
assert result.count("item") <= 1000
def test_oversized_string_truncated(self):
"""Strings beyond 64KB are truncated."""
huge = "x" * 100_000
result = _normalize_chat_content(huge)
assert len(result) == 65_536
def test_empty_text_parts_filtered(self):
content = [
{"type": "text", "text": ""},
{"type": "text", "text": "actual"},
{"type": "text", "text": ""},
]
assert _normalize_chat_content(content) == "actual"
def test_dict_without_type_skipped(self):
content = [{"foo": "bar"}, {"type": "text", "text": "real"}]
assert _normalize_chat_content(content) == "real"
def test_empty_list_returns_empty(self):
assert _normalize_chat_content([]) == ""