fix(api_server): normalize array-based content parts in chat completions

Some OpenAI-compatible clients (Open WebUI, LobeChat, etc.) send message content as an array of typed parts instead of a plain string: [{"type": "text", "text": "hello"}] The agent pipeline expects strings, so these array payloads caused silent failures or empty messages. Add _normalize_chat_content() with defensive limits (recursion depth, list size, output length) and apply it to both the Chat Completions and Responses API endpoints. The Responses path had inline normalization that only handled input_text/output_text — the shared function also handles the standard 'text' type. Salvaged from PR #7980 (ikelvingo) — only the content normalization; the SSE and Weixin changes in that PR were regressions and are not included. Co-authored-by: ikelvingo <ikelvingo@users.noreply.github.com>
2026-06-09 08:21:50 +00:00 · 2026-04-12 17:16:16 -07:00 · 2026-04-12 17:16:16 -07:00 · bcad679799
commit bcad679799
parent e8385f6f89
2 changed files with 149 additions and 13 deletions
--- a/gateway/platforms/api_server.py
+++ b/gateway/platforms/api_server.py
@ -54,6 +54,66 @@ DEFAULT_PORT = 8642
 MAX_STORED_RESPONSES = 100
 MAX_REQUEST_BYTES = 1_000_000  # 1 MB default limit for POST bodies
 CHAT_COMPLETIONS_SSE_KEEPALIVE_SECONDS = 30.0
+MAX_NORMALIZED_TEXT_LENGTH = 65_536  # 64 KB cap for normalized content parts
+MAX_CONTENT_LIST_SIZE = 1_000  # Max items when content is an array
+
+
+def _normalize_chat_content(
+    content: Any, *, _max_depth: int = 10, _depth: int = 0,
+) -> str:
+    """Normalize OpenAI chat message content into a plain text string.
+
+    Some clients (Open WebUI, LobeChat, etc.) send content as an array of
+    typed parts instead of a plain string::
+
+        [{"type": "text", "text": "hello"}, {"type": "input_text", "text": "..."}]
+
+    This function flattens those into a single string so the agent pipeline
+    (which expects strings) doesn't choke.
+
+    Defensive limits prevent abuse: recursion depth, list size, and output
+    length are all bounded.
+    """
+    if _depth > _max_depth:
+        return ""
+    if content is None:
+        return ""
+    if isinstance(content, str):
+        return content[:MAX_NORMALIZED_TEXT_LENGTH] if len(content) > MAX_NORMALIZED_TEXT_LENGTH else content
+
+    if isinstance(content, list):
+        parts: List[str] = []
+        items = content[:MAX_CONTENT_LIST_SIZE] if len(content) > MAX_CONTENT_LIST_SIZE else content
+        for item in items:
+            if isinstance(item, str):
+                if item:
+                    parts.append(item[:MAX_NORMALIZED_TEXT_LENGTH])
+            elif isinstance(item, dict):
+                item_type = str(item.get("type") or "").strip().lower()
+                if item_type in {"text", "input_text", "output_text"}:
+                    text = item.get("text", "")
+                    if text:
+                        try:
+                            parts.append(str(text)[:MAX_NORMALIZED_TEXT_LENGTH])
+                        except Exception:
+                            pass
+                # Silently skip image_url / other non-text parts
+            elif isinstance(item, list):
+                nested = _normalize_chat_content(item, _max_depth=_max_depth, _depth=_depth + 1)
+                if nested:
+                    parts.append(nested)
+            # Check accumulated size
+            if sum(len(p) for p in parts) >= MAX_NORMALIZED_TEXT_LENGTH:
+                break
+        result = "\n".join(parts)
+        return result[:MAX_NORMALIZED_TEXT_LENGTH] if len(result) > MAX_NORMALIZED_TEXT_LENGTH else result
+
+    # Fallback for unexpected types (int, float, bool, etc.)
+    try:
+        result = str(content)
+        return result[:MAX_NORMALIZED_TEXT_LENGTH] if len(result) > MAX_NORMALIZED_TEXT_LENGTH else result
+    except Exception:
+        return ""


 def check_api_server_requirements() -> bool:
@ -553,7 +613,7 @@ class APIServerAdapter(BasePlatformAdapter):

        for msg in messages:
            role = msg.get("role", "")
-            content = msg.get("content", "")
+            content = _normalize_chat_content(msg.get("content", ""))
            if role == "system":
                # Accumulate system messages
                if system_prompt is None:
@ -926,18 +986,7 @@ class APIServerAdapter(BasePlatformAdapter):
                    input_messages.append({"role": "user", "content": item})
                elif isinstance(item, dict):
                    role = item.get("role", "user")
-                    content = item.get("content", "")
-                    # Handle content that may be a list of content parts
-                    if isinstance(content, list):
-                        text_parts = []
-                        for part in content:
-                            if isinstance(part, dict) and part.get("type") == "input_text":
-                                text_parts.append(part.get("text", ""))
-                            elif isinstance(part, dict) and part.get("type") == "output_text":
-                                text_parts.append(part.get("text", ""))
-                            elif isinstance(part, str):
-                                text_parts.append(part)
-                        content = "\n".join(text_parts)
+                    content = _normalize_chat_content(item.get("content", ""))
                    input_messages.append({"role": role, "content": content})
        else:
            return web.json_response(_openai_error("'input' must be a string or array"), status=400)
--- a/tests/gateway/test_api_server_normalize.py
+++ b/tests/gateway/test_api_server_normalize.py
@ -0,0 +1,87 @@
+"""Tests for _normalize_chat_content in the API server adapter."""
+
+from gateway.platforms.api_server import _normalize_chat_content
+
+
+class TestNormalizeChatContent:
+    """Content normalization converts array-based content parts to plain text."""
+
+    def test_none_returns_empty_string(self):
+        assert _normalize_chat_content(None) == ""
+
+    def test_plain_string_returned_as_is(self):
+        assert _normalize_chat_content("hello world") == "hello world"
+
+    def test_empty_string_returned_as_is(self):
+        assert _normalize_chat_content("") == ""
+
+    def test_text_content_part(self):
+        content = [{"type": "text", "text": "hello"}]
+        assert _normalize_chat_content(content) == "hello"
+
+    def test_input_text_content_part(self):
+        content = [{"type": "input_text", "text": "user input"}]
+        assert _normalize_chat_content(content) == "user input"
+
+    def test_output_text_content_part(self):
+        content = [{"type": "output_text", "text": "assistant output"}]
+        assert _normalize_chat_content(content) == "assistant output"
+
+    def test_multiple_text_parts_joined_with_newline(self):
+        content = [
+            {"type": "text", "text": "first"},
+            {"type": "text", "text": "second"},
+        ]
+        assert _normalize_chat_content(content) == "first\nsecond"
+
+    def test_mixed_string_and_dict_parts(self):
+        content = ["plain string", {"type": "text", "text": "dict part"}]
+        assert _normalize_chat_content(content) == "plain string\ndict part"
+
+    def test_image_url_parts_silently_skipped(self):
+        content = [
+            {"type": "text", "text": "check this:"},
+            {"type": "image_url", "image_url": {"url": "https://example.com/img.png"}},
+        ]
+        assert _normalize_chat_content(content) == "check this:"
+
+    def test_integer_content_converted(self):
+        assert _normalize_chat_content(42) == "42"
+
+    def test_boolean_content_converted(self):
+        assert _normalize_chat_content(True) == "True"
+
+    def test_deeply_nested_list_respects_depth_limit(self):
+        """Nesting beyond max_depth returns empty string."""
+        content = [[[[[[[[[[[["deep"]]]]]]]]]]]]
+        result = _normalize_chat_content(content)
+        # The deep nesting should be truncated, not crash
+        assert isinstance(result, str)
+
+    def test_large_list_capped(self):
+        """Lists beyond MAX_CONTENT_LIST_SIZE are truncated."""
+        content = [{"type": "text", "text": f"item{i}"} for i in range(2000)]
+        result = _normalize_chat_content(content)
+        # Should not contain all 2000 items
+        assert result.count("item") <= 1000
+
+    def test_oversized_string_truncated(self):
+        """Strings beyond 64KB are truncated."""
+        huge = "x" * 100_000
+        result = _normalize_chat_content(huge)
+        assert len(result) == 65_536
+
+    def test_empty_text_parts_filtered(self):
+        content = [
+            {"type": "text", "text": ""},
+            {"type": "text", "text": "actual"},
+            {"type": "text", "text": ""},
+        ]
+        assert _normalize_chat_content(content) == "actual"
+
+    def test_dict_without_type_skipped(self):
+        content = [{"foo": "bar"}, {"type": "text", "text": "real"}]
+        assert _normalize_chat_content(content) == "real"
+
+    def test_empty_list_returns_empty(self):
+        assert _normalize_chat_content([]) == ""