diff --git a/gateway/platforms/api_server.py b/gateway/platforms/api_server.py index 1954a2b9e..df3fbe1d3 100644 --- a/gateway/platforms/api_server.py +++ b/gateway/platforms/api_server.py @@ -54,6 +54,66 @@ DEFAULT_PORT = 8642 MAX_STORED_RESPONSES = 100 MAX_REQUEST_BYTES = 1_000_000 # 1 MB default limit for POST bodies CHAT_COMPLETIONS_SSE_KEEPALIVE_SECONDS = 30.0 +MAX_NORMALIZED_TEXT_LENGTH = 65_536 # 64 KB cap for normalized content parts +MAX_CONTENT_LIST_SIZE = 1_000 # Max items when content is an array + + +def _normalize_chat_content( + content: Any, *, _max_depth: int = 10, _depth: int = 0, +) -> str: + """Normalize OpenAI chat message content into a plain text string. + + Some clients (Open WebUI, LobeChat, etc.) send content as an array of + typed parts instead of a plain string:: + + [{"type": "text", "text": "hello"}, {"type": "input_text", "text": "..."}] + + This function flattens those into a single string so the agent pipeline + (which expects strings) doesn't choke. + + Defensive limits prevent abuse: recursion depth, list size, and output + length are all bounded. + """ + if _depth > _max_depth: + return "" + if content is None: + return "" + if isinstance(content, str): + return content[:MAX_NORMALIZED_TEXT_LENGTH] if len(content) > MAX_NORMALIZED_TEXT_LENGTH else content + + if isinstance(content, list): + parts: List[str] = [] + items = content[:MAX_CONTENT_LIST_SIZE] if len(content) > MAX_CONTENT_LIST_SIZE else content + for item in items: + if isinstance(item, str): + if item: + parts.append(item[:MAX_NORMALIZED_TEXT_LENGTH]) + elif isinstance(item, dict): + item_type = str(item.get("type") or "").strip().lower() + if item_type in {"text", "input_text", "output_text"}: + text = item.get("text", "") + if text: + try: + parts.append(str(text)[:MAX_NORMALIZED_TEXT_LENGTH]) + except Exception: + pass + # Silently skip image_url / other non-text parts + elif isinstance(item, list): + nested = _normalize_chat_content(item, _max_depth=_max_depth, _depth=_depth + 1) + if nested: + parts.append(nested) + # Check accumulated size + if sum(len(p) for p in parts) >= MAX_NORMALIZED_TEXT_LENGTH: + break + result = "\n".join(parts) + return result[:MAX_NORMALIZED_TEXT_LENGTH] if len(result) > MAX_NORMALIZED_TEXT_LENGTH else result + + # Fallback for unexpected types (int, float, bool, etc.) + try: + result = str(content) + return result[:MAX_NORMALIZED_TEXT_LENGTH] if len(result) > MAX_NORMALIZED_TEXT_LENGTH else result + except Exception: + return "" def check_api_server_requirements() -> bool: @@ -553,7 +613,7 @@ class APIServerAdapter(BasePlatformAdapter): for msg in messages: role = msg.get("role", "") - content = msg.get("content", "") + content = _normalize_chat_content(msg.get("content", "")) if role == "system": # Accumulate system messages if system_prompt is None: @@ -926,18 +986,7 @@ class APIServerAdapter(BasePlatformAdapter): input_messages.append({"role": "user", "content": item}) elif isinstance(item, dict): role = item.get("role", "user") - content = item.get("content", "") - # Handle content that may be a list of content parts - if isinstance(content, list): - text_parts = [] - for part in content: - if isinstance(part, dict) and part.get("type") == "input_text": - text_parts.append(part.get("text", "")) - elif isinstance(part, dict) and part.get("type") == "output_text": - text_parts.append(part.get("text", "")) - elif isinstance(part, str): - text_parts.append(part) - content = "\n".join(text_parts) + content = _normalize_chat_content(item.get("content", "")) input_messages.append({"role": role, "content": content}) else: return web.json_response(_openai_error("'input' must be a string or array"), status=400) diff --git a/tests/gateway/test_api_server_normalize.py b/tests/gateway/test_api_server_normalize.py new file mode 100644 index 000000000..2dd2c70f7 --- /dev/null +++ b/tests/gateway/test_api_server_normalize.py @@ -0,0 +1,87 @@ +"""Tests for _normalize_chat_content in the API server adapter.""" + +from gateway.platforms.api_server import _normalize_chat_content + + +class TestNormalizeChatContent: + """Content normalization converts array-based content parts to plain text.""" + + def test_none_returns_empty_string(self): + assert _normalize_chat_content(None) == "" + + def test_plain_string_returned_as_is(self): + assert _normalize_chat_content("hello world") == "hello world" + + def test_empty_string_returned_as_is(self): + assert _normalize_chat_content("") == "" + + def test_text_content_part(self): + content = [{"type": "text", "text": "hello"}] + assert _normalize_chat_content(content) == "hello" + + def test_input_text_content_part(self): + content = [{"type": "input_text", "text": "user input"}] + assert _normalize_chat_content(content) == "user input" + + def test_output_text_content_part(self): + content = [{"type": "output_text", "text": "assistant output"}] + assert _normalize_chat_content(content) == "assistant output" + + def test_multiple_text_parts_joined_with_newline(self): + content = [ + {"type": "text", "text": "first"}, + {"type": "text", "text": "second"}, + ] + assert _normalize_chat_content(content) == "first\nsecond" + + def test_mixed_string_and_dict_parts(self): + content = ["plain string", {"type": "text", "text": "dict part"}] + assert _normalize_chat_content(content) == "plain string\ndict part" + + def test_image_url_parts_silently_skipped(self): + content = [ + {"type": "text", "text": "check this:"}, + {"type": "image_url", "image_url": {"url": "https://example.com/img.png"}}, + ] + assert _normalize_chat_content(content) == "check this:" + + def test_integer_content_converted(self): + assert _normalize_chat_content(42) == "42" + + def test_boolean_content_converted(self): + assert _normalize_chat_content(True) == "True" + + def test_deeply_nested_list_respects_depth_limit(self): + """Nesting beyond max_depth returns empty string.""" + content = [[[[[[[[[[[["deep"]]]]]]]]]]]] + result = _normalize_chat_content(content) + # The deep nesting should be truncated, not crash + assert isinstance(result, str) + + def test_large_list_capped(self): + """Lists beyond MAX_CONTENT_LIST_SIZE are truncated.""" + content = [{"type": "text", "text": f"item{i}"} for i in range(2000)] + result = _normalize_chat_content(content) + # Should not contain all 2000 items + assert result.count("item") <= 1000 + + def test_oversized_string_truncated(self): + """Strings beyond 64KB are truncated.""" + huge = "x" * 100_000 + result = _normalize_chat_content(huge) + assert len(result) == 65_536 + + def test_empty_text_parts_filtered(self): + content = [ + {"type": "text", "text": ""}, + {"type": "text", "text": "actual"}, + {"type": "text", "text": ""}, + ] + assert _normalize_chat_content(content) == "actual" + + def test_dict_without_type_skipped(self): + content = [{"foo": "bar"}, {"type": "text", "text": "real"}] + assert _normalize_chat_content(content) == "real" + + def test_empty_list_returns_empty(self): + assert _normalize_chat_content([]) == ""