diff --git a/tests/test_tui_gateway_server.py b/tests/test_tui_gateway_server.py index f7d70f92a9..9e5bbc516f 100644 --- a/tests/test_tui_gateway_server.py +++ b/tests/test_tui_gateway_server.py @@ -526,6 +526,24 @@ def test_history_to_messages_preserves_tool_calls_for_resume_display(): ] +def test_history_to_messages_renders_multimodal_content(): + history = [ + { + "role": "user", + "content": [ + {"type": "text", "text": "look here"}, + {"type": "image_url", "image_url": {"url": "data:image/png;base64,abc"}}, + ], + }, + {"role": "assistant", "content": "saw it"}, + ] + + assert server._history_to_messages(history) == [ + {"role": "user", "text": "look here\n[image]"}, + {"role": "assistant", "text": "saw it"}, + ] + + def test_session_resume_uses_parent_lineage_for_display(monkeypatch): captured = {} diff --git a/tui_gateway/server.py b/tui_gateway/server.py index ca378bb728..229aff17c0 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -1988,6 +1988,36 @@ def _enrich_with_attached_images(user_text: str, image_paths: list[str]) -> str: return text or "What do you see in this image?" +def _content_display_text(content: Any) -> str: + if content is None: + return "" + if isinstance(content, str): + return content + if isinstance(content, (int, float)): + return str(content) + if isinstance(content, list): + parts = [] + for part in content: + text = _content_display_text(part).strip() + if text: + parts.append(text) + return "\n".join(parts) + if isinstance(content, dict): + kind = content.get("type") + if kind in {"text", "input_text", "output_text"}: + return str(content.get("text") or content.get("content") or "") + if kind in {"image_url", "input_image", "image"}: + return "[image]" + if kind in {"input_audio", "audio"}: + return "[audio]" + if kind: + return f"[{kind}]" + if "text" in content: + return str(content.get("text") or "") + return "[structured content]" + return str(content) + + def _history_to_messages(history: list[dict]) -> list[dict]: messages = [] tool_call_args = {} @@ -1998,6 +2028,7 @@ def _history_to_messages(history: list[dict]) -> list[dict]: role = m.get("role") if role not in ("user", "assistant", "tool", "system"): continue + content_text = _content_display_text(m.get("content")) if role == "assistant" and m.get("tool_calls"): for tc in m["tool_calls"]: fn = tc.get("function", {}) @@ -2008,7 +2039,7 @@ def _history_to_messages(history: list[dict]) -> list[dict]: except (json.JSONDecodeError, TypeError): args = {} tool_call_args[tc_id] = (fn["name"], args) - if not (m.get("content") or "").strip(): + if not content_text.strip(): continue if role == "tool": tc_id = m.get("tool_call_id", "") @@ -2019,9 +2050,9 @@ def _history_to_messages(history: list[dict]) -> list[dict]: {"role": "tool", "name": name, "context": _tool_ctx(name, args)} ) continue - if not (m.get("content") or "").strip(): + if not content_text.strip(): continue - messages.append({"role": role, "text": m.get("content") or ""}) + messages.append({"role": role, "text": content_text}) return messages