fix(tui): handle images with codex app-server

This commit is contained in:
Fewmanism 2026-05-15 04:28:44 +09:00 committed by Teknium
parent 7ce6b504a2
commit 83f6a83b24
3 changed files with 78 additions and 2 deletions

View file

@ -87,6 +87,39 @@ class TurnResult:
_TURN_ABORTED_MARKERS = ("<turn_aborted>", "<turn_aborted/>")
def _coerce_turn_input_text(user_input: Any) -> str:
"""Collapse Hermes/OpenAI rich content into app-server text input.
The current `turn/start` path sends text items only. TUI image attachment
can hand us OpenAI-style content parts, so keep the text/path hints and
replace opaque image payloads with a small marker instead of putting a
Python list into the `text` field.
"""
if isinstance(user_input, str):
return user_input
if isinstance(user_input, list):
parts: list[str] = []
for item in user_input:
if isinstance(item, str):
if item.strip():
parts.append(item)
continue
if not isinstance(item, dict):
if item is not None:
parts.append(str(item))
continue
item_type = item.get("type")
if item_type in {"text", "input_text"}:
text = item.get("text") or item.get("content") or ""
if text:
parts.append(str(text))
elif item_type in {"image", "image_url", "input_image"}:
parts.append("[image attached]")
text = "\n\n".join(p for p in parts if p).strip()
return text or "What do you see in this image?"
return "" if user_input is None else str(user_input)
# Substrings in codex stderr / JSON-RPC error messages that signal the
# subprocess died because its OAuth credentials are no longer valid.
# Kept conservative: we only redirect users to `codex login` when we're
@ -327,7 +360,7 @@ class CodexAppServerSession:
def run_turn(
self,
user_input: str,
user_input: Any,
*,
turn_timeout: float = 600.0,
notification_poll_timeout: float = 0.25,
@ -365,6 +398,8 @@ class CodexAppServerSession:
self._interrupt_event.clear()
projector = CodexEventProjector()
user_input_text = _coerce_turn_input_text(user_input)
# Send turn/start with the user input. Text-only for now (codex
# supports rich content but Hermes' text path is the common case).
try:
@ -372,7 +407,7 @@ class CodexAppServerSession:
"turn/start",
{
"threadId": self._thread_id,
"input": [{"type": "text", "text": user_input}],
"input": [{"type": "text", "text": user_input_text}],
},
timeout=10,
)

View file

@ -20,6 +20,7 @@ from agent.transports.codex_app_server_session import (
TurnResult,
_ServerRequestRouting,
_approval_choice_to_codex_decision,
_coerce_turn_input_text,
)
@ -128,6 +129,15 @@ class TestApprovalChoiceMapping:
assert _approval_choice_to_codex_decision(choice) == expected
class TestTurnInputCoercion:
def test_list_content_keeps_text_and_marks_images(self):
text = _coerce_turn_input_text([
{"type": "text", "text": "caption"},
{"type": "image_url", "image_url": {"url": "data:image/png;base64,abc"}},
])
assert text == "caption\n\n[image attached]"
# ---- lifecycle ----
class TestLifecycle:
@ -188,6 +198,35 @@ class TestRunTurn:
# turn_id propagated for downstream session-DB linkage
assert r.turn_id == "turn-fake-001"
def test_rich_content_turn_is_collapsed_to_text_payload(self):
client = FakeClient()
client.queue_notification(
"turn/completed",
threadId="t",
turn={"id": "tu1", "status": "completed", "error": None},
)
s = make_session(client)
r = s.run_turn(
[
{
"type": "text",
"text": "look at this\n\n[Image attached at: /tmp/a.png]",
},
{
"type": "image_url",
"image_url": {"url": "data:image/png;base64,abc"},
},
],
turn_timeout=2.0,
)
assert r.error is None
method, params = next(req for req in client.requests if req[0] == "turn/start")
assert method == "turn/start"
text = params["input"][0]["text"]
assert isinstance(text, str)
assert "[Image attached at: /tmp/a.png]" in text
assert "[image attached]" in text
def test_tool_iteration_counter_ticks(self):
client = FakeClient()
# Two completed exec items + one final agent message

View file

@ -3350,6 +3350,8 @@ def _run_prompt_submit(rid, sid: str, session: dict, text: Any) -> None:
_read_main_model(),
_cfg,
)
if getattr(agent, "api_mode", "") == "codex_app_server":
_mode = "text"
except Exception as _img_exc:
print(
f"[tui_gateway] image_routing decision failed, defaulting to text: {_img_exc}",