mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-29 06:31:32 +00:00
fix(tui): handle images with codex app-server
This commit is contained in:
parent
7ce6b504a2
commit
83f6a83b24
3 changed files with 78 additions and 2 deletions
|
|
@ -87,6 +87,39 @@ class TurnResult:
|
|||
_TURN_ABORTED_MARKERS = ("<turn_aborted>", "<turn_aborted/>")
|
||||
|
||||
|
||||
def _coerce_turn_input_text(user_input: Any) -> str:
|
||||
"""Collapse Hermes/OpenAI rich content into app-server text input.
|
||||
|
||||
The current `turn/start` path sends text items only. TUI image attachment
|
||||
can hand us OpenAI-style content parts, so keep the text/path hints and
|
||||
replace opaque image payloads with a small marker instead of putting a
|
||||
Python list into the `text` field.
|
||||
"""
|
||||
if isinstance(user_input, str):
|
||||
return user_input
|
||||
if isinstance(user_input, list):
|
||||
parts: list[str] = []
|
||||
for item in user_input:
|
||||
if isinstance(item, str):
|
||||
if item.strip():
|
||||
parts.append(item)
|
||||
continue
|
||||
if not isinstance(item, dict):
|
||||
if item is not None:
|
||||
parts.append(str(item))
|
||||
continue
|
||||
item_type = item.get("type")
|
||||
if item_type in {"text", "input_text"}:
|
||||
text = item.get("text") or item.get("content") or ""
|
||||
if text:
|
||||
parts.append(str(text))
|
||||
elif item_type in {"image", "image_url", "input_image"}:
|
||||
parts.append("[image attached]")
|
||||
text = "\n\n".join(p for p in parts if p).strip()
|
||||
return text or "What do you see in this image?"
|
||||
return "" if user_input is None else str(user_input)
|
||||
|
||||
|
||||
# Substrings in codex stderr / JSON-RPC error messages that signal the
|
||||
# subprocess died because its OAuth credentials are no longer valid.
|
||||
# Kept conservative: we only redirect users to `codex login` when we're
|
||||
|
|
@ -327,7 +360,7 @@ class CodexAppServerSession:
|
|||
|
||||
def run_turn(
|
||||
self,
|
||||
user_input: str,
|
||||
user_input: Any,
|
||||
*,
|
||||
turn_timeout: float = 600.0,
|
||||
notification_poll_timeout: float = 0.25,
|
||||
|
|
@ -365,6 +398,8 @@ class CodexAppServerSession:
|
|||
self._interrupt_event.clear()
|
||||
projector = CodexEventProjector()
|
||||
|
||||
user_input_text = _coerce_turn_input_text(user_input)
|
||||
|
||||
# Send turn/start with the user input. Text-only for now (codex
|
||||
# supports rich content but Hermes' text path is the common case).
|
||||
try:
|
||||
|
|
@ -372,7 +407,7 @@ class CodexAppServerSession:
|
|||
"turn/start",
|
||||
{
|
||||
"threadId": self._thread_id,
|
||||
"input": [{"type": "text", "text": user_input}],
|
||||
"input": [{"type": "text", "text": user_input_text}],
|
||||
},
|
||||
timeout=10,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ from agent.transports.codex_app_server_session import (
|
|||
TurnResult,
|
||||
_ServerRequestRouting,
|
||||
_approval_choice_to_codex_decision,
|
||||
_coerce_turn_input_text,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -128,6 +129,15 @@ class TestApprovalChoiceMapping:
|
|||
assert _approval_choice_to_codex_decision(choice) == expected
|
||||
|
||||
|
||||
class TestTurnInputCoercion:
|
||||
def test_list_content_keeps_text_and_marks_images(self):
|
||||
text = _coerce_turn_input_text([
|
||||
{"type": "text", "text": "caption"},
|
||||
{"type": "image_url", "image_url": {"url": "data:image/png;base64,abc"}},
|
||||
])
|
||||
assert text == "caption\n\n[image attached]"
|
||||
|
||||
|
||||
# ---- lifecycle ----
|
||||
|
||||
class TestLifecycle:
|
||||
|
|
@ -188,6 +198,35 @@ class TestRunTurn:
|
|||
# turn_id propagated for downstream session-DB linkage
|
||||
assert r.turn_id == "turn-fake-001"
|
||||
|
||||
def test_rich_content_turn_is_collapsed_to_text_payload(self):
|
||||
client = FakeClient()
|
||||
client.queue_notification(
|
||||
"turn/completed",
|
||||
threadId="t",
|
||||
turn={"id": "tu1", "status": "completed", "error": None},
|
||||
)
|
||||
s = make_session(client)
|
||||
r = s.run_turn(
|
||||
[
|
||||
{
|
||||
"type": "text",
|
||||
"text": "look at this\n\n[Image attached at: /tmp/a.png]",
|
||||
},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": "data:image/png;base64,abc"},
|
||||
},
|
||||
],
|
||||
turn_timeout=2.0,
|
||||
)
|
||||
assert r.error is None
|
||||
method, params = next(req for req in client.requests if req[0] == "turn/start")
|
||||
assert method == "turn/start"
|
||||
text = params["input"][0]["text"]
|
||||
assert isinstance(text, str)
|
||||
assert "[Image attached at: /tmp/a.png]" in text
|
||||
assert "[image attached]" in text
|
||||
|
||||
def test_tool_iteration_counter_ticks(self):
|
||||
client = FakeClient()
|
||||
# Two completed exec items + one final agent message
|
||||
|
|
|
|||
|
|
@ -3350,6 +3350,8 @@ def _run_prompt_submit(rid, sid: str, session: dict, text: Any) -> None:
|
|||
_read_main_model(),
|
||||
_cfg,
|
||||
)
|
||||
if getattr(agent, "api_mode", "") == "codex_app_server":
|
||||
_mode = "text"
|
||||
except Exception as _img_exc:
|
||||
print(
|
||||
f"[tui_gateway] image_routing decision failed, defaulting to text: {_img_exc}",
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue