diff --git a/agent/codex_responses_adapter.py b/agent/codex_responses_adapter.py index 4d3e5590b..3b007a762 100644 --- a/agent/codex_responses_adapter.py +++ b/agent/codex_responses_adapter.py @@ -23,6 +23,23 @@ from agent.prompt_builder import DEFAULT_AGENT_IDENTITY logger = logging.getLogger(__name__) +# Matches Codex/Harmony tool-call serialization that occasionally leaks into +# assistant-message content when the model fails to emit a structured +# ``function_call`` item. Accepts the common forms: +# +# to=functions.exec_command +# assistant to=functions.exec_command +# <|channel|>commentary to=functions.exec_command +# +# ``to=functions.`` is the stable marker — the optional ``assistant`` or +# Harmony channel prefix varies by degeneration mode. Case-insensitive to +# cover lowercase/uppercase ``assistant`` variants. +_TOOL_CALL_LEAK_PATTERN = re.compile( + r"(?:^|[\s>|])to=functions\.[A-Za-z_][\w.]*", + re.IGNORECASE, +) + + # --------------------------------------------------------------------------- # Multimodal content helpers # --------------------------------------------------------------------------- @@ -787,6 +804,37 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]: if isinstance(out_text, str): final_text = out_text.strip() + # ── Tool-call leak recovery ────────────────────────────────── + # gpt-5.x on the Codex Responses API sometimes degenerates and emits + # what should be a structured `function_call` item as plain assistant + # text using the Harmony/Codex serialization (``to=functions.foo + # {json}`` or ``assistant to=functions.foo {json}``). The model + # intended to call a tool, but the intent never made it into + # ``response.output`` as a ``function_call`` item, so ``tool_calls`` + # is empty here. If we pass this through, the parent sees a + # confident-looking summary with no audit trail (empty ``tool_trace``) + # and no tools actually ran — the Taiwan-embassy-email incident. + # + # Detection: leaked tokens always contain ``to=functions.`` and + # the assistant message has no real tool calls. Treat it as incomplete + # so the existing Codex-incomplete continuation path (3 retries, + # handled in run_agent.py) gets a chance to re-elicit a proper + # ``function_call`` item. The existing loop already handles message + # append, dedup, and retry budget. + leaked_tool_call_text = False + if final_text and not tool_calls and _TOOL_CALL_LEAK_PATTERN.search(final_text): + leaked_tool_call_text = True + logger.warning( + "Codex response contains leaked tool-call text in assistant content " + "(no structured function_call items). Treating as incomplete so the " + "continuation path can re-elicit a proper tool call. Leaked snippet: %r", + final_text[:300], + ) + # Clear the text so downstream code doesn't surface the garbage as + # a summary. The encrypted reasoning items (if any) are preserved + # so the model keeps its chain-of-thought on the retry. + final_text = "" + assistant_message = SimpleNamespace( content=final_text, tool_calls=tool_calls, @@ -798,6 +846,8 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]: if tool_calls: finish_reason = "tool_calls" + elif leaked_tool_call_text: + finish_reason = "incomplete" elif has_incomplete_items or (saw_commentary_phase and not saw_final_answer_phase): finish_reason = "incomplete" elif reasoning_items_raw and not final_text: diff --git a/tests/run_agent/test_run_agent_codex_responses.py b/tests/run_agent/test_run_agent_codex_responses.py index d6567f0ec..913a041fb 100644 --- a/tests/run_agent/test_run_agent_codex_responses.py +++ b/tests/run_agent/test_run_agent_codex_responses.py @@ -943,6 +943,113 @@ def test_normalize_codex_response_marks_commentary_only_message_as_incomplete(mo assert "inspect the repository" in (assistant_message.content or "") +def test_normalize_codex_response_detects_leaked_tool_call_text(monkeypatch): + """Harmony-style `to=functions.foo` leaked into assistant content with no + structured function_call items must be treated as incomplete so the + continuation path can re-elicit a proper tool call. This is the + Taiwan-embassy-email (Discord bug report) failure mode: child agent + produces a confident-looking summary, tool_trace is empty because no + tools actually ran, parent can't audit the claim. + """ + agent = _build_agent(monkeypatch) + from agent.codex_responses_adapter import _normalize_codex_response + + leaked_content = ( + "I'll check the official page directly.\n" + "to=functions.exec_command {\"cmd\": \"curl https://example.test\"}\n" + "assistant to=functions.exec_command {\"stdout\": \"mailto:foo@example.test\"}\n" + "Extracted: foo@example.test" + ) + response = SimpleNamespace( + output=[ + SimpleNamespace( + type="message", + status="completed", + content=[SimpleNamespace(type="output_text", text=leaked_content)], + ) + ], + usage=SimpleNamespace(input_tokens=4, output_tokens=2, total_tokens=6), + status="completed", + model="gpt-5.4", + ) + + assistant_message, finish_reason = _normalize_codex_response(response) + + assert finish_reason == "incomplete" + # Content is scrubbed so the parent never surfaces the leaked text as a + # summary. tool_calls stays empty because no structured function_call + # item existed. + assert (assistant_message.content or "") == "" + assert assistant_message.tool_calls == [] + + +def test_normalize_codex_response_ignores_tool_call_text_when_real_tool_call_present(monkeypatch): + """If the model emitted BOTH a structured function_call AND some text that + happens to contain `to=functions.*` (unlikely but possible), trust the + structured call — don't wipe content that came alongside a real tool use. + """ + agent = _build_agent(monkeypatch) + from agent.codex_responses_adapter import _normalize_codex_response + + response = SimpleNamespace( + output=[ + SimpleNamespace( + type="message", + status="completed", + content=[SimpleNamespace( + type="output_text", + text="Running the command via to=functions.exec_command now.", + )], + ), + SimpleNamespace( + type="function_call", + id="fc_1", + call_id="call_1", + name="terminal", + arguments="{}", + ), + ], + usage=SimpleNamespace(input_tokens=4, output_tokens=2, total_tokens=6), + status="completed", + model="gpt-5.4", + ) + + assistant_message, finish_reason = _normalize_codex_response(response) + + assert finish_reason == "tool_calls" + assert assistant_message.tool_calls # real call preserved + assert "Running the command" in (assistant_message.content or "") + + +def test_normalize_codex_response_no_leak_passes_through(monkeypatch): + """Sanity: normal assistant content that doesn't contain the leak pattern + is returned verbatim with finish_reason=stop.""" + agent = _build_agent(monkeypatch) + from agent.codex_responses_adapter import _normalize_codex_response + + response = SimpleNamespace( + output=[ + SimpleNamespace( + type="message", + status="completed", + content=[SimpleNamespace( + type="output_text", + text="Here is the answer with no leak.", + )], + ) + ], + usage=SimpleNamespace(input_tokens=4, output_tokens=2, total_tokens=6), + status="completed", + model="gpt-5.4", + ) + + assistant_message, finish_reason = _normalize_codex_response(response) + + assert finish_reason == "stop" + assert assistant_message.content == "Here is the answer with no leak." + assert assistant_message.tool_calls == [] + + def test_interim_commentary_is_not_marked_already_streamed_without_callbacks(monkeypatch): agent = _build_agent(monkeypatch) observed = {}