From fe83c4001bb77cdda5c0922805455e2ec9c9ffd5 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Thu, 14 May 2026 14:55:23 -0700 Subject: [PATCH] fix(codex-app-server): attach redacted stderr tail to generic failures (#25929) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When codex app-server fails outside the OAuth-classified path (non-auth turn/start errors, plain TimeoutErrors, generic turn-ended status, subprocess silently exits, hard deadline timeout), the user got a bare 'Internal error' / 'turn/start failed: ...' with no context. Diagnosing config/provider/auth-bridge issues forced a re-run with verbose codex flags. Add a _format_error_with_stderr helper that appends the last few stderr lines via agent.redact.redact_sensitive_text(force=True), and use it at every catch-all error site: - ensure_started() failures (codex init / thread/start) now return a TurnResult.error with should_retire=True instead of bubbling - non-OAuth turn/start CodexAppServerError / TimeoutError - subprocess-died branch (previously dumped raw stderr_blob[-300:] with no redaction — a leak risk) - turn ended with non-completed status - hard turn-timeout deadline OAuth-classified failures and the post-tool quiet watchdog already produce clean hints and stay unchanged. The redactor catches sk-*, gh*_*, Authorization: Bearer, query-string tokens, JWTs, private keys, etc., so provider error payloads can't leak into chat output or trajectories. Inspired by openclaw#80718, adapted for our app-server transport. --- agent/transports/codex_app_server_session.py | 94 ++++++++++++++++--- .../test_codex_app_server_session.py | 80 ++++++++++++++++ 2 files changed, 163 insertions(+), 11 deletions(-) diff --git a/agent/transports/codex_app_server_session.py b/agent/transports/codex_app_server_session.py index 8775b54edb4..f0cd0a196c4 100644 --- a/agent/transports/codex_app_server_session.py +++ b/agent/transports/codex_app_server_session.py @@ -31,6 +31,7 @@ import time from dataclasses import dataclass, field from typing import Any, Callable, Optional +from agent.redact import redact_sensitive_text from agent.transports.codex_app_server import ( CodexAppServerClient, CodexAppServerError, @@ -40,6 +41,13 @@ from agent.transports.codex_event_projector import CodexEventProjector logger = logging.getLogger(__name__) +# How many tailing stderr lines from the codex subprocess to attach to a +# user-facing error when we don't have a more specific classification (OAuth, +# wedge watchdog, etc.). Small enough to keep error messages legible, large +# enough to surface a config/provider/auth diagnostic. +_STDERR_TAIL_LINES = 12 + + # Permission profile mapping mirrors the docstring in PR proposal: # Hermes' tools.terminal.security_mode → Codex's permissions profile id. # Defaults if config is missing → workspace-write (matches Codex's own default). @@ -276,6 +284,45 @@ class CodexAppServerSession: and unwind. Called by AIAgent's _interrupt_requested path.""" self._interrupt_event.set() + # ---------- diagnostics ---------- + + def _format_error_with_stderr( + self, + prefix: str, + exc: Any = "", + *, + tail_lines: int = _STDERR_TAIL_LINES, + ) -> str: + """Build a user-facing error string for codex failures. + + Appends the last few lines of codex's stderr buffer when available, + passed through agent.redact with force=True so secrets in provider + error responses (auth headers, query-string tokens, sk-* keys) never + leak into chat output or trajectories. The codex CLI's own error + text ('Internal error', 'turn/start failed: ...') is otherwise + opaque and forces users to re-run with verbose flags to diagnose + config / provider / auth-bridge problems. + + Use this for the generic / catch-all branches. Specific + classifications (OAuth via _classify_oauth_failure, post-tool wedge + watchdog) already produce a clean hint and should be used instead. + """ + exc_str = str(exc) if exc != "" and exc is not None else "" + base = f"{prefix}: {exc_str}" if exc_str else prefix + if self._client is None: + return base + try: + tail = self._client.stderr_tail(tail_lines) + except Exception: # pragma: no cover - diagnostic best-effort + return base + if not tail: + return base + joined = "\n".join(line.rstrip() for line in tail if line) + if not joined.strip(): + return base + redacted = redact_sensitive_text(joined, force=True) + return f"{base}\ncodex stderr (last {len(tail)} lines):\n{redacted}" + # ---------- per-turn ---------- def run_turn( @@ -296,12 +343,27 @@ class CodexAppServerSession: Mirrors openclaw beta.8's post-tool completion watchdog (#81697) so a wedged codex doesn't burn the full turn deadline. """ - self.ensure_started() + # Pre-create the result so startup failures (codex subprocess can't + # spawn, initialize handshake rejects, thread/start blows up) surface + # the same way per-turn failures do — with a TurnResult.error string + # the caller can render — instead of bubbling raw codex exceptions + # up to AIAgent.run_conversation. + result = TurnResult() + try: + self.ensure_started() + except (CodexAppServerError, TimeoutError) as exc: + result.error = self._format_error_with_stderr( + "codex app-server startup failed", exc + ) + # Subprocess almost certainly unhealthy — retire so the next + # turn re-spawns cleanly. + result.should_retire = True + return result assert self._client is not None and self._thread_id is not None + result.thread_id = self._thread_id self._interrupt_event.clear() projector = CodexEventProjector() - result = TurnResult(thread_id=self._thread_id) # Send turn/start with the user input. Text-only for now (codex # supports rich content but Hermes' text path is the common case). @@ -327,13 +389,17 @@ class CodexAppServerSession: # via `codex login` between turns). result.should_retire = True else: - result.error = f"turn/start failed: {exc}" + result.error = self._format_error_with_stderr( + "turn/start failed", exc + ) return result except TimeoutError as exc: # turn/start hanging is a strong signal the subprocess is wedged. stderr_blob = "\n".join(self._client.stderr_tail(40)) hint = _classify_oauth_failure(stderr_blob) - result.error = hint or f"turn/start timed out: {exc}" + result.error = hint or self._format_error_with_stderr( + "turn/start timed out", exc + ) result.should_retire = True return result @@ -359,10 +425,13 @@ class CodexAppServerSession: if not self._client.is_alive(): stderr_blob = "\n".join(self._client.stderr_tail(60)) hint = _classify_oauth_failure(stderr_blob) - result.error = hint or ( - f"codex app-server subprocess exited unexpectedly: " - f"{stderr_blob[-300:] if stderr_blob else ''}" - ) + if hint is not None: + result.error = hint + else: + result.error = self._format_error_with_stderr( + "codex app-server subprocess exited unexpectedly", + tail_lines=20, + ) result.should_retire = True break @@ -489,8 +558,8 @@ class CodexAppServerSession: result.error = hint result.should_retire = True else: - result.error = ( - f"turn ended status={turn_status}: {err_msg}" + result.error = self._format_error_with_stderr( + f"turn ended status={turn_status}", err_msg ) if not turn_complete and not result.interrupted: @@ -500,7 +569,10 @@ class CodexAppServerSession: # turn shouldn't inherit. self._issue_interrupt(result.turn_id) result.interrupted = True - result.error = result.error or f"turn timed out after {turn_timeout}s" + if not result.error: + result.error = self._format_error_with_stderr( + f"turn timed out after {turn_timeout}s" + ) result.should_retire = True return result diff --git a/tests/agent/transports/test_codex_app_server_session.py b/tests/agent/transports/test_codex_app_server_session.py index e74d5a20c18..f51996dd067 100644 --- a/tests/agent/transports/test_codex_app_server_session.py +++ b/tests/agent/transports/test_codex_app_server_session.py @@ -231,6 +231,86 @@ class TestRunTurn: assert "bad input" in r.error assert r.final_text == "" + def test_turn_start_failure_attaches_redacted_stderr_tail(self): + """When codex stderr has content (non-OAuth), the tail gets attached + to the user-facing error so config/provider problems are debuggable + instead of just 'Internal error'. Secrets in stderr are redacted + via agent.redact(force=True).""" + client = FakeClient() + client.set_stderr_tail([ + "ERROR: provider auth failed", + "Authorization: Bearer sk-live-deadbeefdeadbeef", + "url=https://api.example.com/v1?token=querysecret12345", + ]) + from agent.transports.codex_app_server import CodexAppServerError + + def boom(method, params): + if method == "turn/start": + raise CodexAppServerError(code=-32603, message="Internal error") + return {"thread": {"id": "t"}, "activePermissionProfile": {"id": "x"}} + + client._request_handler = boom + s = make_session(client) + r = s.run_turn("hi", turn_timeout=2.0) + assert r.error is not None + assert "turn/start failed" in r.error + assert "Internal error" in r.error + # Stderr tail attached + assert "codex stderr" in r.error + assert "provider auth failed" in r.error + # Secrets redacted + assert "sk-live-deadbeefdeadbeef" not in r.error + assert "querysecret12345" not in r.error + # Non-OAuth → should NOT retire (subprocess JSON-RPC is still healthy). + assert r.should_retire is False + + def test_turn_start_timeout_attaches_redacted_stderr_tail(self): + """A non-OAuth TimeoutError on turn/start surfaces with codex stderr + context attached and marks the session for retirement.""" + client = FakeClient() + client.set_stderr_tail([ + "WARN: provider request stalled", + "Authorization: Bearer sk-stalled-secret-abc123", + ]) + + def stall(method, params): + if method == "turn/start": + raise TimeoutError("codex method 'turn/start' timed out after 10s") + return {"thread": {"id": "t"}, "activePermissionProfile": {"id": "x"}} + + client._request_handler = stall + s = make_session(client) + r = s.run_turn("hi", turn_timeout=2.0) + assert r.error is not None + assert "turn/start timed out" in r.error + assert "provider request stalled" in r.error + assert "sk-stalled-secret-abc123" not in r.error + assert r.should_retire is True + + def test_startup_failure_returns_error_with_stderr(self): + """Codex thread/start failures during ensure_started() used to bubble + up as uncaught exceptions. Now they return a TurnResult.error so + AIAgent surfaces a clean diagnostic instead of crashing the turn.""" + client = FakeClient() + client.set_stderr_tail([ + "FATAL: model_provider 'azure_foundry' not configured", + ]) + from agent.transports.codex_app_server import CodexAppServerError + + def boom(method, params): + if method == "thread/start": + raise CodexAppServerError(code=-32603, message="Internal error") + return {} + + client._request_handler = boom + s = make_session(client) + r = s.run_turn("hi", turn_timeout=2.0) + assert r.error is not None + assert "startup failed" in r.error + assert "model_provider 'azure_foundry' not configured" in r.error + assert r.should_retire is True + assert r.final_text == "" + def test_interrupt_during_turn_issues_turn_interrupt(self): client = FakeClient() # Don't queue turn/completed — the loop has to interrupt out