diff --git a/agent/transports/codex_app_server_session.py b/agent/transports/codex_app_server_session.py index 619cfeabfc1..8775b54edb4 100644 --- a/agent/transports/codex_app_server_session.py +++ b/agent/transports/codex_app_server_session.py @@ -63,6 +63,73 @@ class TurnResult: error: Optional[str] = None # Set if turn ended in a non-recoverable error turn_id: Optional[str] = None thread_id: Optional[str] = None + # Hint to the caller that the underlying codex subprocess is likely + # wedged (turn-level timeout fired, post-tool watchdog tripped, or + # token-refresh failure killed the child). The caller should retire + # the session so the next turn respawns codex from scratch instead + # of riding a CPU-spinning or auth-broken process. Mirrors openclaw + # beta.8's "retire timed-out app-server clients" fix. + should_retire: bool = False + + +# Markers we accept as terminal even when codex never emits turn/completed. +# Some codex versions stream `` as raw text in agentMessage +# items when an interrupt or upstream error tears the turn down before the +# normal completion path fires. Mirrors openclaw beta.8 fix. +_TURN_ABORTED_MARKERS = ("", "") + + +# Substrings in codex stderr / JSON-RPC error messages that signal the +# subprocess died because its OAuth credentials are no longer valid. +# Kept conservative: we only redirect users to `codex login` when we're +# reasonably sure that's the actual failure, otherwise we surface the +# original error verbatim. Mirrors openclaw beta.8's auth-refresh +# classification. +_OAUTH_REFRESH_FAILURE_HINTS = ( + "invalid_grant", + "invalid grant", + "refresh token", + "refresh_token", + "token refresh", + "token_refresh", + "token has expired", + "expired_token", + "expired token", + "not authenticated", + "unauthenticated", + "unauthorized", + "401 unauthorized", + "re-authenticate", + "reauthenticate", + "please log in", + "please login", + "auth profile", + "no auth profile", + "oauth", +) + + +def _classify_oauth_failure(*parts: str) -> Optional[str]: + """Return a user-friendly re-auth hint if any of the provided strings + look like a codex OAuth/token-refresh failure; otherwise None. + + Used for both `turn/start` JSON-RPC errors and post-mortem stderr + inspection when the subprocess exits unexpectedly. Conservative on + purpose — we only redirect users to `codex login` when the signal + is strong, so unrelated runtime failures still surface verbatim. + """ + haystack = " ".join(p for p in parts if p).lower() + if not haystack: + return None + for needle in _OAUTH_REFRESH_FAILURE_HINTS: + if needle in haystack: + return ( + "Codex authentication failed — your ChatGPT/Codex login " + "looks expired or invalid. Run `codex login` to refresh, " + "then retry. (Fall back to default runtime with " + "`/codex-runtime auto` if the issue persists.)" + ) + return None @dataclass @@ -156,7 +223,26 @@ class CodexAppServerSession: # ~/.codex/config.toml the same way they would for any codex usage. params: dict[str, Any] = {"cwd": self._cwd} result = self._client.request("thread/start", params, timeout=15) - self._thread_id = result["thread"]["id"] + # Cross-fill thread.id/sessionId — different codex versions have + # serialized this under either key. Mirrors openclaw beta.8's + # tolerance fix so future codex drops/renames don't KeyError us + # at handshake time. + thread_obj = result.get("thread") or {} + thread_id = ( + thread_obj.get("id") + or thread_obj.get("sessionId") + or result.get("sessionId") + or result.get("threadId") + ) + if not thread_id: + raise CodexAppServerError( + code=-32603, + message=( + "codex thread/start returned no thread id " + f"(payload keys: {sorted(result.keys())})" + ), + ) + self._thread_id = thread_id logger.info( "codex app-server thread started: id=%s profile=%s cwd=%s", self._thread_id[:8], @@ -198,10 +284,18 @@ class CodexAppServerSession: *, turn_timeout: float = 600.0, notification_poll_timeout: float = 0.25, + post_tool_quiet_timeout: float = 90.0, ) -> TurnResult: """Send a user message and block until turn/completed, while forwarding server-initiated approval requests and projecting items - into Hermes' messages shape.""" + into Hermes' messages shape. + + post_tool_quiet_timeout: if codex emits a tool completion and then + goes quiet for this many seconds without emitting another item or + `turn/completed`, fast-fail and mark the session for retirement. + Mirrors openclaw beta.8's post-tool completion watchdog (#81697) + so a wedged codex doesn't burn the full turn deadline. + """ self.ensure_started() assert self._client is not None and self._thread_id is not None @@ -221,12 +315,36 @@ class CodexAppServerSession: timeout=10, ) except CodexAppServerError as exc: - result.error = f"turn/start failed: {exc}" + # Classify auth/refresh failures so the user gets a clear + # `codex login` pointer instead of a raw RPC error string. + stderr_blob = "\n".join(self._client.stderr_tail(40)) + hint = _classify_oauth_failure(exc.message, stderr_blob) + if hint is not None: + result.error = hint + # Subprocess is fine on a JSON-RPC level here, but the + # token store is broken — retire so the next turn does a + # clean handshake (and the user has a chance to re-auth + # via `codex login` between turns). + result.should_retire = True + else: + result.error = f"turn/start failed: {exc}" + return result + except TimeoutError as exc: + # turn/start hanging is a strong signal the subprocess is wedged. + stderr_blob = "\n".join(self._client.stderr_tail(40)) + hint = _classify_oauth_failure(stderr_blob) + result.error = hint or f"turn/start timed out: {exc}" + result.should_retire = True return result result.turn_id = (ts.get("turn") or {}).get("id") deadline = time.time() + turn_timeout turn_complete = False + # Post-tool watchdog state. last_tool_completion_at is set whenever + # a tool-shaped item completes; if no further notification arrives + # within post_tool_quiet_timeout and the turn hasn't completed, we + # fast-fail and retire the session. + last_tool_completion_at: Optional[float] = None while time.time() < deadline and not turn_complete: if self._interrupt_event.is_set(): @@ -234,6 +352,38 @@ class CodexAppServerSession: result.interrupted = True break + # Detect a dead subprocess between iterations. If codex exited + # (e.g. crashed, segfaulted, or its auth refresh thread killed + # the process), we won't get any more notifications — bail out + # rather than waiting for the full turn deadline. + if not self._client.is_alive(): + stderr_blob = "\n".join(self._client.stderr_tail(60)) + hint = _classify_oauth_failure(stderr_blob) + result.error = hint or ( + f"codex app-server subprocess exited unexpectedly: " + f"{stderr_blob[-300:] if stderr_blob else ''}" + ) + result.should_retire = True + break + + # Post-tool watchdog: if a tool completion was the most recent + # signal and codex has been silent past the quiet timeout, give + # up on this turn instead of waiting for the outer deadline. + if ( + last_tool_completion_at is not None + and (time.time() - last_tool_completion_at) + > post_tool_quiet_timeout + ): + self._issue_interrupt(result.turn_id) + result.interrupted = True + result.error = ( + f"codex went silent for " + f"{post_tool_quiet_timeout:.0f}s after a tool result; " + f"retiring app-server session." + ) + result.should_retire = True + break + # Drain any server-initiated requests (approvals) before # reading notifications, so the codex side isn't blocked. sreq = self._client.take_server_request(timeout=0) @@ -252,9 +402,20 @@ class CodexAppServerSession: result.projected_messages.extend(proj.messages) if proj.is_tool_iteration: result.tool_iterations += 1 + last_tool_completion_at = time.time() if proj.final_text is not None: result.final_text = proj.final_text + if _has_turn_aborted_marker(proj.final_text): + turn_complete = True + result.interrupted = True + result.error = ( + result.error + or "codex reported turn_aborted" + ) self._handle_server_request(sreq) + # Activity counts as live signal — reset the post-tool + # quiet timer so an approval round-trip doesn't trip it. + last_tool_completion_at = None continue note = self._client.take_notification( @@ -282,10 +443,29 @@ class CodexAppServerSession: result.projected_messages.extend(projection.messages) if projection.is_tool_iteration: result.tool_iterations += 1 + # Arm/refresh the post-tool quiet watchdog whenever a + # tool-shaped item completes. + last_tool_completion_at = time.time() + else: + # Any non-tool projected activity (assistant message, + # status update, etc.) means codex is still producing + # output — clear the quiet timer so we don't fast-fail. + if projection.messages or projection.final_text is not None: + last_tool_completion_at = None if projection.final_text is not None: # Codex can emit multiple agentMessage items in one turn # (e.g. partial then final). Take the last one as canonical. result.final_text = projection.final_text + # Some codex builds tear a turn down by emitting a + # `` marker in the agent message text and + # never sending turn/completed. Treat the marker itself + # as terminal so we don't burn the full deadline. + if _has_turn_aborted_marker(projection.final_text): + turn_complete = True + result.interrupted = True + result.error = ( + result.error or "codex reported turn_aborted" + ) if method == "turn/completed": turn_complete = True @@ -297,16 +477,31 @@ class CodexAppServerSession: (note.get("params") or {}).get("turn") or {} ).get("error") if err_obj: - result.error = ( - f"turn ended status={turn_status}: " - f"{err_obj.get('message') or err_obj}" + err_msg = err_obj.get("message") or str(err_obj) + # If the turn failed for an auth/refresh reason, + # rewrite the error into a re-auth hint AND mark + # the session for retirement. + stderr_blob = "\n".join( + self._client.stderr_tail(40) ) + hint = _classify_oauth_failure(err_msg, stderr_blob) + if hint is not None: + result.error = hint + result.should_retire = True + else: + result.error = ( + f"turn ended status={turn_status}: {err_msg}" + ) if not turn_complete and not result.interrupted: - # Hit the deadline. Issue interrupt to stop wasted compute. + # Hit the deadline. Issue interrupt to stop wasted compute, and + # tell the caller to retire the session — a turn that never + # finished is a strong sign codex is wedged in a way the next + # turn shouldn't inherit. self._issue_interrupt(result.turn_id) result.interrupted = True result.error = result.error or f"turn timed out after {turn_timeout}s" + result.should_retire = True return result @@ -515,6 +710,24 @@ def _approval_choice_to_codex_decision(choice: str) -> str: return "decline" +def _has_turn_aborted_marker(text: str) -> bool: + """Return True if `text` contains any of the raw markers codex uses + to signal a turn was aborted without emitting `turn/completed`. + + Codex emits `` (and sometimes ``) as raw + text inside agentMessage items when an interrupt or upstream error + tears the turn down before the normal completion path fires. Mirrors + openclaw beta.8's terminal-marker fix so we don't burn the full turn + deadline waiting for a turn/completed that never comes. + """ + if not text: + return False + for marker in _TURN_ABORTED_MARKERS: + if marker in text: + return True + return False + + def _get_hermes_version() -> str: """Best-effort Hermes version string for codex's userAgent line.""" try: diff --git a/run_agent.py b/run_agent.py index d995c607de6..b60f6c43ce6 100644 --- a/run_agent.py +++ b/run_agent.py @@ -15721,6 +15721,13 @@ class AIAgent: turn = self._codex_session.run_turn(user_input=user_message) except Exception as exc: logger.exception("codex app-server turn failed") + # Crash → unconditionally drop the session so the next turn + # respawns from scratch instead of reusing a dead client. + try: + self._codex_session.close() + except Exception: + pass + self._codex_session = None return { "final_response": ( f"Codex app-server turn failed: {exc}. " @@ -15733,6 +15740,22 @@ class AIAgent: "error": str(exc), } + # If the turn signalled the underlying client is wedged (deadline + # blown, post-tool watchdog tripped, OAuth refresh died, subprocess + # exited), retire the session so the next turn respawns codex + # rather than riding the broken process. Mirrors openclaw beta.8's + # "retire timed-out app-server clients" fix. + if getattr(turn, "should_retire", False): + logger.warning( + "codex app-server session retired (turn error: %s)", + turn.error, + ) + try: + self._codex_session.close() + except Exception: + pass + self._codex_session = None + # Splice projected messages into the conversation. The projector emits # standard {role, content, tool_calls, tool_call_id} entries, which # is exactly what curator.py / sessions DB expect. diff --git a/tests/agent/transports/test_codex_app_server_session.py b/tests/agent/transports/test_codex_app_server_session.py index de0b2f60cb8..e74d5a20c18 100644 --- a/tests/agent/transports/test_codex_app_server_session.py +++ b/tests/agent/transports/test_codex_app_server_session.py @@ -84,6 +84,14 @@ class FakeClient: def close(self): self._closed = True + def is_alive(self) -> bool: + # Fake is "alive" until close() is called; tests that want a dead + # subprocess can patch this attribute or call close() directly. + return not self._closed + + def stderr_tail(self, n: int = 20): + return list(getattr(self, "_stderr_tail", []))[-n:] + # Test helpers def queue_notification(self, method: str, **params): self._notifications.append({"method": method, "params": params}) @@ -91,6 +99,10 @@ class FakeClient: def queue_server_request(self, method: str, request_id: Any = "srv-1", **params): self._server_requests.append({"id": request_id, "method": method, "params": params}) + def set_stderr_tail(self, lines): + """Test helper: seed stderr_tail() output for OAuth-refresh classifier tests.""" + self._stderr_tail = list(lines) + def make_session(client: FakeClient, **kwargs) -> CodexAppServerSession: return CodexAppServerSession( @@ -500,3 +512,385 @@ class TestApprovalPromptEnrichment: s.run_turn("hi", turn_timeout=1.0) # Falls back to the reason assert "apply some changes" in captured["command"] + + +# ---- openclaw beta.8 parity: retire/wedge/oauth/abort marker ---- + +class TestSessionRetirement: + """Mirrors openclaw beta.8's resilience fixes: + - retire timed-out app-server clients (should_retire on deadline) + - post-tool completion watchdog (don't burn the full deadline after a + tool result if codex goes silent) + - raw marker as terminal (don't wait for turn/completed + that never comes) + - OAuth refresh failure classification (suggest `codex login` instead + of raw RPC error strings) + - dead subprocess detection between iterations + """ + + def test_deadline_marks_session_for_retirement(self): + client = FakeClient() + s = make_session(client) + r = s.run_turn( + "never finishes", + turn_timeout=0.05, + notification_poll_timeout=0.01, + ) + assert r.interrupted is True + assert r.error and "timed out" in r.error + assert r.should_retire is True, ( + "Deadline exhaustion must signal retirement so the next turn " + "respawns codex instead of riding a wedged subprocess." + ) + + def test_completed_turn_does_not_retire(self): + client = FakeClient() + client.queue_notification( + "item/completed", + item={"type": "agentMessage", "id": "m1", "text": "hi"}, + threadId="t", turnId="tu1", + ) + client.queue_notification( + "turn/completed", threadId="t", + turn={"id": "tu1", "status": "completed", "error": None}, + ) + s = make_session(client) + r = s.run_turn("hi", turn_timeout=1.0) + assert r.should_retire is False + + def test_post_tool_quiet_watchdog_trips_and_retires(self): + client = FakeClient() + # One tool completion, then total silence — no further events, + # no turn/completed. With a tiny post_tool_quiet_timeout the + # watchdog must fire before the larger turn deadline. + client.queue_notification( + "item/completed", + item={ + "type": "commandExecution", "id": "ex1", + "command": "echo hi", "cwd": "/tmp", + "status": "completed", "aggregatedOutput": "hi", + "exitCode": 0, "commandActions": [], + }, + threadId="t", turnId="tu1", + ) + s = make_session(client) + r = s.run_turn( + "tool then silence", + turn_timeout=5.0, # would be miserable to wait + notification_poll_timeout=0.02, + post_tool_quiet_timeout=0.15, + ) + assert r.interrupted is True + assert r.should_retire is True + assert r.error and "silent" in r.error + # Confirm we issued turn/interrupt to free codex compute + assert any(method == "turn/interrupt" for (method, _) in client.requests) + + def test_post_tool_watchdog_resets_on_further_activity(self): + """A tool completion followed by an agent message should NOT trip + the watchdog — further activity = codex still alive.""" + client = FakeClient() + client.queue_notification( + "item/completed", + item={ + "type": "commandExecution", "id": "ex1", + "command": "echo hi", "cwd": "/tmp", + "status": "completed", "aggregatedOutput": "hi", + "exitCode": 0, "commandActions": [], + }, + threadId="t", turnId="tu1", + ) + # Non-tool activity immediately after — resets watchdog. + client.queue_notification( + "item/completed", + item={"type": "agentMessage", "id": "m1", "text": "tool finished"}, + threadId="t", turnId="tu1", + ) + client.queue_notification( + "turn/completed", threadId="t", + turn={"id": "tu1", "status": "completed", "error": None}, + ) + s = make_session(client) + r = s.run_turn( + "tool then talk", turn_timeout=2.0, + notification_poll_timeout=0.01, + post_tool_quiet_timeout=0.05, + ) + # Tool ran, then text reset the watchdog, then turn/completed. + # Should NOT be a retirement case. + assert r.tool_iterations == 1 + assert r.final_text == "tool finished" + assert r.should_retire is False + assert r.interrupted is False + + def test_turn_aborted_marker_in_text_is_terminal(self): + """If codex emits `` in agent text and never sends + turn/completed, we still exit promptly instead of burning the + deadline.""" + client = FakeClient() + client.queue_notification( + "item/completed", + item={ + "type": "agentMessage", "id": "m1", + "text": "partial output... ", + }, + threadId="t", turnId="tu1", + ) + # Deliberately NO turn/completed notification queued. + s = make_session(client) + r = s.run_turn( + "abort mid-turn", turn_timeout=2.0, + notification_poll_timeout=0.01, + ) + assert r.interrupted is True + assert r.error and "turn_aborted" in r.error + # Should have exited fast — not waited for the full 2s deadline. + # (Can't measure wall clock reliably in CI; presence of the marker + # error string instead of a "timed out" message is the proxy.) + assert "timed out" not in r.error + + def test_turn_aborted_self_closing_marker_also_terminal(self): + client = FakeClient() + client.queue_notification( + "item/completed", + item={"type": "agentMessage", "id": "m1", + "text": ""}, + threadId="t", turnId="tu1", + ) + s = make_session(client) + r = s.run_turn("x", turn_timeout=2.0, + notification_poll_timeout=0.01) + assert r.interrupted is True + assert r.error and "turn_aborted" in r.error + + def test_oauth_refresh_failure_on_turn_start_suggests_login(self): + from agent.transports.codex_app_server import CodexAppServerError + + client = FakeClient() + + def boom(method, params): + if method == "turn/start": + raise CodexAppServerError( + code=-32603, + message="auth refresh failed: invalid_grant", + ) + return {"thread": {"id": "t"}, + "activePermissionProfile": {"id": "x"}} + + client._request_handler = boom + s = make_session(client) + r = s.run_turn("hi", turn_timeout=1.0) + assert r.error is not None + assert "codex login" in r.error + assert r.should_retire is True + + def test_oauth_failure_from_stderr_on_turn_start_failure(self): + """If the RPC error itself is opaque but stderr shows an auth + problem, we still classify it as a refresh failure.""" + from agent.transports.codex_app_server import CodexAppServerError + + client = FakeClient() + client.set_stderr_tail([ + "[2026-05-14T10:00:00Z WARN codex_core::auth] token refresh failed", + "[2026-05-14T10:00:00Z ERROR codex_core] please log in again", + ]) + + def boom(method, params): + if method == "turn/start": + raise CodexAppServerError(code=-32603, message="rpc broke") + return {"thread": {"id": "t"}, + "activePermissionProfile": {"id": "x"}} + + client._request_handler = boom + s = make_session(client) + r = s.run_turn("hi", turn_timeout=1.0) + assert r.error is not None + assert "codex login" in r.error + assert r.should_retire is True + + def test_oauth_failure_in_turn_completed_error(self): + """A failed turn/completed whose error mentions auth/refresh + triggers the re-auth hint + retirement.""" + client = FakeClient() + client.queue_notification( + "turn/completed", threadId="t", + turn={ + "id": "tu1", "status": "failed", + "error": {"message": "401 Unauthorized: please reauthenticate"}, + }, + ) + s = make_session(client) + r = s.run_turn("x", turn_timeout=1.0, + notification_poll_timeout=0.01) + assert r.error is not None + assert "codex login" in r.error + assert r.should_retire is True + + def test_generic_turn_failure_does_not_trigger_oauth_hint(self): + """A boring model error must NOT rewrite the message into a fake + re-auth hint. Conservative classifier.""" + client = FakeClient() + client.queue_notification( + "turn/completed", threadId="t", + turn={ + "id": "tu1", "status": "failed", + "error": {"message": "rate limit exceeded"}, + }, + ) + s = make_session(client) + r = s.run_turn("x", turn_timeout=1.0, + notification_poll_timeout=0.01) + assert r.error is not None + assert "codex login" not in r.error + assert "rate limit exceeded" in r.error + # Generic model failures don't retire — the session itself is fine + assert r.should_retire is False + + def test_dead_subprocess_detected_between_iterations(self): + """If codex dies (segfault, OOM, killed by its auth refresh + thread), the inter-iteration is_alive check breaks the loop + instead of waiting on a queue that will never fill.""" + client = FakeClient() + s = make_session(client) + s.ensure_started() + # Simulate subprocess death by setting _closed (FakeClient's + # is_alive returns False when closed). + client._closed = True + client.set_stderr_tail([ + "thread 'tokio-runtime-worker' panicked at 'oauth: invalid_grant'", + ]) + r = s.run_turn("x", turn_timeout=2.0, + notification_poll_timeout=0.01) + assert r.should_retire is True + # Stderr-derived auth hint takes precedence over generic message + assert r.error and "codex login" in r.error + + +# ---- thread/start cross-fill ---- + +class TestThreadStartCrossFill: + """Mirrors openclaw beta.8's tolerance for thread.id/sessionId aliasing.""" + + def test_thread_id_under_thread_key(self): + client = FakeClient() + s = make_session(client) + tid = s.ensure_started() + assert tid == "thread-fake-001" + + def test_thread_session_id_alias_under_thread_key(self): + client = FakeClient() + client._request_handler = lambda method, params: ( + {"thread": {"sessionId": "alias-1"}, + "activePermissionProfile": {"id": "x"}} + if method == "thread/start" else + {"turn": {"id": "tu1"}} if method == "turn/start" else {} + ) + s = make_session(client) + tid = s.ensure_started() + assert tid == "alias-1" + + def test_top_level_session_id_fallback(self): + client = FakeClient() + client._request_handler = lambda method, params: ( + {"sessionId": "top-1"} if method == "thread/start" else + {"turn": {"id": "tu1"}} if method == "turn/start" else {} + ) + s = make_session(client) + tid = s.ensure_started() + assert tid == "top-1" + + def test_missing_thread_id_raises(self): + from agent.transports.codex_app_server import CodexAppServerError + + client = FakeClient() + client._request_handler = lambda method, params: ( + {"thread": {}, "activePermissionProfile": {"id": "x"}} + if method == "thread/start" else + {"turn": {"id": "tu1"}} + ) + s = make_session(client) + with pytest.raises(CodexAppServerError, match="no thread id"): + s.ensure_started() + + +class TestHasTurnAbortedMarker: + """Unit coverage for the marker matcher itself.""" + + def test_empty_string(self): + from agent.transports.codex_app_server_session import ( + _has_turn_aborted_marker, + ) + assert _has_turn_aborted_marker("") is False + assert _has_turn_aborted_marker(None) is False # type: ignore[arg-type] + + def test_plain_text_no_marker(self): + from agent.transports.codex_app_server_session import ( + _has_turn_aborted_marker, + ) + assert _has_turn_aborted_marker("normal response with no markers") is False + + def test_open_marker(self): + from agent.transports.codex_app_server_session import ( + _has_turn_aborted_marker, + ) + assert _has_turn_aborted_marker("blah blah") is True + + def test_self_closing_marker(self): + from agent.transports.codex_app_server_session import ( + _has_turn_aborted_marker, + ) + assert _has_turn_aborted_marker("") is True + + +class TestClassifyOAuthFailure: + """Unit coverage for the OAuth classifier; conservative on purpose.""" + + def test_invalid_grant_classified(self): + from agent.transports.codex_app_server_session import ( + _classify_oauth_failure, + ) + hint = _classify_oauth_failure("error: invalid_grant returned by server") + assert hint is not None + assert "codex login" in hint + + def test_token_refresh_classified(self): + from agent.transports.codex_app_server_session import ( + _classify_oauth_failure, + ) + hint = _classify_oauth_failure("token refresh failed: network error") + assert hint is not None + assert "codex login" in hint + + def test_401_classified(self): + from agent.transports.codex_app_server_session import ( + _classify_oauth_failure, + ) + hint = _classify_oauth_failure("HTTP 401 Unauthorized") + assert hint is not None + + def test_generic_error_not_classified(self): + from agent.transports.codex_app_server_session import ( + _classify_oauth_failure, + ) + assert _classify_oauth_failure("connection reset") is None + assert _classify_oauth_failure("model returned bad json") is None + assert _classify_oauth_failure("rate limit exceeded") is None + + def test_empty_inputs(self): + from agent.transports.codex_app_server_session import ( + _classify_oauth_failure, + ) + assert _classify_oauth_failure() is None + assert _classify_oauth_failure("") is None + assert _classify_oauth_failure("", None) is None # type: ignore[arg-type] + + def test_multi_string_search(self): + """Hint can come from any of the provided strings.""" + from agent.transports.codex_app_server_session import ( + _classify_oauth_failure, + ) + hint = _classify_oauth_failure( + "rpc returned -32603", + "[stderr] token has expired, run codex login", + ) + assert hint is not None diff --git a/tests/run_agent/test_codex_app_server_integration.py b/tests/run_agent/test_codex_app_server_integration.py index 6fc60695d2a..46e47bae13e 100644 --- a/tests/run_agent/test_codex_app_server_integration.py +++ b/tests/run_agent/test_codex_app_server_integration.py @@ -342,3 +342,77 @@ class TestErrorHandling: assert result["completed"] is False assert result["partial"] is True assert result["error"] == "user interrupted" + + +class TestSessionRetirementOnRunAgent: + """run_agent.py side: when run_turn returns should_retire=True, the + AIAgent must close + null _codex_session so the next turn respawns.""" + + def test_should_retire_drops_session(self, monkeypatch): + closes = {"count": 0} + + def fake_run_turn(self, user_input, **kwargs): + return TurnResult( + final_text="", + projected_messages=[], + tool_iterations=0, + interrupted=True, + error="turn timed out after 600.0s", + turn_id="tu1", + thread_id="th1", + should_retire=True, + ) + + def fake_close(self): + closes["count"] += 1 + + monkeypatch.setattr(CodexAppServerSession, "ensure_started", + lambda self: "th1") + monkeypatch.setattr(CodexAppServerSession, "run_turn", fake_run_turn) + monkeypatch.setattr(CodexAppServerSession, "close", fake_close) + + agent = _make_codex_agent() + with patch.object(agent, "_spawn_background_review", return_value=None): + result = agent.run_conversation("hi") + + # The session was closed and cleared + assert closes["count"] == 1 + assert getattr(agent, "_codex_session", "MISSING") is None + # Partial result was still returned (caller still sees the error) + assert result["partial"] is True + assert result["error"] == "turn timed out after 600.0s" + + def test_normal_turn_keeps_session(self, fake_session): + """fake_session fixture returns should_retire=False (default). + The session must stay attached for the next turn to reuse.""" + agent = _make_codex_agent() + with patch.object(agent, "_spawn_background_review", return_value=None): + agent.run_conversation("hi") + # Session was lazily created and still attached. + assert getattr(agent, "_codex_session", None) is not None + + def test_exception_path_also_drops_session(self, monkeypatch): + """Even if run_turn raises (not just sets should_retire), we must + drop the session — a thrown exception is the strongest possible + signal the process is dead.""" + closes = {"count": 0} + + def boom_run_turn(self, user_input, **kwargs): + raise RuntimeError("codex segfaulted") + + def fake_close(self): + closes["count"] += 1 + + monkeypatch.setattr(CodexAppServerSession, "ensure_started", + lambda self: "th1") + monkeypatch.setattr(CodexAppServerSession, "run_turn", boom_run_turn) + monkeypatch.setattr(CodexAppServerSession, "close", fake_close) + + agent = _make_codex_agent() + with patch.object(agent, "_spawn_background_review", return_value=None): + result = agent.run_conversation("hi") + + assert closes["count"] == 1 + assert agent._codex_session is None + assert result["completed"] is False + assert "codex segfaulted" in result["error"]