mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-18 04:41:56 +00:00
fix(codex-runtime): retire wedged sessions + post-tool watchdog + OAuth refresh classify (#25769)
Mirrors openclaw beta.8's app-server resilience fixes so a stuck codex subprocess can't burn the full turn deadline and so users get a `codex login` pointer instead of raw RPC errors when their token expires. - TurnResult.should_retire signals the caller to drop+respawn codex. - Deadline-hit path and dead-subprocess detection set should_retire so the next turn doesn't ride a CPU-spinning or auth-broken process. - Post-tool watchdog (post_tool_quiet_timeout=90s): if a tool item completes and codex goes silent past the threshold without further output or turn/completed, fast-fail instead of waiting the full 600s. Resets on any non-tool activity so normal think-after-tool flows are not affected. - <turn_aborted> and <turn_aborted/> in agent text are treated as terminal — some codex builds tear down a turn that way without emitting turn/completed. - _classify_oauth_failure() inspects RPC error message + stderr tail for invalid_grant / token refresh / 401 / etc. and rewrites user-facing errors to 'run codex login'. Conservative: generic failures still surface verbatim. Fires at turn/start failure, turn/completed failure, and dead-subprocess paths. - thread/start cross-fill: tolerate thread.id, thread.sessionId, top-level sessionId/threadId so future codex schema drift doesn't KeyError us at handshake. - run_agent.py: when run_turn returns should_retire=True OR raises, close + null self._codex_session so the next turn respawns. Tests: +30 cases across session + integration suites. tests/agent/transports/test_codex_app_server_session.py 50/50 pass tests/run_agent/test_codex_app_server_integration.py 27/27 pass Broader codex scope (transports + cli runtime/migration) 376/376 pass
This commit is contained in:
parent
63991bbd97
commit
12f755c9eb
4 changed files with 711 additions and 7 deletions
|
|
@ -63,6 +63,73 @@ class TurnResult:
|
||||||
error: Optional[str] = None # Set if turn ended in a non-recoverable error
|
error: Optional[str] = None # Set if turn ended in a non-recoverable error
|
||||||
turn_id: Optional[str] = None
|
turn_id: Optional[str] = None
|
||||||
thread_id: Optional[str] = None
|
thread_id: Optional[str] = None
|
||||||
|
# Hint to the caller that the underlying codex subprocess is likely
|
||||||
|
# wedged (turn-level timeout fired, post-tool watchdog tripped, or
|
||||||
|
# token-refresh failure killed the child). The caller should retire
|
||||||
|
# the session so the next turn respawns codex from scratch instead
|
||||||
|
# of riding a CPU-spinning or auth-broken process. Mirrors openclaw
|
||||||
|
# beta.8's "retire timed-out app-server clients" fix.
|
||||||
|
should_retire: bool = False
|
||||||
|
|
||||||
|
|
||||||
|
# Markers we accept as terminal even when codex never emits turn/completed.
|
||||||
|
# Some codex versions stream `<turn_aborted>` as raw text in agentMessage
|
||||||
|
# items when an interrupt or upstream error tears the turn down before the
|
||||||
|
# normal completion path fires. Mirrors openclaw beta.8 fix.
|
||||||
|
_TURN_ABORTED_MARKERS = ("<turn_aborted>", "<turn_aborted/>")
|
||||||
|
|
||||||
|
|
||||||
|
# Substrings in codex stderr / JSON-RPC error messages that signal the
|
||||||
|
# subprocess died because its OAuth credentials are no longer valid.
|
||||||
|
# Kept conservative: we only redirect users to `codex login` when we're
|
||||||
|
# reasonably sure that's the actual failure, otherwise we surface the
|
||||||
|
# original error verbatim. Mirrors openclaw beta.8's auth-refresh
|
||||||
|
# classification.
|
||||||
|
_OAUTH_REFRESH_FAILURE_HINTS = (
|
||||||
|
"invalid_grant",
|
||||||
|
"invalid grant",
|
||||||
|
"refresh token",
|
||||||
|
"refresh_token",
|
||||||
|
"token refresh",
|
||||||
|
"token_refresh",
|
||||||
|
"token has expired",
|
||||||
|
"expired_token",
|
||||||
|
"expired token",
|
||||||
|
"not authenticated",
|
||||||
|
"unauthenticated",
|
||||||
|
"unauthorized",
|
||||||
|
"401 unauthorized",
|
||||||
|
"re-authenticate",
|
||||||
|
"reauthenticate",
|
||||||
|
"please log in",
|
||||||
|
"please login",
|
||||||
|
"auth profile",
|
||||||
|
"no auth profile",
|
||||||
|
"oauth",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _classify_oauth_failure(*parts: str) -> Optional[str]:
|
||||||
|
"""Return a user-friendly re-auth hint if any of the provided strings
|
||||||
|
look like a codex OAuth/token-refresh failure; otherwise None.
|
||||||
|
|
||||||
|
Used for both `turn/start` JSON-RPC errors and post-mortem stderr
|
||||||
|
inspection when the subprocess exits unexpectedly. Conservative on
|
||||||
|
purpose — we only redirect users to `codex login` when the signal
|
||||||
|
is strong, so unrelated runtime failures still surface verbatim.
|
||||||
|
"""
|
||||||
|
haystack = " ".join(p for p in parts if p).lower()
|
||||||
|
if not haystack:
|
||||||
|
return None
|
||||||
|
for needle in _OAUTH_REFRESH_FAILURE_HINTS:
|
||||||
|
if needle in haystack:
|
||||||
|
return (
|
||||||
|
"Codex authentication failed — your ChatGPT/Codex login "
|
||||||
|
"looks expired or invalid. Run `codex login` to refresh, "
|
||||||
|
"then retry. (Fall back to default runtime with "
|
||||||
|
"`/codex-runtime auto` if the issue persists.)"
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
|
@ -156,7 +223,26 @@ class CodexAppServerSession:
|
||||||
# ~/.codex/config.toml the same way they would for any codex usage.
|
# ~/.codex/config.toml the same way they would for any codex usage.
|
||||||
params: dict[str, Any] = {"cwd": self._cwd}
|
params: dict[str, Any] = {"cwd": self._cwd}
|
||||||
result = self._client.request("thread/start", params, timeout=15)
|
result = self._client.request("thread/start", params, timeout=15)
|
||||||
self._thread_id = result["thread"]["id"]
|
# Cross-fill thread.id/sessionId — different codex versions have
|
||||||
|
# serialized this under either key. Mirrors openclaw beta.8's
|
||||||
|
# tolerance fix so future codex drops/renames don't KeyError us
|
||||||
|
# at handshake time.
|
||||||
|
thread_obj = result.get("thread") or {}
|
||||||
|
thread_id = (
|
||||||
|
thread_obj.get("id")
|
||||||
|
or thread_obj.get("sessionId")
|
||||||
|
or result.get("sessionId")
|
||||||
|
or result.get("threadId")
|
||||||
|
)
|
||||||
|
if not thread_id:
|
||||||
|
raise CodexAppServerError(
|
||||||
|
code=-32603,
|
||||||
|
message=(
|
||||||
|
"codex thread/start returned no thread id "
|
||||||
|
f"(payload keys: {sorted(result.keys())})"
|
||||||
|
),
|
||||||
|
)
|
||||||
|
self._thread_id = thread_id
|
||||||
logger.info(
|
logger.info(
|
||||||
"codex app-server thread started: id=%s profile=%s cwd=%s",
|
"codex app-server thread started: id=%s profile=%s cwd=%s",
|
||||||
self._thread_id[:8],
|
self._thread_id[:8],
|
||||||
|
|
@ -198,10 +284,18 @@ class CodexAppServerSession:
|
||||||
*,
|
*,
|
||||||
turn_timeout: float = 600.0,
|
turn_timeout: float = 600.0,
|
||||||
notification_poll_timeout: float = 0.25,
|
notification_poll_timeout: float = 0.25,
|
||||||
|
post_tool_quiet_timeout: float = 90.0,
|
||||||
) -> TurnResult:
|
) -> TurnResult:
|
||||||
"""Send a user message and block until turn/completed, while
|
"""Send a user message and block until turn/completed, while
|
||||||
forwarding server-initiated approval requests and projecting items
|
forwarding server-initiated approval requests and projecting items
|
||||||
into Hermes' messages shape."""
|
into Hermes' messages shape.
|
||||||
|
|
||||||
|
post_tool_quiet_timeout: if codex emits a tool completion and then
|
||||||
|
goes quiet for this many seconds without emitting another item or
|
||||||
|
`turn/completed`, fast-fail and mark the session for retirement.
|
||||||
|
Mirrors openclaw beta.8's post-tool completion watchdog (#81697)
|
||||||
|
so a wedged codex doesn't burn the full turn deadline.
|
||||||
|
"""
|
||||||
self.ensure_started()
|
self.ensure_started()
|
||||||
assert self._client is not None and self._thread_id is not None
|
assert self._client is not None and self._thread_id is not None
|
||||||
|
|
||||||
|
|
@ -221,12 +315,36 @@ class CodexAppServerSession:
|
||||||
timeout=10,
|
timeout=10,
|
||||||
)
|
)
|
||||||
except CodexAppServerError as exc:
|
except CodexAppServerError as exc:
|
||||||
result.error = f"turn/start failed: {exc}"
|
# Classify auth/refresh failures so the user gets a clear
|
||||||
|
# `codex login` pointer instead of a raw RPC error string.
|
||||||
|
stderr_blob = "\n".join(self._client.stderr_tail(40))
|
||||||
|
hint = _classify_oauth_failure(exc.message, stderr_blob)
|
||||||
|
if hint is not None:
|
||||||
|
result.error = hint
|
||||||
|
# Subprocess is fine on a JSON-RPC level here, but the
|
||||||
|
# token store is broken — retire so the next turn does a
|
||||||
|
# clean handshake (and the user has a chance to re-auth
|
||||||
|
# via `codex login` between turns).
|
||||||
|
result.should_retire = True
|
||||||
|
else:
|
||||||
|
result.error = f"turn/start failed: {exc}"
|
||||||
|
return result
|
||||||
|
except TimeoutError as exc:
|
||||||
|
# turn/start hanging is a strong signal the subprocess is wedged.
|
||||||
|
stderr_blob = "\n".join(self._client.stderr_tail(40))
|
||||||
|
hint = _classify_oauth_failure(stderr_blob)
|
||||||
|
result.error = hint or f"turn/start timed out: {exc}"
|
||||||
|
result.should_retire = True
|
||||||
return result
|
return result
|
||||||
|
|
||||||
result.turn_id = (ts.get("turn") or {}).get("id")
|
result.turn_id = (ts.get("turn") or {}).get("id")
|
||||||
deadline = time.time() + turn_timeout
|
deadline = time.time() + turn_timeout
|
||||||
turn_complete = False
|
turn_complete = False
|
||||||
|
# Post-tool watchdog state. last_tool_completion_at is set whenever
|
||||||
|
# a tool-shaped item completes; if no further notification arrives
|
||||||
|
# within post_tool_quiet_timeout and the turn hasn't completed, we
|
||||||
|
# fast-fail and retire the session.
|
||||||
|
last_tool_completion_at: Optional[float] = None
|
||||||
|
|
||||||
while time.time() < deadline and not turn_complete:
|
while time.time() < deadline and not turn_complete:
|
||||||
if self._interrupt_event.is_set():
|
if self._interrupt_event.is_set():
|
||||||
|
|
@ -234,6 +352,38 @@ class CodexAppServerSession:
|
||||||
result.interrupted = True
|
result.interrupted = True
|
||||||
break
|
break
|
||||||
|
|
||||||
|
# Detect a dead subprocess between iterations. If codex exited
|
||||||
|
# (e.g. crashed, segfaulted, or its auth refresh thread killed
|
||||||
|
# the process), we won't get any more notifications — bail out
|
||||||
|
# rather than waiting for the full turn deadline.
|
||||||
|
if not self._client.is_alive():
|
||||||
|
stderr_blob = "\n".join(self._client.stderr_tail(60))
|
||||||
|
hint = _classify_oauth_failure(stderr_blob)
|
||||||
|
result.error = hint or (
|
||||||
|
f"codex app-server subprocess exited unexpectedly: "
|
||||||
|
f"{stderr_blob[-300:] if stderr_blob else '<no stderr>'}"
|
||||||
|
)
|
||||||
|
result.should_retire = True
|
||||||
|
break
|
||||||
|
|
||||||
|
# Post-tool watchdog: if a tool completion was the most recent
|
||||||
|
# signal and codex has been silent past the quiet timeout, give
|
||||||
|
# up on this turn instead of waiting for the outer deadline.
|
||||||
|
if (
|
||||||
|
last_tool_completion_at is not None
|
||||||
|
and (time.time() - last_tool_completion_at)
|
||||||
|
> post_tool_quiet_timeout
|
||||||
|
):
|
||||||
|
self._issue_interrupt(result.turn_id)
|
||||||
|
result.interrupted = True
|
||||||
|
result.error = (
|
||||||
|
f"codex went silent for "
|
||||||
|
f"{post_tool_quiet_timeout:.0f}s after a tool result; "
|
||||||
|
f"retiring app-server session."
|
||||||
|
)
|
||||||
|
result.should_retire = True
|
||||||
|
break
|
||||||
|
|
||||||
# Drain any server-initiated requests (approvals) before
|
# Drain any server-initiated requests (approvals) before
|
||||||
# reading notifications, so the codex side isn't blocked.
|
# reading notifications, so the codex side isn't blocked.
|
||||||
sreq = self._client.take_server_request(timeout=0)
|
sreq = self._client.take_server_request(timeout=0)
|
||||||
|
|
@ -252,9 +402,20 @@ class CodexAppServerSession:
|
||||||
result.projected_messages.extend(proj.messages)
|
result.projected_messages.extend(proj.messages)
|
||||||
if proj.is_tool_iteration:
|
if proj.is_tool_iteration:
|
||||||
result.tool_iterations += 1
|
result.tool_iterations += 1
|
||||||
|
last_tool_completion_at = time.time()
|
||||||
if proj.final_text is not None:
|
if proj.final_text is not None:
|
||||||
result.final_text = proj.final_text
|
result.final_text = proj.final_text
|
||||||
|
if _has_turn_aborted_marker(proj.final_text):
|
||||||
|
turn_complete = True
|
||||||
|
result.interrupted = True
|
||||||
|
result.error = (
|
||||||
|
result.error
|
||||||
|
or "codex reported turn_aborted"
|
||||||
|
)
|
||||||
self._handle_server_request(sreq)
|
self._handle_server_request(sreq)
|
||||||
|
# Activity counts as live signal — reset the post-tool
|
||||||
|
# quiet timer so an approval round-trip doesn't trip it.
|
||||||
|
last_tool_completion_at = None
|
||||||
continue
|
continue
|
||||||
|
|
||||||
note = self._client.take_notification(
|
note = self._client.take_notification(
|
||||||
|
|
@ -282,10 +443,29 @@ class CodexAppServerSession:
|
||||||
result.projected_messages.extend(projection.messages)
|
result.projected_messages.extend(projection.messages)
|
||||||
if projection.is_tool_iteration:
|
if projection.is_tool_iteration:
|
||||||
result.tool_iterations += 1
|
result.tool_iterations += 1
|
||||||
|
# Arm/refresh the post-tool quiet watchdog whenever a
|
||||||
|
# tool-shaped item completes.
|
||||||
|
last_tool_completion_at = time.time()
|
||||||
|
else:
|
||||||
|
# Any non-tool projected activity (assistant message,
|
||||||
|
# status update, etc.) means codex is still producing
|
||||||
|
# output — clear the quiet timer so we don't fast-fail.
|
||||||
|
if projection.messages or projection.final_text is not None:
|
||||||
|
last_tool_completion_at = None
|
||||||
if projection.final_text is not None:
|
if projection.final_text is not None:
|
||||||
# Codex can emit multiple agentMessage items in one turn
|
# Codex can emit multiple agentMessage items in one turn
|
||||||
# (e.g. partial then final). Take the last one as canonical.
|
# (e.g. partial then final). Take the last one as canonical.
|
||||||
result.final_text = projection.final_text
|
result.final_text = projection.final_text
|
||||||
|
# Some codex builds tear a turn down by emitting a
|
||||||
|
# `<turn_aborted>` marker in the agent message text and
|
||||||
|
# never sending turn/completed. Treat the marker itself
|
||||||
|
# as terminal so we don't burn the full deadline.
|
||||||
|
if _has_turn_aborted_marker(projection.final_text):
|
||||||
|
turn_complete = True
|
||||||
|
result.interrupted = True
|
||||||
|
result.error = (
|
||||||
|
result.error or "codex reported turn_aborted"
|
||||||
|
)
|
||||||
|
|
||||||
if method == "turn/completed":
|
if method == "turn/completed":
|
||||||
turn_complete = True
|
turn_complete = True
|
||||||
|
|
@ -297,16 +477,31 @@ class CodexAppServerSession:
|
||||||
(note.get("params") or {}).get("turn") or {}
|
(note.get("params") or {}).get("turn") or {}
|
||||||
).get("error")
|
).get("error")
|
||||||
if err_obj:
|
if err_obj:
|
||||||
result.error = (
|
err_msg = err_obj.get("message") or str(err_obj)
|
||||||
f"turn ended status={turn_status}: "
|
# If the turn failed for an auth/refresh reason,
|
||||||
f"{err_obj.get('message') or err_obj}"
|
# rewrite the error into a re-auth hint AND mark
|
||||||
|
# the session for retirement.
|
||||||
|
stderr_blob = "\n".join(
|
||||||
|
self._client.stderr_tail(40)
|
||||||
)
|
)
|
||||||
|
hint = _classify_oauth_failure(err_msg, stderr_blob)
|
||||||
|
if hint is not None:
|
||||||
|
result.error = hint
|
||||||
|
result.should_retire = True
|
||||||
|
else:
|
||||||
|
result.error = (
|
||||||
|
f"turn ended status={turn_status}: {err_msg}"
|
||||||
|
)
|
||||||
|
|
||||||
if not turn_complete and not result.interrupted:
|
if not turn_complete and not result.interrupted:
|
||||||
# Hit the deadline. Issue interrupt to stop wasted compute.
|
# Hit the deadline. Issue interrupt to stop wasted compute, and
|
||||||
|
# tell the caller to retire the session — a turn that never
|
||||||
|
# finished is a strong sign codex is wedged in a way the next
|
||||||
|
# turn shouldn't inherit.
|
||||||
self._issue_interrupt(result.turn_id)
|
self._issue_interrupt(result.turn_id)
|
||||||
result.interrupted = True
|
result.interrupted = True
|
||||||
result.error = result.error or f"turn timed out after {turn_timeout}s"
|
result.error = result.error or f"turn timed out after {turn_timeout}s"
|
||||||
|
result.should_retire = True
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
@ -515,6 +710,24 @@ def _approval_choice_to_codex_decision(choice: str) -> str:
|
||||||
return "decline"
|
return "decline"
|
||||||
|
|
||||||
|
|
||||||
|
def _has_turn_aborted_marker(text: str) -> bool:
|
||||||
|
"""Return True if `text` contains any of the raw markers codex uses
|
||||||
|
to signal a turn was aborted without emitting `turn/completed`.
|
||||||
|
|
||||||
|
Codex emits `<turn_aborted>` (and sometimes `<turn_aborted/>`) as raw
|
||||||
|
text inside agentMessage items when an interrupt or upstream error
|
||||||
|
tears the turn down before the normal completion path fires. Mirrors
|
||||||
|
openclaw beta.8's terminal-marker fix so we don't burn the full turn
|
||||||
|
deadline waiting for a turn/completed that never comes.
|
||||||
|
"""
|
||||||
|
if not text:
|
||||||
|
return False
|
||||||
|
for marker in _TURN_ABORTED_MARKERS:
|
||||||
|
if marker in text:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
def _get_hermes_version() -> str:
|
def _get_hermes_version() -> str:
|
||||||
"""Best-effort Hermes version string for codex's userAgent line."""
|
"""Best-effort Hermes version string for codex's userAgent line."""
|
||||||
try:
|
try:
|
||||||
|
|
|
||||||
23
run_agent.py
23
run_agent.py
|
|
@ -15721,6 +15721,13 @@ class AIAgent:
|
||||||
turn = self._codex_session.run_turn(user_input=user_message)
|
turn = self._codex_session.run_turn(user_input=user_message)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.exception("codex app-server turn failed")
|
logger.exception("codex app-server turn failed")
|
||||||
|
# Crash → unconditionally drop the session so the next turn
|
||||||
|
# respawns from scratch instead of reusing a dead client.
|
||||||
|
try:
|
||||||
|
self._codex_session.close()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
self._codex_session = None
|
||||||
return {
|
return {
|
||||||
"final_response": (
|
"final_response": (
|
||||||
f"Codex app-server turn failed: {exc}. "
|
f"Codex app-server turn failed: {exc}. "
|
||||||
|
|
@ -15733,6 +15740,22 @@ class AIAgent:
|
||||||
"error": str(exc),
|
"error": str(exc),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# If the turn signalled the underlying client is wedged (deadline
|
||||||
|
# blown, post-tool watchdog tripped, OAuth refresh died, subprocess
|
||||||
|
# exited), retire the session so the next turn respawns codex
|
||||||
|
# rather than riding the broken process. Mirrors openclaw beta.8's
|
||||||
|
# "retire timed-out app-server clients" fix.
|
||||||
|
if getattr(turn, "should_retire", False):
|
||||||
|
logger.warning(
|
||||||
|
"codex app-server session retired (turn error: %s)",
|
||||||
|
turn.error,
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
self._codex_session.close()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
self._codex_session = None
|
||||||
|
|
||||||
# Splice projected messages into the conversation. The projector emits
|
# Splice projected messages into the conversation. The projector emits
|
||||||
# standard {role, content, tool_calls, tool_call_id} entries, which
|
# standard {role, content, tool_calls, tool_call_id} entries, which
|
||||||
# is exactly what curator.py / sessions DB expect.
|
# is exactly what curator.py / sessions DB expect.
|
||||||
|
|
|
||||||
|
|
@ -84,6 +84,14 @@ class FakeClient:
|
||||||
def close(self):
|
def close(self):
|
||||||
self._closed = True
|
self._closed = True
|
||||||
|
|
||||||
|
def is_alive(self) -> bool:
|
||||||
|
# Fake is "alive" until close() is called; tests that want a dead
|
||||||
|
# subprocess can patch this attribute or call close() directly.
|
||||||
|
return not self._closed
|
||||||
|
|
||||||
|
def stderr_tail(self, n: int = 20):
|
||||||
|
return list(getattr(self, "_stderr_tail", []))[-n:]
|
||||||
|
|
||||||
# Test helpers
|
# Test helpers
|
||||||
def queue_notification(self, method: str, **params):
|
def queue_notification(self, method: str, **params):
|
||||||
self._notifications.append({"method": method, "params": params})
|
self._notifications.append({"method": method, "params": params})
|
||||||
|
|
@ -91,6 +99,10 @@ class FakeClient:
|
||||||
def queue_server_request(self, method: str, request_id: Any = "srv-1", **params):
|
def queue_server_request(self, method: str, request_id: Any = "srv-1", **params):
|
||||||
self._server_requests.append({"id": request_id, "method": method, "params": params})
|
self._server_requests.append({"id": request_id, "method": method, "params": params})
|
||||||
|
|
||||||
|
def set_stderr_tail(self, lines):
|
||||||
|
"""Test helper: seed stderr_tail() output for OAuth-refresh classifier tests."""
|
||||||
|
self._stderr_tail = list(lines)
|
||||||
|
|
||||||
|
|
||||||
def make_session(client: FakeClient, **kwargs) -> CodexAppServerSession:
|
def make_session(client: FakeClient, **kwargs) -> CodexAppServerSession:
|
||||||
return CodexAppServerSession(
|
return CodexAppServerSession(
|
||||||
|
|
@ -500,3 +512,385 @@ class TestApprovalPromptEnrichment:
|
||||||
s.run_turn("hi", turn_timeout=1.0)
|
s.run_turn("hi", turn_timeout=1.0)
|
||||||
# Falls back to the reason
|
# Falls back to the reason
|
||||||
assert "apply some changes" in captured["command"]
|
assert "apply some changes" in captured["command"]
|
||||||
|
|
||||||
|
|
||||||
|
# ---- openclaw beta.8 parity: retire/wedge/oauth/abort marker ----
|
||||||
|
|
||||||
|
class TestSessionRetirement:
|
||||||
|
"""Mirrors openclaw beta.8's resilience fixes:
|
||||||
|
- retire timed-out app-server clients (should_retire on deadline)
|
||||||
|
- post-tool completion watchdog (don't burn the full deadline after a
|
||||||
|
tool result if codex goes silent)
|
||||||
|
- <turn_aborted> raw marker as terminal (don't wait for turn/completed
|
||||||
|
that never comes)
|
||||||
|
- OAuth refresh failure classification (suggest `codex login` instead
|
||||||
|
of raw RPC error strings)
|
||||||
|
- dead subprocess detection between iterations
|
||||||
|
"""
|
||||||
|
|
||||||
|
def test_deadline_marks_session_for_retirement(self):
|
||||||
|
client = FakeClient()
|
||||||
|
s = make_session(client)
|
||||||
|
r = s.run_turn(
|
||||||
|
"never finishes",
|
||||||
|
turn_timeout=0.05,
|
||||||
|
notification_poll_timeout=0.01,
|
||||||
|
)
|
||||||
|
assert r.interrupted is True
|
||||||
|
assert r.error and "timed out" in r.error
|
||||||
|
assert r.should_retire is True, (
|
||||||
|
"Deadline exhaustion must signal retirement so the next turn "
|
||||||
|
"respawns codex instead of riding a wedged subprocess."
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_completed_turn_does_not_retire(self):
|
||||||
|
client = FakeClient()
|
||||||
|
client.queue_notification(
|
||||||
|
"item/completed",
|
||||||
|
item={"type": "agentMessage", "id": "m1", "text": "hi"},
|
||||||
|
threadId="t", turnId="tu1",
|
||||||
|
)
|
||||||
|
client.queue_notification(
|
||||||
|
"turn/completed", threadId="t",
|
||||||
|
turn={"id": "tu1", "status": "completed", "error": None},
|
||||||
|
)
|
||||||
|
s = make_session(client)
|
||||||
|
r = s.run_turn("hi", turn_timeout=1.0)
|
||||||
|
assert r.should_retire is False
|
||||||
|
|
||||||
|
def test_post_tool_quiet_watchdog_trips_and_retires(self):
|
||||||
|
client = FakeClient()
|
||||||
|
# One tool completion, then total silence — no further events,
|
||||||
|
# no turn/completed. With a tiny post_tool_quiet_timeout the
|
||||||
|
# watchdog must fire before the larger turn deadline.
|
||||||
|
client.queue_notification(
|
||||||
|
"item/completed",
|
||||||
|
item={
|
||||||
|
"type": "commandExecution", "id": "ex1",
|
||||||
|
"command": "echo hi", "cwd": "/tmp",
|
||||||
|
"status": "completed", "aggregatedOutput": "hi",
|
||||||
|
"exitCode": 0, "commandActions": [],
|
||||||
|
},
|
||||||
|
threadId="t", turnId="tu1",
|
||||||
|
)
|
||||||
|
s = make_session(client)
|
||||||
|
r = s.run_turn(
|
||||||
|
"tool then silence",
|
||||||
|
turn_timeout=5.0, # would be miserable to wait
|
||||||
|
notification_poll_timeout=0.02,
|
||||||
|
post_tool_quiet_timeout=0.15,
|
||||||
|
)
|
||||||
|
assert r.interrupted is True
|
||||||
|
assert r.should_retire is True
|
||||||
|
assert r.error and "silent" in r.error
|
||||||
|
# Confirm we issued turn/interrupt to free codex compute
|
||||||
|
assert any(method == "turn/interrupt" for (method, _) in client.requests)
|
||||||
|
|
||||||
|
def test_post_tool_watchdog_resets_on_further_activity(self):
|
||||||
|
"""A tool completion followed by an agent message should NOT trip
|
||||||
|
the watchdog — further activity = codex still alive."""
|
||||||
|
client = FakeClient()
|
||||||
|
client.queue_notification(
|
||||||
|
"item/completed",
|
||||||
|
item={
|
||||||
|
"type": "commandExecution", "id": "ex1",
|
||||||
|
"command": "echo hi", "cwd": "/tmp",
|
||||||
|
"status": "completed", "aggregatedOutput": "hi",
|
||||||
|
"exitCode": 0, "commandActions": [],
|
||||||
|
},
|
||||||
|
threadId="t", turnId="tu1",
|
||||||
|
)
|
||||||
|
# Non-tool activity immediately after — resets watchdog.
|
||||||
|
client.queue_notification(
|
||||||
|
"item/completed",
|
||||||
|
item={"type": "agentMessage", "id": "m1", "text": "tool finished"},
|
||||||
|
threadId="t", turnId="tu1",
|
||||||
|
)
|
||||||
|
client.queue_notification(
|
||||||
|
"turn/completed", threadId="t",
|
||||||
|
turn={"id": "tu1", "status": "completed", "error": None},
|
||||||
|
)
|
||||||
|
s = make_session(client)
|
||||||
|
r = s.run_turn(
|
||||||
|
"tool then talk", turn_timeout=2.0,
|
||||||
|
notification_poll_timeout=0.01,
|
||||||
|
post_tool_quiet_timeout=0.05,
|
||||||
|
)
|
||||||
|
# Tool ran, then text reset the watchdog, then turn/completed.
|
||||||
|
# Should NOT be a retirement case.
|
||||||
|
assert r.tool_iterations == 1
|
||||||
|
assert r.final_text == "tool finished"
|
||||||
|
assert r.should_retire is False
|
||||||
|
assert r.interrupted is False
|
||||||
|
|
||||||
|
def test_turn_aborted_marker_in_text_is_terminal(self):
|
||||||
|
"""If codex emits `<turn_aborted>` in agent text and never sends
|
||||||
|
turn/completed, we still exit promptly instead of burning the
|
||||||
|
deadline."""
|
||||||
|
client = FakeClient()
|
||||||
|
client.queue_notification(
|
||||||
|
"item/completed",
|
||||||
|
item={
|
||||||
|
"type": "agentMessage", "id": "m1",
|
||||||
|
"text": "partial output... <turn_aborted>",
|
||||||
|
},
|
||||||
|
threadId="t", turnId="tu1",
|
||||||
|
)
|
||||||
|
# Deliberately NO turn/completed notification queued.
|
||||||
|
s = make_session(client)
|
||||||
|
r = s.run_turn(
|
||||||
|
"abort mid-turn", turn_timeout=2.0,
|
||||||
|
notification_poll_timeout=0.01,
|
||||||
|
)
|
||||||
|
assert r.interrupted is True
|
||||||
|
assert r.error and "turn_aborted" in r.error
|
||||||
|
# Should have exited fast — not waited for the full 2s deadline.
|
||||||
|
# (Can't measure wall clock reliably in CI; presence of the marker
|
||||||
|
# error string instead of a "timed out" message is the proxy.)
|
||||||
|
assert "timed out" not in r.error
|
||||||
|
|
||||||
|
def test_turn_aborted_self_closing_marker_also_terminal(self):
|
||||||
|
client = FakeClient()
|
||||||
|
client.queue_notification(
|
||||||
|
"item/completed",
|
||||||
|
item={"type": "agentMessage", "id": "m1",
|
||||||
|
"text": "<turn_aborted/>"},
|
||||||
|
threadId="t", turnId="tu1",
|
||||||
|
)
|
||||||
|
s = make_session(client)
|
||||||
|
r = s.run_turn("x", turn_timeout=2.0,
|
||||||
|
notification_poll_timeout=0.01)
|
||||||
|
assert r.interrupted is True
|
||||||
|
assert r.error and "turn_aborted" in r.error
|
||||||
|
|
||||||
|
def test_oauth_refresh_failure_on_turn_start_suggests_login(self):
|
||||||
|
from agent.transports.codex_app_server import CodexAppServerError
|
||||||
|
|
||||||
|
client = FakeClient()
|
||||||
|
|
||||||
|
def boom(method, params):
|
||||||
|
if method == "turn/start":
|
||||||
|
raise CodexAppServerError(
|
||||||
|
code=-32603,
|
||||||
|
message="auth refresh failed: invalid_grant",
|
||||||
|
)
|
||||||
|
return {"thread": {"id": "t"},
|
||||||
|
"activePermissionProfile": {"id": "x"}}
|
||||||
|
|
||||||
|
client._request_handler = boom
|
||||||
|
s = make_session(client)
|
||||||
|
r = s.run_turn("hi", turn_timeout=1.0)
|
||||||
|
assert r.error is not None
|
||||||
|
assert "codex login" in r.error
|
||||||
|
assert r.should_retire is True
|
||||||
|
|
||||||
|
def test_oauth_failure_from_stderr_on_turn_start_failure(self):
|
||||||
|
"""If the RPC error itself is opaque but stderr shows an auth
|
||||||
|
problem, we still classify it as a refresh failure."""
|
||||||
|
from agent.transports.codex_app_server import CodexAppServerError
|
||||||
|
|
||||||
|
client = FakeClient()
|
||||||
|
client.set_stderr_tail([
|
||||||
|
"[2026-05-14T10:00:00Z WARN codex_core::auth] token refresh failed",
|
||||||
|
"[2026-05-14T10:00:00Z ERROR codex_core] please log in again",
|
||||||
|
])
|
||||||
|
|
||||||
|
def boom(method, params):
|
||||||
|
if method == "turn/start":
|
||||||
|
raise CodexAppServerError(code=-32603, message="rpc broke")
|
||||||
|
return {"thread": {"id": "t"},
|
||||||
|
"activePermissionProfile": {"id": "x"}}
|
||||||
|
|
||||||
|
client._request_handler = boom
|
||||||
|
s = make_session(client)
|
||||||
|
r = s.run_turn("hi", turn_timeout=1.0)
|
||||||
|
assert r.error is not None
|
||||||
|
assert "codex login" in r.error
|
||||||
|
assert r.should_retire is True
|
||||||
|
|
||||||
|
def test_oauth_failure_in_turn_completed_error(self):
|
||||||
|
"""A failed turn/completed whose error mentions auth/refresh
|
||||||
|
triggers the re-auth hint + retirement."""
|
||||||
|
client = FakeClient()
|
||||||
|
client.queue_notification(
|
||||||
|
"turn/completed", threadId="t",
|
||||||
|
turn={
|
||||||
|
"id": "tu1", "status": "failed",
|
||||||
|
"error": {"message": "401 Unauthorized: please reauthenticate"},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
s = make_session(client)
|
||||||
|
r = s.run_turn("x", turn_timeout=1.0,
|
||||||
|
notification_poll_timeout=0.01)
|
||||||
|
assert r.error is not None
|
||||||
|
assert "codex login" in r.error
|
||||||
|
assert r.should_retire is True
|
||||||
|
|
||||||
|
def test_generic_turn_failure_does_not_trigger_oauth_hint(self):
|
||||||
|
"""A boring model error must NOT rewrite the message into a fake
|
||||||
|
re-auth hint. Conservative classifier."""
|
||||||
|
client = FakeClient()
|
||||||
|
client.queue_notification(
|
||||||
|
"turn/completed", threadId="t",
|
||||||
|
turn={
|
||||||
|
"id": "tu1", "status": "failed",
|
||||||
|
"error": {"message": "rate limit exceeded"},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
s = make_session(client)
|
||||||
|
r = s.run_turn("x", turn_timeout=1.0,
|
||||||
|
notification_poll_timeout=0.01)
|
||||||
|
assert r.error is not None
|
||||||
|
assert "codex login" not in r.error
|
||||||
|
assert "rate limit exceeded" in r.error
|
||||||
|
# Generic model failures don't retire — the session itself is fine
|
||||||
|
assert r.should_retire is False
|
||||||
|
|
||||||
|
def test_dead_subprocess_detected_between_iterations(self):
|
||||||
|
"""If codex dies (segfault, OOM, killed by its auth refresh
|
||||||
|
thread), the inter-iteration is_alive check breaks the loop
|
||||||
|
instead of waiting on a queue that will never fill."""
|
||||||
|
client = FakeClient()
|
||||||
|
s = make_session(client)
|
||||||
|
s.ensure_started()
|
||||||
|
# Simulate subprocess death by setting _closed (FakeClient's
|
||||||
|
# is_alive returns False when closed).
|
||||||
|
client._closed = True
|
||||||
|
client.set_stderr_tail([
|
||||||
|
"thread 'tokio-runtime-worker' panicked at 'oauth: invalid_grant'",
|
||||||
|
])
|
||||||
|
r = s.run_turn("x", turn_timeout=2.0,
|
||||||
|
notification_poll_timeout=0.01)
|
||||||
|
assert r.should_retire is True
|
||||||
|
# Stderr-derived auth hint takes precedence over generic message
|
||||||
|
assert r.error and "codex login" in r.error
|
||||||
|
|
||||||
|
|
||||||
|
# ---- thread/start cross-fill ----
|
||||||
|
|
||||||
|
class TestThreadStartCrossFill:
|
||||||
|
"""Mirrors openclaw beta.8's tolerance for thread.id/sessionId aliasing."""
|
||||||
|
|
||||||
|
def test_thread_id_under_thread_key(self):
|
||||||
|
client = FakeClient()
|
||||||
|
s = make_session(client)
|
||||||
|
tid = s.ensure_started()
|
||||||
|
assert tid == "thread-fake-001"
|
||||||
|
|
||||||
|
def test_thread_session_id_alias_under_thread_key(self):
|
||||||
|
client = FakeClient()
|
||||||
|
client._request_handler = lambda method, params: (
|
||||||
|
{"thread": {"sessionId": "alias-1"},
|
||||||
|
"activePermissionProfile": {"id": "x"}}
|
||||||
|
if method == "thread/start" else
|
||||||
|
{"turn": {"id": "tu1"}} if method == "turn/start" else {}
|
||||||
|
)
|
||||||
|
s = make_session(client)
|
||||||
|
tid = s.ensure_started()
|
||||||
|
assert tid == "alias-1"
|
||||||
|
|
||||||
|
def test_top_level_session_id_fallback(self):
|
||||||
|
client = FakeClient()
|
||||||
|
client._request_handler = lambda method, params: (
|
||||||
|
{"sessionId": "top-1"} if method == "thread/start" else
|
||||||
|
{"turn": {"id": "tu1"}} if method == "turn/start" else {}
|
||||||
|
)
|
||||||
|
s = make_session(client)
|
||||||
|
tid = s.ensure_started()
|
||||||
|
assert tid == "top-1"
|
||||||
|
|
||||||
|
def test_missing_thread_id_raises(self):
|
||||||
|
from agent.transports.codex_app_server import CodexAppServerError
|
||||||
|
|
||||||
|
client = FakeClient()
|
||||||
|
client._request_handler = lambda method, params: (
|
||||||
|
{"thread": {}, "activePermissionProfile": {"id": "x"}}
|
||||||
|
if method == "thread/start" else
|
||||||
|
{"turn": {"id": "tu1"}}
|
||||||
|
)
|
||||||
|
s = make_session(client)
|
||||||
|
with pytest.raises(CodexAppServerError, match="no thread id"):
|
||||||
|
s.ensure_started()
|
||||||
|
|
||||||
|
|
||||||
|
class TestHasTurnAbortedMarker:
|
||||||
|
"""Unit coverage for the marker matcher itself."""
|
||||||
|
|
||||||
|
def test_empty_string(self):
|
||||||
|
from agent.transports.codex_app_server_session import (
|
||||||
|
_has_turn_aborted_marker,
|
||||||
|
)
|
||||||
|
assert _has_turn_aborted_marker("") is False
|
||||||
|
assert _has_turn_aborted_marker(None) is False # type: ignore[arg-type]
|
||||||
|
|
||||||
|
def test_plain_text_no_marker(self):
|
||||||
|
from agent.transports.codex_app_server_session import (
|
||||||
|
_has_turn_aborted_marker,
|
||||||
|
)
|
||||||
|
assert _has_turn_aborted_marker("normal response with no markers") is False
|
||||||
|
|
||||||
|
def test_open_marker(self):
|
||||||
|
from agent.transports.codex_app_server_session import (
|
||||||
|
_has_turn_aborted_marker,
|
||||||
|
)
|
||||||
|
assert _has_turn_aborted_marker("blah <turn_aborted> blah") is True
|
||||||
|
|
||||||
|
def test_self_closing_marker(self):
|
||||||
|
from agent.transports.codex_app_server_session import (
|
||||||
|
_has_turn_aborted_marker,
|
||||||
|
)
|
||||||
|
assert _has_turn_aborted_marker("<turn_aborted/>") is True
|
||||||
|
|
||||||
|
|
||||||
|
class TestClassifyOAuthFailure:
|
||||||
|
"""Unit coverage for the OAuth classifier; conservative on purpose."""
|
||||||
|
|
||||||
|
def test_invalid_grant_classified(self):
|
||||||
|
from agent.transports.codex_app_server_session import (
|
||||||
|
_classify_oauth_failure,
|
||||||
|
)
|
||||||
|
hint = _classify_oauth_failure("error: invalid_grant returned by server")
|
||||||
|
assert hint is not None
|
||||||
|
assert "codex login" in hint
|
||||||
|
|
||||||
|
def test_token_refresh_classified(self):
|
||||||
|
from agent.transports.codex_app_server_session import (
|
||||||
|
_classify_oauth_failure,
|
||||||
|
)
|
||||||
|
hint = _classify_oauth_failure("token refresh failed: network error")
|
||||||
|
assert hint is not None
|
||||||
|
assert "codex login" in hint
|
||||||
|
|
||||||
|
def test_401_classified(self):
|
||||||
|
from agent.transports.codex_app_server_session import (
|
||||||
|
_classify_oauth_failure,
|
||||||
|
)
|
||||||
|
hint = _classify_oauth_failure("HTTP 401 Unauthorized")
|
||||||
|
assert hint is not None
|
||||||
|
|
||||||
|
def test_generic_error_not_classified(self):
|
||||||
|
from agent.transports.codex_app_server_session import (
|
||||||
|
_classify_oauth_failure,
|
||||||
|
)
|
||||||
|
assert _classify_oauth_failure("connection reset") is None
|
||||||
|
assert _classify_oauth_failure("model returned bad json") is None
|
||||||
|
assert _classify_oauth_failure("rate limit exceeded") is None
|
||||||
|
|
||||||
|
def test_empty_inputs(self):
|
||||||
|
from agent.transports.codex_app_server_session import (
|
||||||
|
_classify_oauth_failure,
|
||||||
|
)
|
||||||
|
assert _classify_oauth_failure() is None
|
||||||
|
assert _classify_oauth_failure("") is None
|
||||||
|
assert _classify_oauth_failure("", None) is None # type: ignore[arg-type]
|
||||||
|
|
||||||
|
def test_multi_string_search(self):
|
||||||
|
"""Hint can come from any of the provided strings."""
|
||||||
|
from agent.transports.codex_app_server_session import (
|
||||||
|
_classify_oauth_failure,
|
||||||
|
)
|
||||||
|
hint = _classify_oauth_failure(
|
||||||
|
"rpc returned -32603",
|
||||||
|
"[stderr] token has expired, run codex login",
|
||||||
|
)
|
||||||
|
assert hint is not None
|
||||||
|
|
|
||||||
|
|
@ -342,3 +342,77 @@ class TestErrorHandling:
|
||||||
assert result["completed"] is False
|
assert result["completed"] is False
|
||||||
assert result["partial"] is True
|
assert result["partial"] is True
|
||||||
assert result["error"] == "user interrupted"
|
assert result["error"] == "user interrupted"
|
||||||
|
|
||||||
|
|
||||||
|
class TestSessionRetirementOnRunAgent:
|
||||||
|
"""run_agent.py side: when run_turn returns should_retire=True, the
|
||||||
|
AIAgent must close + null _codex_session so the next turn respawns."""
|
||||||
|
|
||||||
|
def test_should_retire_drops_session(self, monkeypatch):
|
||||||
|
closes = {"count": 0}
|
||||||
|
|
||||||
|
def fake_run_turn(self, user_input, **kwargs):
|
||||||
|
return TurnResult(
|
||||||
|
final_text="",
|
||||||
|
projected_messages=[],
|
||||||
|
tool_iterations=0,
|
||||||
|
interrupted=True,
|
||||||
|
error="turn timed out after 600.0s",
|
||||||
|
turn_id="tu1",
|
||||||
|
thread_id="th1",
|
||||||
|
should_retire=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
def fake_close(self):
|
||||||
|
closes["count"] += 1
|
||||||
|
|
||||||
|
monkeypatch.setattr(CodexAppServerSession, "ensure_started",
|
||||||
|
lambda self: "th1")
|
||||||
|
monkeypatch.setattr(CodexAppServerSession, "run_turn", fake_run_turn)
|
||||||
|
monkeypatch.setattr(CodexAppServerSession, "close", fake_close)
|
||||||
|
|
||||||
|
agent = _make_codex_agent()
|
||||||
|
with patch.object(agent, "_spawn_background_review", return_value=None):
|
||||||
|
result = agent.run_conversation("hi")
|
||||||
|
|
||||||
|
# The session was closed and cleared
|
||||||
|
assert closes["count"] == 1
|
||||||
|
assert getattr(agent, "_codex_session", "MISSING") is None
|
||||||
|
# Partial result was still returned (caller still sees the error)
|
||||||
|
assert result["partial"] is True
|
||||||
|
assert result["error"] == "turn timed out after 600.0s"
|
||||||
|
|
||||||
|
def test_normal_turn_keeps_session(self, fake_session):
|
||||||
|
"""fake_session fixture returns should_retire=False (default).
|
||||||
|
The session must stay attached for the next turn to reuse."""
|
||||||
|
agent = _make_codex_agent()
|
||||||
|
with patch.object(agent, "_spawn_background_review", return_value=None):
|
||||||
|
agent.run_conversation("hi")
|
||||||
|
# Session was lazily created and still attached.
|
||||||
|
assert getattr(agent, "_codex_session", None) is not None
|
||||||
|
|
||||||
|
def test_exception_path_also_drops_session(self, monkeypatch):
|
||||||
|
"""Even if run_turn raises (not just sets should_retire), we must
|
||||||
|
drop the session — a thrown exception is the strongest possible
|
||||||
|
signal the process is dead."""
|
||||||
|
closes = {"count": 0}
|
||||||
|
|
||||||
|
def boom_run_turn(self, user_input, **kwargs):
|
||||||
|
raise RuntimeError("codex segfaulted")
|
||||||
|
|
||||||
|
def fake_close(self):
|
||||||
|
closes["count"] += 1
|
||||||
|
|
||||||
|
monkeypatch.setattr(CodexAppServerSession, "ensure_started",
|
||||||
|
lambda self: "th1")
|
||||||
|
monkeypatch.setattr(CodexAppServerSession, "run_turn", boom_run_turn)
|
||||||
|
monkeypatch.setattr(CodexAppServerSession, "close", fake_close)
|
||||||
|
|
||||||
|
agent = _make_codex_agent()
|
||||||
|
with patch.object(agent, "_spawn_background_review", return_value=None):
|
||||||
|
result = agent.run_conversation("hi")
|
||||||
|
|
||||||
|
assert closes["count"] == 1
|
||||||
|
assert agent._codex_session is None
|
||||||
|
assert result["completed"] is False
|
||||||
|
assert "codex segfaulted" in result["error"]
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue