fix(transports): use monotonic deadlines in codex app-server turn loop

This commit is contained in:
QuenVix 2026-05-16 08:00:48 +03:00 committed by Teknium
parent 37286a5bcd
commit d5a0815c3d
2 changed files with 53 additions and 5 deletions

View file

@ -404,7 +404,7 @@ class CodexAppServerSession:
return result
result.turn_id = (ts.get("turn") or {}).get("id")
deadline = time.time() + turn_timeout
deadline = time.monotonic() + turn_timeout
turn_complete = False
# Post-tool watchdog state. last_tool_completion_at is set whenever
# a tool-shaped item completes; if no further notification arrives
@ -412,7 +412,7 @@ class CodexAppServerSession:
# fast-fail and retire the session.
last_tool_completion_at: Optional[float] = None
while time.time() < deadline and not turn_complete:
while time.monotonic() < deadline and not turn_complete:
if self._interrupt_event.is_set():
self._issue_interrupt(result.turn_id)
result.interrupted = True
@ -440,7 +440,7 @@ class CodexAppServerSession:
# up on this turn instead of waiting for the outer deadline.
if (
last_tool_completion_at is not None
and (time.time() - last_tool_completion_at)
and (time.monotonic() - last_tool_completion_at)
> post_tool_quiet_timeout
):
self._issue_interrupt(result.turn_id)
@ -471,7 +471,7 @@ class CodexAppServerSession:
result.projected_messages.extend(proj.messages)
if proj.is_tool_iteration:
result.tool_iterations += 1
last_tool_completion_at = time.time()
last_tool_completion_at = time.monotonic()
if proj.final_text is not None:
result.final_text = proj.final_text
if _has_turn_aborted_marker(proj.final_text):
@ -514,7 +514,7 @@ class CodexAppServerSession:
result.tool_iterations += 1
# Arm/refresh the post-tool quiet watchdog whenever a
# tool-shaped item completes.
last_tool_completion_at = time.time()
last_tool_completion_at = time.monotonic()
else:
# Any non-tool projected activity (assistant message,
# status update, etc.) means codex is still producing

View file

@ -9,10 +9,12 @@ from __future__ import annotations
import threading
import time
from unittest.mock import patch
from typing import Any, Optional
import pytest
import agent.transports.codex_app_server_session as session_mod
from agent.transports.codex_app_server_session import (
CodexAppServerSession,
TurnResult,
@ -344,6 +346,23 @@ class TestRunTurn:
assert r.interrupted is True
assert r.error and "timed out" in r.error
def test_deadline_uses_monotonic_clock(self):
client = FakeClient()
s = make_session(client)
monotonic_values = iter([1000.0, 999.0, 999.0, 1001.0])
with patch.object(
session_mod.time,
"monotonic",
side_effect=lambda: next(monotonic_values),
):
r = s.run_turn(
"never finishes",
turn_timeout=0.1,
notification_poll_timeout=0.0,
)
assert r.interrupted is True
assert r.error and "timed out" in r.error
def test_failed_turn_records_error_from_turn_completed(self):
client = FakeClient()
client.queue_notification(
@ -666,6 +685,35 @@ class TestSessionRetirement:
# Confirm we issued turn/interrupt to free codex compute
assert any(method == "turn/interrupt" for (method, _) in client.requests)
def test_post_tool_watchdog_uses_monotonic_clock(self):
client = FakeClient()
client.queue_notification(
"item/completed",
item={
"type": "commandExecution", "id": "ex1",
"command": "echo hi", "cwd": "/tmp",
"status": "completed", "aggregatedOutput": "hi",
"exitCode": 0, "commandActions": [],
},
threadId="t", turnId="tu1",
)
s = make_session(client)
monotonic_values = iter([1000.0, 999.0, 999.0, 999.0, 1000.2])
with patch.object(
session_mod.time,
"monotonic",
side_effect=lambda: next(monotonic_values),
):
r = s.run_turn(
"tool then silence",
turn_timeout=5.0,
notification_poll_timeout=0.0,
post_tool_quiet_timeout=0.15,
)
assert r.interrupted is True
assert r.should_retire is True
assert r.error and "silent" in r.error
def test_post_tool_watchdog_resets_on_further_activity(self):
"""A tool completion followed by an agent message should NOT trip
the watchdog further activity = codex still alive."""