"""Regression tests for the Codex time-to-first-byte (TTFB) watchdog. The chatgpt.com/backend-api/codex endpoint has an intermittent failure mode where it accepts the connection but never emits a single stream event. The watchdog in ``interruptible_api_call`` kills such a connection at a short TTFB cutoff (instead of waiting out the much longer wall-clock stale timeout) so the retry loop can reconnect promptly. Once any stream event arrives, the TTFB watchdog is satisfied and a separate idle watchdog handles streams that stop emitting SSE events. The "bytes flowing" signal is ``agent._codex_stream_last_event_ts``, set on *any* event by ``codex_runtime.run_codex_stream`` — so reasoning-only or tool-call-only turns (which emit no output-text deltas) are not mistaken for a stall. """ from __future__ import annotations import sys import time import types from types import SimpleNamespace import pytest # Stub optional heavy imports so run_agent imports cleanly in isolation. sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None)) sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object)) sys.modules.setdefault("fal_client", types.SimpleNamespace()) def _make_codex_agent(tmp_path, monkeypatch): monkeypatch.setenv("HERMES_HOME", str(tmp_path)) (tmp_path / ".env").write_text("", encoding="utf-8") (tmp_path / "config.yaml").write_text("{}\n", encoding="utf-8") from run_agent import AIAgent agent = AIAgent( model="gpt-5.5", provider="openai-codex", api_key="sk-dummy", base_url="https://chatgpt.com/backend-api/codex", quiet_mode=True, skip_context_files=True, skip_memory=True, platform="cli", ) # The watchdog is gated on the codex_responses api_mode; assert/force it so # the test is robust to detection-logic changes elsewhere. agent.api_mode = "codex_responses" monkeypatch.setattr(agent, "_emit_status", lambda *a, **k: None) # Keep the wall-clock stale timeout high so any early kill is unambiguously # the TTFB path, not the stale-call path. monkeypatch.setattr( agent, "_compute_non_stream_stale_timeout", lambda *a, **k: 60.0 ) return agent def test_ttfb_kills_when_no_stream_event(tmp_path, monkeypatch): """Backend accepts the connection but emits no event -> killed at the TTFB cutoff, well before the 60s wall-clock stale timeout, with a retryable TimeoutError and a ``codex_ttfb_kill`` close reason.""" from agent import chat_completion_helpers as h agent = _make_codex_agent(tmp_path, monkeypatch) monkeypatch.setenv("HERMES_CODEX_TTFB_TIMEOUT_SECONDS", "1") closes: list = [] dummy_client = SimpleNamespace() monkeypatch.setattr(agent, "_create_request_openai_client", lambda **k: dummy_client) monkeypatch.setattr( agent, "_abort_request_openai_client", lambda c, reason=None: closes.append(reason), ) monkeypatch.setattr( agent, "_close_request_openai_client", lambda c, reason=None: closes.append(reason), ) stop = {"flag": False} def fake_hang(api_kwargs, client=None, on_first_delta=None): # Never set _codex_stream_last_event_ts: simulate zero events arriving. deadline = time.time() + 30 while time.time() < deadline and not stop["flag"] and not agent._interrupt_requested: time.sleep(0.02) raise RuntimeError("connection closed") monkeypatch.setattr(agent, "_run_codex_stream", fake_hang) t0 = time.time() try: with pytest.raises(TimeoutError) as excinfo: h.interruptible_api_call(agent, {"model": "gpt-5.5", "input": "hi"}) elapsed = time.time() - t0 assert "TTFB" in str(excinfo.value) assert "codex_ttfb_kill" in closes # ~1s cutoff + 2s join grace; must be far under the 60s stale timeout. assert elapsed < 15, f"TTFB watchdog took {elapsed:.1f}s" finally: stop["flag"] = True def test_ttfb_includes_silent_hang_hint_for_gpt_5_5(tmp_path, monkeypatch): """The no-first-byte watchdog should surface the same actionable hint as the stale-call timeout path when the model matches the silent-hang heuristic.""" from agent import chat_completion_helpers as h agent = _make_codex_agent(tmp_path, monkeypatch) monkeypatch.setenv("HERMES_CODEX_TTFB_TIMEOUT_SECONDS", "1") closes: list = [] statuses: list[str] = [] dummy_client = SimpleNamespace() monkeypatch.setattr(agent, "_create_request_openai_client", lambda **k: dummy_client) monkeypatch.setattr(agent, "_buffer_status", lambda msg: statuses.append(msg)) monkeypatch.setattr(agent, "_emit_status", lambda msg: statuses.append(msg)) monkeypatch.setattr( agent, "_abort_request_openai_client", lambda c, reason=None: closes.append(reason), ) monkeypatch.setattr( agent, "_close_request_openai_client", lambda c, reason=None: closes.append(reason), ) stop = {"flag": False} def fake_hang(api_kwargs, client=None, on_first_delta=None): deadline = time.time() + 30 while time.time() < deadline and not stop["flag"] and not agent._interrupt_requested: time.sleep(0.02) raise RuntimeError("connection closed") monkeypatch.setattr(agent, "_run_codex_stream", fake_hang) try: with pytest.raises(TimeoutError) as excinfo: h.interruptible_api_call(agent, {"model": "gpt-5.5", "input": "hi"}) message = str(excinfo.value) assert "gpt-5.4" in message assert "gpt-5.3-codex" in message assert "gpt-5.4-codex" in message assert "codex_ttfb_kill" in closes assert statuses, "expected a user-facing watchdog status" assert any("gpt-5.4" in s and "gpt-5.3-codex" in s for s in statuses) finally: stop["flag"] = True def test_ttfb_high_env_is_capped_for_openai_codex(tmp_path, monkeypatch): """A stale local env value like 90s must not make openai-codex wait 90s before reconnecting when the backend emits no SSE frames.""" from agent import chat_completion_helpers as h agent = _make_codex_agent(tmp_path, monkeypatch) monkeypatch.setenv("HERMES_CODEX_TTFB_TIMEOUT_SECONDS", "90") monkeypatch.setenv("HERMES_CODEX_TTFB_MAX_SECONDS", "1") closes: list = [] dummy_client = SimpleNamespace() monkeypatch.setattr(agent, "_create_request_openai_client", lambda **k: dummy_client) monkeypatch.setattr( agent, "_abort_request_openai_client", lambda c, reason=None: closes.append(reason), ) monkeypatch.setattr( agent, "_close_request_openai_client", lambda c, reason=None: closes.append(reason), ) stop = {"flag": False} def fake_hang(api_kwargs, client=None, on_first_delta=None): deadline = time.time() + 30 while time.time() < deadline and not stop["flag"] and not agent._interrupt_requested: time.sleep(0.02) raise RuntimeError("connection closed") monkeypatch.setattr(agent, "_run_codex_stream", fake_hang) t0 = time.time() try: with pytest.raises(TimeoutError) as excinfo: h.interruptible_api_call(agent, {"model": "gpt-5.4", "input": "hi"}) elapsed = time.time() - t0 assert "TTFB threshold: 1s" in str(excinfo.value) assert "codex_ttfb_kill" in closes assert elapsed < 15, f"TTFB watchdog ignored cap and took {elapsed:.1f}s" finally: stop["flag"] = True def test_ttfb_does_not_kill_when_events_flow(tmp_path, monkeypatch): """Once a stream event has arrived, a generation that runs past the TTFB cutoff is NOT killed by the watchdog — it completes normally.""" from agent import chat_completion_helpers as h agent = _make_codex_agent(tmp_path, monkeypatch) monkeypatch.setenv("HERMES_CODEX_TTFB_TIMEOUT_SECONDS", "1") closes: list = [] dummy_client = SimpleNamespace() monkeypatch.setattr(agent, "_create_request_openai_client", lambda **k: dummy_client) monkeypatch.setattr( agent, "_abort_request_openai_client", lambda c, reason=None: closes.append(reason), ) monkeypatch.setattr( agent, "_close_request_openai_client", lambda c, reason=None: closes.append(reason), ) sentinel = SimpleNamespace(ok=True) def fake_stream(api_kwargs, client=None, on_first_delta=None): # Bytes flowing: mark stream activity right away, then keep generating # past the 1s TTFB cutoff before returning a real response. agent._codex_stream_last_event_ts = time.time() if on_first_delta: on_first_delta() time.sleep(2.0) return sentinel monkeypatch.setattr(agent, "_run_codex_stream", fake_stream) resp = h.interruptible_api_call(agent, {"model": "gpt-5.5", "input": "hi"}) assert resp is sentinel assert "codex_ttfb_kill" not in closes def test_event_idle_kills_after_first_event_then_silence(tmp_path, monkeypatch): """If Codex emits an opening SSE event and then goes silent, kill it via the stream-idle watchdog instead of waiting for the long non-stream stale timeout.""" from agent import chat_completion_helpers as h agent = _make_codex_agent(tmp_path, monkeypatch) monkeypatch.setenv("HERMES_CODEX_TTFB_TIMEOUT_SECONDS", "10") monkeypatch.setenv("HERMES_CODEX_EVENT_STALE_TIMEOUT_SECONDS", "1") closes: list = [] dummy_client = SimpleNamespace() monkeypatch.setattr(agent, "_create_request_openai_client", lambda **k: dummy_client) monkeypatch.setattr( agent, "_abort_request_openai_client", lambda c, reason=None: closes.append(reason), ) monkeypatch.setattr( agent, "_close_request_openai_client", lambda c, reason=None: closes.append(reason), ) stop = {"flag": False} def fake_stream(api_kwargs, client=None, on_first_delta=None): agent._codex_stream_last_event_ts = time.time() deadline = time.time() + 30 while time.time() < deadline and not stop["flag"] and not agent._interrupt_requested: time.sleep(0.02) raise RuntimeError("connection closed") monkeypatch.setattr(agent, "_run_codex_stream", fake_stream) try: with pytest.raises(TimeoutError) as excinfo: h.interruptible_api_call(agent, {"model": "gpt-5.5", "input": "hi"}) assert "after first byte" in str(excinfo.value) assert "codex_stream_idle_kill" in closes assert "codex_ttfb_kill" not in closes finally: stop["flag"] = True def test_ttfb_disabled_via_env_zero(tmp_path, monkeypatch): """Setting HERMES_CODEX_TTFB_TIMEOUT_SECONDS=0 disables the TTFB watchdog; a no-event stall then falls through to the (here, 60s) stale timeout, so a short hang is NOT killed by TTFB.""" from agent import chat_completion_helpers as h agent = _make_codex_agent(tmp_path, monkeypatch) monkeypatch.setenv("HERMES_CODEX_TTFB_TIMEOUT_SECONDS", "0") closes: list = [] dummy_client = SimpleNamespace() monkeypatch.setattr(agent, "_create_request_openai_client", lambda **k: dummy_client) monkeypatch.setattr( agent, "_abort_request_openai_client", lambda c, reason=None: closes.append(reason), ) monkeypatch.setattr( agent, "_close_request_openai_client", lambda c, reason=None: closes.append(reason), ) sentinel = SimpleNamespace(ok=True) def fake_stream(api_kwargs, client=None, on_first_delta=None): # No event marker, but only briefly — well under the 60s stale timeout. time.sleep(2.0) return sentinel monkeypatch.setattr(agent, "_run_codex_stream", fake_stream) resp = h.interruptible_api_call(agent, {"model": "gpt-5.5", "input": "hi"}) assert resp is sentinel assert "codex_ttfb_kill" not in closes def test_large_codex_request_waits_instead_of_ttfb_reconnect(tmp_path, monkeypatch): """Large Codex inputs can legitimately take longer than the small-request first-byte cutoff before the first SSE frame. Preserve the full input and wait instead of killing/retrying at TTFB.""" from agent import chat_completion_helpers as h agent = _make_codex_agent(tmp_path, monkeypatch) monkeypatch.setenv("HERMES_CODEX_TTFB_TIMEOUT_SECONDS", "1") closes: list = [] dummy_client = SimpleNamespace() monkeypatch.setattr(agent, "_create_request_openai_client", lambda **k: dummy_client) monkeypatch.setattr( agent, "_abort_request_openai_client", lambda c, reason=None: closes.append(reason) ) monkeypatch.setattr( agent, "_close_request_openai_client", lambda c, reason=None: closes.append(reason) ) sentinel = SimpleNamespace(ok=True) def fake_stream(api_kwargs, client=None, on_first_delta=None): # No event marker for 2s: this would trip the 1s TTFB watchdog on a # small request, but should be allowed for a large request. time.sleep(2.0) return sentinel monkeypatch.setattr(agent, "_run_codex_stream", fake_stream) large_input = "x" * 120_000 # ~30k estimated tokens, above large-request gate. resp = h.interruptible_api_call(agent, {"model": "gpt-5.5", "input": large_input}) assert resp is sentinel assert "codex_ttfb_kill" not in closes def test_large_codex_request_strict_ttfb_env_still_reconnects(tmp_path, monkeypatch): """Operators can force the old early-reconnect behavior for large inputs with HERMES_CODEX_TTFB_STRICT=1.""" from agent import chat_completion_helpers as h agent = _make_codex_agent(tmp_path, monkeypatch) monkeypatch.setenv("HERMES_CODEX_TTFB_TIMEOUT_SECONDS", "1") monkeypatch.setenv("HERMES_CODEX_TTFB_STRICT", "1") closes: list = [] dummy_client = SimpleNamespace() monkeypatch.setattr(agent, "_create_request_openai_client", lambda **k: dummy_client) monkeypatch.setattr( agent, "_abort_request_openai_client", lambda c, reason=None: closes.append(reason) ) monkeypatch.setattr( agent, "_close_request_openai_client", lambda c, reason=None: closes.append(reason) ) stop = {"flag": False} def fake_hang(api_kwargs, client=None, on_first_delta=None): deadline = time.time() + 30 while time.time() < deadline and not stop["flag"] and not agent._interrupt_requested: time.sleep(0.02) raise RuntimeError("connection closed") monkeypatch.setattr(agent, "_run_codex_stream", fake_hang) large_input = "x" * 120_000 try: with pytest.raises(TimeoutError) as excinfo: h.interruptible_api_call(agent, {"model": "gpt-5.5", "input": large_input}) assert "TTFB threshold: 1s" in str(excinfo.value) assert "codex_ttfb_kill" in closes finally: stop["flag"] = True