From 2d422720b53207208efacfdb2e32dc92048b6edd Mon Sep 17 00:00:00 2001 From: Kasun Athaudahetti Date: Mon, 25 May 2026 01:36:22 -0700 Subject: [PATCH] fix(codex): size and propagate timeouts for Responses-API requests; lower stale defaults Codex / Responses-API requests had three latent timeout bugs that combined into the long silent hangs reported on #21444: 1. The non-stream stale-call detector estimated context tokens from ``api_kwargs["messages"]`` only. Codex / Responses-API payloads carry their conversational load in ``input`` (with ``instructions`` and ``tools``), so every Codex turn logged ``context=~0 tokens`` and the detector never applied its >50k / >100k tier bumps. 2. ``providers..request_timeout_seconds`` was silently dropped on the main Codex path. The chat_completions path and the auxiliary Codex adapter both forwarded it; the main path skipped it through three places (``build_api_kwargs``, ``ResponsesApiTransport.build_kwargs``, ``_preflight_codex_api_kwargs``). 3. The streaming stale detector had the same payload-shape bug for ``codex_responses`` requests, which route through the non-streaming detector (it's the path that emits the user-facing "No response from provider for 300s (non-streaming, ...)" warning that reporters keep pasting). This commit: - Adds ``estimate_request_context_tokens`` in ``chat_completion_helpers``, used by both the non-stream and stream detectors. Handles ``messages`` (Chat Completions), ``input + instructions + tools`` (Responses API), bare lists, and an unknown-dict fallback. - Forwards ``timeout`` through ``ResponsesApiTransport.build_kwargs`` and ``_preflight_codex_api_kwargs`` (with guards against zero/negative/inf/bool values), and wires ``_resolved_api_call_timeout()`` into the Codex branch of ``build_api_kwargs``. - Lowers the implicit non-stream stale defaults so fallback providers kick in faster when upstream stalls: * base 300s -> 90s * >50k 450s -> 150s * >100k 600s -> 240s These only apply when the user has *not* set ``providers..stale_timeout_seconds`` or ``HERMES_API_CALL_STALE_TIMEOUT``. Explicit config still wins. - Adds regression tests for the estimator shapes, the new defaults, the context-tier scaling, transport timeout pass-through, and preflight timeout pass-through / rejection of invalid values. Closes #21444 Supersedes #21652 #24126 #31855 Co-authored-by: Hoang V. Pham <26063003+hehehe0803@users.noreply.github.com> --- agent/chat_completion_helpers.py | 64 +++++- agent/codex_responses_adapter.py | 9 +- agent/transports/codex.py | 15 ++ run_agent.py | 25 ++- scripts/release.py | 2 + tests/agent/test_non_stream_stale_timeout.py | 192 ++++++++++++++++++ .../agent/transports/test_codex_transport.py | 61 ++++++ tests/hermes_cli/test_timeouts.py | 2 +- .../run_agent/test_openai_client_lifecycle.py | 2 +- .../test_run_agent_codex_responses.py | 28 ++- 10 files changed, 383 insertions(+), 17 deletions(-) create mode 100644 tests/agent/test_non_stream_stale_timeout.py diff --git a/agent/chat_completion_helpers.py b/agent/chat_completion_helpers.py index b52bd6a1fb1..5cdb590dd5d 100644 --- a/agent/chat_completion_helpers.py +++ b/agent/chat_completion_helpers.py @@ -75,6 +75,59 @@ def _ra(): return run_agent +def estimate_request_context_tokens(api_payload: Any) -> int: + """Estimate context/load tokens from an API payload, dict or messages list. + + The stale-call detectors historically assumed a Chat Completions request: + they pulled ``api_kwargs["messages"]`` and ran a cheap char/4 estimate. + Codex / Responses API requests carry the conversational payload in + ``input`` (with additional load in ``instructions`` and ``tools``), so the + legacy estimator reported ~0 tokens for every Codex turn and the + context-tier scaling never fired. + + This helper handles both shapes: + - bare list -> treat as Chat Completions ``messages`` + - dict with ``messages`` -> Chat Completions (+ ``tools`` if present) + - dict with ``input`` -> Responses API (+ ``instructions``/``tools``) + - any other dict -> fall back to summing string values + """ + + def _chars(value: Any) -> int: + if value is None: + return 0 + if isinstance(value, str): + return len(value) + return len(str(value)) + + def _message_chars(messages: Any) -> int: + if not isinstance(messages, list): + return _chars(messages) + return sum(_chars(item) for item in messages) + + if isinstance(api_payload, list): + return _message_chars(api_payload) // 4 + + if isinstance(api_payload, dict): + messages = api_payload.get("messages") + if isinstance(messages, list): + total_chars = _message_chars(messages) + if "tools" in api_payload: + total_chars += _chars(api_payload.get("tools")) + return total_chars // 4 + + if "input" in api_payload: + total_chars = ( + _chars(api_payload.get("input")) + + _chars(api_payload.get("instructions")) + + _chars(api_payload.get("tools")) + ) + return total_chars // 4 + + return sum(_chars(value) for value in api_payload.values()) // 4 + + return _chars(api_payload) // 4 + + def interruptible_api_call(agent, api_kwargs: dict): """ @@ -200,9 +253,7 @@ def interruptible_api_call(agent, api_kwargs: dict): # httpx timeout (default 1800s) with zero feedback. The stale # detector kills the connection early so the main retry loop can # apply richer recovery (credential rotation, provider fallback). - _stale_timeout = agent._compute_non_stream_stale_timeout( - api_kwargs.get("messages", []) - ) + _stale_timeout = agent._compute_non_stream_stale_timeout(api_kwargs) _call_start = time.time() agent._touch_activity("waiting for non-streaming API response") @@ -226,7 +277,7 @@ def interruptible_api_call(agent, api_kwargs: dict): # arrives within the configured timeout. _elapsed = time.time() - _call_start if _elapsed > _stale_timeout: - _est_ctx = sum(len(str(v)) for v in api_kwargs.get("messages", [])) // 4 + _est_ctx = estimate_request_context_tokens(api_kwargs) logger.warning( "Non-streaming API call stale for %.0fs (threshold %.0fs). " "model=%s context=~%s tokens. Killing connection.", @@ -362,6 +413,7 @@ def build_api_kwargs(agent, api_messages: list) -> dict: reasoning_config=agent.reasoning_config, session_id=getattr(agent, "session_id", None), max_tokens=agent.max_tokens, + timeout=agent._resolved_api_call_timeout(), request_overrides=agent.request_overrides, is_github_responses=is_github_responses, is_codex_backend=is_codex_backend, @@ -2019,7 +2071,7 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta= # when the context is large. Without this, the stale detector kills # healthy connections during the model's thinking phase, producing # spurious RemoteProtocolError ("peer closed connection"). - _est_tokens = sum(len(str(v)) for v in api_kwargs.get("messages", [])) // 4 + _est_tokens = estimate_request_context_tokens(api_kwargs) if _est_tokens > 100_000: _stream_stale_timeout = max(_stream_stale_timeout_base, 300.0) elif _est_tokens > 50_000: @@ -2055,7 +2107,7 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta= # inner retry loop can start a fresh connection. _stale_elapsed = time.time() - last_chunk_time["t"] if _stale_elapsed > _stream_stale_timeout: - _est_ctx = sum(len(str(v)) for v in api_kwargs.get("messages", [])) // 4 + _est_ctx = estimate_request_context_tokens(api_kwargs) logger.warning( "Stream stale for %.0fs (threshold %.0fs) — no chunks received. " "model=%s context=~%s tokens. Killing connection.", diff --git a/agent/codex_responses_adapter.py b/agent/codex_responses_adapter.py index adea34d094c..07ae5cc9506 100644 --- a/agent/codex_responses_adapter.py +++ b/agent/codex_responses_adapter.py @@ -745,7 +745,7 @@ def _preflight_codex_api_kwargs( "model", "instructions", "input", "tools", "store", "reasoning", "include", "max_output_tokens", "temperature", "tool_choice", "parallel_tool_calls", "prompt_cache_key", "service_tier", - "extra_headers", "extra_body", + "extra_headers", "extra_body", "timeout", } normalized: Dict[str, Any] = { "model": model, @@ -771,6 +771,13 @@ def _preflight_codex_api_kwargs( max_output_tokens = api_kwargs.get("max_output_tokens") if isinstance(max_output_tokens, (int, float)) and max_output_tokens > 0: normalized["max_output_tokens"] = int(max_output_tokens) + timeout = api_kwargs.get("timeout") + if ( + isinstance(timeout, (int, float)) + and not isinstance(timeout, bool) + and 0 < float(timeout) < float("inf") + ): + normalized["timeout"] = float(timeout) temperature = api_kwargs.get("temperature") if isinstance(temperature, (int, float)): normalized["temperature"] = float(temperature) diff --git a/agent/transports/codex.py b/agent/transports/codex.py index 27264f2f38f..970692c0394 100644 --- a/agent/transports/codex.py +++ b/agent/transports/codex.py @@ -50,6 +50,7 @@ class ResponsesApiTransport(ProviderTransport): reasoning_config: dict | None — {effort, enabled} session_id: str | None — used for prompt_cache_key + xAI conv header max_tokens: int | None — max_output_tokens + timeout: float | None — per-request timeout forwarded to the SDK request_overrides: dict | None — extra kwargs merged in provider: str | None — provider name for backend-specific logic base_url: str | None — endpoint URL @@ -143,6 +144,20 @@ class ResponsesApiTransport(ProviderTransport): if request_overrides: kwargs.update(request_overrides) + # Forward per-request timeout to the SDK so OpenAI/Anthropic clients + # honor it. Without this, ``providers..request_timeout_seconds`` + # is silently dropped on the main agent Codex path while the + # chat_completions path and auxiliary Codex adapter both forward it. + timeout = kwargs.get("timeout", params.get("timeout")) + if ( + isinstance(timeout, (int, float)) + and not isinstance(timeout, bool) + and 0 < float(timeout) < float("inf") + ): + kwargs["timeout"] = float(timeout) + else: + kwargs.pop("timeout", None) + if is_codex_backend: prompt_cache_key = kwargs.get("prompt_cache_key") cache_scope_id = str(prompt_cache_key or session_id or "").strip() diff --git a/run_agent.py b/run_agent.py index 4a2f3cb3ba4..6c44c2d9a48 100644 --- a/run_agent.py +++ b/run_agent.py @@ -885,7 +885,11 @@ class AIAgent: 1. ``providers..models..stale_timeout_seconds`` 2. ``providers..stale_timeout_seconds`` 3. ``HERMES_API_CALL_STALE_TIMEOUT`` env var - 4. 300.0s default + 4. 90.0s default (time-to-first-byte for non-streaming / Codex + internal-streaming requests; lowered from 300s in May 2026 so + fallback providers kick in faster when upstream providers + stall). The detector still scales up for large contexts in + ``_compute_non_stream_stale_timeout``. Returns ``(timeout_seconds, uses_implicit_default)`` so the caller can preserve legacy behaviors that only apply when the user has *not* @@ -900,20 +904,27 @@ class AIAgent: if env_timeout is not None: return float(env_timeout), False - return 300.0, True + return 90.0, True - def _compute_non_stream_stale_timeout(self, messages: list[dict[str, Any]]) -> float: - """Compute the effective non-stream stale timeout for this request.""" + def _compute_non_stream_stale_timeout(self, api_payload: Any) -> float: + """Compute the effective non-stream stale timeout for this request. + + Accepts either the full ``api_kwargs`` dict (Chat Completions or + Responses API) or a legacy ``messages`` list. Context-size scaling + applies the same way to both shapes via + :func:`agent.chat_completion_helpers.estimate_request_context_tokens`. + """ stale_base, uses_implicit_default = self._resolved_api_call_stale_timeout_base() base_url = getattr(self, "_base_url", None) or self.base_url or "" if uses_implicit_default and base_url and is_local_endpoint(base_url): return float("inf") - est_tokens = sum(len(str(v)) for v in messages) // 4 + from agent.chat_completion_helpers import estimate_request_context_tokens + est_tokens = estimate_request_context_tokens(api_payload) if est_tokens > 100_000: - return max(stale_base, 600.0) + return max(stale_base, 240.0) if est_tokens > 50_000: - return max(stale_base, 450.0) + return max(stale_base, 150.0) return stale_base def _is_openrouter_url(self) -> bool: diff --git a/scripts/release.py b/scripts/release.py index f2da64997ea..82c0f2f98e3 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -1240,6 +1240,8 @@ AUTHOR_MAP = { "165905879+davidcampbelldc@users.noreply.github.com": "davidcampbelldc", "hoangv.pham0803@gmail.com": "hehehe0803", # PR #26212 salvage (codex kanban writable root) "26063003+hehehe0803@users.noreply.github.com": "hehehe0803", + "kasunvinod@users.noreply.github.com": "kasunvinod", # PR #24126 salvage (codex timeout propagation) + "15059870+kasunvinod@users.noreply.github.com": "kasunvinod", "38348871+vaddisrinivas@users.noreply.github.com": "vaddisrinivas", # PR #26394 salvage (Docker messaging extra) # batch salvage (May 2026 LHF run, group 7) "198679067+02356abc@users.noreply.github.com": "02356abc", # PR #28286 salvage (wecom CLOSING) diff --git a/tests/agent/test_non_stream_stale_timeout.py b/tests/agent/test_non_stream_stale_timeout.py new file mode 100644 index 00000000000..702856275f6 --- /dev/null +++ b/tests/agent/test_non_stream_stale_timeout.py @@ -0,0 +1,192 @@ +"""Tests for the non-stream stale-call detector context estimator. + +Covers: +- ``estimate_request_context_tokens`` for Chat Completions, Responses API, + bare lists, and mixed-shape dicts. +- ``AIAgent._compute_non_stream_stale_timeout`` with both legacy ``messages`` + list and full ``api_kwargs`` dicts. +- The May 2026 default-base change (300s -> 90s) and the lowered + context-tier ceilings (450/600 -> 150/240). +""" + +from __future__ import annotations + +import os +from pathlib import Path + +import pytest + + +def _write_config(tmp_path: Path, body: str) -> None: + hermes_home = tmp_path + (hermes_home / "config.yaml").write_text(body or "{}\n", encoding="utf-8") + + +def _make_agent(tmp_path: Path, **overrides): + from run_agent import AIAgent + kwargs = dict( + model="gpt-5.5", + provider="openai-codex", + api_key="sk-dummy", + base_url="https://chatgpt.com/backend-api/codex", + quiet_mode=True, + skip_context_files=True, + skip_memory=True, + platform="cli", + ) + kwargs.update(overrides) + return AIAgent(**kwargs) + + +# ── estimator ────────────────────────────────────────────────────────────── + + +def test_estimator_chat_completions_messages(): + from agent.chat_completion_helpers import estimate_request_context_tokens + payload = { + "model": "gpt-5.4", + "messages": [ + {"role": "user", "content": "x" * 400}, + {"role": "assistant", "content": "y" * 400}, + ], + } + # 800+ chars from messages -> ~200 tokens (char/4 estimate) + assert estimate_request_context_tokens(payload) >= 200 + + +def test_estimator_responses_api_input(): + from agent.chat_completion_helpers import estimate_request_context_tokens + payload = { + "model": "gpt-5.5", + "instructions": "i" * 1000, + "input": "x" * 4000, + "tools": [{"name": "t", "description": "d" * 200}], + } + # input(4000) + instructions(1000) + tools (~stringified) -> well over 1000 tokens + tokens = estimate_request_context_tokens(payload) + assert tokens >= 1200, f"Responses API estimator returned {tokens}" + + +def test_estimator_responses_api_long_session_triggers_tier(): + """A real long Codex session (large ``input``) should clear the 50k boundary.""" + from agent.chat_completion_helpers import estimate_request_context_tokens + payload = { + "model": "gpt-5.5", + "input": "x" * 240_000, # ~60k tokens (240k chars / 4) + "instructions": "s" * 4000, + } + assert estimate_request_context_tokens(payload) > 50_000 + + +def test_estimator_bare_list_back_compat(): + from agent.chat_completion_helpers import estimate_request_context_tokens + messages = [ + {"role": "user", "content": "x" * 800}, + ] + assert estimate_request_context_tokens(messages) >= 200 + + +def test_estimator_empty_inputs(): + from agent.chat_completion_helpers import estimate_request_context_tokens + assert estimate_request_context_tokens({}) == 0 + assert estimate_request_context_tokens([]) == 0 + assert estimate_request_context_tokens(None) == 0 + + +def test_estimator_unknown_dict_fallback(): + from agent.chat_completion_helpers import estimate_request_context_tokens + payload = {"random_field": "z" * 400} + assert estimate_request_context_tokens(payload) > 50 + + +# ── default base + tier scaling ──────────────────────────────────────────── + + +def test_default_base_is_90s(monkeypatch, tmp_path): + """Default base stale timeout dropped from 300s to 90s (May 2026).""" + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + (tmp_path / ".env").write_text("", encoding="utf-8") + monkeypatch.delenv("HERMES_API_CALL_STALE_TIMEOUT", raising=False) + _write_config(tmp_path, "") + + agent = _make_agent(tmp_path) + base, implicit = agent._resolved_api_call_stale_timeout_base() + assert base == 90.0 + assert implicit is True + + +def test_short_codex_request_uses_base_only(monkeypatch, tmp_path): + """Codex payload below 50k tokens -> default 90s base.""" + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + (tmp_path / ".env").write_text("", encoding="utf-8") + monkeypatch.delenv("HERMES_API_CALL_STALE_TIMEOUT", raising=False) + _write_config(tmp_path, "") + + agent = _make_agent(tmp_path) + payload = {"model": "gpt-5.5", "input": "hi", "instructions": ""} + assert agent._compute_non_stream_stale_timeout(payload) == 90.0 + + +def test_long_codex_request_bumps_to_50k_tier(monkeypatch, tmp_path): + """Codex payload > 50k tokens -> at least 150s.""" + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + (tmp_path / ".env").write_text("", encoding="utf-8") + monkeypatch.delenv("HERMES_API_CALL_STALE_TIMEOUT", raising=False) + _write_config(tmp_path, "") + + agent = _make_agent(tmp_path) + payload = {"model": "gpt-5.5", "input": "x" * 240_000, "instructions": ""} + timeout = agent._compute_non_stream_stale_timeout(payload) + assert timeout >= 150.0 + assert timeout < 240.0 + + +def test_very_long_codex_request_bumps_to_100k_tier(monkeypatch, tmp_path): + """Codex payload > 100k tokens -> at least 240s.""" + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + (tmp_path / ".env").write_text("", encoding="utf-8") + monkeypatch.delenv("HERMES_API_CALL_STALE_TIMEOUT", raising=False) + _write_config(tmp_path, "") + + agent = _make_agent(tmp_path) + payload = {"model": "gpt-5.5", "input": "x" * 500_000, "instructions": ""} + assert agent._compute_non_stream_stale_timeout(payload) >= 240.0 + + +def test_chat_completions_long_messages_bumps_tier(monkeypatch, tmp_path): + """Chat Completions estimator still works for the legacy messages path.""" + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + (tmp_path / ".env").write_text("", encoding="utf-8") + monkeypatch.delenv("HERMES_API_CALL_STALE_TIMEOUT", raising=False) + _write_config(tmp_path, "") + + agent = _make_agent( + tmp_path, + provider="openai", + base_url="https://api.openai.com/v1", + model="gpt-5.4", + ) + payload = { + "model": "gpt-5.4", + "messages": [{"role": "user", "content": "x" * 240_000}], + } + assert agent._compute_non_stream_stale_timeout(payload) >= 150.0 + + +def test_explicit_user_config_overrides_default(monkeypatch, tmp_path): + """If the user explicitly sets a stale_timeout, the new defaults don't apply.""" + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + (tmp_path / ".env").write_text("", encoding="utf-8") + _write_config(tmp_path, """\ +providers: + openai-codex: + stale_timeout_seconds: 1800 +""") + monkeypatch.delenv("HERMES_API_CALL_STALE_TIMEOUT", raising=False) + + import importlib + from hermes_cli import timeouts as to_mod + importlib.reload(to_mod) + + agent = _make_agent(tmp_path) + assert agent._compute_non_stream_stale_timeout({"input": "hi"}) == 1800.0 diff --git a/tests/agent/transports/test_codex_transport.py b/tests/agent/transports/test_codex_transport.py index a0470fa8de8..96a80827204 100644 --- a/tests/agent/transports/test_codex_transport.py +++ b/tests/agent/transports/test_codex_transport.py @@ -452,3 +452,64 @@ class TestCodexNormalizeResponse: tc = nr.tool_calls[0] assert tc.name == "terminal" assert '"command"' in tc.arguments + + + +class TestCodexTransportTimeout: + """Forward per-request timeout from build_kwargs to the SDK kwargs.""" + + def test_positive_timeout_preserved(self, transport): + kw = transport.build_kwargs( + model="gpt-5.5", + messages=[{"role": "user", "content": "hi"}], + tools=[], + timeout=600.0, + ) + assert kw.get("timeout") == 600.0 + + def test_zero_timeout_dropped(self, transport): + kw = transport.build_kwargs( + model="gpt-5.5", + messages=[{"role": "user", "content": "hi"}], + tools=[], + timeout=0, + ) + assert "timeout" not in kw + + def test_none_timeout_omitted(self, transport): + kw = transport.build_kwargs( + model="gpt-5.5", + messages=[{"role": "user", "content": "hi"}], + tools=[], + timeout=None, + ) + assert "timeout" not in kw + + def test_inf_timeout_dropped(self, transport): + kw = transport.build_kwargs( + model="gpt-5.5", + messages=[{"role": "user", "content": "hi"}], + tools=[], + timeout=float("inf"), + ) + assert "timeout" not in kw + + def test_bool_timeout_dropped(self, transport): + """``True`` is technically int but must not survive — caller bug guard.""" + kw = transport.build_kwargs( + model="gpt-5.5", + messages=[{"role": "user", "content": "hi"}], + tools=[], + timeout=True, + ) + assert "timeout" not in kw + + def test_request_overrides_can_supply_timeout(self, transport): + """request_overrides["timeout"] is honored when no explicit kwarg passed.""" + kw = transport.build_kwargs( + model="gpt-5.5", + messages=[{"role": "user", "content": "hi"}], + tools=[], + request_overrides={"timeout": 450.0}, + ) + assert kw.get("timeout") == 450.0 diff --git a/tests/hermes_cli/test_timeouts.py b/tests/hermes_cli/test_timeouts.py index 0f641a5c1b8..93c8cafc0a9 100644 --- a/tests/hermes_cli/test_timeouts.py +++ b/tests/hermes_cli/test_timeouts.py @@ -265,7 +265,7 @@ def test_resolved_api_call_stale_timeout_priority(monkeypatch, tmp_path): assert agent2._resolved_api_call_stale_timeout_base() == (999.0, False) monkeypatch.delenv("HERMES_API_CALL_STALE_TIMEOUT", raising=False) - assert agent2._resolved_api_call_stale_timeout_base() == (300.0, True) + assert agent2._resolved_api_call_stale_timeout_base() == (90.0, True) def test_default_non_stream_stale_timeout_auto_disables_for_local_endpoints(monkeypatch, tmp_path): diff --git a/tests/run_agent/test_openai_client_lifecycle.py b/tests/run_agent/test_openai_client_lifecycle.py index 35a8ec7a084..e38c1f726e4 100644 --- a/tests/run_agent/test_openai_client_lifecycle.py +++ b/tests/run_agent/test_openai_client_lifecycle.py @@ -105,7 +105,7 @@ def test_stale_non_stream_close_is_single_owner(monkeypatch): monkeypatch.setattr(run_agent, "OpenAI", factory) agent = _build_agent() - agent._compute_non_stream_stale_timeout = lambda _messages: 0.01 + agent._compute_non_stream_stale_timeout = lambda api_payload: 0.01 with pytest.raises(APIConnectionError): agent._interruptible_api_call({"model": agent.model, "messages": []}) diff --git a/tests/run_agent/test_run_agent_codex_responses.py b/tests/run_agent/test_run_agent_codex_responses.py index 42948e1c41e..bc575cc676f 100644 --- a/tests/run_agent/test_run_agent_codex_responses.py +++ b/tests/run_agent/test_run_agent_codex_responses.py @@ -306,7 +306,10 @@ def test_build_api_kwargs_codex(monkeypatch): assert kwargs["parallel_tool_calls"] is True assert isinstance(kwargs["prompt_cache_key"], str) assert len(kwargs["prompt_cache_key"]) > 0 - assert "timeout" not in kwargs + # ``timeout`` is now wired from ``_resolved_api_call_timeout`` (default 1800s) + # so per-provider ``request_timeout_seconds`` actually reaches the SDK. + assert isinstance(kwargs.get("timeout"), float) + assert kwargs["timeout"] > 0 assert "max_tokens" not in kwargs assert "extra_body" not in kwargs @@ -1053,6 +1056,29 @@ def test_preflight_codex_api_kwargs_allows_service_tier(monkeypatch): assert result["service_tier"] == "priority" +def test_preflight_codex_api_kwargs_preserves_positive_timeout(monkeypatch): + """Positive numeric timeouts survive preflight so the SDK honors them.""" + agent = _build_agent(monkeypatch) + kwargs = _codex_request_kwargs() + kwargs["timeout"] = 600.0 + + from agent.codex_responses_adapter import _preflight_codex_api_kwargs + result = _preflight_codex_api_kwargs(kwargs) + assert result["timeout"] == 600.0 + + +def test_preflight_codex_api_kwargs_drops_invalid_timeout(monkeypatch): + """Zero, negative, inf, and booleans are all dropped — not passed to SDK.""" + agent = _build_agent(monkeypatch) + from agent.codex_responses_adapter import _preflight_codex_api_kwargs + + for bad in (0, -1, float("inf"), True, False, "300", None): + kwargs = _codex_request_kwargs() + kwargs["timeout"] = bad + result = _preflight_codex_api_kwargs(kwargs) + assert "timeout" not in result, f"timeout={bad!r} should be dropped" + + def test_run_conversation_codex_replay_payload_keeps_call_id(monkeypatch): agent = _build_agent(monkeypatch) responses = [_codex_tool_call_response(), _codex_message_response("done")]