mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-29 06:31:32 +00:00
fix(codex): size and propagate timeouts for Responses-API requests; lower stale defaults
Codex / Responses-API requests had three latent timeout bugs that combined into the long silent hangs reported on #21444: 1. The non-stream stale-call detector estimated context tokens from ``api_kwargs["messages"]`` only. Codex / Responses-API payloads carry their conversational load in ``input`` (with ``instructions`` and ``tools``), so every Codex turn logged ``context=~0 tokens`` and the detector never applied its >50k / >100k tier bumps. 2. ``providers.<id>.request_timeout_seconds`` was silently dropped on the main Codex path. The chat_completions path and the auxiliary Codex adapter both forwarded it; the main path skipped it through three places (``build_api_kwargs``, ``ResponsesApiTransport.build_kwargs``, ``_preflight_codex_api_kwargs``). 3. The streaming stale detector had the same payload-shape bug for ``codex_responses`` requests, which route through the non-streaming detector (it's the path that emits the user-facing "No response from provider for 300s (non-streaming, ...)" warning that reporters keep pasting). This commit: - Adds ``estimate_request_context_tokens`` in ``chat_completion_helpers``, used by both the non-stream and stream detectors. Handles ``messages`` (Chat Completions), ``input + instructions + tools`` (Responses API), bare lists, and an unknown-dict fallback. - Forwards ``timeout`` through ``ResponsesApiTransport.build_kwargs`` and ``_preflight_codex_api_kwargs`` (with guards against zero/negative/inf/bool values), and wires ``_resolved_api_call_timeout()`` into the Codex branch of ``build_api_kwargs``. - Lowers the implicit non-stream stale defaults so fallback providers kick in faster when upstream stalls: * base 300s -> 90s * >50k 450s -> 150s * >100k 600s -> 240s These only apply when the user has *not* set ``providers.<id>.stale_timeout_seconds`` or ``HERMES_API_CALL_STALE_TIMEOUT``. Explicit config still wins. - Adds regression tests for the estimator shapes, the new defaults, the context-tier scaling, transport timeout pass-through, and preflight timeout pass-through / rejection of invalid values. Closes #21444 Supersedes #21652 #24126 #31855 Co-authored-by: Hoang V. Pham <26063003+hehehe0803@users.noreply.github.com>
This commit is contained in:
parent
76135b329d
commit
2d422720b5
10 changed files with 383 additions and 17 deletions
|
|
@ -75,6 +75,59 @@ def _ra():
|
|||
return run_agent
|
||||
|
||||
|
||||
def estimate_request_context_tokens(api_payload: Any) -> int:
|
||||
"""Estimate context/load tokens from an API payload, dict or messages list.
|
||||
|
||||
The stale-call detectors historically assumed a Chat Completions request:
|
||||
they pulled ``api_kwargs["messages"]`` and ran a cheap char/4 estimate.
|
||||
Codex / Responses API requests carry the conversational payload in
|
||||
``input`` (with additional load in ``instructions`` and ``tools``), so the
|
||||
legacy estimator reported ~0 tokens for every Codex turn and the
|
||||
context-tier scaling never fired.
|
||||
|
||||
This helper handles both shapes:
|
||||
- bare list -> treat as Chat Completions ``messages``
|
||||
- dict with ``messages`` -> Chat Completions (+ ``tools`` if present)
|
||||
- dict with ``input`` -> Responses API (+ ``instructions``/``tools``)
|
||||
- any other dict -> fall back to summing string values
|
||||
"""
|
||||
|
||||
def _chars(value: Any) -> int:
|
||||
if value is None:
|
||||
return 0
|
||||
if isinstance(value, str):
|
||||
return len(value)
|
||||
return len(str(value))
|
||||
|
||||
def _message_chars(messages: Any) -> int:
|
||||
if not isinstance(messages, list):
|
||||
return _chars(messages)
|
||||
return sum(_chars(item) for item in messages)
|
||||
|
||||
if isinstance(api_payload, list):
|
||||
return _message_chars(api_payload) // 4
|
||||
|
||||
if isinstance(api_payload, dict):
|
||||
messages = api_payload.get("messages")
|
||||
if isinstance(messages, list):
|
||||
total_chars = _message_chars(messages)
|
||||
if "tools" in api_payload:
|
||||
total_chars += _chars(api_payload.get("tools"))
|
||||
return total_chars // 4
|
||||
|
||||
if "input" in api_payload:
|
||||
total_chars = (
|
||||
_chars(api_payload.get("input"))
|
||||
+ _chars(api_payload.get("instructions"))
|
||||
+ _chars(api_payload.get("tools"))
|
||||
)
|
||||
return total_chars // 4
|
||||
|
||||
return sum(_chars(value) for value in api_payload.values()) // 4
|
||||
|
||||
return _chars(api_payload) // 4
|
||||
|
||||
|
||||
|
||||
def interruptible_api_call(agent, api_kwargs: dict):
|
||||
"""
|
||||
|
|
@ -200,9 +253,7 @@ def interruptible_api_call(agent, api_kwargs: dict):
|
|||
# httpx timeout (default 1800s) with zero feedback. The stale
|
||||
# detector kills the connection early so the main retry loop can
|
||||
# apply richer recovery (credential rotation, provider fallback).
|
||||
_stale_timeout = agent._compute_non_stream_stale_timeout(
|
||||
api_kwargs.get("messages", [])
|
||||
)
|
||||
_stale_timeout = agent._compute_non_stream_stale_timeout(api_kwargs)
|
||||
|
||||
_call_start = time.time()
|
||||
agent._touch_activity("waiting for non-streaming API response")
|
||||
|
|
@ -226,7 +277,7 @@ def interruptible_api_call(agent, api_kwargs: dict):
|
|||
# arrives within the configured timeout.
|
||||
_elapsed = time.time() - _call_start
|
||||
if _elapsed > _stale_timeout:
|
||||
_est_ctx = sum(len(str(v)) for v in api_kwargs.get("messages", [])) // 4
|
||||
_est_ctx = estimate_request_context_tokens(api_kwargs)
|
||||
logger.warning(
|
||||
"Non-streaming API call stale for %.0fs (threshold %.0fs). "
|
||||
"model=%s context=~%s tokens. Killing connection.",
|
||||
|
|
@ -362,6 +413,7 @@ def build_api_kwargs(agent, api_messages: list) -> dict:
|
|||
reasoning_config=agent.reasoning_config,
|
||||
session_id=getattr(agent, "session_id", None),
|
||||
max_tokens=agent.max_tokens,
|
||||
timeout=agent._resolved_api_call_timeout(),
|
||||
request_overrides=agent.request_overrides,
|
||||
is_github_responses=is_github_responses,
|
||||
is_codex_backend=is_codex_backend,
|
||||
|
|
@ -2019,7 +2071,7 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
|
|||
# when the context is large. Without this, the stale detector kills
|
||||
# healthy connections during the model's thinking phase, producing
|
||||
# spurious RemoteProtocolError ("peer closed connection").
|
||||
_est_tokens = sum(len(str(v)) for v in api_kwargs.get("messages", [])) // 4
|
||||
_est_tokens = estimate_request_context_tokens(api_kwargs)
|
||||
if _est_tokens > 100_000:
|
||||
_stream_stale_timeout = max(_stream_stale_timeout_base, 300.0)
|
||||
elif _est_tokens > 50_000:
|
||||
|
|
@ -2055,7 +2107,7 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
|
|||
# inner retry loop can start a fresh connection.
|
||||
_stale_elapsed = time.time() - last_chunk_time["t"]
|
||||
if _stale_elapsed > _stream_stale_timeout:
|
||||
_est_ctx = sum(len(str(v)) for v in api_kwargs.get("messages", [])) // 4
|
||||
_est_ctx = estimate_request_context_tokens(api_kwargs)
|
||||
logger.warning(
|
||||
"Stream stale for %.0fs (threshold %.0fs) — no chunks received. "
|
||||
"model=%s context=~%s tokens. Killing connection.",
|
||||
|
|
|
|||
|
|
@ -745,7 +745,7 @@ def _preflight_codex_api_kwargs(
|
|||
"model", "instructions", "input", "tools", "store",
|
||||
"reasoning", "include", "max_output_tokens", "temperature",
|
||||
"tool_choice", "parallel_tool_calls", "prompt_cache_key", "service_tier",
|
||||
"extra_headers", "extra_body",
|
||||
"extra_headers", "extra_body", "timeout",
|
||||
}
|
||||
normalized: Dict[str, Any] = {
|
||||
"model": model,
|
||||
|
|
@ -771,6 +771,13 @@ def _preflight_codex_api_kwargs(
|
|||
max_output_tokens = api_kwargs.get("max_output_tokens")
|
||||
if isinstance(max_output_tokens, (int, float)) and max_output_tokens > 0:
|
||||
normalized["max_output_tokens"] = int(max_output_tokens)
|
||||
timeout = api_kwargs.get("timeout")
|
||||
if (
|
||||
isinstance(timeout, (int, float))
|
||||
and not isinstance(timeout, bool)
|
||||
and 0 < float(timeout) < float("inf")
|
||||
):
|
||||
normalized["timeout"] = float(timeout)
|
||||
temperature = api_kwargs.get("temperature")
|
||||
if isinstance(temperature, (int, float)):
|
||||
normalized["temperature"] = float(temperature)
|
||||
|
|
|
|||
|
|
@ -50,6 +50,7 @@ class ResponsesApiTransport(ProviderTransport):
|
|||
reasoning_config: dict | None — {effort, enabled}
|
||||
session_id: str | None — used for prompt_cache_key + xAI conv header
|
||||
max_tokens: int | None — max_output_tokens
|
||||
timeout: float | None — per-request timeout forwarded to the SDK
|
||||
request_overrides: dict | None — extra kwargs merged in
|
||||
provider: str | None — provider name for backend-specific logic
|
||||
base_url: str | None — endpoint URL
|
||||
|
|
@ -143,6 +144,20 @@ class ResponsesApiTransport(ProviderTransport):
|
|||
if request_overrides:
|
||||
kwargs.update(request_overrides)
|
||||
|
||||
# Forward per-request timeout to the SDK so OpenAI/Anthropic clients
|
||||
# honor it. Without this, ``providers.<id>.request_timeout_seconds``
|
||||
# is silently dropped on the main agent Codex path while the
|
||||
# chat_completions path and auxiliary Codex adapter both forward it.
|
||||
timeout = kwargs.get("timeout", params.get("timeout"))
|
||||
if (
|
||||
isinstance(timeout, (int, float))
|
||||
and not isinstance(timeout, bool)
|
||||
and 0 < float(timeout) < float("inf")
|
||||
):
|
||||
kwargs["timeout"] = float(timeout)
|
||||
else:
|
||||
kwargs.pop("timeout", None)
|
||||
|
||||
if is_codex_backend:
|
||||
prompt_cache_key = kwargs.get("prompt_cache_key")
|
||||
cache_scope_id = str(prompt_cache_key or session_id or "").strip()
|
||||
|
|
|
|||
25
run_agent.py
25
run_agent.py
|
|
@ -885,7 +885,11 @@ class AIAgent:
|
|||
1. ``providers.<id>.models.<model>.stale_timeout_seconds``
|
||||
2. ``providers.<id>.stale_timeout_seconds``
|
||||
3. ``HERMES_API_CALL_STALE_TIMEOUT`` env var
|
||||
4. 300.0s default
|
||||
4. 90.0s default (time-to-first-byte for non-streaming / Codex
|
||||
internal-streaming requests; lowered from 300s in May 2026 so
|
||||
fallback providers kick in faster when upstream providers
|
||||
stall). The detector still scales up for large contexts in
|
||||
``_compute_non_stream_stale_timeout``.
|
||||
|
||||
Returns ``(timeout_seconds, uses_implicit_default)`` so the caller can
|
||||
preserve legacy behaviors that only apply when the user has *not*
|
||||
|
|
@ -900,20 +904,27 @@ class AIAgent:
|
|||
if env_timeout is not None:
|
||||
return float(env_timeout), False
|
||||
|
||||
return 300.0, True
|
||||
return 90.0, True
|
||||
|
||||
def _compute_non_stream_stale_timeout(self, messages: list[dict[str, Any]]) -> float:
|
||||
"""Compute the effective non-stream stale timeout for this request."""
|
||||
def _compute_non_stream_stale_timeout(self, api_payload: Any) -> float:
|
||||
"""Compute the effective non-stream stale timeout for this request.
|
||||
|
||||
Accepts either the full ``api_kwargs`` dict (Chat Completions or
|
||||
Responses API) or a legacy ``messages`` list. Context-size scaling
|
||||
applies the same way to both shapes via
|
||||
:func:`agent.chat_completion_helpers.estimate_request_context_tokens`.
|
||||
"""
|
||||
stale_base, uses_implicit_default = self._resolved_api_call_stale_timeout_base()
|
||||
base_url = getattr(self, "_base_url", None) or self.base_url or ""
|
||||
if uses_implicit_default and base_url and is_local_endpoint(base_url):
|
||||
return float("inf")
|
||||
|
||||
est_tokens = sum(len(str(v)) for v in messages) // 4
|
||||
from agent.chat_completion_helpers import estimate_request_context_tokens
|
||||
est_tokens = estimate_request_context_tokens(api_payload)
|
||||
if est_tokens > 100_000:
|
||||
return max(stale_base, 600.0)
|
||||
return max(stale_base, 240.0)
|
||||
if est_tokens > 50_000:
|
||||
return max(stale_base, 450.0)
|
||||
return max(stale_base, 150.0)
|
||||
return stale_base
|
||||
|
||||
def _is_openrouter_url(self) -> bool:
|
||||
|
|
|
|||
|
|
@ -1240,6 +1240,8 @@ AUTHOR_MAP = {
|
|||
"165905879+davidcampbelldc@users.noreply.github.com": "davidcampbelldc",
|
||||
"hoangv.pham0803@gmail.com": "hehehe0803", # PR #26212 salvage (codex kanban writable root)
|
||||
"26063003+hehehe0803@users.noreply.github.com": "hehehe0803",
|
||||
"kasunvinod@users.noreply.github.com": "kasunvinod", # PR #24126 salvage (codex timeout propagation)
|
||||
"15059870+kasunvinod@users.noreply.github.com": "kasunvinod",
|
||||
"38348871+vaddisrinivas@users.noreply.github.com": "vaddisrinivas", # PR #26394 salvage (Docker messaging extra)
|
||||
# batch salvage (May 2026 LHF run, group 7)
|
||||
"198679067+02356abc@users.noreply.github.com": "02356abc", # PR #28286 salvage (wecom CLOSING)
|
||||
|
|
|
|||
192
tests/agent/test_non_stream_stale_timeout.py
Normal file
192
tests/agent/test_non_stream_stale_timeout.py
Normal file
|
|
@ -0,0 +1,192 @@
|
|||
"""Tests for the non-stream stale-call detector context estimator.
|
||||
|
||||
Covers:
|
||||
- ``estimate_request_context_tokens`` for Chat Completions, Responses API,
|
||||
bare lists, and mixed-shape dicts.
|
||||
- ``AIAgent._compute_non_stream_stale_timeout`` with both legacy ``messages``
|
||||
list and full ``api_kwargs`` dicts.
|
||||
- The May 2026 default-base change (300s -> 90s) and the lowered
|
||||
context-tier ceilings (450/600 -> 150/240).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
def _write_config(tmp_path: Path, body: str) -> None:
|
||||
hermes_home = tmp_path
|
||||
(hermes_home / "config.yaml").write_text(body or "{}\n", encoding="utf-8")
|
||||
|
||||
|
||||
def _make_agent(tmp_path: Path, **overrides):
|
||||
from run_agent import AIAgent
|
||||
kwargs = dict(
|
||||
model="gpt-5.5",
|
||||
provider="openai-codex",
|
||||
api_key="sk-dummy",
|
||||
base_url="https://chatgpt.com/backend-api/codex",
|
||||
quiet_mode=True,
|
||||
skip_context_files=True,
|
||||
skip_memory=True,
|
||||
platform="cli",
|
||||
)
|
||||
kwargs.update(overrides)
|
||||
return AIAgent(**kwargs)
|
||||
|
||||
|
||||
# ── estimator ──────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_estimator_chat_completions_messages():
|
||||
from agent.chat_completion_helpers import estimate_request_context_tokens
|
||||
payload = {
|
||||
"model": "gpt-5.4",
|
||||
"messages": [
|
||||
{"role": "user", "content": "x" * 400},
|
||||
{"role": "assistant", "content": "y" * 400},
|
||||
],
|
||||
}
|
||||
# 800+ chars from messages -> ~200 tokens (char/4 estimate)
|
||||
assert estimate_request_context_tokens(payload) >= 200
|
||||
|
||||
|
||||
def test_estimator_responses_api_input():
|
||||
from agent.chat_completion_helpers import estimate_request_context_tokens
|
||||
payload = {
|
||||
"model": "gpt-5.5",
|
||||
"instructions": "i" * 1000,
|
||||
"input": "x" * 4000,
|
||||
"tools": [{"name": "t", "description": "d" * 200}],
|
||||
}
|
||||
# input(4000) + instructions(1000) + tools (~stringified) -> well over 1000 tokens
|
||||
tokens = estimate_request_context_tokens(payload)
|
||||
assert tokens >= 1200, f"Responses API estimator returned {tokens}"
|
||||
|
||||
|
||||
def test_estimator_responses_api_long_session_triggers_tier():
|
||||
"""A real long Codex session (large ``input``) should clear the 50k boundary."""
|
||||
from agent.chat_completion_helpers import estimate_request_context_tokens
|
||||
payload = {
|
||||
"model": "gpt-5.5",
|
||||
"input": "x" * 240_000, # ~60k tokens (240k chars / 4)
|
||||
"instructions": "s" * 4000,
|
||||
}
|
||||
assert estimate_request_context_tokens(payload) > 50_000
|
||||
|
||||
|
||||
def test_estimator_bare_list_back_compat():
|
||||
from agent.chat_completion_helpers import estimate_request_context_tokens
|
||||
messages = [
|
||||
{"role": "user", "content": "x" * 800},
|
||||
]
|
||||
assert estimate_request_context_tokens(messages) >= 200
|
||||
|
||||
|
||||
def test_estimator_empty_inputs():
|
||||
from agent.chat_completion_helpers import estimate_request_context_tokens
|
||||
assert estimate_request_context_tokens({}) == 0
|
||||
assert estimate_request_context_tokens([]) == 0
|
||||
assert estimate_request_context_tokens(None) == 0
|
||||
|
||||
|
||||
def test_estimator_unknown_dict_fallback():
|
||||
from agent.chat_completion_helpers import estimate_request_context_tokens
|
||||
payload = {"random_field": "z" * 400}
|
||||
assert estimate_request_context_tokens(payload) > 50
|
||||
|
||||
|
||||
# ── default base + tier scaling ────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_default_base_is_90s(monkeypatch, tmp_path):
|
||||
"""Default base stale timeout dropped from 300s to 90s (May 2026)."""
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
(tmp_path / ".env").write_text("", encoding="utf-8")
|
||||
monkeypatch.delenv("HERMES_API_CALL_STALE_TIMEOUT", raising=False)
|
||||
_write_config(tmp_path, "")
|
||||
|
||||
agent = _make_agent(tmp_path)
|
||||
base, implicit = agent._resolved_api_call_stale_timeout_base()
|
||||
assert base == 90.0
|
||||
assert implicit is True
|
||||
|
||||
|
||||
def test_short_codex_request_uses_base_only(monkeypatch, tmp_path):
|
||||
"""Codex payload below 50k tokens -> default 90s base."""
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
(tmp_path / ".env").write_text("", encoding="utf-8")
|
||||
monkeypatch.delenv("HERMES_API_CALL_STALE_TIMEOUT", raising=False)
|
||||
_write_config(tmp_path, "")
|
||||
|
||||
agent = _make_agent(tmp_path)
|
||||
payload = {"model": "gpt-5.5", "input": "hi", "instructions": ""}
|
||||
assert agent._compute_non_stream_stale_timeout(payload) == 90.0
|
||||
|
||||
|
||||
def test_long_codex_request_bumps_to_50k_tier(monkeypatch, tmp_path):
|
||||
"""Codex payload > 50k tokens -> at least 150s."""
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
(tmp_path / ".env").write_text("", encoding="utf-8")
|
||||
monkeypatch.delenv("HERMES_API_CALL_STALE_TIMEOUT", raising=False)
|
||||
_write_config(tmp_path, "")
|
||||
|
||||
agent = _make_agent(tmp_path)
|
||||
payload = {"model": "gpt-5.5", "input": "x" * 240_000, "instructions": ""}
|
||||
timeout = agent._compute_non_stream_stale_timeout(payload)
|
||||
assert timeout >= 150.0
|
||||
assert timeout < 240.0
|
||||
|
||||
|
||||
def test_very_long_codex_request_bumps_to_100k_tier(monkeypatch, tmp_path):
|
||||
"""Codex payload > 100k tokens -> at least 240s."""
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
(tmp_path / ".env").write_text("", encoding="utf-8")
|
||||
monkeypatch.delenv("HERMES_API_CALL_STALE_TIMEOUT", raising=False)
|
||||
_write_config(tmp_path, "")
|
||||
|
||||
agent = _make_agent(tmp_path)
|
||||
payload = {"model": "gpt-5.5", "input": "x" * 500_000, "instructions": ""}
|
||||
assert agent._compute_non_stream_stale_timeout(payload) >= 240.0
|
||||
|
||||
|
||||
def test_chat_completions_long_messages_bumps_tier(monkeypatch, tmp_path):
|
||||
"""Chat Completions estimator still works for the legacy messages path."""
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
(tmp_path / ".env").write_text("", encoding="utf-8")
|
||||
monkeypatch.delenv("HERMES_API_CALL_STALE_TIMEOUT", raising=False)
|
||||
_write_config(tmp_path, "")
|
||||
|
||||
agent = _make_agent(
|
||||
tmp_path,
|
||||
provider="openai",
|
||||
base_url="https://api.openai.com/v1",
|
||||
model="gpt-5.4",
|
||||
)
|
||||
payload = {
|
||||
"model": "gpt-5.4",
|
||||
"messages": [{"role": "user", "content": "x" * 240_000}],
|
||||
}
|
||||
assert agent._compute_non_stream_stale_timeout(payload) >= 150.0
|
||||
|
||||
|
||||
def test_explicit_user_config_overrides_default(monkeypatch, tmp_path):
|
||||
"""If the user explicitly sets a stale_timeout, the new defaults don't apply."""
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
(tmp_path / ".env").write_text("", encoding="utf-8")
|
||||
_write_config(tmp_path, """\
|
||||
providers:
|
||||
openai-codex:
|
||||
stale_timeout_seconds: 1800
|
||||
""")
|
||||
monkeypatch.delenv("HERMES_API_CALL_STALE_TIMEOUT", raising=False)
|
||||
|
||||
import importlib
|
||||
from hermes_cli import timeouts as to_mod
|
||||
importlib.reload(to_mod)
|
||||
|
||||
agent = _make_agent(tmp_path)
|
||||
assert agent._compute_non_stream_stale_timeout({"input": "hi"}) == 1800.0
|
||||
|
|
@ -452,3 +452,64 @@ class TestCodexNormalizeResponse:
|
|||
tc = nr.tool_calls[0]
|
||||
assert tc.name == "terminal"
|
||||
assert '"command"' in tc.arguments
|
||||
|
||||
|
||||
|
||||
class TestCodexTransportTimeout:
|
||||
"""Forward per-request timeout from build_kwargs to the SDK kwargs."""
|
||||
|
||||
def test_positive_timeout_preserved(self, transport):
|
||||
kw = transport.build_kwargs(
|
||||
model="gpt-5.5",
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
tools=[],
|
||||
timeout=600.0,
|
||||
)
|
||||
assert kw.get("timeout") == 600.0
|
||||
|
||||
def test_zero_timeout_dropped(self, transport):
|
||||
kw = transport.build_kwargs(
|
||||
model="gpt-5.5",
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
tools=[],
|
||||
timeout=0,
|
||||
)
|
||||
assert "timeout" not in kw
|
||||
|
||||
def test_none_timeout_omitted(self, transport):
|
||||
kw = transport.build_kwargs(
|
||||
model="gpt-5.5",
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
tools=[],
|
||||
timeout=None,
|
||||
)
|
||||
assert "timeout" not in kw
|
||||
|
||||
def test_inf_timeout_dropped(self, transport):
|
||||
kw = transport.build_kwargs(
|
||||
model="gpt-5.5",
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
tools=[],
|
||||
timeout=float("inf"),
|
||||
)
|
||||
assert "timeout" not in kw
|
||||
|
||||
def test_bool_timeout_dropped(self, transport):
|
||||
"""``True`` is technically int but must not survive — caller bug guard."""
|
||||
kw = transport.build_kwargs(
|
||||
model="gpt-5.5",
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
tools=[],
|
||||
timeout=True,
|
||||
)
|
||||
assert "timeout" not in kw
|
||||
|
||||
def test_request_overrides_can_supply_timeout(self, transport):
|
||||
"""request_overrides["timeout"] is honored when no explicit kwarg passed."""
|
||||
kw = transport.build_kwargs(
|
||||
model="gpt-5.5",
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
tools=[],
|
||||
request_overrides={"timeout": 450.0},
|
||||
)
|
||||
assert kw.get("timeout") == 450.0
|
||||
|
|
|
|||
|
|
@ -265,7 +265,7 @@ def test_resolved_api_call_stale_timeout_priority(monkeypatch, tmp_path):
|
|||
assert agent2._resolved_api_call_stale_timeout_base() == (999.0, False)
|
||||
|
||||
monkeypatch.delenv("HERMES_API_CALL_STALE_TIMEOUT", raising=False)
|
||||
assert agent2._resolved_api_call_stale_timeout_base() == (300.0, True)
|
||||
assert agent2._resolved_api_call_stale_timeout_base() == (90.0, True)
|
||||
|
||||
|
||||
def test_default_non_stream_stale_timeout_auto_disables_for_local_endpoints(monkeypatch, tmp_path):
|
||||
|
|
|
|||
|
|
@ -105,7 +105,7 @@ def test_stale_non_stream_close_is_single_owner(monkeypatch):
|
|||
monkeypatch.setattr(run_agent, "OpenAI", factory)
|
||||
|
||||
agent = _build_agent()
|
||||
agent._compute_non_stream_stale_timeout = lambda _messages: 0.01
|
||||
agent._compute_non_stream_stale_timeout = lambda api_payload: 0.01
|
||||
|
||||
with pytest.raises(APIConnectionError):
|
||||
agent._interruptible_api_call({"model": agent.model, "messages": []})
|
||||
|
|
|
|||
|
|
@ -306,7 +306,10 @@ def test_build_api_kwargs_codex(monkeypatch):
|
|||
assert kwargs["parallel_tool_calls"] is True
|
||||
assert isinstance(kwargs["prompt_cache_key"], str)
|
||||
assert len(kwargs["prompt_cache_key"]) > 0
|
||||
assert "timeout" not in kwargs
|
||||
# ``timeout`` is now wired from ``_resolved_api_call_timeout`` (default 1800s)
|
||||
# so per-provider ``request_timeout_seconds`` actually reaches the SDK.
|
||||
assert isinstance(kwargs.get("timeout"), float)
|
||||
assert kwargs["timeout"] > 0
|
||||
assert "max_tokens" not in kwargs
|
||||
assert "extra_body" not in kwargs
|
||||
|
||||
|
|
@ -1053,6 +1056,29 @@ def test_preflight_codex_api_kwargs_allows_service_tier(monkeypatch):
|
|||
assert result["service_tier"] == "priority"
|
||||
|
||||
|
||||
def test_preflight_codex_api_kwargs_preserves_positive_timeout(monkeypatch):
|
||||
"""Positive numeric timeouts survive preflight so the SDK honors them."""
|
||||
agent = _build_agent(monkeypatch)
|
||||
kwargs = _codex_request_kwargs()
|
||||
kwargs["timeout"] = 600.0
|
||||
|
||||
from agent.codex_responses_adapter import _preflight_codex_api_kwargs
|
||||
result = _preflight_codex_api_kwargs(kwargs)
|
||||
assert result["timeout"] == 600.0
|
||||
|
||||
|
||||
def test_preflight_codex_api_kwargs_drops_invalid_timeout(monkeypatch):
|
||||
"""Zero, negative, inf, and booleans are all dropped — not passed to SDK."""
|
||||
agent = _build_agent(monkeypatch)
|
||||
from agent.codex_responses_adapter import _preflight_codex_api_kwargs
|
||||
|
||||
for bad in (0, -1, float("inf"), True, False, "300", None):
|
||||
kwargs = _codex_request_kwargs()
|
||||
kwargs["timeout"] = bad
|
||||
result = _preflight_codex_api_kwargs(kwargs)
|
||||
assert "timeout" not in result, f"timeout={bad!r} should be dropped"
|
||||
|
||||
|
||||
def test_run_conversation_codex_replay_payload_keeps_call_id(monkeypatch):
|
||||
agent = _build_agent(monkeypatch)
|
||||
responses = [_codex_tool_call_response(), _codex_message_response("done")]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue