mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-05 07:41:39 +00:00
fix(codex): size and propagate timeouts for Responses-API requests; lower stale defaults
Codex / Responses-API requests had three latent timeout bugs that combined into the long silent hangs reported on #21444: 1. The non-stream stale-call detector estimated context tokens from ``api_kwargs["messages"]`` only. Codex / Responses-API payloads carry their conversational load in ``input`` (with ``instructions`` and ``tools``), so every Codex turn logged ``context=~0 tokens`` and the detector never applied its >50k / >100k tier bumps. 2. ``providers.<id>.request_timeout_seconds`` was silently dropped on the main Codex path. The chat_completions path and the auxiliary Codex adapter both forwarded it; the main path skipped it through three places (``build_api_kwargs``, ``ResponsesApiTransport.build_kwargs``, ``_preflight_codex_api_kwargs``). 3. The streaming stale detector had the same payload-shape bug for ``codex_responses`` requests, which route through the non-streaming detector (it's the path that emits the user-facing "No response from provider for 300s (non-streaming, ...)" warning that reporters keep pasting). This commit: - Adds ``estimate_request_context_tokens`` in ``chat_completion_helpers``, used by both the non-stream and stream detectors. Handles ``messages`` (Chat Completions), ``input + instructions + tools`` (Responses API), bare lists, and an unknown-dict fallback. - Forwards ``timeout`` through ``ResponsesApiTransport.build_kwargs`` and ``_preflight_codex_api_kwargs`` (with guards against zero/negative/inf/bool values), and wires ``_resolved_api_call_timeout()`` into the Codex branch of ``build_api_kwargs``. - Lowers the implicit non-stream stale defaults so fallback providers kick in faster when upstream stalls: * base 300s -> 90s * >50k 450s -> 150s * >100k 600s -> 240s These only apply when the user has *not* set ``providers.<id>.stale_timeout_seconds`` or ``HERMES_API_CALL_STALE_TIMEOUT``. Explicit config still wins. - Adds regression tests for the estimator shapes, the new defaults, the context-tier scaling, transport timeout pass-through, and preflight timeout pass-through / rejection of invalid values. Closes #21444 Supersedes #21652 #24126 #31855 Co-authored-by: Hoang V. Pham <26063003+hehehe0803@users.noreply.github.com>
This commit is contained in:
parent
76135b329d
commit
2d422720b5
10 changed files with 383 additions and 17 deletions
|
|
@ -105,7 +105,7 @@ def test_stale_non_stream_close_is_single_owner(monkeypatch):
|
|||
monkeypatch.setattr(run_agent, "OpenAI", factory)
|
||||
|
||||
agent = _build_agent()
|
||||
agent._compute_non_stream_stale_timeout = lambda _messages: 0.01
|
||||
agent._compute_non_stream_stale_timeout = lambda api_payload: 0.01
|
||||
|
||||
with pytest.raises(APIConnectionError):
|
||||
agent._interruptible_api_call({"model": agent.model, "messages": []})
|
||||
|
|
|
|||
|
|
@ -306,7 +306,10 @@ def test_build_api_kwargs_codex(monkeypatch):
|
|||
assert kwargs["parallel_tool_calls"] is True
|
||||
assert isinstance(kwargs["prompt_cache_key"], str)
|
||||
assert len(kwargs["prompt_cache_key"]) > 0
|
||||
assert "timeout" not in kwargs
|
||||
# ``timeout`` is now wired from ``_resolved_api_call_timeout`` (default 1800s)
|
||||
# so per-provider ``request_timeout_seconds`` actually reaches the SDK.
|
||||
assert isinstance(kwargs.get("timeout"), float)
|
||||
assert kwargs["timeout"] > 0
|
||||
assert "max_tokens" not in kwargs
|
||||
assert "extra_body" not in kwargs
|
||||
|
||||
|
|
@ -1053,6 +1056,29 @@ def test_preflight_codex_api_kwargs_allows_service_tier(monkeypatch):
|
|||
assert result["service_tier"] == "priority"
|
||||
|
||||
|
||||
def test_preflight_codex_api_kwargs_preserves_positive_timeout(monkeypatch):
|
||||
"""Positive numeric timeouts survive preflight so the SDK honors them."""
|
||||
agent = _build_agent(monkeypatch)
|
||||
kwargs = _codex_request_kwargs()
|
||||
kwargs["timeout"] = 600.0
|
||||
|
||||
from agent.codex_responses_adapter import _preflight_codex_api_kwargs
|
||||
result = _preflight_codex_api_kwargs(kwargs)
|
||||
assert result["timeout"] == 600.0
|
||||
|
||||
|
||||
def test_preflight_codex_api_kwargs_drops_invalid_timeout(monkeypatch):
|
||||
"""Zero, negative, inf, and booleans are all dropped — not passed to SDK."""
|
||||
agent = _build_agent(monkeypatch)
|
||||
from agent.codex_responses_adapter import _preflight_codex_api_kwargs
|
||||
|
||||
for bad in (0, -1, float("inf"), True, False, "300", None):
|
||||
kwargs = _codex_request_kwargs()
|
||||
kwargs["timeout"] = bad
|
||||
result = _preflight_codex_api_kwargs(kwargs)
|
||||
assert "timeout" not in result, f"timeout={bad!r} should be dropped"
|
||||
|
||||
|
||||
def test_run_conversation_codex_replay_payload_keeps_call_id(monkeypatch):
|
||||
agent = _build_agent(monkeypatch)
|
||||
responses = [_codex_tool_call_response(), _codex_message_response("done")]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue