mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-29 06:31:32 +00:00
fix(codex): size and propagate timeouts for Responses-API requests; lower stale defaults
Codex / Responses-API requests had three latent timeout bugs that combined into the long silent hangs reported on #21444: 1. The non-stream stale-call detector estimated context tokens from ``api_kwargs["messages"]`` only. Codex / Responses-API payloads carry their conversational load in ``input`` (with ``instructions`` and ``tools``), so every Codex turn logged ``context=~0 tokens`` and the detector never applied its >50k / >100k tier bumps. 2. ``providers.<id>.request_timeout_seconds`` was silently dropped on the main Codex path. The chat_completions path and the auxiliary Codex adapter both forwarded it; the main path skipped it through three places (``build_api_kwargs``, ``ResponsesApiTransport.build_kwargs``, ``_preflight_codex_api_kwargs``). 3. The streaming stale detector had the same payload-shape bug for ``codex_responses`` requests, which route through the non-streaming detector (it's the path that emits the user-facing "No response from provider for 300s (non-streaming, ...)" warning that reporters keep pasting). This commit: - Adds ``estimate_request_context_tokens`` in ``chat_completion_helpers``, used by both the non-stream and stream detectors. Handles ``messages`` (Chat Completions), ``input + instructions + tools`` (Responses API), bare lists, and an unknown-dict fallback. - Forwards ``timeout`` through ``ResponsesApiTransport.build_kwargs`` and ``_preflight_codex_api_kwargs`` (with guards against zero/negative/inf/bool values), and wires ``_resolved_api_call_timeout()`` into the Codex branch of ``build_api_kwargs``. - Lowers the implicit non-stream stale defaults so fallback providers kick in faster when upstream stalls: * base 300s -> 90s * >50k 450s -> 150s * >100k 600s -> 240s These only apply when the user has *not* set ``providers.<id>.stale_timeout_seconds`` or ``HERMES_API_CALL_STALE_TIMEOUT``. Explicit config still wins. - Adds regression tests for the estimator shapes, the new defaults, the context-tier scaling, transport timeout pass-through, and preflight timeout pass-through / rejection of invalid values. Closes #21444 Supersedes #21652 #24126 #31855 Co-authored-by: Hoang V. Pham <26063003+hehehe0803@users.noreply.github.com>
This commit is contained in:
parent
76135b329d
commit
2d422720b5
10 changed files with 383 additions and 17 deletions
192
tests/agent/test_non_stream_stale_timeout.py
Normal file
192
tests/agent/test_non_stream_stale_timeout.py
Normal file
|
|
@ -0,0 +1,192 @@
|
|||
"""Tests for the non-stream stale-call detector context estimator.
|
||||
|
||||
Covers:
|
||||
- ``estimate_request_context_tokens`` for Chat Completions, Responses API,
|
||||
bare lists, and mixed-shape dicts.
|
||||
- ``AIAgent._compute_non_stream_stale_timeout`` with both legacy ``messages``
|
||||
list and full ``api_kwargs`` dicts.
|
||||
- The May 2026 default-base change (300s -> 90s) and the lowered
|
||||
context-tier ceilings (450/600 -> 150/240).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
def _write_config(tmp_path: Path, body: str) -> None:
|
||||
hermes_home = tmp_path
|
||||
(hermes_home / "config.yaml").write_text(body or "{}\n", encoding="utf-8")
|
||||
|
||||
|
||||
def _make_agent(tmp_path: Path, **overrides):
|
||||
from run_agent import AIAgent
|
||||
kwargs = dict(
|
||||
model="gpt-5.5",
|
||||
provider="openai-codex",
|
||||
api_key="sk-dummy",
|
||||
base_url="https://chatgpt.com/backend-api/codex",
|
||||
quiet_mode=True,
|
||||
skip_context_files=True,
|
||||
skip_memory=True,
|
||||
platform="cli",
|
||||
)
|
||||
kwargs.update(overrides)
|
||||
return AIAgent(**kwargs)
|
||||
|
||||
|
||||
# ── estimator ──────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_estimator_chat_completions_messages():
|
||||
from agent.chat_completion_helpers import estimate_request_context_tokens
|
||||
payload = {
|
||||
"model": "gpt-5.4",
|
||||
"messages": [
|
||||
{"role": "user", "content": "x" * 400},
|
||||
{"role": "assistant", "content": "y" * 400},
|
||||
],
|
||||
}
|
||||
# 800+ chars from messages -> ~200 tokens (char/4 estimate)
|
||||
assert estimate_request_context_tokens(payload) >= 200
|
||||
|
||||
|
||||
def test_estimator_responses_api_input():
|
||||
from agent.chat_completion_helpers import estimate_request_context_tokens
|
||||
payload = {
|
||||
"model": "gpt-5.5",
|
||||
"instructions": "i" * 1000,
|
||||
"input": "x" * 4000,
|
||||
"tools": [{"name": "t", "description": "d" * 200}],
|
||||
}
|
||||
# input(4000) + instructions(1000) + tools (~stringified) -> well over 1000 tokens
|
||||
tokens = estimate_request_context_tokens(payload)
|
||||
assert tokens >= 1200, f"Responses API estimator returned {tokens}"
|
||||
|
||||
|
||||
def test_estimator_responses_api_long_session_triggers_tier():
|
||||
"""A real long Codex session (large ``input``) should clear the 50k boundary."""
|
||||
from agent.chat_completion_helpers import estimate_request_context_tokens
|
||||
payload = {
|
||||
"model": "gpt-5.5",
|
||||
"input": "x" * 240_000, # ~60k tokens (240k chars / 4)
|
||||
"instructions": "s" * 4000,
|
||||
}
|
||||
assert estimate_request_context_tokens(payload) > 50_000
|
||||
|
||||
|
||||
def test_estimator_bare_list_back_compat():
|
||||
from agent.chat_completion_helpers import estimate_request_context_tokens
|
||||
messages = [
|
||||
{"role": "user", "content": "x" * 800},
|
||||
]
|
||||
assert estimate_request_context_tokens(messages) >= 200
|
||||
|
||||
|
||||
def test_estimator_empty_inputs():
|
||||
from agent.chat_completion_helpers import estimate_request_context_tokens
|
||||
assert estimate_request_context_tokens({}) == 0
|
||||
assert estimate_request_context_tokens([]) == 0
|
||||
assert estimate_request_context_tokens(None) == 0
|
||||
|
||||
|
||||
def test_estimator_unknown_dict_fallback():
|
||||
from agent.chat_completion_helpers import estimate_request_context_tokens
|
||||
payload = {"random_field": "z" * 400}
|
||||
assert estimate_request_context_tokens(payload) > 50
|
||||
|
||||
|
||||
# ── default base + tier scaling ────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_default_base_is_90s(monkeypatch, tmp_path):
|
||||
"""Default base stale timeout dropped from 300s to 90s (May 2026)."""
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
(tmp_path / ".env").write_text("", encoding="utf-8")
|
||||
monkeypatch.delenv("HERMES_API_CALL_STALE_TIMEOUT", raising=False)
|
||||
_write_config(tmp_path, "")
|
||||
|
||||
agent = _make_agent(tmp_path)
|
||||
base, implicit = agent._resolved_api_call_stale_timeout_base()
|
||||
assert base == 90.0
|
||||
assert implicit is True
|
||||
|
||||
|
||||
def test_short_codex_request_uses_base_only(monkeypatch, tmp_path):
|
||||
"""Codex payload below 50k tokens -> default 90s base."""
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
(tmp_path / ".env").write_text("", encoding="utf-8")
|
||||
monkeypatch.delenv("HERMES_API_CALL_STALE_TIMEOUT", raising=False)
|
||||
_write_config(tmp_path, "")
|
||||
|
||||
agent = _make_agent(tmp_path)
|
||||
payload = {"model": "gpt-5.5", "input": "hi", "instructions": ""}
|
||||
assert agent._compute_non_stream_stale_timeout(payload) == 90.0
|
||||
|
||||
|
||||
def test_long_codex_request_bumps_to_50k_tier(monkeypatch, tmp_path):
|
||||
"""Codex payload > 50k tokens -> at least 150s."""
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
(tmp_path / ".env").write_text("", encoding="utf-8")
|
||||
monkeypatch.delenv("HERMES_API_CALL_STALE_TIMEOUT", raising=False)
|
||||
_write_config(tmp_path, "")
|
||||
|
||||
agent = _make_agent(tmp_path)
|
||||
payload = {"model": "gpt-5.5", "input": "x" * 240_000, "instructions": ""}
|
||||
timeout = agent._compute_non_stream_stale_timeout(payload)
|
||||
assert timeout >= 150.0
|
||||
assert timeout < 240.0
|
||||
|
||||
|
||||
def test_very_long_codex_request_bumps_to_100k_tier(monkeypatch, tmp_path):
|
||||
"""Codex payload > 100k tokens -> at least 240s."""
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
(tmp_path / ".env").write_text("", encoding="utf-8")
|
||||
monkeypatch.delenv("HERMES_API_CALL_STALE_TIMEOUT", raising=False)
|
||||
_write_config(tmp_path, "")
|
||||
|
||||
agent = _make_agent(tmp_path)
|
||||
payload = {"model": "gpt-5.5", "input": "x" * 500_000, "instructions": ""}
|
||||
assert agent._compute_non_stream_stale_timeout(payload) >= 240.0
|
||||
|
||||
|
||||
def test_chat_completions_long_messages_bumps_tier(monkeypatch, tmp_path):
|
||||
"""Chat Completions estimator still works for the legacy messages path."""
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
(tmp_path / ".env").write_text("", encoding="utf-8")
|
||||
monkeypatch.delenv("HERMES_API_CALL_STALE_TIMEOUT", raising=False)
|
||||
_write_config(tmp_path, "")
|
||||
|
||||
agent = _make_agent(
|
||||
tmp_path,
|
||||
provider="openai",
|
||||
base_url="https://api.openai.com/v1",
|
||||
model="gpt-5.4",
|
||||
)
|
||||
payload = {
|
||||
"model": "gpt-5.4",
|
||||
"messages": [{"role": "user", "content": "x" * 240_000}],
|
||||
}
|
||||
assert agent._compute_non_stream_stale_timeout(payload) >= 150.0
|
||||
|
||||
|
||||
def test_explicit_user_config_overrides_default(monkeypatch, tmp_path):
|
||||
"""If the user explicitly sets a stale_timeout, the new defaults don't apply."""
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
(tmp_path / ".env").write_text("", encoding="utf-8")
|
||||
_write_config(tmp_path, """\
|
||||
providers:
|
||||
openai-codex:
|
||||
stale_timeout_seconds: 1800
|
||||
""")
|
||||
monkeypatch.delenv("HERMES_API_CALL_STALE_TIMEOUT", raising=False)
|
||||
|
||||
import importlib
|
||||
from hermes_cli import timeouts as to_mod
|
||||
importlib.reload(to_mod)
|
||||
|
||||
agent = _make_agent(tmp_path)
|
||||
assert agent._compute_non_stream_stale_timeout({"input": "hi"}) == 1800.0
|
||||
|
|
@ -452,3 +452,64 @@ class TestCodexNormalizeResponse:
|
|||
tc = nr.tool_calls[0]
|
||||
assert tc.name == "terminal"
|
||||
assert '"command"' in tc.arguments
|
||||
|
||||
|
||||
|
||||
class TestCodexTransportTimeout:
|
||||
"""Forward per-request timeout from build_kwargs to the SDK kwargs."""
|
||||
|
||||
def test_positive_timeout_preserved(self, transport):
|
||||
kw = transport.build_kwargs(
|
||||
model="gpt-5.5",
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
tools=[],
|
||||
timeout=600.0,
|
||||
)
|
||||
assert kw.get("timeout") == 600.0
|
||||
|
||||
def test_zero_timeout_dropped(self, transport):
|
||||
kw = transport.build_kwargs(
|
||||
model="gpt-5.5",
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
tools=[],
|
||||
timeout=0,
|
||||
)
|
||||
assert "timeout" not in kw
|
||||
|
||||
def test_none_timeout_omitted(self, transport):
|
||||
kw = transport.build_kwargs(
|
||||
model="gpt-5.5",
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
tools=[],
|
||||
timeout=None,
|
||||
)
|
||||
assert "timeout" not in kw
|
||||
|
||||
def test_inf_timeout_dropped(self, transport):
|
||||
kw = transport.build_kwargs(
|
||||
model="gpt-5.5",
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
tools=[],
|
||||
timeout=float("inf"),
|
||||
)
|
||||
assert "timeout" not in kw
|
||||
|
||||
def test_bool_timeout_dropped(self, transport):
|
||||
"""``True`` is technically int but must not survive — caller bug guard."""
|
||||
kw = transport.build_kwargs(
|
||||
model="gpt-5.5",
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
tools=[],
|
||||
timeout=True,
|
||||
)
|
||||
assert "timeout" not in kw
|
||||
|
||||
def test_request_overrides_can_supply_timeout(self, transport):
|
||||
"""request_overrides["timeout"] is honored when no explicit kwarg passed."""
|
||||
kw = transport.build_kwargs(
|
||||
model="gpt-5.5",
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
tools=[],
|
||||
request_overrides={"timeout": 450.0},
|
||||
)
|
||||
assert kw.get("timeout") == 450.0
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue