hermes-agent/tests/hermes_cli/test_timeouts.py
Kasun Athaudahetti 2d422720b5 fix(codex): size and propagate timeouts for Responses-API requests; lower stale defaults
Codex / Responses-API requests had three latent timeout bugs that combined
into the long silent hangs reported on #21444:

1. The non-stream stale-call detector estimated context tokens from
   ``api_kwargs["messages"]`` only. Codex / Responses-API payloads carry
   their conversational load in ``input`` (with ``instructions`` and
   ``tools``), so every Codex turn logged ``context=~0 tokens`` and the
   detector never applied its >50k / >100k tier bumps.

2. ``providers.<id>.request_timeout_seconds`` was silently dropped on the
   main Codex path. The chat_completions path and the auxiliary Codex
   adapter both forwarded it; the main path skipped it through three
   places (``build_api_kwargs``, ``ResponsesApiTransport.build_kwargs``,
   ``_preflight_codex_api_kwargs``).

3. The streaming stale detector had the same payload-shape bug for
   ``codex_responses`` requests, which route through the non-streaming
   detector (it's the path that emits the user-facing
   "No response from provider for 300s (non-streaming, ...)" warning that
   reporters keep pasting).

This commit:

- Adds ``estimate_request_context_tokens`` in ``chat_completion_helpers``,
  used by both the non-stream and stream detectors. Handles ``messages``
  (Chat Completions), ``input + instructions + tools`` (Responses API),
  bare lists, and an unknown-dict fallback.
- Forwards ``timeout`` through ``ResponsesApiTransport.build_kwargs``
  and ``_preflight_codex_api_kwargs`` (with guards against
  zero/negative/inf/bool values), and wires
  ``_resolved_api_call_timeout()`` into the Codex branch of
  ``build_api_kwargs``.
- Lowers the implicit non-stream stale defaults so fallback providers
  kick in faster when upstream stalls:
    * base   300s -> 90s
    * >50k   450s -> 150s
    * >100k  600s -> 240s
  These only apply when the user has *not* set
  ``providers.<id>.stale_timeout_seconds`` or
  ``HERMES_API_CALL_STALE_TIMEOUT``. Explicit config still wins.
- Adds regression tests for the estimator shapes, the new defaults, the
  context-tier scaling, transport timeout pass-through, and preflight
  timeout pass-through / rejection of invalid values.

Closes #21444
Supersedes #21652 #24126 #31855

Co-authored-by: Hoang V. Pham <26063003+hehehe0803@users.noreply.github.com>
2026-05-25 01:47:55 -07:00

308 lines
9.3 KiB
Python

from __future__ import annotations
import textwrap
from hermes_cli.timeouts import (
get_provider_request_timeout,
get_provider_stale_timeout,
)
def _write_config(tmp_path, body: str) -> None:
(tmp_path / "config.yaml").write_text(textwrap.dedent(body), encoding="utf-8")
def test_model_timeout_override_wins(monkeypatch, tmp_path):
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
_write_config(
tmp_path,
"""\
providers:
anthropic:
request_timeout_seconds: 30
models:
claude-opus-4.6:
timeout_seconds: 120
""",
)
assert get_provider_request_timeout("anthropic", "claude-opus-4.6") == 120.0
def test_provider_timeout_used_when_no_model_override(monkeypatch, tmp_path):
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
_write_config(
tmp_path,
"""\
providers:
ollama-local:
request_timeout_seconds: 300
""",
)
assert get_provider_request_timeout("ollama-local", "qwen3:32b") == 300.0
def test_model_stale_timeout_override_wins(monkeypatch, tmp_path):
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
_write_config(
tmp_path,
"""\
providers:
openai-codex:
stale_timeout_seconds: 600
models:
gpt-5.4:
stale_timeout_seconds: 1800
""",
)
assert get_provider_stale_timeout("openai-codex", "gpt-5.4") == 1800.0
def test_provider_stale_timeout_used_when_no_model_override(monkeypatch, tmp_path):
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
_write_config(
tmp_path,
"""\
providers:
openai-codex:
stale_timeout_seconds: 900
""",
)
assert get_provider_stale_timeout("openai-codex", "gpt-5.4") == 900.0
def test_missing_timeout_returns_none(monkeypatch, tmp_path):
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
_write_config(
tmp_path,
"""\
providers:
anthropic:
models:
claude-opus-4.6:
context_length: 200000
""",
)
assert get_provider_request_timeout("anthropic", "claude-opus-4.6") is None
assert get_provider_request_timeout("missing-provider", "claude-opus-4.6") is None
def test_invalid_timeout_values_return_none(monkeypatch, tmp_path):
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
_write_config(
tmp_path,
"""\
providers:
anthropic:
request_timeout_seconds: "fast"
models:
claude-opus-4.6:
timeout_seconds: -5
ollama-local:
request_timeout_seconds: -1
""",
)
assert get_provider_request_timeout("anthropic", "claude-opus-4.6") is None
assert get_provider_request_timeout("anthropic", "claude-sonnet-4.5") is None
assert get_provider_request_timeout("ollama-local") is None
def test_invalid_stale_timeout_values_return_none(monkeypatch, tmp_path):
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
_write_config(
tmp_path,
"""\
providers:
openai-codex:
stale_timeout_seconds: "slow"
models:
gpt-5.4:
stale_timeout_seconds: -1
""",
)
assert get_provider_stale_timeout("openai-codex", "gpt-5.4") is None
assert get_provider_stale_timeout("openai-codex", "gpt-5.5") is None
def test_anthropic_adapter_honors_timeout_kwarg():
"""build_anthropic_client(timeout=X) overrides the 900s default read timeout."""
pytest = __import__("pytest")
anthropic = pytest.importorskip("anthropic") # skip if optional SDK missing
from agent.anthropic_adapter import build_anthropic_client
c_default = build_anthropic_client("sk-ant-dummy", None)
c_custom = build_anthropic_client("sk-ant-dummy", None, timeout=45.0)
c_invalid = build_anthropic_client("sk-ant-dummy", None, timeout=-1)
# Default stays at 900s; custom overrides; invalid falls back to default
assert c_default.timeout.read == 900.0
assert c_custom.timeout.read == 45.0
assert c_invalid.timeout.read == 900.0
# Connect timeout always stays at 10s regardless
assert c_default.timeout.connect == 10.0
assert c_custom.timeout.connect == 10.0
def test_resolved_api_call_timeout_priority(monkeypatch, tmp_path):
"""AIAgent._resolved_api_call_timeout() honors config > env > default priority."""
# Isolate HERMES_HOME
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
(tmp_path / ".env").write_text("", encoding="utf-8")
# Case A: config wins over env var
_write_config(tmp_path, """\
providers:
openrouter:
request_timeout_seconds: 77
models:
openai/gpt-4o-mini:
timeout_seconds: 42
""")
monkeypatch.setenv("HERMES_API_TIMEOUT", "999")
from run_agent import AIAgent
agent = AIAgent(
model="openai/gpt-4o-mini",
provider="openrouter",
api_key="sk-dummy",
base_url="https://openrouter.ai/api/v1",
quiet_mode=True,
skip_context_files=True,
skip_memory=True,
platform="cli",
)
# Per-model override wins
assert agent._resolved_api_call_timeout() == 42.0
# Provider-level (different model, no per-model override)
agent.model = "some/other-model"
assert agent._resolved_api_call_timeout() == 77.0
# Case B: no config → env wins
_write_config(tmp_path, "")
# Clear the cached config load
import importlib
from hermes_cli import config as cfg_mod
importlib.reload(cfg_mod)
from hermes_cli import timeouts as to_mod
importlib.reload(to_mod)
import run_agent as ra_mod
importlib.reload(ra_mod)
agent2 = ra_mod.AIAgent(
model="some/model",
provider="openrouter",
api_key="sk-dummy",
base_url="https://openrouter.ai/api/v1",
quiet_mode=True,
skip_context_files=True,
skip_memory=True,
platform="cli",
)
assert agent2._resolved_api_call_timeout() == 999.0
# Case C: no config, no env → 1800.0 default
monkeypatch.delenv("HERMES_API_TIMEOUT", raising=False)
assert agent2._resolved_api_call_timeout() == 1800.0
def test_resolved_api_call_stale_timeout_priority(monkeypatch, tmp_path):
"""AIAgent stale timeout honors config > env > default priority."""
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
(tmp_path / ".env").write_text("", encoding="utf-8")
_write_config(tmp_path, """\
providers:
openai-codex:
stale_timeout_seconds: 600
models:
gpt-5.4:
stale_timeout_seconds: 1800
""")
monkeypatch.setenv("HERMES_API_CALL_STALE_TIMEOUT", "999")
from run_agent import AIAgent
agent = AIAgent(
model="gpt-5.4",
provider="openai-codex",
api_key="sk-dummy",
base_url="https://chatgpt.com/backend-api/codex",
quiet_mode=True,
skip_context_files=True,
skip_memory=True,
platform="cli",
)
assert agent._resolved_api_call_stale_timeout_base() == (1800.0, False)
agent.model = "gpt-5.5"
assert agent._resolved_api_call_stale_timeout_base() == (600.0, False)
_write_config(tmp_path, "")
import importlib
from hermes_cli import config as cfg_mod
importlib.reload(cfg_mod)
from hermes_cli import timeouts as to_mod
importlib.reload(to_mod)
import run_agent as ra_mod
importlib.reload(ra_mod)
agent2 = ra_mod.AIAgent(
model="gpt-5.4",
provider="openai-codex",
api_key="sk-dummy",
base_url="https://chatgpt.com/backend-api/codex",
quiet_mode=True,
skip_context_files=True,
skip_memory=True,
platform="cli",
)
assert agent2._resolved_api_call_stale_timeout_base() == (999.0, False)
monkeypatch.delenv("HERMES_API_CALL_STALE_TIMEOUT", raising=False)
assert agent2._resolved_api_call_stale_timeout_base() == (90.0, True)
def test_default_non_stream_stale_timeout_auto_disables_for_local_endpoints(monkeypatch, tmp_path):
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
(tmp_path / ".env").write_text("", encoding="utf-8")
monkeypatch.delenv("HERMES_API_CALL_STALE_TIMEOUT", raising=False)
from run_agent import AIAgent
agent = AIAgent(
model="qwen3:32b",
provider="ollama-local",
api_key="sk-dummy",
base_url="http://127.0.0.1:11434/v1",
quiet_mode=True,
skip_context_files=True,
skip_memory=True,
platform="cli",
)
assert agent._compute_non_stream_stale_timeout([]) == float("inf")
def test_explicit_non_stream_stale_timeout_is_honored_for_local_endpoints(monkeypatch, tmp_path):
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
(tmp_path / ".env").write_text("", encoding="utf-8")
monkeypatch.setenv("HERMES_API_CALL_STALE_TIMEOUT", "300")
from run_agent import AIAgent
agent = AIAgent(
model="qwen3:32b",
provider="ollama-local",
api_key="sk-dummy",
base_url="http://127.0.0.1:11434/v1",
quiet_mode=True,
skip_context_files=True,
skip_memory=True,
platform="cli",
)
assert agent._compute_non_stream_stale_timeout([]) == 300.0