mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-30 06:41:51 +00:00
Codex / Responses-API requests had three latent timeout bugs that combined into the long silent hangs reported on #21444: 1. The non-stream stale-call detector estimated context tokens from ``api_kwargs["messages"]`` only. Codex / Responses-API payloads carry their conversational load in ``input`` (with ``instructions`` and ``tools``), so every Codex turn logged ``context=~0 tokens`` and the detector never applied its >50k / >100k tier bumps. 2. ``providers.<id>.request_timeout_seconds`` was silently dropped on the main Codex path. The chat_completions path and the auxiliary Codex adapter both forwarded it; the main path skipped it through three places (``build_api_kwargs``, ``ResponsesApiTransport.build_kwargs``, ``_preflight_codex_api_kwargs``). 3. The streaming stale detector had the same payload-shape bug for ``codex_responses`` requests, which route through the non-streaming detector (it's the path that emits the user-facing "No response from provider for 300s (non-streaming, ...)" warning that reporters keep pasting). This commit: - Adds ``estimate_request_context_tokens`` in ``chat_completion_helpers``, used by both the non-stream and stream detectors. Handles ``messages`` (Chat Completions), ``input + instructions + tools`` (Responses API), bare lists, and an unknown-dict fallback. - Forwards ``timeout`` through ``ResponsesApiTransport.build_kwargs`` and ``_preflight_codex_api_kwargs`` (with guards against zero/negative/inf/bool values), and wires ``_resolved_api_call_timeout()`` into the Codex branch of ``build_api_kwargs``. - Lowers the implicit non-stream stale defaults so fallback providers kick in faster when upstream stalls: * base 300s -> 90s * >50k 450s -> 150s * >100k 600s -> 240s These only apply when the user has *not* set ``providers.<id>.stale_timeout_seconds`` or ``HERMES_API_CALL_STALE_TIMEOUT``. Explicit config still wins. - Adds regression tests for the estimator shapes, the new defaults, the context-tier scaling, transport timeout pass-through, and preflight timeout pass-through / rejection of invalid values. Closes #21444 Supersedes #21652 #24126 #31855 Co-authored-by: Hoang V. Pham <26063003+hehehe0803@users.noreply.github.com>
308 lines
9.3 KiB
Python
308 lines
9.3 KiB
Python
from __future__ import annotations
|
|
|
|
import textwrap
|
|
|
|
from hermes_cli.timeouts import (
|
|
get_provider_request_timeout,
|
|
get_provider_stale_timeout,
|
|
)
|
|
|
|
|
|
def _write_config(tmp_path, body: str) -> None:
|
|
(tmp_path / "config.yaml").write_text(textwrap.dedent(body), encoding="utf-8")
|
|
|
|
|
|
def test_model_timeout_override_wins(monkeypatch, tmp_path):
|
|
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
|
_write_config(
|
|
tmp_path,
|
|
"""\
|
|
providers:
|
|
anthropic:
|
|
request_timeout_seconds: 30
|
|
models:
|
|
claude-opus-4.6:
|
|
timeout_seconds: 120
|
|
""",
|
|
)
|
|
|
|
assert get_provider_request_timeout("anthropic", "claude-opus-4.6") == 120.0
|
|
|
|
|
|
def test_provider_timeout_used_when_no_model_override(monkeypatch, tmp_path):
|
|
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
|
_write_config(
|
|
tmp_path,
|
|
"""\
|
|
providers:
|
|
ollama-local:
|
|
request_timeout_seconds: 300
|
|
""",
|
|
)
|
|
|
|
assert get_provider_request_timeout("ollama-local", "qwen3:32b") == 300.0
|
|
|
|
|
|
def test_model_stale_timeout_override_wins(monkeypatch, tmp_path):
|
|
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
|
_write_config(
|
|
tmp_path,
|
|
"""\
|
|
providers:
|
|
openai-codex:
|
|
stale_timeout_seconds: 600
|
|
models:
|
|
gpt-5.4:
|
|
stale_timeout_seconds: 1800
|
|
""",
|
|
)
|
|
|
|
assert get_provider_stale_timeout("openai-codex", "gpt-5.4") == 1800.0
|
|
|
|
|
|
def test_provider_stale_timeout_used_when_no_model_override(monkeypatch, tmp_path):
|
|
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
|
_write_config(
|
|
tmp_path,
|
|
"""\
|
|
providers:
|
|
openai-codex:
|
|
stale_timeout_seconds: 900
|
|
""",
|
|
)
|
|
|
|
assert get_provider_stale_timeout("openai-codex", "gpt-5.4") == 900.0
|
|
|
|
|
|
def test_missing_timeout_returns_none(monkeypatch, tmp_path):
|
|
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
|
_write_config(
|
|
tmp_path,
|
|
"""\
|
|
providers:
|
|
anthropic:
|
|
models:
|
|
claude-opus-4.6:
|
|
context_length: 200000
|
|
""",
|
|
)
|
|
|
|
assert get_provider_request_timeout("anthropic", "claude-opus-4.6") is None
|
|
assert get_provider_request_timeout("missing-provider", "claude-opus-4.6") is None
|
|
|
|
|
|
def test_invalid_timeout_values_return_none(monkeypatch, tmp_path):
|
|
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
|
_write_config(
|
|
tmp_path,
|
|
"""\
|
|
providers:
|
|
anthropic:
|
|
request_timeout_seconds: "fast"
|
|
models:
|
|
claude-opus-4.6:
|
|
timeout_seconds: -5
|
|
ollama-local:
|
|
request_timeout_seconds: -1
|
|
""",
|
|
)
|
|
|
|
assert get_provider_request_timeout("anthropic", "claude-opus-4.6") is None
|
|
assert get_provider_request_timeout("anthropic", "claude-sonnet-4.5") is None
|
|
assert get_provider_request_timeout("ollama-local") is None
|
|
|
|
|
|
def test_invalid_stale_timeout_values_return_none(monkeypatch, tmp_path):
|
|
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
|
_write_config(
|
|
tmp_path,
|
|
"""\
|
|
providers:
|
|
openai-codex:
|
|
stale_timeout_seconds: "slow"
|
|
models:
|
|
gpt-5.4:
|
|
stale_timeout_seconds: -1
|
|
""",
|
|
)
|
|
|
|
assert get_provider_stale_timeout("openai-codex", "gpt-5.4") is None
|
|
assert get_provider_stale_timeout("openai-codex", "gpt-5.5") is None
|
|
|
|
|
|
def test_anthropic_adapter_honors_timeout_kwarg():
|
|
"""build_anthropic_client(timeout=X) overrides the 900s default read timeout."""
|
|
pytest = __import__("pytest")
|
|
anthropic = pytest.importorskip("anthropic") # skip if optional SDK missing
|
|
from agent.anthropic_adapter import build_anthropic_client
|
|
|
|
c_default = build_anthropic_client("sk-ant-dummy", None)
|
|
c_custom = build_anthropic_client("sk-ant-dummy", None, timeout=45.0)
|
|
c_invalid = build_anthropic_client("sk-ant-dummy", None, timeout=-1)
|
|
|
|
# Default stays at 900s; custom overrides; invalid falls back to default
|
|
assert c_default.timeout.read == 900.0
|
|
assert c_custom.timeout.read == 45.0
|
|
assert c_invalid.timeout.read == 900.0
|
|
# Connect timeout always stays at 10s regardless
|
|
assert c_default.timeout.connect == 10.0
|
|
assert c_custom.timeout.connect == 10.0
|
|
|
|
|
|
def test_resolved_api_call_timeout_priority(monkeypatch, tmp_path):
|
|
"""AIAgent._resolved_api_call_timeout() honors config > env > default priority."""
|
|
# Isolate HERMES_HOME
|
|
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
|
(tmp_path / ".env").write_text("", encoding="utf-8")
|
|
|
|
# Case A: config wins over env var
|
|
_write_config(tmp_path, """\
|
|
providers:
|
|
openrouter:
|
|
request_timeout_seconds: 77
|
|
models:
|
|
openai/gpt-4o-mini:
|
|
timeout_seconds: 42
|
|
""")
|
|
monkeypatch.setenv("HERMES_API_TIMEOUT", "999")
|
|
|
|
from run_agent import AIAgent
|
|
agent = AIAgent(
|
|
model="openai/gpt-4o-mini",
|
|
provider="openrouter",
|
|
api_key="sk-dummy",
|
|
base_url="https://openrouter.ai/api/v1",
|
|
quiet_mode=True,
|
|
skip_context_files=True,
|
|
skip_memory=True,
|
|
platform="cli",
|
|
)
|
|
# Per-model override wins
|
|
assert agent._resolved_api_call_timeout() == 42.0
|
|
|
|
# Provider-level (different model, no per-model override)
|
|
agent.model = "some/other-model"
|
|
assert agent._resolved_api_call_timeout() == 77.0
|
|
|
|
# Case B: no config → env wins
|
|
_write_config(tmp_path, "")
|
|
# Clear the cached config load
|
|
import importlib
|
|
from hermes_cli import config as cfg_mod
|
|
importlib.reload(cfg_mod)
|
|
from hermes_cli import timeouts as to_mod
|
|
importlib.reload(to_mod)
|
|
import run_agent as ra_mod
|
|
importlib.reload(ra_mod)
|
|
|
|
agent2 = ra_mod.AIAgent(
|
|
model="some/model",
|
|
provider="openrouter",
|
|
api_key="sk-dummy",
|
|
base_url="https://openrouter.ai/api/v1",
|
|
quiet_mode=True,
|
|
skip_context_files=True,
|
|
skip_memory=True,
|
|
platform="cli",
|
|
)
|
|
assert agent2._resolved_api_call_timeout() == 999.0
|
|
|
|
# Case C: no config, no env → 1800.0 default
|
|
monkeypatch.delenv("HERMES_API_TIMEOUT", raising=False)
|
|
assert agent2._resolved_api_call_timeout() == 1800.0
|
|
|
|
|
|
def test_resolved_api_call_stale_timeout_priority(monkeypatch, tmp_path):
|
|
"""AIAgent stale timeout honors config > env > default priority."""
|
|
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
|
(tmp_path / ".env").write_text("", encoding="utf-8")
|
|
|
|
_write_config(tmp_path, """\
|
|
providers:
|
|
openai-codex:
|
|
stale_timeout_seconds: 600
|
|
models:
|
|
gpt-5.4:
|
|
stale_timeout_seconds: 1800
|
|
""")
|
|
monkeypatch.setenv("HERMES_API_CALL_STALE_TIMEOUT", "999")
|
|
|
|
from run_agent import AIAgent
|
|
agent = AIAgent(
|
|
model="gpt-5.4",
|
|
provider="openai-codex",
|
|
api_key="sk-dummy",
|
|
base_url="https://chatgpt.com/backend-api/codex",
|
|
quiet_mode=True,
|
|
skip_context_files=True,
|
|
skip_memory=True,
|
|
platform="cli",
|
|
)
|
|
assert agent._resolved_api_call_stale_timeout_base() == (1800.0, False)
|
|
|
|
agent.model = "gpt-5.5"
|
|
assert agent._resolved_api_call_stale_timeout_base() == (600.0, False)
|
|
|
|
_write_config(tmp_path, "")
|
|
import importlib
|
|
from hermes_cli import config as cfg_mod
|
|
importlib.reload(cfg_mod)
|
|
from hermes_cli import timeouts as to_mod
|
|
importlib.reload(to_mod)
|
|
import run_agent as ra_mod
|
|
importlib.reload(ra_mod)
|
|
|
|
agent2 = ra_mod.AIAgent(
|
|
model="gpt-5.4",
|
|
provider="openai-codex",
|
|
api_key="sk-dummy",
|
|
base_url="https://chatgpt.com/backend-api/codex",
|
|
quiet_mode=True,
|
|
skip_context_files=True,
|
|
skip_memory=True,
|
|
platform="cli",
|
|
)
|
|
assert agent2._resolved_api_call_stale_timeout_base() == (999.0, False)
|
|
|
|
monkeypatch.delenv("HERMES_API_CALL_STALE_TIMEOUT", raising=False)
|
|
assert agent2._resolved_api_call_stale_timeout_base() == (90.0, True)
|
|
|
|
|
|
def test_default_non_stream_stale_timeout_auto_disables_for_local_endpoints(monkeypatch, tmp_path):
|
|
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
|
(tmp_path / ".env").write_text("", encoding="utf-8")
|
|
monkeypatch.delenv("HERMES_API_CALL_STALE_TIMEOUT", raising=False)
|
|
|
|
from run_agent import AIAgent
|
|
agent = AIAgent(
|
|
model="qwen3:32b",
|
|
provider="ollama-local",
|
|
api_key="sk-dummy",
|
|
base_url="http://127.0.0.1:11434/v1",
|
|
quiet_mode=True,
|
|
skip_context_files=True,
|
|
skip_memory=True,
|
|
platform="cli",
|
|
)
|
|
|
|
assert agent._compute_non_stream_stale_timeout([]) == float("inf")
|
|
|
|
|
|
def test_explicit_non_stream_stale_timeout_is_honored_for_local_endpoints(monkeypatch, tmp_path):
|
|
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
|
(tmp_path / ".env").write_text("", encoding="utf-8")
|
|
monkeypatch.setenv("HERMES_API_CALL_STALE_TIMEOUT", "300")
|
|
|
|
from run_agent import AIAgent
|
|
agent = AIAgent(
|
|
model="qwen3:32b",
|
|
provider="ollama-local",
|
|
api_key="sk-dummy",
|
|
base_url="http://127.0.0.1:11434/v1",
|
|
quiet_mode=True,
|
|
skip_context_files=True,
|
|
skip_memory=True,
|
|
platform="cli",
|
|
)
|
|
|
|
assert agent._compute_non_stream_stale_timeout([]) == 300.0
|