mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-29 06:31:32 +00:00
Codex / Responses-API requests had three latent timeout bugs that combined into the long silent hangs reported on #21444: 1. The non-stream stale-call detector estimated context tokens from ``api_kwargs["messages"]`` only. Codex / Responses-API payloads carry their conversational load in ``input`` (with ``instructions`` and ``tools``), so every Codex turn logged ``context=~0 tokens`` and the detector never applied its >50k / >100k tier bumps. 2. ``providers.<id>.request_timeout_seconds`` was silently dropped on the main Codex path. The chat_completions path and the auxiliary Codex adapter both forwarded it; the main path skipped it through three places (``build_api_kwargs``, ``ResponsesApiTransport.build_kwargs``, ``_preflight_codex_api_kwargs``). 3. The streaming stale detector had the same payload-shape bug for ``codex_responses`` requests, which route through the non-streaming detector (it's the path that emits the user-facing "No response from provider for 300s (non-streaming, ...)" warning that reporters keep pasting). This commit: - Adds ``estimate_request_context_tokens`` in ``chat_completion_helpers``, used by both the non-stream and stream detectors. Handles ``messages`` (Chat Completions), ``input + instructions + tools`` (Responses API), bare lists, and an unknown-dict fallback. - Forwards ``timeout`` through ``ResponsesApiTransport.build_kwargs`` and ``_preflight_codex_api_kwargs`` (with guards against zero/negative/inf/bool values), and wires ``_resolved_api_call_timeout()`` into the Codex branch of ``build_api_kwargs``. - Lowers the implicit non-stream stale defaults so fallback providers kick in faster when upstream stalls: * base 300s -> 90s * >50k 450s -> 150s * >100k 600s -> 240s These only apply when the user has *not* set ``providers.<id>.stale_timeout_seconds`` or ``HERMES_API_CALL_STALE_TIMEOUT``. Explicit config still wins. - Adds regression tests for the estimator shapes, the new defaults, the context-tier scaling, transport timeout pass-through, and preflight timeout pass-through / rejection of invalid values. Closes #21444 Supersedes #21652 #24126 #31855 Co-authored-by: Hoang V. Pham <26063003+hehehe0803@users.noreply.github.com>
192 lines
6.9 KiB
Python
192 lines
6.9 KiB
Python
"""Tests for the non-stream stale-call detector context estimator.
|
|
|
|
Covers:
|
|
- ``estimate_request_context_tokens`` for Chat Completions, Responses API,
|
|
bare lists, and mixed-shape dicts.
|
|
- ``AIAgent._compute_non_stream_stale_timeout`` with both legacy ``messages``
|
|
list and full ``api_kwargs`` dicts.
|
|
- The May 2026 default-base change (300s -> 90s) and the lowered
|
|
context-tier ceilings (450/600 -> 150/240).
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
|
|
def _write_config(tmp_path: Path, body: str) -> None:
|
|
hermes_home = tmp_path
|
|
(hermes_home / "config.yaml").write_text(body or "{}\n", encoding="utf-8")
|
|
|
|
|
|
def _make_agent(tmp_path: Path, **overrides):
|
|
from run_agent import AIAgent
|
|
kwargs = dict(
|
|
model="gpt-5.5",
|
|
provider="openai-codex",
|
|
api_key="sk-dummy",
|
|
base_url="https://chatgpt.com/backend-api/codex",
|
|
quiet_mode=True,
|
|
skip_context_files=True,
|
|
skip_memory=True,
|
|
platform="cli",
|
|
)
|
|
kwargs.update(overrides)
|
|
return AIAgent(**kwargs)
|
|
|
|
|
|
# ── estimator ──────────────────────────────────────────────────────────────
|
|
|
|
|
|
def test_estimator_chat_completions_messages():
|
|
from agent.chat_completion_helpers import estimate_request_context_tokens
|
|
payload = {
|
|
"model": "gpt-5.4",
|
|
"messages": [
|
|
{"role": "user", "content": "x" * 400},
|
|
{"role": "assistant", "content": "y" * 400},
|
|
],
|
|
}
|
|
# 800+ chars from messages -> ~200 tokens (char/4 estimate)
|
|
assert estimate_request_context_tokens(payload) >= 200
|
|
|
|
|
|
def test_estimator_responses_api_input():
|
|
from agent.chat_completion_helpers import estimate_request_context_tokens
|
|
payload = {
|
|
"model": "gpt-5.5",
|
|
"instructions": "i" * 1000,
|
|
"input": "x" * 4000,
|
|
"tools": [{"name": "t", "description": "d" * 200}],
|
|
}
|
|
# input(4000) + instructions(1000) + tools (~stringified) -> well over 1000 tokens
|
|
tokens = estimate_request_context_tokens(payload)
|
|
assert tokens >= 1200, f"Responses API estimator returned {tokens}"
|
|
|
|
|
|
def test_estimator_responses_api_long_session_triggers_tier():
|
|
"""A real long Codex session (large ``input``) should clear the 50k boundary."""
|
|
from agent.chat_completion_helpers import estimate_request_context_tokens
|
|
payload = {
|
|
"model": "gpt-5.5",
|
|
"input": "x" * 240_000, # ~60k tokens (240k chars / 4)
|
|
"instructions": "s" * 4000,
|
|
}
|
|
assert estimate_request_context_tokens(payload) > 50_000
|
|
|
|
|
|
def test_estimator_bare_list_back_compat():
|
|
from agent.chat_completion_helpers import estimate_request_context_tokens
|
|
messages = [
|
|
{"role": "user", "content": "x" * 800},
|
|
]
|
|
assert estimate_request_context_tokens(messages) >= 200
|
|
|
|
|
|
def test_estimator_empty_inputs():
|
|
from agent.chat_completion_helpers import estimate_request_context_tokens
|
|
assert estimate_request_context_tokens({}) == 0
|
|
assert estimate_request_context_tokens([]) == 0
|
|
assert estimate_request_context_tokens(None) == 0
|
|
|
|
|
|
def test_estimator_unknown_dict_fallback():
|
|
from agent.chat_completion_helpers import estimate_request_context_tokens
|
|
payload = {"random_field": "z" * 400}
|
|
assert estimate_request_context_tokens(payload) > 50
|
|
|
|
|
|
# ── default base + tier scaling ────────────────────────────────────────────
|
|
|
|
|
|
def test_default_base_is_90s(monkeypatch, tmp_path):
|
|
"""Default base stale timeout dropped from 300s to 90s (May 2026)."""
|
|
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
|
(tmp_path / ".env").write_text("", encoding="utf-8")
|
|
monkeypatch.delenv("HERMES_API_CALL_STALE_TIMEOUT", raising=False)
|
|
_write_config(tmp_path, "")
|
|
|
|
agent = _make_agent(tmp_path)
|
|
base, implicit = agent._resolved_api_call_stale_timeout_base()
|
|
assert base == 90.0
|
|
assert implicit is True
|
|
|
|
|
|
def test_short_codex_request_uses_base_only(monkeypatch, tmp_path):
|
|
"""Codex payload below 50k tokens -> default 90s base."""
|
|
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
|
(tmp_path / ".env").write_text("", encoding="utf-8")
|
|
monkeypatch.delenv("HERMES_API_CALL_STALE_TIMEOUT", raising=False)
|
|
_write_config(tmp_path, "")
|
|
|
|
agent = _make_agent(tmp_path)
|
|
payload = {"model": "gpt-5.5", "input": "hi", "instructions": ""}
|
|
assert agent._compute_non_stream_stale_timeout(payload) == 90.0
|
|
|
|
|
|
def test_long_codex_request_bumps_to_50k_tier(monkeypatch, tmp_path):
|
|
"""Codex payload > 50k tokens -> at least 150s."""
|
|
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
|
(tmp_path / ".env").write_text("", encoding="utf-8")
|
|
monkeypatch.delenv("HERMES_API_CALL_STALE_TIMEOUT", raising=False)
|
|
_write_config(tmp_path, "")
|
|
|
|
agent = _make_agent(tmp_path)
|
|
payload = {"model": "gpt-5.5", "input": "x" * 240_000, "instructions": ""}
|
|
timeout = agent._compute_non_stream_stale_timeout(payload)
|
|
assert timeout >= 150.0
|
|
assert timeout < 240.0
|
|
|
|
|
|
def test_very_long_codex_request_bumps_to_100k_tier(monkeypatch, tmp_path):
|
|
"""Codex payload > 100k tokens -> at least 240s."""
|
|
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
|
(tmp_path / ".env").write_text("", encoding="utf-8")
|
|
monkeypatch.delenv("HERMES_API_CALL_STALE_TIMEOUT", raising=False)
|
|
_write_config(tmp_path, "")
|
|
|
|
agent = _make_agent(tmp_path)
|
|
payload = {"model": "gpt-5.5", "input": "x" * 500_000, "instructions": ""}
|
|
assert agent._compute_non_stream_stale_timeout(payload) >= 240.0
|
|
|
|
|
|
def test_chat_completions_long_messages_bumps_tier(monkeypatch, tmp_path):
|
|
"""Chat Completions estimator still works for the legacy messages path."""
|
|
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
|
(tmp_path / ".env").write_text("", encoding="utf-8")
|
|
monkeypatch.delenv("HERMES_API_CALL_STALE_TIMEOUT", raising=False)
|
|
_write_config(tmp_path, "")
|
|
|
|
agent = _make_agent(
|
|
tmp_path,
|
|
provider="openai",
|
|
base_url="https://api.openai.com/v1",
|
|
model="gpt-5.4",
|
|
)
|
|
payload = {
|
|
"model": "gpt-5.4",
|
|
"messages": [{"role": "user", "content": "x" * 240_000}],
|
|
}
|
|
assert agent._compute_non_stream_stale_timeout(payload) >= 150.0
|
|
|
|
|
|
def test_explicit_user_config_overrides_default(monkeypatch, tmp_path):
|
|
"""If the user explicitly sets a stale_timeout, the new defaults don't apply."""
|
|
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
|
(tmp_path / ".env").write_text("", encoding="utf-8")
|
|
_write_config(tmp_path, """\
|
|
providers:
|
|
openai-codex:
|
|
stale_timeout_seconds: 1800
|
|
""")
|
|
monkeypatch.delenv("HERMES_API_CALL_STALE_TIMEOUT", raising=False)
|
|
|
|
import importlib
|
|
from hermes_cli import timeouts as to_mod
|
|
importlib.reload(to_mod)
|
|
|
|
agent = _make_agent(tmp_path)
|
|
assert agent._compute_non_stream_stale_timeout({"input": "hi"}) == 1800.0
|