mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
[verified] fix: account for responses payloads in stale timeout sizing
This commit is contained in:
parent
79bfad0adc
commit
1bf574fb7b
2 changed files with 122 additions and 19 deletions
67
run_agent.py
67
run_agent.py
|
|
@ -2576,19 +2576,20 @@ class AIAgent:
|
|||
|
||||
return 300.0, True
|
||||
|
||||
def _compute_non_stream_stale_timeout(self, messages: list[dict[str, Any]]) -> float:
|
||||
def _compute_non_stream_stale_timeout(self, api_kwargs: dict[str, Any]) -> float:
|
||||
"""Compute the effective non-stream stale timeout for this request."""
|
||||
stale_base, uses_implicit_default = self._resolved_api_call_stale_timeout_base()
|
||||
base_url = getattr(self, "_base_url", None) or self.base_url or ""
|
||||
if uses_implicit_default and base_url and is_local_endpoint(base_url):
|
||||
return float("inf")
|
||||
|
||||
est_tokens = sum(len(str(v)) for v in messages) // 4
|
||||
if est_tokens > 100_000:
|
||||
return max(stale_base, 600.0)
|
||||
if est_tokens > 50_000:
|
||||
return max(stale_base, 450.0)
|
||||
return stale_base
|
||||
est_tokens = self._estimate_request_context_tokens(api_kwargs)
|
||||
return self._scale_stale_timeout_for_context(
|
||||
stale_base,
|
||||
est_tokens,
|
||||
medium_timeout=450.0,
|
||||
large_timeout=600.0,
|
||||
)
|
||||
|
||||
def _is_openrouter_url(self) -> bool:
|
||||
"""Return True when the base URL targets OpenRouter."""
|
||||
|
|
@ -5666,6 +5667,36 @@ class AIAgent:
|
|||
timeout=get_provider_request_timeout(self.provider, self.model),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _rough_payload_chars(value: Any) -> int:
|
||||
if value is None:
|
||||
return 0
|
||||
try:
|
||||
return len(json.dumps(value, ensure_ascii=False, separators=(",", ":")))
|
||||
except Exception:
|
||||
return len(str(value))
|
||||
|
||||
def _estimate_request_context_tokens(self, api_kwargs: dict) -> int:
|
||||
"""Roughly estimate request size across chat-completions and Responses payloads."""
|
||||
total_chars = 0
|
||||
for key in ("system", "messages", "input", "instructions", "tools"):
|
||||
total_chars += self._rough_payload_chars(api_kwargs.get(key))
|
||||
return (total_chars + 3) // 4 if total_chars > 0 else 0
|
||||
|
||||
@staticmethod
|
||||
def _scale_stale_timeout_for_context(
|
||||
base_timeout: float,
|
||||
est_tokens: int,
|
||||
*,
|
||||
medium_timeout: float,
|
||||
large_timeout: float,
|
||||
) -> float:
|
||||
if est_tokens > 100_000:
|
||||
return max(base_timeout, large_timeout)
|
||||
if est_tokens > 50_000:
|
||||
return max(base_timeout, medium_timeout)
|
||||
return base_timeout
|
||||
|
||||
def _interruptible_api_call(self, api_kwargs: dict):
|
||||
"""
|
||||
Run the API call in a background thread so the main conversation loop
|
||||
|
|
@ -5733,9 +5764,7 @@ class AIAgent:
|
|||
# httpx timeout (default 1800s) with zero feedback. The stale
|
||||
# detector kills the connection early so the main retry loop can
|
||||
# apply richer recovery (credential rotation, provider fallback).
|
||||
_stale_timeout = self._compute_non_stream_stale_timeout(
|
||||
api_kwargs.get("messages", [])
|
||||
)
|
||||
_stale_timeout = self._compute_non_stream_stale_timeout(api_kwargs)
|
||||
|
||||
_call_start = time.time()
|
||||
self._touch_activity("waiting for non-streaming API response")
|
||||
|
|
@ -5759,7 +5788,7 @@ class AIAgent:
|
|||
# arrives within the configured timeout.
|
||||
_elapsed = time.time() - _call_start
|
||||
if _elapsed > _stale_timeout:
|
||||
_est_ctx = sum(len(str(v)) for v in api_kwargs.get("messages", [])) // 4
|
||||
_est_ctx = self._estimate_request_context_tokens(api_kwargs)
|
||||
logger.warning(
|
||||
"Non-streaming API call stale for %.0fs (threshold %.0fs). "
|
||||
"model=%s context=~%s tokens. Killing connection.",
|
||||
|
|
@ -6593,13 +6622,13 @@ class AIAgent:
|
|||
# when the context is large. Without this, the stale detector kills
|
||||
# healthy connections during the model's thinking phase, producing
|
||||
# spurious RemoteProtocolError ("peer closed connection").
|
||||
_est_tokens = sum(len(str(v)) for v in api_kwargs.get("messages", [])) // 4
|
||||
if _est_tokens > 100_000:
|
||||
_stream_stale_timeout = max(_stream_stale_timeout_base, 300.0)
|
||||
elif _est_tokens > 50_000:
|
||||
_stream_stale_timeout = max(_stream_stale_timeout_base, 240.0)
|
||||
else:
|
||||
_stream_stale_timeout = _stream_stale_timeout_base
|
||||
_est_tokens = self._estimate_request_context_tokens(api_kwargs)
|
||||
_stream_stale_timeout = self._scale_stale_timeout_for_context(
|
||||
_stream_stale_timeout_base,
|
||||
_est_tokens,
|
||||
medium_timeout=240.0,
|
||||
large_timeout=300.0,
|
||||
)
|
||||
|
||||
t = threading.Thread(target=_call, daemon=True)
|
||||
t.start()
|
||||
|
|
@ -6629,7 +6658,7 @@ class AIAgent:
|
|||
# inner retry loop can start a fresh connection.
|
||||
_stale_elapsed = time.time() - last_chunk_time["t"]
|
||||
if _stale_elapsed > _stream_stale_timeout:
|
||||
_est_ctx = sum(len(str(v)) for v in api_kwargs.get("messages", [])) // 4
|
||||
_est_ctx = self._estimate_request_context_tokens(api_kwargs)
|
||||
logger.warning(
|
||||
"Stream stale for %.0fs (threshold %.0fs) — no chunks received. "
|
||||
"model=%s context=~%s tokens. Killing connection.",
|
||||
|
|
|
|||
|
|
@ -438,6 +438,80 @@ class TestBuildApiKwargsCodex:
|
|||
assert "function" not in tools[0]
|
||||
|
||||
|
||||
class TestEstimateRequestContextTokens:
|
||||
def test_chat_completions_counts_messages_and_tools(self, monkeypatch):
|
||||
agent = _make_agent(monkeypatch, "openrouter")
|
||||
api_kwargs = {
|
||||
"model": "anthropic/claude-sonnet-4-20250514",
|
||||
"messages": [
|
||||
{"role": "system", "content": "a" * 4000},
|
||||
{"role": "user", "content": "b" * 4000},
|
||||
],
|
||||
"tools": _tool_defs("web_search", "terminal"),
|
||||
}
|
||||
|
||||
est = agent._estimate_request_context_tokens(api_kwargs)
|
||||
|
||||
assert est > 2000
|
||||
|
||||
def test_codex_responses_counts_input_and_instructions(self, monkeypatch):
|
||||
agent = _make_agent(
|
||||
monkeypatch,
|
||||
"openai-codex",
|
||||
api_mode="codex_responses",
|
||||
base_url="https://chatgpt.com/backend-api/codex",
|
||||
)
|
||||
api_kwargs = {
|
||||
"model": "gpt-5.4",
|
||||
"instructions": "system:" + ("y" * 10000),
|
||||
"input": [{"role": "user", "content": "x" * 410000}],
|
||||
"tools": [
|
||||
{
|
||||
"type": "function",
|
||||
"name": "web_search",
|
||||
"description": "search",
|
||||
"parameters": {"type": "object", "properties": {}},
|
||||
}
|
||||
],
|
||||
}
|
||||
|
||||
assert "messages" not in api_kwargs
|
||||
est = agent._estimate_request_context_tokens(api_kwargs)
|
||||
|
||||
assert est > 100000
|
||||
assert agent._scale_stale_timeout_for_context(
|
||||
300.0,
|
||||
est,
|
||||
medium_timeout=450.0,
|
||||
large_timeout=600.0,
|
||||
) == 600.0
|
||||
assert agent._scale_stale_timeout_for_context(
|
||||
180.0,
|
||||
est,
|
||||
medium_timeout=240.0,
|
||||
large_timeout=300.0,
|
||||
) == 300.0
|
||||
|
||||
def test_anthropic_counts_top_level_system(self, monkeypatch):
|
||||
agent = _make_agent(monkeypatch, "anthropic", api_mode="anthropic_messages")
|
||||
api_kwargs = {
|
||||
"model": "claude-sonnet-4.6",
|
||||
"system": "policy:" + ("s" * 240000),
|
||||
"messages": [{"role": "user", "content": "hi"}],
|
||||
"tools": _tool_defs("web_search"),
|
||||
}
|
||||
|
||||
est = agent._estimate_request_context_tokens(api_kwargs)
|
||||
|
||||
assert est > 60000
|
||||
assert agent._scale_stale_timeout_for_context(
|
||||
300.0,
|
||||
est,
|
||||
medium_timeout=450.0,
|
||||
large_timeout=600.0,
|
||||
) == 450.0
|
||||
|
||||
|
||||
# ── Message conversion tests ────────────────────────────────────────────────
|
||||
|
||||
class TestChatMessagesToResponsesInput:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue