mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-29 06:31:32 +00:00
fix(agent): honor provider timeout config in streaming API calls
Closes #25249 (and supersedes PR #25260) in spirit. Two bugs in the streaming chat-completions path caused provider timeout configuration to be silently ignored: 1. Hardcoded connect/pool timeout. The httpx.Timeout for streaming calls used hardcoded connect=30.0 and pool=30.0 regardless of the user's providers.<id>.request_timeout_seconds config. If the custom provider (e.g. Ollama) was unreachable, the call always waited exactly 30s before failing, ignoring any configured timeout. Fix: use min(_base_timeout, 60.0) for connect and pool when a provider timeout is configured, falling back to 30.0 otherwise. The 60s cap addresses review feedback (TCP handshake shouldn't wait the inference timeout — connect/pool cover the connection layer, not model latency). 2. Streaming stale-stream detector ignored provider config. The stale detector read only HERMES_STREAM_STALE_TIMEOUT (env default 180s). The providers.<id>.stale_timeout_seconds key (correctly used in the non-streaming path) was never consulted. Fix: check get_provider_stale_timeout(provider, model) first, then fall back to the env var. Aligns the streaming path with the non-streaming path's priority chain (config > env > default). Salvage shape diverged from PR #25260: the function moved to agent/chat_completion_helpers.py and the contributor's two commits (initial fix + 60s-cap review follow-up) are squashed into one final commit applied at the new location. Original diagnosis, fix shape, AND the 60s-cap review response from @zccyman in PR #25260; credited via Co-authored-by. Co-authored-by: zccyman <16263913+zccyman@users.noreply.github.com>
This commit is contained in:
parent
2f28b60a47
commit
55d6a1636b
1 changed files with 12 additions and 4 deletions
|
|
@ -33,7 +33,7 @@ from types import SimpleNamespace
|
|||
from typing import Any, Dict, List, Optional, Tuple
|
||||
from urllib.parse import urlparse, parse_qs, urlunparse
|
||||
|
||||
from hermes_cli.timeouts import get_provider_request_timeout
|
||||
from hermes_cli.timeouts import get_provider_request_timeout, get_provider_stale_timeout
|
||||
from agent.error_classifier import classify_api_error, FailoverReason
|
||||
from agent.model_metadata import is_local_endpoint
|
||||
from agent.message_sanitization import (
|
||||
|
|
@ -1272,15 +1272,18 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
|
|||
"Local provider detected (%s) — stream read timeout raised to %.0fs",
|
||||
agent.base_url, _stream_read_timeout,
|
||||
)
|
||||
# Cap connect/pool at 60s even when provider timeout is higher.
|
||||
# connect/pool cover TCP handshake, not model inference.
|
||||
_conn_cap = min(_base_timeout, 60.0) if _provider_timeout_cfg is not None else 30.0
|
||||
stream_kwargs = {
|
||||
**api_kwargs,
|
||||
"stream": True,
|
||||
"stream_options": {"include_usage": True},
|
||||
"timeout": _httpx.Timeout(
|
||||
connect=30.0,
|
||||
connect=_conn_cap,
|
||||
read=_stream_read_timeout,
|
||||
write=_base_timeout,
|
||||
pool=30.0,
|
||||
pool=_conn_cap,
|
||||
),
|
||||
}
|
||||
request_client_holder["client"] = agent._create_request_openai_client(
|
||||
|
|
@ -1868,7 +1871,12 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
|
|||
if request_client is not None:
|
||||
agent._close_request_openai_client(request_client, reason="stream_request_complete")
|
||||
|
||||
_stream_stale_timeout_base = float(os.getenv("HERMES_STREAM_STALE_TIMEOUT", 180.0))
|
||||
# Provider-configured stale timeout takes priority over env default.
|
||||
_cfg_stale = get_provider_stale_timeout(agent.provider, agent.model)
|
||||
if _cfg_stale is not None:
|
||||
_stream_stale_timeout_base = _cfg_stale
|
||||
else:
|
||||
_stream_stale_timeout_base = float(os.getenv("HERMES_STREAM_STALE_TIMEOUT", 180.0))
|
||||
# Local providers (Ollama, oMLX, llama-cpp) can take 300+ seconds
|
||||
# for prefill on large contexts. Disable the stale detector unless
|
||||
# the user explicitly set HERMES_STREAM_STALE_TIMEOUT.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue