feat(providers): extend request_timeout_seconds to all client paths

Follow-up on top of mvanhorn's cherry-picked commit. Original PR only
wired request_timeout_seconds into the explicit-creds OpenAI branch at
run_agent.py init; router-based implicit auth, native Anthropic, and the
fallback chain were still hardcoded to SDK defaults.

- agent/anthropic_adapter.py: build_anthropic_client() accepts an optional
  timeout kwarg (default 900s preserved when unset/invalid).
- run_agent.py: resolve per-provider/per-model timeout once at init; apply
  to Anthropic native init + post-refresh rebuild + stale/interrupt
  rebuilds + switch_model + _restore_primary_runtime + the OpenAI
  implicit-auth path + _try_activate_fallback (with immediate client
  rebuild so the first fallback request carries the configured timeout).
- tests: cover anthropic adapter kwarg honoring; widen mock signatures
  to accept the new timeout kwarg.
- docs/example: clarify that the knob now applies to every transport,
  the fallback chain, and rebuilds after credential rotation.
This commit is contained in:
Teknium 2026-04-19 05:41:29 -07:00 committed by Teknium
parent 3143d32330
commit f1fe29d1c3
9 changed files with 81 additions and 13 deletions

View file

@ -292,9 +292,15 @@ def _common_betas_for_base_url(base_url: str | None) -> list[str]:
return _COMMON_BETAS
def build_anthropic_client(api_key: str, base_url: str = None):
def build_anthropic_client(api_key: str, base_url: str = None, timeout: float = None):
"""Create an Anthropic client, auto-detecting setup-tokens vs API keys.
If *timeout* is provided it overrides the default 900s read timeout. The
connect timeout stays at 10s. Callers pass this from the per-provider /
per-model ``request_timeout_seconds`` config so Anthropic-native and
Anthropic-compatible providers respect the same knob as OpenAI-wire
providers.
Returns an anthropic.Anthropic instance.
"""
if _anthropic_sdk is None:
@ -305,8 +311,9 @@ def build_anthropic_client(api_key: str, base_url: str = None):
from httpx import Timeout
normalized_base_url = _normalize_base_url_text(base_url)
_read_timeout = timeout if (isinstance(timeout, (int, float)) and timeout > 0) else 900.0
kwargs = {
"timeout": Timeout(timeout=900.0, connect=10.0),
"timeout": Timeout(timeout=float(_read_timeout), connect=10.0),
}
if normalized_base_url:
kwargs["base_url"] = normalized_base_url

View file

@ -67,6 +67,10 @@ model:
# Named provider overrides (optional)
# Use this for per-provider request timeouts and per-model exceptions.
# Applies to the primary turn client on every api_mode (OpenAI-wire, native
# Anthropic, and Anthropic-compatible providers), the fallback chain, and
# client rebuilds during credential rotation. Leaving these unset keeps the
# SDK defaults (OpenAI ≈ 600s, native Anthropic 900s).
#
# providers:
# ollama-local:

View file

@ -964,6 +964,12 @@ class AIAgent:
self._anthropic_client = None
self._is_anthropic_oauth = False
# Resolve per-provider / per-model request timeout once up front so
# every client construction path below (Anthropic native, OpenAI-wire,
# router-based implicit auth) can apply it consistently. Bedrock
# Claude uses its own timeout path and is not covered here.
_provider_timeout = get_provider_request_timeout(self.provider, self.model)
if self.api_mode == "anthropic_messages":
from agent.anthropic_adapter import build_anthropic_client, resolve_anthropic_token
# Bedrock + Claude → use AnthropicBedrock SDK for full feature parity
@ -995,7 +1001,7 @@ class AIAgent:
self._anthropic_base_url = base_url
from agent.anthropic_adapter import _is_oauth_token as _is_oat
self._is_anthropic_oauth = _is_oat(effective_key)
self._anthropic_client = build_anthropic_client(effective_key, base_url)
self._anthropic_client = build_anthropic_client(effective_key, base_url, timeout=_provider_timeout)
# No OpenAI client needed for Anthropic mode
self.client = None
self._client_kwargs = {}
@ -1035,7 +1041,6 @@ class AIAgent:
# Explicit credentials from CLI/gateway — construct directly.
# The runtime provider resolver already handled auth for us.
client_kwargs = {"api_key": api_key, "base_url": base_url}
_provider_timeout = get_provider_request_timeout(self.provider, self.model)
if _provider_timeout is not None:
client_kwargs["timeout"] = _provider_timeout
if self.provider == "copilot-acp":
@ -1068,6 +1073,8 @@ class AIAgent:
"api_key": _routed_client.api_key,
"base_url": str(_routed_client.base_url),
}
if _provider_timeout is not None:
client_kwargs["timeout"] = _provider_timeout
# Preserve any default_headers the router set
if hasattr(_routed_client, '_default_headers') and _routed_client._default_headers:
client_kwargs["default_headers"] = dict(_routed_client._default_headers)
@ -1796,6 +1803,7 @@ class AIAgent:
self._anthropic_base_url = base_url or getattr(self, "_anthropic_base_url", None)
self._anthropic_client = build_anthropic_client(
effective_key, self._anthropic_base_url,
timeout=get_provider_request_timeout(self.provider, self.model),
)
self._is_anthropic_oauth = _is_oauth_token(effective_key)
self.client = None
@ -1807,6 +1815,9 @@ class AIAgent:
"api_key": effective_key,
"base_url": effective_base,
}
_sm_timeout = get_provider_request_timeout(self.provider, self.model)
if _sm_timeout is not None:
self._client_kwargs["timeout"] = _sm_timeout
self.client = self._create_openai_client(
dict(self._client_kwargs),
reason="switch_model",
@ -5233,7 +5244,11 @@ class AIAgent:
pass
try:
self._anthropic_client = build_anthropic_client(new_token, getattr(self, "_anthropic_base_url", None))
self._anthropic_client = build_anthropic_client(
new_token,
getattr(self, "_anthropic_base_url", None),
timeout=get_provider_request_timeout(self.provider, self.model),
)
except Exception as exc:
logger.warning("Failed to rebuild Anthropic client after credential refresh: %s", exc)
return False
@ -5275,7 +5290,10 @@ class AIAgent:
self._anthropic_api_key = runtime_key
self._anthropic_base_url = runtime_base
self._anthropic_client = build_anthropic_client(runtime_key, runtime_base)
self._anthropic_client = build_anthropic_client(
runtime_key, runtime_base,
timeout=get_provider_request_timeout(self.provider, self.model),
)
self._is_anthropic_oauth = _is_oauth_token(runtime_key)
self.api_key = runtime_key
self.base_url = runtime_base
@ -5487,6 +5505,7 @@ class AIAgent:
self._anthropic_client = build_anthropic_client(
self._anthropic_api_key,
getattr(self, "_anthropic_base_url", None),
timeout=get_provider_request_timeout(self.provider, self.model),
)
else:
rc = request_client_holder.get("client")
@ -5518,6 +5537,7 @@ class AIAgent:
self._anthropic_client = build_anthropic_client(
self._anthropic_api_key,
getattr(self, "_anthropic_base_url", None),
timeout=get_provider_request_timeout(self.provider, self.model),
)
else:
request_client = request_client_holder.get("client")
@ -6246,6 +6266,7 @@ class AIAgent:
self._anthropic_client = build_anthropic_client(
self._anthropic_api_key,
getattr(self, "_anthropic_base_url", None),
timeout=get_provider_request_timeout(self.provider, self.model),
)
else:
request_client = request_client_holder.get("client")
@ -6402,6 +6423,11 @@ class AIAgent:
self.api_mode = fb_api_mode
self._fallback_activated = True
# Honor per-provider / per-model request_timeout_seconds for the
# fallback target (same knob the primary client uses). None = use
# SDK default.
_fb_timeout = get_provider_request_timeout(fb_provider, fb_model)
if fb_api_mode == "anthropic_messages":
# Build native Anthropic client instead of using OpenAI client
from agent.anthropic_adapter import build_anthropic_client, resolve_anthropic_token, _is_oauth_token
@ -6409,7 +6435,9 @@ class AIAgent:
self.api_key = effective_key
self._anthropic_api_key = effective_key
self._anthropic_base_url = fb_base_url
self._anthropic_client = build_anthropic_client(effective_key, self._anthropic_base_url)
self._anthropic_client = build_anthropic_client(
effective_key, self._anthropic_base_url, timeout=_fb_timeout,
)
self._is_anthropic_oauth = _is_oauth_token(effective_key)
self.client = None
self._client_kwargs = {}
@ -6433,6 +6461,12 @@ class AIAgent:
"base_url": fb_base_url,
**({"default_headers": dict(fb_headers)} if fb_headers else {}),
}
if _fb_timeout is not None:
self._client_kwargs["timeout"] = _fb_timeout
# Rebuild the shared OpenAI client so the configured
# timeout takes effect on the very next fallback request,
# not only after a later credential-rotation rebuild.
self._replace_primary_openai_client(reason="fallback_timeout_apply")
# Re-evaluate prompt caching for the new provider/model
is_native_anthropic = fb_api_mode == "anthropic_messages" and fb_provider == "anthropic"
@ -6506,6 +6540,7 @@ class AIAgent:
self._anthropic_base_url = rt["anthropic_base_url"]
self._anthropic_client = build_anthropic_client(
rt["anthropic_api_key"], rt["anthropic_base_url"],
timeout=get_provider_request_timeout(self.provider, self.model),
)
self._is_anthropic_oauth = rt["is_anthropic_oauth"]
self.client = None
@ -6602,6 +6637,7 @@ class AIAgent:
self._anthropic_base_url = rt["anthropic_base_url"]
self._anthropic_client = build_anthropic_client(
rt["anthropic_api_key"], rt["anthropic_base_url"],
timeout=get_provider_request_timeout(self.provider, self.model),
)
self._is_anthropic_oauth = rt["is_anthropic_oauth"]
self.client = None

View file

@ -214,7 +214,7 @@ class TestAgentSwitchModelDefenseInDepth:
# client factory.
captured = {}
def _fake_build_anthropic_client(api_key, base_url):
def _fake_build_anthropic_client(api_key, base_url, **kwargs):
captured["api_key"] = api_key
captured["base_url"] = base_url
return object() # placeholder client — no real calls expected
@ -226,7 +226,7 @@ class TestAgentSwitchModelDefenseInDepth:
class _Sentinel(Exception):
pass
def _raise_after_capture(api_key, base_url):
def _raise_after_capture(api_key, base_url, **kwargs):
captured["api_key"] = api_key
captured["base_url"] = base_url
raise _Sentinel("strip verified")

View file

@ -76,3 +76,22 @@ def test_invalid_timeout_values_return_none(monkeypatch, tmp_path):
assert get_provider_request_timeout("anthropic", "claude-opus-4.6") is None
assert get_provider_request_timeout("anthropic", "claude-sonnet-4.5") is None
assert get_provider_request_timeout("ollama-local") is None
def test_anthropic_adapter_honors_timeout_kwarg():
"""build_anthropic_client(timeout=X) overrides the 900s default read timeout."""
pytest = __import__("pytest")
anthropic = pytest.importorskip("anthropic") # skip if optional SDK missing
from agent.anthropic_adapter import build_anthropic_client
c_default = build_anthropic_client("sk-ant-dummy", None)
c_custom = build_anthropic_client("sk-ant-dummy", None, timeout=45.0)
c_invalid = build_anthropic_client("sk-ant-dummy", None, timeout=-1)
# Default stays at 900s; custom overrides; invalid falls back to default
assert c_default.timeout.read == 900.0
assert c_custom.timeout.read == 45.0
assert c_invalid.timeout.read == 900.0
# Connect timeout always stays at 10s regardless
assert c_default.timeout.connect == 10.0
assert c_custom.timeout.connect == 10.0

View file

@ -152,7 +152,7 @@ class _FakeAnthropicClient:
pass
def _fake_build_anthropic_client(key, base_url=None):
def _fake_build_anthropic_client(key, base_url=None, **kwargs):
return _FakeAnthropicClient()

View file

@ -40,7 +40,7 @@ class _FakeOpenAIClient:
def _make_agent(monkeypatch, api_mode, provider, response_fn):
_patch_bootstrap(monkeypatch)
if api_mode == "anthropic_messages":
monkeypatch.setattr("agent.anthropic_adapter.build_anthropic_client", lambda k, b=None: _FakeAnthropicClient())
monkeypatch.setattr("agent.anthropic_adapter.build_anthropic_client", lambda k, b=None, **kwargs: _FakeAnthropicClient())
if provider == "openai-codex":
monkeypatch.setattr(
"agent.auxiliary_client.resolve_provider_client",

View file

@ -3615,7 +3615,9 @@ class TestAnthropicCredentialRefresh:
assert agent._try_refresh_anthropic_client_credentials() is True
old_client.close.assert_called_once()
rebuild.assert_called_once_with("sk-ant-oat01-fresh-token", "https://api.anthropic.com")
rebuild.assert_called_once_with(
"sk-ant-oat01-fresh-token", "https://api.anthropic.com", timeout=None,
)
assert agent._anthropic_client is new_client
assert agent._anthropic_api_key == "sk-ant-oat01-fresh-token"

View file

@ -75,7 +75,7 @@ For AI provider setup (OpenRouter, Anthropic, Copilot, custom endpoints, self-ho
### Provider Request Timeouts
You can set `providers.<id>.request_timeout_seconds` for a provider-wide timeout, plus `providers.<id>.models.<model>.timeout_seconds` for a model-specific override. See the commented example in [`cli-config.yaml.example`](https://github.com/NousResearch/hermes-agent/blob/main/cli-config.yaml.example).
You can set `providers.<id>.request_timeout_seconds` for a provider-wide timeout, plus `providers.<id>.models.<model>.timeout_seconds` for a model-specific override. Applies to the primary turn client on every transport (OpenAI-wire, native Anthropic, Anthropic-compatible), the fallback chain, and rebuilds after credential rotation. Leaving these unset keeps SDK defaults (OpenAI ≈ 600s, native Anthropic 900s). See the commented example in [`cli-config.yaml.example`](https://github.com/NousResearch/hermes-agent/blob/main/cli-config.yaml.example).
## Terminal Backend Configuration