feat(providers): extend request_timeout_seconds to all client paths

Follow-up on top of mvanhorn's cherry-picked commit. Original PR only wired request_timeout_seconds into the explicit-creds OpenAI branch at run_agent.py init; router-based implicit auth, native Anthropic, and the fallback chain were still hardcoded to SDK defaults. - agent/anthropic_adapter.py: build_anthropic_client() accepts an optional timeout kwarg (default 900s preserved when unset/invalid). - run_agent.py: resolve per-provider/per-model timeout once at init; apply to Anthropic native init + post-refresh rebuild + stale/interrupt rebuilds + switch_model + _restore_primary_runtime + the OpenAI implicit-auth path + _try_activate_fallback (with immediate client rebuild so the first fallback request carries the configured timeout). - tests: cover anthropic adapter kwarg honoring; widen mock signatures to accept the new timeout kwarg. - docs/example: clarify that the knob now applies to every transport, the fallback chain, and rebuilds after credential rotation.
2026-04-25 00:51:20 +00:00 · 2026-04-19 05:41:29 -07:00 · 2026-04-19 05:41:29 -07:00 · f1fe29d1c3
commit f1fe29d1c3
parent 3143d32330
9 changed files with 81 additions and 13 deletions
--- a/agent/anthropic_adapter.py
+++ b/agent/anthropic_adapter.py
@ -292,9 +292,15 @@ def _common_betas_for_base_url(base_url: str | None) -> list[str]:
    return _COMMON_BETAS


-def build_anthropic_client(api_key: str, base_url: str = None):
+def build_anthropic_client(api_key: str, base_url: str = None, timeout: float = None):
    """Create an Anthropic client, auto-detecting setup-tokens vs API keys.

+    If *timeout* is provided it overrides the default 900s read timeout.  The
+    connect timeout stays at 10s.  Callers pass this from the per-provider /
+    per-model ``request_timeout_seconds`` config so Anthropic-native and
+    Anthropic-compatible providers respect the same knob as OpenAI-wire
+    providers.
+
    Returns an anthropic.Anthropic instance.
    """
    if _anthropic_sdk is None:
@ -305,8 +311,9 @@ def build_anthropic_client(api_key: str, base_url: str = None):
    from httpx import Timeout

    normalized_base_url = _normalize_base_url_text(base_url)
+    _read_timeout = timeout if (isinstance(timeout, (int, float)) and timeout > 0) else 900.0
    kwargs = {
-        "timeout": Timeout(timeout=900.0, connect=10.0),
+        "timeout": Timeout(timeout=float(_read_timeout), connect=10.0),
    }
    if normalized_base_url:
        kwargs["base_url"] = normalized_base_url
--- a/cli-config.yaml.example
+++ b/cli-config.yaml.example
@ -67,6 +67,10 @@ model:

 # Named provider overrides (optional)
 # Use this for per-provider request timeouts and per-model exceptions.
+# Applies to the primary turn client on every api_mode (OpenAI-wire, native
+# Anthropic, and Anthropic-compatible providers), the fallback chain, and
+# client rebuilds during credential rotation.  Leaving these unset keeps the
+# SDK defaults (OpenAI ≈ 600s, native Anthropic 900s).
 #
 # providers:
 #   ollama-local:
--- a/run_agent.py
+++ b/run_agent.py
@ -964,6 +964,12 @@ class AIAgent:
        self._anthropic_client = None
        self._is_anthropic_oauth = False

+        # Resolve per-provider / per-model request timeout once up front so
+        # every client construction path below (Anthropic native, OpenAI-wire,
+        # router-based implicit auth) can apply it consistently.  Bedrock
+        # Claude uses its own timeout path and is not covered here.
+        _provider_timeout = get_provider_request_timeout(self.provider, self.model)
+
        if self.api_mode == "anthropic_messages":
            from agent.anthropic_adapter import build_anthropic_client, resolve_anthropic_token
            # Bedrock + Claude → use AnthropicBedrock SDK for full feature parity
@ -995,7 +1001,7 @@ class AIAgent:
                self._anthropic_base_url = base_url
                from agent.anthropic_adapter import _is_oauth_token as _is_oat
                self._is_anthropic_oauth = _is_oat(effective_key)
-                self._anthropic_client = build_anthropic_client(effective_key, base_url)
+                self._anthropic_client = build_anthropic_client(effective_key, base_url, timeout=_provider_timeout)
                # No OpenAI client needed for Anthropic mode
                self.client = None
                self._client_kwargs = {}
@ -1035,7 +1041,6 @@ class AIAgent:
                # Explicit credentials from CLI/gateway — construct directly.
                # The runtime provider resolver already handled auth for us.
                client_kwargs = {"api_key": api_key, "base_url": base_url}
-                _provider_timeout = get_provider_request_timeout(self.provider, self.model)
                if _provider_timeout is not None:
                    client_kwargs["timeout"] = _provider_timeout
                if self.provider == "copilot-acp":
@ -1068,6 +1073,8 @@ class AIAgent:
                        "api_key": _routed_client.api_key,
                        "base_url": str(_routed_client.base_url),
                    }
+                    if _provider_timeout is not None:
+                        client_kwargs["timeout"] = _provider_timeout
                    # Preserve any default_headers the router set
                    if hasattr(_routed_client, '_default_headers') and _routed_client._default_headers:
                        client_kwargs["default_headers"] = dict(_routed_client._default_headers)
@ -1796,6 +1803,7 @@ class AIAgent:
            self._anthropic_base_url = base_url or getattr(self, "_anthropic_base_url", None)
            self._anthropic_client = build_anthropic_client(
                effective_key, self._anthropic_base_url,
+                timeout=get_provider_request_timeout(self.provider, self.model),
            )
            self._is_anthropic_oauth = _is_oauth_token(effective_key)
            self.client = None
@ -1807,6 +1815,9 @@ class AIAgent:
                "api_key": effective_key,
                "base_url": effective_base,
            }
+            _sm_timeout = get_provider_request_timeout(self.provider, self.model)
+            if _sm_timeout is not None:
+                self._client_kwargs["timeout"] = _sm_timeout
            self.client = self._create_openai_client(
                dict(self._client_kwargs),
                reason="switch_model",
@ -5233,7 +5244,11 @@ class AIAgent:
            pass

        try:
-            self._anthropic_client = build_anthropic_client(new_token, getattr(self, "_anthropic_base_url", None))
+            self._anthropic_client = build_anthropic_client(
+                new_token,
+                getattr(self, "_anthropic_base_url", None),
+                timeout=get_provider_request_timeout(self.provider, self.model),
+            )
        except Exception as exc:
            logger.warning("Failed to rebuild Anthropic client after credential refresh: %s", exc)
            return False
@ -5275,7 +5290,10 @@ class AIAgent:

            self._anthropic_api_key = runtime_key
            self._anthropic_base_url = runtime_base
-            self._anthropic_client = build_anthropic_client(runtime_key, runtime_base)
+            self._anthropic_client = build_anthropic_client(
+                runtime_key, runtime_base,
+                timeout=get_provider_request_timeout(self.provider, self.model),
+            )
            self._is_anthropic_oauth = _is_oauth_token(runtime_key)
            self.api_key = runtime_key
            self.base_url = runtime_base
@ -5487,6 +5505,7 @@ class AIAgent:
                        self._anthropic_client = build_anthropic_client(
                            self._anthropic_api_key,
                            getattr(self, "_anthropic_base_url", None),
+                            timeout=get_provider_request_timeout(self.provider, self.model),
                        )
                    else:
                        rc = request_client_holder.get("client")
@ -5518,6 +5537,7 @@ class AIAgent:
                        self._anthropic_client = build_anthropic_client(
                            self._anthropic_api_key,
                            getattr(self, "_anthropic_base_url", None),
+                            timeout=get_provider_request_timeout(self.provider, self.model),
                        )
                    else:
                        request_client = request_client_holder.get("client")
@ -6246,6 +6266,7 @@ class AIAgent:
                        self._anthropic_client = build_anthropic_client(
                            self._anthropic_api_key,
                            getattr(self, "_anthropic_base_url", None),
+                            timeout=get_provider_request_timeout(self.provider, self.model),
                        )
                    else:
                        request_client = request_client_holder.get("client")
@ -6402,6 +6423,11 @@ class AIAgent:
            self.api_mode = fb_api_mode
            self._fallback_activated = True

+            # Honor per-provider / per-model request_timeout_seconds for the
+            # fallback target (same knob the primary client uses).  None = use
+            # SDK default.
+            _fb_timeout = get_provider_request_timeout(fb_provider, fb_model)
+
            if fb_api_mode == "anthropic_messages":
                # Build native Anthropic client instead of using OpenAI client
                from agent.anthropic_adapter import build_anthropic_client, resolve_anthropic_token, _is_oauth_token
@ -6409,7 +6435,9 @@ class AIAgent:
                self.api_key = effective_key
                self._anthropic_api_key = effective_key
                self._anthropic_base_url = fb_base_url
-                self._anthropic_client = build_anthropic_client(effective_key, self._anthropic_base_url)
+                self._anthropic_client = build_anthropic_client(
+                    effective_key, self._anthropic_base_url, timeout=_fb_timeout,
+                )
                self._is_anthropic_oauth = _is_oauth_token(effective_key)
                self.client = None
                self._client_kwargs = {}
@ -6433,6 +6461,12 @@ class AIAgent:
                    "base_url": fb_base_url,
                    **({"default_headers": dict(fb_headers)} if fb_headers else {}),
                }
+                if _fb_timeout is not None:
+                    self._client_kwargs["timeout"] = _fb_timeout
+                    # Rebuild the shared OpenAI client so the configured
+                    # timeout takes effect on the very next fallback request,
+                    # not only after a later credential-rotation rebuild.
+                    self._replace_primary_openai_client(reason="fallback_timeout_apply")

            # Re-evaluate prompt caching for the new provider/model
            is_native_anthropic = fb_api_mode == "anthropic_messages" and fb_provider == "anthropic"
@ -6506,6 +6540,7 @@ class AIAgent:
                self._anthropic_base_url = rt["anthropic_base_url"]
                self._anthropic_client = build_anthropic_client(
                    rt["anthropic_api_key"], rt["anthropic_base_url"],
+                    timeout=get_provider_request_timeout(self.provider, self.model),
                )
                self._is_anthropic_oauth = rt["is_anthropic_oauth"]
                self.client = None
@ -6602,6 +6637,7 @@ class AIAgent:
                self._anthropic_base_url = rt["anthropic_base_url"]
                self._anthropic_client = build_anthropic_client(
                    rt["anthropic_api_key"], rt["anthropic_base_url"],
+                    timeout=get_provider_request_timeout(self.provider, self.model),
                )
                self._is_anthropic_oauth = rt["is_anthropic_oauth"]
                self.client = None
--- a/tests/hermes_cli/test_model_switch_opencode_anthropic.py
+++ b/tests/hermes_cli/test_model_switch_opencode_anthropic.py
@ -214,7 +214,7 @@ class TestAgentSwitchModelDefenseInDepth:
        # client factory.
        captured = {}

-        def _fake_build_anthropic_client(api_key, base_url):
+        def _fake_build_anthropic_client(api_key, base_url, **kwargs):
            captured["api_key"] = api_key
            captured["base_url"] = base_url
            return object()  # placeholder client — no real calls expected
@ -226,7 +226,7 @@ class TestAgentSwitchModelDefenseInDepth:
        class _Sentinel(Exception):
            pass

-        def _raise_after_capture(api_key, base_url):
+        def _raise_after_capture(api_key, base_url, **kwargs):
            captured["api_key"] = api_key
            captured["base_url"] = base_url
            raise _Sentinel("strip verified")
--- a/tests/hermes_cli/test_timeouts.py
+++ b/tests/hermes_cli/test_timeouts.py
@ -76,3 +76,22 @@ def test_invalid_timeout_values_return_none(monkeypatch, tmp_path):
    assert get_provider_request_timeout("anthropic", "claude-opus-4.6") is None
    assert get_provider_request_timeout("anthropic", "claude-sonnet-4.5") is None
    assert get_provider_request_timeout("ollama-local") is None
+
+
+def test_anthropic_adapter_honors_timeout_kwarg():
+    """build_anthropic_client(timeout=X) overrides the 900s default read timeout."""
+    pytest = __import__("pytest")
+    anthropic = pytest.importorskip("anthropic")  # skip if optional SDK missing
+    from agent.anthropic_adapter import build_anthropic_client
+
+    c_default = build_anthropic_client("sk-ant-dummy", None)
+    c_custom = build_anthropic_client("sk-ant-dummy", None, timeout=45.0)
+    c_invalid = build_anthropic_client("sk-ant-dummy", None, timeout=-1)
+
+    # Default stays at 900s; custom overrides; invalid falls back to default
+    assert c_default.timeout.read == 900.0
+    assert c_custom.timeout.read == 45.0
+    assert c_invalid.timeout.read == 900.0
+    # Connect timeout always stays at 10s regardless
+    assert c_default.timeout.connect == 10.0
+    assert c_custom.timeout.connect == 10.0
--- a/tests/run_agent/test_anthropic_error_handling.py
+++ b/tests/run_agent/test_anthropic_error_handling.py
@ -152,7 +152,7 @@ class _FakeAnthropicClient:
        pass


-def _fake_build_anthropic_client(key, base_url=None):
+def _fake_build_anthropic_client(key, base_url=None, **kwargs):
    return _FakeAnthropicClient()


--- a/tests/run_agent/test_context_token_tracking.py
+++ b/tests/run_agent/test_context_token_tracking.py
@ -40,7 +40,7 @@ class _FakeOpenAIClient:
 def _make_agent(monkeypatch, api_mode, provider, response_fn):
    _patch_bootstrap(monkeypatch)
    if api_mode == "anthropic_messages":
-        monkeypatch.setattr("agent.anthropic_adapter.build_anthropic_client", lambda k, b=None: _FakeAnthropicClient())
+        monkeypatch.setattr("agent.anthropic_adapter.build_anthropic_client", lambda k, b=None, **kwargs: _FakeAnthropicClient())
    if provider == "openai-codex":
        monkeypatch.setattr(
            "agent.auxiliary_client.resolve_provider_client",
--- a/tests/run_agent/test_run_agent.py
+++ b/tests/run_agent/test_run_agent.py
@ -3615,7 +3615,9 @@ class TestAnthropicCredentialRefresh:
            assert agent._try_refresh_anthropic_client_credentials() is True

        old_client.close.assert_called_once()
-        rebuild.assert_called_once_with("sk-ant-oat01-fresh-token", "https://api.anthropic.com")
+        rebuild.assert_called_once_with(
+            "sk-ant-oat01-fresh-token", "https://api.anthropic.com", timeout=None,
+        )
        assert agent._anthropic_client is new_client
        assert agent._anthropic_api_key == "sk-ant-oat01-fresh-token"

--- a/website/docs/user-guide/configuration.md
+++ b/website/docs/user-guide/configuration.md
@ -75,7 +75,7 @@ For AI provider setup (OpenRouter, Anthropic, Copilot, custom endpoints, self-ho

 ### Provider Request Timeouts

-You can set `providers.<id>.request_timeout_seconds` for a provider-wide timeout, plus `providers.<id>.models.<model>.timeout_seconds` for a model-specific override. See the commented example in [`cli-config.yaml.example`](https://github.com/NousResearch/hermes-agent/blob/main/cli-config.yaml.example).
+You can set `providers.<id>.request_timeout_seconds` for a provider-wide timeout, plus `providers.<id>.models.<model>.timeout_seconds` for a model-specific override. Applies to the primary turn client on every transport (OpenAI-wire, native Anthropic, Anthropic-compatible), the fallback chain, and rebuilds after credential rotation. Leaving these unset keeps SDK defaults (OpenAI ≈ 600s, native Anthropic 900s). See the commented example in [`cli-config.yaml.example`](https://github.com/NousResearch/hermes-agent/blob/main/cli-config.yaml.example).

 ## Terminal Backend Configuration