From 3143d3233077c3ff798216c8df15bcd914a7bda9 Mon Sep 17 00:00:00 2001
From: Matt Van Horn <455140+mvanhorn@users.noreply.github.com>
Date: Sat, 18 Apr 2026 21:38:31 -0700
Subject: [PATCH] feat(providers): add per-provider and per-model
 request_timeout_seconds config

Adds optional providers.<id>.request_timeout_seconds and
providers.<id>.models.<model>.timeout_seconds config, resolved via a new
hermes_cli/timeouts.py helper and applied where client_kwargs is built
in run_agent.py. Zero default behavior change: when both keys are unset,
the openai SDK default takes over.

Mirrors the existing _get_task_timeout pattern in agent/auxiliary_client.py
for auxiliary tasks - the primary turn path just never got the equivalent
knob.

Cross-project demand: openclaw/openclaw#43946 (17 reactions) asks for
exactly this config - specifically calls out Ollama cold-start hanging
the client.
---
 cli-config.yaml.example                  | 14 ++++-
 hermes_cli/timeouts.py                   | 42 +++++++++++++
 run_agent.py                             |  4 ++
 tests/hermes_cli/test_timeouts.py        | 78 ++++++++++++++++++++++++
 website/docs/user-guide/configuration.md |  4 ++
 5 files changed, 141 insertions(+), 1 deletion(-)
 create mode 100644 hermes_cli/timeouts.py
 create mode 100644 tests/hermes_cli/test_timeouts.py
diff --git a/cli-config.yaml.example b/cli-config.yaml.example
index 20b54b788..23851d88e 100644
--- a/cli-config.yaml.example
+++ b/cli-config.yaml.example
@@ -63,7 +63,19 @@ model:
   #   Leave unset to use the model's native output ceiling (recommended).
   #   Set only if you want to deliberately limit individual response length.
   #
-  # max_tokens: 8192
+# max_tokens: 8192
+
+# Named provider overrides (optional)
+# Use this for per-provider request timeouts and per-model exceptions.
+#
+# providers:
+#   ollama-local:
+#     request_timeout_seconds: 300   # Longer timeout for local cold-starts
+#   anthropic:
+#     request_timeout_seconds: 30    # Fast-fail cloud requests
+#     models:
+#       claude-opus-4.6:
+#         timeout_seconds: 600       # Longer timeout for extended-thinking Opus calls
 
 # =============================================================================
 # OpenRouter Provider Routing (only applies when using OpenRouter)
diff --git a/hermes_cli/timeouts.py b/hermes_cli/timeouts.py
new file mode 100644
index 000000000..9ba2ac6c8
--- /dev/null
+++ b/hermes_cli/timeouts.py
@@ -0,0 +1,42 @@
+from __future__ import annotations
+
+
+def _coerce_timeout(raw: object) -> float | None:
+    try:
+        timeout = float(raw)
+    except (TypeError, ValueError):
+        return None
+    if timeout <= 0:
+        return None
+    return timeout
+
+
+def get_provider_request_timeout(
+    provider_id: str, model: str | None = None
+) -> float | None:
+    """Return a configured provider request timeout in seconds, if any."""
+    if not provider_id:
+        return None
+
+    try:
+        from hermes_cli.config import load_config
+    except ImportError:
+        return None
+
+    config = load_config()
+    providers = config.get("providers", {}) if isinstance(config, dict) else {}
+    provider_config = (
+        providers.get(provider_id, {}) if isinstance(providers, dict) else {}
+    )
+    if not isinstance(provider_config, dict):
+        return None
+
+    if model:
+        models = provider_config.get("models", {})
+        model_config = models.get(model, {}) if isinstance(models, dict) else {}
+        if isinstance(model_config, dict):
+            timeout = _coerce_timeout(model_config.get("timeout_seconds"))
+            if timeout is not None:
+                return timeout
+
+    return _coerce_timeout(provider_config.get("request_timeout_seconds"))
diff --git a/run_agent.py b/run_agent.py
index 4ad047262..5a9dca869 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -48,6 +48,7 @@ from hermes_constants import get_hermes_home
 # Load .env from ~/.hermes/.env first, then project root as dev fallback.
 # User-managed env files should override stale shell exports on restart.
 from hermes_cli.env_loader import load_hermes_dotenv
+from hermes_cli.timeouts import get_provider_request_timeout
 
 _hermes_home = get_hermes_home()
 _project_env = Path(__file__).parent / '.env'
@@ -1034,6 +1035,9 @@ class AIAgent:
                 # Explicit credentials from CLI/gateway — construct directly.
                 # The runtime provider resolver already handled auth for us.
                 client_kwargs = {"api_key": api_key, "base_url": base_url}
+                _provider_timeout = get_provider_request_timeout(self.provider, self.model)
+                if _provider_timeout is not None:
+                    client_kwargs["timeout"] = _provider_timeout
                 if self.provider == "copilot-acp":
                     client_kwargs["command"] = self.acp_command
                     client_kwargs["args"] = self.acp_args
diff --git a/tests/hermes_cli/test_timeouts.py b/tests/hermes_cli/test_timeouts.py
new file mode 100644
index 000000000..bf996b295
--- /dev/null
+++ b/tests/hermes_cli/test_timeouts.py
@@ -0,0 +1,78 @@
+from __future__ import annotations
+
+import textwrap
+
+from hermes_cli.timeouts import get_provider_request_timeout
+
+
+def _write_config(tmp_path, body: str) -> None:
+    (tmp_path / "config.yaml").write_text(textwrap.dedent(body), encoding="utf-8")
+
+
+def test_model_timeout_override_wins(monkeypatch, tmp_path):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    _write_config(
+        tmp_path,
+        """\
+        providers:
+          anthropic:
+            request_timeout_seconds: 30
+            models:
+              claude-opus-4.6:
+                timeout_seconds: 120
+        """,
+    )
+
+    assert get_provider_request_timeout("anthropic", "claude-opus-4.6") == 120.0
+
+
+def test_provider_timeout_used_when_no_model_override(monkeypatch, tmp_path):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    _write_config(
+        tmp_path,
+        """\
+        providers:
+          ollama-local:
+            request_timeout_seconds: 300
+        """,
+    )
+
+    assert get_provider_request_timeout("ollama-local", "qwen3:32b") == 300.0
+
+
+def test_missing_timeout_returns_none(monkeypatch, tmp_path):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    _write_config(
+        tmp_path,
+        """\
+        providers:
+          anthropic:
+            models:
+              claude-opus-4.6:
+                context_length: 200000
+        """,
+    )
+
+    assert get_provider_request_timeout("anthropic", "claude-opus-4.6") is None
+    assert get_provider_request_timeout("missing-provider", "claude-opus-4.6") is None
+
+
+def test_invalid_timeout_values_return_none(monkeypatch, tmp_path):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    _write_config(
+        tmp_path,
+        """\
+        providers:
+          anthropic:
+            request_timeout_seconds: "fast"
+            models:
+              claude-opus-4.6:
+                timeout_seconds: -5
+          ollama-local:
+            request_timeout_seconds: -1
+        """,
+    )
+
+    assert get_provider_request_timeout("anthropic", "claude-opus-4.6") is None
+    assert get_provider_request_timeout("anthropic", "claude-sonnet-4.5") is None
+    assert get_provider_request_timeout("ollama-local") is None
diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md
index f91a25c38..c8b092f20 100644
--- a/website/docs/user-guide/configuration.md
+++ b/website/docs/user-guide/configuration.md
@@ -73,6 +73,10 @@ Multiple references in a single value work: `url: "${HOST}:${PORT}"`. If a refer
 
 For AI provider setup (OpenRouter, Anthropic, Copilot, custom endpoints, self-hosted LLMs, fallback models, etc.), see [AI Providers](/docs/integrations/providers).
 
+### Provider Request Timeouts
+
+You can set `providers.<id>.request_timeout_seconds` for a provider-wide timeout, plus `providers.<id>.models.<model>.timeout_seconds` for a model-specific override. See the commented example in [`cli-config.yaml.example`](https://github.com/NousResearch/hermes-agent/blob/main/cli-config.yaml.example).
+
 ## Terminal Backend Configuration
 
 Hermes supports six terminal backends. Each determines where the agent's shell commands actually execute — your local machine, a Docker container, a remote server via SSH, a Modal cloud sandbox, a Daytona workspace, or a Singularity/Apptainer container.