From 3143d3233077c3ff798216c8df15bcd914a7bda9 Mon Sep 17 00:00:00 2001 From: Matt Van Horn <455140+mvanhorn@users.noreply.github.com> Date: Sat, 18 Apr 2026 21:38:31 -0700 Subject: [PATCH] feat(providers): add per-provider and per-model request_timeout_seconds config Adds optional providers..request_timeout_seconds and providers..models..timeout_seconds config, resolved via a new hermes_cli/timeouts.py helper and applied where client_kwargs is built in run_agent.py. Zero default behavior change: when both keys are unset, the openai SDK default takes over. Mirrors the existing _get_task_timeout pattern in agent/auxiliary_client.py for auxiliary tasks - the primary turn path just never got the equivalent knob. Cross-project demand: openclaw/openclaw#43946 (17 reactions) asks for exactly this config - specifically calls out Ollama cold-start hanging the client. --- cli-config.yaml.example | 14 ++++- hermes_cli/timeouts.py | 42 +++++++++++++ run_agent.py | 4 ++ tests/hermes_cli/test_timeouts.py | 78 ++++++++++++++++++++++++ website/docs/user-guide/configuration.md | 4 ++ 5 files changed, 141 insertions(+), 1 deletion(-) create mode 100644 hermes_cli/timeouts.py create mode 100644 tests/hermes_cli/test_timeouts.py diff --git a/cli-config.yaml.example b/cli-config.yaml.example index 20b54b788..23851d88e 100644 --- a/cli-config.yaml.example +++ b/cli-config.yaml.example @@ -63,7 +63,19 @@ model: # Leave unset to use the model's native output ceiling (recommended). # Set only if you want to deliberately limit individual response length. # - # max_tokens: 8192 +# max_tokens: 8192 + +# Named provider overrides (optional) +# Use this for per-provider request timeouts and per-model exceptions. +# +# providers: +# ollama-local: +# request_timeout_seconds: 300 # Longer timeout for local cold-starts +# anthropic: +# request_timeout_seconds: 30 # Fast-fail cloud requests +# models: +# claude-opus-4.6: +# timeout_seconds: 600 # Longer timeout for extended-thinking Opus calls # ============================================================================= # OpenRouter Provider Routing (only applies when using OpenRouter) diff --git a/hermes_cli/timeouts.py b/hermes_cli/timeouts.py new file mode 100644 index 000000000..9ba2ac6c8 --- /dev/null +++ b/hermes_cli/timeouts.py @@ -0,0 +1,42 @@ +from __future__ import annotations + + +def _coerce_timeout(raw: object) -> float | None: + try: + timeout = float(raw) + except (TypeError, ValueError): + return None + if timeout <= 0: + return None + return timeout + + +def get_provider_request_timeout( + provider_id: str, model: str | None = None +) -> float | None: + """Return a configured provider request timeout in seconds, if any.""" + if not provider_id: + return None + + try: + from hermes_cli.config import load_config + except ImportError: + return None + + config = load_config() + providers = config.get("providers", {}) if isinstance(config, dict) else {} + provider_config = ( + providers.get(provider_id, {}) if isinstance(providers, dict) else {} + ) + if not isinstance(provider_config, dict): + return None + + if model: + models = provider_config.get("models", {}) + model_config = models.get(model, {}) if isinstance(models, dict) else {} + if isinstance(model_config, dict): + timeout = _coerce_timeout(model_config.get("timeout_seconds")) + if timeout is not None: + return timeout + + return _coerce_timeout(provider_config.get("request_timeout_seconds")) diff --git a/run_agent.py b/run_agent.py index 4ad047262..5a9dca869 100644 --- a/run_agent.py +++ b/run_agent.py @@ -48,6 +48,7 @@ from hermes_constants import get_hermes_home # Load .env from ~/.hermes/.env first, then project root as dev fallback. # User-managed env files should override stale shell exports on restart. from hermes_cli.env_loader import load_hermes_dotenv +from hermes_cli.timeouts import get_provider_request_timeout _hermes_home = get_hermes_home() _project_env = Path(__file__).parent / '.env' @@ -1034,6 +1035,9 @@ class AIAgent: # Explicit credentials from CLI/gateway — construct directly. # The runtime provider resolver already handled auth for us. client_kwargs = {"api_key": api_key, "base_url": base_url} + _provider_timeout = get_provider_request_timeout(self.provider, self.model) + if _provider_timeout is not None: + client_kwargs["timeout"] = _provider_timeout if self.provider == "copilot-acp": client_kwargs["command"] = self.acp_command client_kwargs["args"] = self.acp_args diff --git a/tests/hermes_cli/test_timeouts.py b/tests/hermes_cli/test_timeouts.py new file mode 100644 index 000000000..bf996b295 --- /dev/null +++ b/tests/hermes_cli/test_timeouts.py @@ -0,0 +1,78 @@ +from __future__ import annotations + +import textwrap + +from hermes_cli.timeouts import get_provider_request_timeout + + +def _write_config(tmp_path, body: str) -> None: + (tmp_path / "config.yaml").write_text(textwrap.dedent(body), encoding="utf-8") + + +def test_model_timeout_override_wins(monkeypatch, tmp_path): + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + _write_config( + tmp_path, + """\ + providers: + anthropic: + request_timeout_seconds: 30 + models: + claude-opus-4.6: + timeout_seconds: 120 + """, + ) + + assert get_provider_request_timeout("anthropic", "claude-opus-4.6") == 120.0 + + +def test_provider_timeout_used_when_no_model_override(monkeypatch, tmp_path): + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + _write_config( + tmp_path, + """\ + providers: + ollama-local: + request_timeout_seconds: 300 + """, + ) + + assert get_provider_request_timeout("ollama-local", "qwen3:32b") == 300.0 + + +def test_missing_timeout_returns_none(monkeypatch, tmp_path): + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + _write_config( + tmp_path, + """\ + providers: + anthropic: + models: + claude-opus-4.6: + context_length: 200000 + """, + ) + + assert get_provider_request_timeout("anthropic", "claude-opus-4.6") is None + assert get_provider_request_timeout("missing-provider", "claude-opus-4.6") is None + + +def test_invalid_timeout_values_return_none(monkeypatch, tmp_path): + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + _write_config( + tmp_path, + """\ + providers: + anthropic: + request_timeout_seconds: "fast" + models: + claude-opus-4.6: + timeout_seconds: -5 + ollama-local: + request_timeout_seconds: -1 + """, + ) + + assert get_provider_request_timeout("anthropic", "claude-opus-4.6") is None + assert get_provider_request_timeout("anthropic", "claude-sonnet-4.5") is None + assert get_provider_request_timeout("ollama-local") is None diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md index f91a25c38..c8b092f20 100644 --- a/website/docs/user-guide/configuration.md +++ b/website/docs/user-guide/configuration.md @@ -73,6 +73,10 @@ Multiple references in a single value work: `url: "${HOST}:${PORT}"`. If a refer For AI provider setup (OpenRouter, Anthropic, Copilot, custom endpoints, self-hosted LLMs, fallback models, etc.), see [AI Providers](/docs/integrations/providers). +### Provider Request Timeouts + +You can set `providers..request_timeout_seconds` for a provider-wide timeout, plus `providers..models..timeout_seconds` for a model-specific override. See the commented example in [`cli-config.yaml.example`](https://github.com/NousResearch/hermes-agent/blob/main/cli-config.yaml.example). + ## Terminal Backend Configuration Hermes supports six terminal backends. Each determines where the agent's shell commands actually execute — your local machine, a Docker container, a remote server via SSH, a Modal cloud sandbox, a Daytona workspace, or a Singularity/Apptainer container.