feat(providers): enforce request_timeout_seconds on OpenAI-wire primary calls

Live test with timeout_seconds: 0.5 on claude-sonnet-4.6 proved the initial wiring was insufficient: run_agent.py was overriding the client-level timeout on every call via hardcoded per-request kwargs. Root cause: run_agent.py had two sites that pass an explicit timeout= kwarg into chat.completions.create() — api_kwargs['timeout'] at line 7075 (HERMES_API_TIMEOUT=1800s default) and the streaming path's _httpx.Timeout(..., read=HERMES_STREAM_READ_TIMEOUT=120s, ...) at line 5760. Both override the per-provider config value the client was constructed with, so a 0.5s config timeout would silently not enforce. This commit: - Adds AIAgent._resolved_api_call_timeout() — config > HERMES_API_TIMEOUT env > 1800s default. - Uses it for the non-streaming api_kwargs['timeout'] field. - Uses it for the streaming path's httpx.Timeout(connect, read, write, pool) so both connect and read respect the configured value when set. Local-provider auto-bump (Ollama/vLLM cold-start) only applies when no explicit config value is set. - New test: test_resolved_api_call_timeout_priority covers all three precedence cases (config, env, default). Live verified: 0.5s config on claude-sonnet-4.6 now triggers APITimeoutError at ~3s per retry, exhausts 3 retries in ~15s total (was: 29-47s success with timeout ignored). Positive case (60s config + gpt-4o-mini) still succeeds at 1.3s.
2026-05-01 01:51:44 +00:00 · 2026-04-19 11:10:47 -07:00 · 2026-04-19 11:10:47 -07:00 · c11ab6f64d
commit c11ab6f64d
parent f1fe29d1c3
4 changed files with 115 additions and 16 deletions
--- a/tests/hermes_cli/test_timeouts.py
+++ b/tests/hermes_cli/test_timeouts.py
@ -95,3 +95,66 @@ def test_anthropic_adapter_honors_timeout_kwarg():
    # Connect timeout always stays at 10s regardless
    assert c_default.timeout.connect == 10.0
    assert c_custom.timeout.connect == 10.0
+
+
+def test_resolved_api_call_timeout_priority(monkeypatch, tmp_path):
+    """AIAgent._resolved_api_call_timeout() honors config > env > default priority."""
+    # Isolate HERMES_HOME
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    (tmp_path / ".env").write_text("", encoding="utf-8")
+
+    # Case A: config wins over env var
+    _write_config(tmp_path, """\
+        providers:
+          openrouter:
+            request_timeout_seconds: 77
+            models:
+              openai/gpt-4o-mini:
+                timeout_seconds: 42
+        """)
+    monkeypatch.setenv("HERMES_API_TIMEOUT", "999")
+
+    from run_agent import AIAgent
+    agent = AIAgent(
+        model="openai/gpt-4o-mini",
+        provider="openrouter",
+        api_key="sk-dummy",
+        base_url="https://openrouter.ai/api/v1",
+        quiet_mode=True,
+        skip_context_files=True,
+        skip_memory=True,
+        platform="cli",
+    )
+    # Per-model override wins
+    assert agent._resolved_api_call_timeout() == 42.0
+
+    # Provider-level (different model, no per-model override)
+    agent.model = "some/other-model"
+    assert agent._resolved_api_call_timeout() == 77.0
+
+    # Case B: no config → env wins
+    _write_config(tmp_path, "")
+    # Clear the cached config load
+    import importlib
+    from hermes_cli import config as cfg_mod
+    importlib.reload(cfg_mod)
+    from hermes_cli import timeouts as to_mod
+    importlib.reload(to_mod)
+    import run_agent as ra_mod
+    importlib.reload(ra_mod)
+
+    agent2 = ra_mod.AIAgent(
+        model="some/model",
+        provider="openrouter",
+        api_key="sk-dummy",
+        base_url="https://openrouter.ai/api/v1",
+        quiet_mode=True,
+        skip_context_files=True,
+        skip_memory=True,
+        platform="cli",
+    )
+    assert agent2._resolved_api_call_timeout() == 999.0
+
+    # Case C: no config, no env → 1800.0 default
+    monkeypatch.delenv("HERMES_API_TIMEOUT", raising=False)
+    assert agent2._resolved_api_call_timeout() == 1800.0