fix: increase API timeout default from 900s to 1800s for slow-thinking models (#3431)

Models like GLM-5/5.1 can think for 15+ minutes. The previous 900s (15 min) default for HERMES_API_TIMEOUT killed legitimate requests. Raised to 1800s (30 min) in both places that read the env var: - _build_api_kwargs() timeout (non-streaming total timeout) - _call_chat_completions() write timeout (streaming connection) The streaming per-chunk read timeout (60s) and stale stream detector (180-300s) are unchanged — those are appropriate for inter-chunk timing.
2026-07-18 14:52:04 +00:00 · 2026-03-27 13:02:23 -07:00 · 2026-03-27 13:02:23 -07:00 · fb46a90098
commit fb46a90098
parent fd8c465e42
2 changed files with 3 additions and 3 deletions
--- a/run_agent.py
+++ b/run_agent.py
@ -3772,7 +3772,7 @@ class AIAgent:
        def _call_chat_completions():
            """Stream a chat completions response."""
            import httpx as _httpx
-            _base_timeout = float(os.getenv("HERMES_API_TIMEOUT", 900.0))
+            _base_timeout = float(os.getenv("HERMES_API_TIMEOUT", 1800.0))
            _stream_read_timeout = float(os.getenv("HERMES_STREAM_READ_TIMEOUT", 60.0))
            stream_kwargs = {
                **api_kwargs,
@ -4497,7 +4497,7 @@ class AIAgent:
            "model": self.model,
            "messages": sanitized_messages,
            "tools": self.tools if self.tools else None,
-            "timeout": float(os.getenv("HERMES_API_TIMEOUT", 900.0)),
+            "timeout": float(os.getenv("HERMES_API_TIMEOUT", 1800.0)),
        }

        if self.max_tokens is not None:
--- a/tests/test_run_agent.py
+++ b/tests/test_run_agent.py
@ -637,7 +637,7 @@ class TestBuildApiKwargs:
        kwargs = agent._build_api_kwargs(messages)
        assert kwargs["model"] == agent.model
        assert kwargs["messages"] is messages
-        assert kwargs["timeout"] == 900.0
+        assert kwargs["timeout"] == 1800.0

    def test_provider_preferences_injected(self, agent):
        agent.providers_allowed = ["Anthropic"]