[verified] fix: account for responses payloads in stale timeout sizing

2026-04-25 00:51:20 +00:00 · 2026-04-20 15:56:20 -04:00 · 2026-04-20 15:56:20 -04:00 · 1bf574fb7b
commit 1bf574fb7b
parent 79bfad0adc
2 changed files with 122 additions and 19 deletions
--- a/run_agent.py
+++ b/run_agent.py
@ -2576,19 +2576,20 @@ class AIAgent:

        return 300.0, True

-    def _compute_non_stream_stale_timeout(self, messages: list[dict[str, Any]]) -> float:
+    def _compute_non_stream_stale_timeout(self, api_kwargs: dict[str, Any]) -> float:
        """Compute the effective non-stream stale timeout for this request."""
        stale_base, uses_implicit_default = self._resolved_api_call_stale_timeout_base()
        base_url = getattr(self, "_base_url", None) or self.base_url or ""
        if uses_implicit_default and base_url and is_local_endpoint(base_url):
            return float("inf")

-        est_tokens = sum(len(str(v)) for v in messages) // 4
-        if est_tokens > 100_000:
-            return max(stale_base, 600.0)
-        if est_tokens > 50_000:
-            return max(stale_base, 450.0)
-        return stale_base
+        est_tokens = self._estimate_request_context_tokens(api_kwargs)
+        return self._scale_stale_timeout_for_context(
+            stale_base,
+            est_tokens,
+            medium_timeout=450.0,
+            large_timeout=600.0,
+        )

    def _is_openrouter_url(self) -> bool:
        """Return True when the base URL targets OpenRouter."""
@ -5666,6 +5667,36 @@ class AIAgent:
                timeout=get_provider_request_timeout(self.provider, self.model),
            )

+    @staticmethod
+    def _rough_payload_chars(value: Any) -> int:
+        if value is None:
+            return 0
+        try:
+            return len(json.dumps(value, ensure_ascii=False, separators=(",", ":")))
+        except Exception:
+            return len(str(value))
+
+    def _estimate_request_context_tokens(self, api_kwargs: dict) -> int:
+        """Roughly estimate request size across chat-completions and Responses payloads."""
+        total_chars = 0
+        for key in ("system", "messages", "input", "instructions", "tools"):
+            total_chars += self._rough_payload_chars(api_kwargs.get(key))
+        return (total_chars + 3) // 4 if total_chars > 0 else 0
+
+    @staticmethod
+    def _scale_stale_timeout_for_context(
+        base_timeout: float,
+        est_tokens: int,
+        *,
+        medium_timeout: float,
+        large_timeout: float,
+    ) -> float:
+        if est_tokens > 100_000:
+            return max(base_timeout, large_timeout)
+        if est_tokens > 50_000:
+            return max(base_timeout, medium_timeout)
+        return base_timeout
+
    def _interruptible_api_call(self, api_kwargs: dict):
        """
        Run the API call in a background thread so the main conversation loop
@ -5733,9 +5764,7 @@ class AIAgent:
        # httpx timeout (default 1800s) with zero feedback.  The stale
        # detector kills the connection early so the main retry loop can
        # apply richer recovery (credential rotation, provider fallback).
-        _stale_timeout = self._compute_non_stream_stale_timeout(
-            api_kwargs.get("messages", [])
-        )
+        _stale_timeout = self._compute_non_stream_stale_timeout(api_kwargs)

        _call_start = time.time()
        self._touch_activity("waiting for non-streaming API response")
@ -5759,7 +5788,7 @@ class AIAgent:
            # arrives within the configured timeout.
            _elapsed = time.time() - _call_start
            if _elapsed > _stale_timeout:
-                _est_ctx = sum(len(str(v)) for v in api_kwargs.get("messages", [])) // 4
+                _est_ctx = self._estimate_request_context_tokens(api_kwargs)
                logger.warning(
                    "Non-streaming API call stale for %.0fs (threshold %.0fs). "
                    "model=%s context=~%s tokens. Killing connection.",
@ -6593,13 +6622,13 @@ class AIAgent:
            # when the context is large.  Without this, the stale detector kills
            # healthy connections during the model's thinking phase, producing
            # spurious RemoteProtocolError ("peer closed connection").
-            _est_tokens = sum(len(str(v)) for v in api_kwargs.get("messages", [])) // 4
-            if _est_tokens > 100_000:
-                _stream_stale_timeout = max(_stream_stale_timeout_base, 300.0)
-            elif _est_tokens > 50_000:
-                _stream_stale_timeout = max(_stream_stale_timeout_base, 240.0)
-            else:
-                _stream_stale_timeout = _stream_stale_timeout_base
+            _est_tokens = self._estimate_request_context_tokens(api_kwargs)
+            _stream_stale_timeout = self._scale_stale_timeout_for_context(
+                _stream_stale_timeout_base,
+                _est_tokens,
+                medium_timeout=240.0,
+                large_timeout=300.0,
+            )

        t = threading.Thread(target=_call, daemon=True)
        t.start()
@ -6629,7 +6658,7 @@ class AIAgent:
            # inner retry loop can start a fresh connection.
            _stale_elapsed = time.time() - last_chunk_time["t"]
            if _stale_elapsed > _stream_stale_timeout:
-                _est_ctx = sum(len(str(v)) for v in api_kwargs.get("messages", [])) // 4
+                _est_ctx = self._estimate_request_context_tokens(api_kwargs)
                logger.warning(
                    "Stream stale for %.0fs (threshold %.0fs) — no chunks received. "
                    "model=%s context=~%s tokens. Killing connection.",
--- a/tests/run_agent/test_provider_parity.py
+++ b/tests/run_agent/test_provider_parity.py
@ -438,6 +438,80 @@ class TestBuildApiKwargsCodex:
        assert "function" not in tools[0]


+class TestEstimateRequestContextTokens:
+    def test_chat_completions_counts_messages_and_tools(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openrouter")
+        api_kwargs = {
+            "model": "anthropic/claude-sonnet-4-20250514",
+            "messages": [
+                {"role": "system", "content": "a" * 4000},
+                {"role": "user", "content": "b" * 4000},
+            ],
+            "tools": _tool_defs("web_search", "terminal"),
+        }
+
+        est = agent._estimate_request_context_tokens(api_kwargs)
+
+        assert est > 2000
+
+    def test_codex_responses_counts_input_and_instructions(self, monkeypatch):
+        agent = _make_agent(
+            monkeypatch,
+            "openai-codex",
+            api_mode="codex_responses",
+            base_url="https://chatgpt.com/backend-api/codex",
+        )
+        api_kwargs = {
+            "model": "gpt-5.4",
+            "instructions": "system:" + ("y" * 10000),
+            "input": [{"role": "user", "content": "x" * 410000}],
+            "tools": [
+                {
+                    "type": "function",
+                    "name": "web_search",
+                    "description": "search",
+                    "parameters": {"type": "object", "properties": {}},
+                }
+            ],
+        }
+
+        assert "messages" not in api_kwargs
+        est = agent._estimate_request_context_tokens(api_kwargs)
+
+        assert est > 100000
+        assert agent._scale_stale_timeout_for_context(
+            300.0,
+            est,
+            medium_timeout=450.0,
+            large_timeout=600.0,
+        ) == 600.0
+        assert agent._scale_stale_timeout_for_context(
+            180.0,
+            est,
+            medium_timeout=240.0,
+            large_timeout=300.0,
+        ) == 300.0
+
+    def test_anthropic_counts_top_level_system(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "anthropic", api_mode="anthropic_messages")
+        api_kwargs = {
+            "model": "claude-sonnet-4.6",
+            "system": "policy:" + ("s" * 240000),
+            "messages": [{"role": "user", "content": "hi"}],
+            "tools": _tool_defs("web_search"),
+        }
+
+        est = agent._estimate_request_context_tokens(api_kwargs)
+
+        assert est > 60000
+        assert agent._scale_stale_timeout_for_context(
+            300.0,
+            est,
+            medium_timeout=450.0,
+            large_timeout=600.0,
+        ) == 450.0
+
+
 # ── Message conversion tests ────────────────────────────────────────────────

 class TestChatMessagesToResponsesInput: