[verified] fix: account for responses payloads in stale timeout sizing

This commit is contained in:
Remotework 2026-04-20 15:56:20 -04:00
parent 79bfad0adc
commit 1bf574fb7b
2 changed files with 122 additions and 19 deletions

View file

@ -2576,19 +2576,20 @@ class AIAgent:
return 300.0, True
def _compute_non_stream_stale_timeout(self, messages: list[dict[str, Any]]) -> float:
def _compute_non_stream_stale_timeout(self, api_kwargs: dict[str, Any]) -> float:
"""Compute the effective non-stream stale timeout for this request."""
stale_base, uses_implicit_default = self._resolved_api_call_stale_timeout_base()
base_url = getattr(self, "_base_url", None) or self.base_url or ""
if uses_implicit_default and base_url and is_local_endpoint(base_url):
return float("inf")
est_tokens = sum(len(str(v)) for v in messages) // 4
if est_tokens > 100_000:
return max(stale_base, 600.0)
if est_tokens > 50_000:
return max(stale_base, 450.0)
return stale_base
est_tokens = self._estimate_request_context_tokens(api_kwargs)
return self._scale_stale_timeout_for_context(
stale_base,
est_tokens,
medium_timeout=450.0,
large_timeout=600.0,
)
def _is_openrouter_url(self) -> bool:
"""Return True when the base URL targets OpenRouter."""
@ -5666,6 +5667,36 @@ class AIAgent:
timeout=get_provider_request_timeout(self.provider, self.model),
)
@staticmethod
def _rough_payload_chars(value: Any) -> int:
if value is None:
return 0
try:
return len(json.dumps(value, ensure_ascii=False, separators=(",", ":")))
except Exception:
return len(str(value))
def _estimate_request_context_tokens(self, api_kwargs: dict) -> int:
"""Roughly estimate request size across chat-completions and Responses payloads."""
total_chars = 0
for key in ("system", "messages", "input", "instructions", "tools"):
total_chars += self._rough_payload_chars(api_kwargs.get(key))
return (total_chars + 3) // 4 if total_chars > 0 else 0
@staticmethod
def _scale_stale_timeout_for_context(
base_timeout: float,
est_tokens: int,
*,
medium_timeout: float,
large_timeout: float,
) -> float:
if est_tokens > 100_000:
return max(base_timeout, large_timeout)
if est_tokens > 50_000:
return max(base_timeout, medium_timeout)
return base_timeout
def _interruptible_api_call(self, api_kwargs: dict):
"""
Run the API call in a background thread so the main conversation loop
@ -5733,9 +5764,7 @@ class AIAgent:
# httpx timeout (default 1800s) with zero feedback. The stale
# detector kills the connection early so the main retry loop can
# apply richer recovery (credential rotation, provider fallback).
_stale_timeout = self._compute_non_stream_stale_timeout(
api_kwargs.get("messages", [])
)
_stale_timeout = self._compute_non_stream_stale_timeout(api_kwargs)
_call_start = time.time()
self._touch_activity("waiting for non-streaming API response")
@ -5759,7 +5788,7 @@ class AIAgent:
# arrives within the configured timeout.
_elapsed = time.time() - _call_start
if _elapsed > _stale_timeout:
_est_ctx = sum(len(str(v)) for v in api_kwargs.get("messages", [])) // 4
_est_ctx = self._estimate_request_context_tokens(api_kwargs)
logger.warning(
"Non-streaming API call stale for %.0fs (threshold %.0fs). "
"model=%s context=~%s tokens. Killing connection.",
@ -6593,13 +6622,13 @@ class AIAgent:
# when the context is large. Without this, the stale detector kills
# healthy connections during the model's thinking phase, producing
# spurious RemoteProtocolError ("peer closed connection").
_est_tokens = sum(len(str(v)) for v in api_kwargs.get("messages", [])) // 4
if _est_tokens > 100_000:
_stream_stale_timeout = max(_stream_stale_timeout_base, 300.0)
elif _est_tokens > 50_000:
_stream_stale_timeout = max(_stream_stale_timeout_base, 240.0)
else:
_stream_stale_timeout = _stream_stale_timeout_base
_est_tokens = self._estimate_request_context_tokens(api_kwargs)
_stream_stale_timeout = self._scale_stale_timeout_for_context(
_stream_stale_timeout_base,
_est_tokens,
medium_timeout=240.0,
large_timeout=300.0,
)
t = threading.Thread(target=_call, daemon=True)
t.start()
@ -6629,7 +6658,7 @@ class AIAgent:
# inner retry loop can start a fresh connection.
_stale_elapsed = time.time() - last_chunk_time["t"]
if _stale_elapsed > _stream_stale_timeout:
_est_ctx = sum(len(str(v)) for v in api_kwargs.get("messages", [])) // 4
_est_ctx = self._estimate_request_context_tokens(api_kwargs)
logger.warning(
"Stream stale for %.0fs (threshold %.0fs) — no chunks received. "
"model=%s context=~%s tokens. Killing connection.",

View file

@ -438,6 +438,80 @@ class TestBuildApiKwargsCodex:
assert "function" not in tools[0]
class TestEstimateRequestContextTokens:
def test_chat_completions_counts_messages_and_tools(self, monkeypatch):
agent = _make_agent(monkeypatch, "openrouter")
api_kwargs = {
"model": "anthropic/claude-sonnet-4-20250514",
"messages": [
{"role": "system", "content": "a" * 4000},
{"role": "user", "content": "b" * 4000},
],
"tools": _tool_defs("web_search", "terminal"),
}
est = agent._estimate_request_context_tokens(api_kwargs)
assert est > 2000
def test_codex_responses_counts_input_and_instructions(self, monkeypatch):
agent = _make_agent(
monkeypatch,
"openai-codex",
api_mode="codex_responses",
base_url="https://chatgpt.com/backend-api/codex",
)
api_kwargs = {
"model": "gpt-5.4",
"instructions": "system:" + ("y" * 10000),
"input": [{"role": "user", "content": "x" * 410000}],
"tools": [
{
"type": "function",
"name": "web_search",
"description": "search",
"parameters": {"type": "object", "properties": {}},
}
],
}
assert "messages" not in api_kwargs
est = agent._estimate_request_context_tokens(api_kwargs)
assert est > 100000
assert agent._scale_stale_timeout_for_context(
300.0,
est,
medium_timeout=450.0,
large_timeout=600.0,
) == 600.0
assert agent._scale_stale_timeout_for_context(
180.0,
est,
medium_timeout=240.0,
large_timeout=300.0,
) == 300.0
def test_anthropic_counts_top_level_system(self, monkeypatch):
agent = _make_agent(monkeypatch, "anthropic", api_mode="anthropic_messages")
api_kwargs = {
"model": "claude-sonnet-4.6",
"system": "policy:" + ("s" * 240000),
"messages": [{"role": "user", "content": "hi"}],
"tools": _tool_defs("web_search"),
}
est = agent._estimate_request_context_tokens(api_kwargs)
assert est > 60000
assert agent._scale_stale_timeout_for_context(
300.0,
est,
medium_timeout=450.0,
large_timeout=600.0,
) == 450.0
# ── Message conversion tests ────────────────────────────────────────────────
class TestChatMessagesToResponsesInput: