From 0cc63043e085dc6c12bc80007b6e6e3fafb7b3cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=98=BF=E6=B3=A5=E8=B1=86?= <1243352777@qq.com> Date: Sun, 26 Apr 2026 14:37:12 +0800 Subject: [PATCH] fix(delegation): increase heartbeat stale thresholds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The heartbeat stale detection was too aggressive: - idle: 5 * 30s = 150s — LLM inference on slow providers (Zhipu/GLM) frequently exceeds 150s, causing heartbeat to stop prematurely - in-tool: 20 * 30s = 600s — borderline for long tool calls When heartbeat stops, parent._last_activity_ts freezes, eventually triggering gateway timeout and killing the entire delegation. New thresholds: - idle: 15 * 30s = 450s — accommodates slow LLM inference - in-tool: 40 * 30s = 1200s — accommodates long-running tool calls child_timeout_seconds (config: delegation.child_timeout_seconds) remains the hard cap for total delegation duration. --- tools/delegate_tool.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/delegate_tool.py b/tools/delegate_tool.py index 5655631662..c288e7b28a 100644 --- a/tools/delegate_tool.py +++ b/tools/delegate_tool.py @@ -483,8 +483,8 @@ _HEARTBEAT_INTERVAL = 30 # seconds between parent activity heartbeats during de # The idle ceiling stays tight so genuinely stuck children don't mask the gateway # timeout. The in-tool ceiling is much higher so legit long-running tools get # time to finish; child_timeout_seconds (default 600s) is still the hard cap. -_HEARTBEAT_STALE_CYCLES_IDLE = 5 # 5 * 30s = 150s idle between turns → stale -_HEARTBEAT_STALE_CYCLES_IN_TOOL = 20 # 20 * 30s = 600s stuck on same tool → stale +_HEARTBEAT_STALE_CYCLES_IDLE = 15 # 15 * 30s = 450s idle between turns → stale +_HEARTBEAT_STALE_CYCLES_IN_TOOL = 40 # 40 * 30s = 1200s stuck on same tool → stale DEFAULT_TOOLSETS = ["terminal", "file", "web"]