mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
A child running a legitimately long-running tool (terminal command, browser fetch, big file read) holds current_tool set and keeps api_call_count frozen while the tool runs. The previous stale check treated that as idle after 5 heartbeat cycles (~150s), stopped touching the parent, and let the gateway kill the session. Split the threshold in two: - _HEARTBEAT_STALE_CYCLES_IDLE=5 (~150s) — applied only when current_tool is None (child wedged between turns) - _HEARTBEAT_STALE_CYCLES_IN_TOOL=20 (~600s) — applied when the child is inside a tool call Stale counter also resets when current_tool changes (new tool = progress). The hard child_timeout_seconds (default 600s) is still the final cap, so genuinely stuck tools don't get to block forever.
This commit is contained in:
parent
1840c6a57d
commit
fcc05284fc
2 changed files with 146 additions and 14 deletions
|
|
@ -1319,6 +1319,112 @@ class TestDelegateHeartbeat(unittest.TestCase):
|
|||
any("API call #5 completed" in desc for desc in touch_calls),
|
||||
f"Heartbeat should include last_activity_desc: {touch_calls}")
|
||||
|
||||
def test_heartbeat_does_not_trip_idle_stale_while_inside_tool(self):
|
||||
"""A long-running tool (no iteration advance, but current_tool set)
|
||||
must not be flagged stale at the idle threshold.
|
||||
|
||||
Bug #13041: when a child is legitimately busy inside a slow tool
|
||||
(terminal command, browser fetch), api_call_count does not advance.
|
||||
The previous stale check treated this as idle and stopped the
|
||||
heartbeat after 5 cycles (~150s), letting the gateway kill the
|
||||
session. The fix uses a much higher in-tool threshold and only
|
||||
applies the tight idle threshold when current_tool is None.
|
||||
"""
|
||||
from tools.delegate_tool import _run_single_child
|
||||
|
||||
parent = _make_mock_parent()
|
||||
touch_calls = []
|
||||
parent._touch_activity = lambda desc: touch_calls.append(desc)
|
||||
|
||||
child = MagicMock()
|
||||
# Child is stuck inside a single terminal call for the whole run.
|
||||
# api_call_count never advances, current_tool is always set.
|
||||
child.get_activity_summary.return_value = {
|
||||
"current_tool": "terminal",
|
||||
"api_call_count": 1,
|
||||
"max_iterations": 50,
|
||||
"last_activity_desc": "executing tool: terminal",
|
||||
}
|
||||
|
||||
def slow_run(**kwargs):
|
||||
# Long enough to exceed the OLD idle threshold (5 cycles) at
|
||||
# the patched interval, but shorter than the new in-tool
|
||||
# threshold.
|
||||
time.sleep(0.4)
|
||||
return {"final_response": "done", "completed": True, "api_calls": 1}
|
||||
|
||||
child.run_conversation.side_effect = slow_run
|
||||
|
||||
# Patch both the interval AND the idle ceiling so the test proves
|
||||
# the in-tool branch takes effect: with a 0.05s interval and the
|
||||
# default _HEARTBEAT_STALE_CYCLES_IDLE=5, the old behavior would
|
||||
# trip after 0.25s and stop firing. We should see heartbeats
|
||||
# continuing through the full 0.4s run.
|
||||
with patch("tools.delegate_tool._HEARTBEAT_INTERVAL", 0.05):
|
||||
_run_single_child(
|
||||
task_index=0,
|
||||
goal="Test long-running tool",
|
||||
child=child,
|
||||
parent_agent=parent,
|
||||
)
|
||||
|
||||
# With the old idle threshold (5 cycles = 0.25s), touch_calls
|
||||
# would cap at ~5. With the in-tool threshold (20 cycles = 1.0s),
|
||||
# we should see substantially more heartbeats over 0.4s.
|
||||
self.assertGreater(
|
||||
len(touch_calls), 6,
|
||||
f"Heartbeat stopped too early while child was inside a tool; "
|
||||
f"got {len(touch_calls)} touches over 0.4s at 0.05s interval",
|
||||
)
|
||||
|
||||
def test_heartbeat_still_trips_idle_stale_when_no_tool(self):
|
||||
"""A wedged child with no current_tool still trips the idle threshold.
|
||||
|
||||
Regression guard: the fix for #13041 must not disable stale
|
||||
detection entirely. A child that's hung between turns (no tool
|
||||
running, no iteration progress) must still stop touching the
|
||||
parent so the gateway timeout can fire.
|
||||
"""
|
||||
from tools.delegate_tool import _run_single_child
|
||||
|
||||
parent = _make_mock_parent()
|
||||
touch_calls = []
|
||||
parent._touch_activity = lambda desc: touch_calls.append(desc)
|
||||
|
||||
child = MagicMock()
|
||||
# Wedged child: no tool running, iteration frozen.
|
||||
child.get_activity_summary.return_value = {
|
||||
"current_tool": None,
|
||||
"api_call_count": 3,
|
||||
"max_iterations": 50,
|
||||
"last_activity_desc": "waiting for API response",
|
||||
}
|
||||
|
||||
def slow_run(**kwargs):
|
||||
time.sleep(0.6)
|
||||
return {"final_response": "done", "completed": True, "api_calls": 3}
|
||||
|
||||
child.run_conversation.side_effect = slow_run
|
||||
|
||||
# At interval 0.05s, idle threshold (5 cycles) trips at ~0.25s.
|
||||
# We should see the heartbeat stop firing well before 0.6s.
|
||||
with patch("tools.delegate_tool._HEARTBEAT_INTERVAL", 0.05):
|
||||
_run_single_child(
|
||||
task_index=0,
|
||||
goal="Test wedged child",
|
||||
child=child,
|
||||
parent_agent=parent,
|
||||
)
|
||||
|
||||
# With idle threshold=5 + interval=0.05s, touches should cap
|
||||
# around 5. Bound loosely to avoid timing flakes.
|
||||
self.assertLess(
|
||||
len(touch_calls), 9,
|
||||
f"Idle stale detection did not fire: got {len(touch_calls)} "
|
||||
f"touches over 0.6s — expected heartbeat to stop after "
|
||||
f"~5 stale cycles",
|
||||
)
|
||||
|
||||
|
||||
class TestDelegationReasoningEffort(unittest.TestCase):
|
||||
"""Tests for delegation.reasoning_effort config override."""
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue