mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-12 08:51:53 +00:00
When agent.interrupt() fires during an active LLM call, the main poll loop force-closes the worker-local httpx client to stop token generation. That raises a transport error (RemoteProtocolError) on the worker thread — the EXPECTED consequence of our own close, not a network bug. The streaming retry loop misclassified it as a transient connection error and retried; each doomed retry stalled for the full stream-stale timeout (up to 300s). Because the gateway caches AIAgent instances per session, the stale worker outlived the interrupted turn and raced the next turn's request on shared client state — the root of the multi-minute cascading-interrupt hang reported in the wild. Fix: a request-local _request_cancelled token set by the poll loop right before the force-close, in both interruptible_api_call (non-streaming) and interruptible_streaming_api_call. The worker's exception handler checks the token and exits cleanly — no retry, no fallback, no 'reconnecting' status — instead of treating the forced error as transient. The token is request- local (not agent._interrupt_requested, which is cleared at turn boundaries) so a stale worker outliving its turn still recognizes its own forced close. Original diagnosis and fix by @kristianvast (PR #6600), against the then- inline methods in run_agent.py. Those were since extracted into agent/chat_completion_helpers.py, so the fix is reapplied there. Co-authored-by: Kristian Vastveit <kristianvast@users.noreply.github.com>
134 lines
5.5 KiB
Python
134 lines
5.5 KiB
Python
"""Regression guard for the cascading-interrupt hang (PR #6600).
|
|
|
|
Original diagnosis and fix by Kristian Vastveit (@kristianvast) in PR #6600,
|
|
against the then-inline ``_interruptible_api_call`` /
|
|
``_interruptible_streaming_api_call`` methods in run_agent.py. Those methods
|
|
have since been extracted into ``agent/chat_completion_helpers.py``, so the
|
|
fix is reapplied there and these tests target the extracted functions.
|
|
|
|
The bug: when ``agent.interrupt()`` fires during an active LLM call, the main
|
|
poll loop force-closes the worker-local httpx client to stop token generation.
|
|
That raises a transport error (RemoteProtocolError) on the worker — the
|
|
EXPECTED consequence of our own close, not a network bug. The streaming retry
|
|
loop misclassified it as a transient connection error and retried, each doomed
|
|
retry stalling for the full stream-stale timeout (up to 300s). Because the
|
|
gateway caches AIAgent instances per session, the stale worker outlived the
|
|
turn and raced the next turn's request — the root of the multi-minute
|
|
cascading-interrupt hang.
|
|
|
|
The fix: a request-local ``_request_cancelled`` token set by the poll loop
|
|
right before the force-close. The worker's exception handler checks it and
|
|
exits cleanly (no retry, no fallback, no "reconnecting" status) instead of
|
|
treating the forced error as transient.
|
|
"""
|
|
import threading
|
|
import time
|
|
import types
|
|
from unittest.mock import MagicMock
|
|
|
|
import httpx
|
|
import pytest
|
|
|
|
from agent import chat_completion_helpers as cch
|
|
|
|
|
|
class _FakeInterruptError(Exception):
|
|
"""Stand-in for the transport error a force-close raises on the worker."""
|
|
|
|
|
|
def _make_agent():
|
|
"""A MagicMock agent wired with just enough surface for the helpers."""
|
|
agent = MagicMock()
|
|
agent.api_mode = "chat_completions"
|
|
agent._interrupt_requested = False
|
|
agent.verbose_logging = False
|
|
# _compute_non_stream_stale_timeout / streaming setup helpers return
|
|
# benign values; the real call path is mocked per-test.
|
|
agent._compute_non_stream_stale_timeout.return_value = 5.0
|
|
return agent
|
|
|
|
|
|
def test_non_streaming_cancel_does_not_surface_network_error():
|
|
"""A force-close during a non-streaming call must raise InterruptedError,
|
|
not the swallowed transport error."""
|
|
agent = _make_agent()
|
|
|
|
create_calls = {"n": 0}
|
|
fake_client = MagicMock()
|
|
|
|
def _create(**kwargs):
|
|
create_calls["n"] += 1
|
|
# Simulate the main thread firing an interrupt mid-call, then the
|
|
# force-close raising a transport error on this worker.
|
|
agent._interrupt_requested = True
|
|
time.sleep(0.3) # let the poll loop observe the interrupt + force-close
|
|
raise httpx.RemoteProtocolError("peer closed connection")
|
|
|
|
fake_client.chat.completions.create.side_effect = _create
|
|
agent._create_request_openai_client.return_value = fake_client
|
|
agent._close_request_openai_client = MagicMock()
|
|
agent._abort_request_openai_client = MagicMock()
|
|
|
|
t0 = time.time()
|
|
with pytest.raises(InterruptedError):
|
|
cch.interruptible_api_call(agent, {"model": "x", "messages": []})
|
|
elapsed = time.time() - t0
|
|
|
|
# The forced RemoteProtocolError must NOT surface as the raised error.
|
|
assert create_calls["n"] == 1
|
|
assert elapsed < 3.0, f"interrupt took {elapsed:.1f}s — should be near-instant"
|
|
|
|
|
|
def test_normal_transient_error_still_raises_when_not_cancelled():
|
|
"""Regression guard: a real transport error with NO interrupt must still
|
|
surface to the caller (so the outer retry loop can recover)."""
|
|
agent = _make_agent()
|
|
fake_client = MagicMock()
|
|
fake_client.chat.completions.create.side_effect = httpx.RemoteProtocolError(
|
|
"genuine network drop"
|
|
)
|
|
agent._create_request_openai_client.return_value = fake_client
|
|
agent._close_request_openai_client = MagicMock()
|
|
agent._abort_request_openai_client = MagicMock()
|
|
agent._interrupt_requested = False
|
|
|
|
with pytest.raises(httpx.RemoteProtocolError):
|
|
cch.interruptible_api_call(agent, {"model": "x", "messages": []})
|
|
|
|
|
|
def test_request_cancelled_token_is_request_local():
|
|
"""The cancellation token must be created per call, not shared on the
|
|
agent — a stale worker from a previous turn must not see the next turn's
|
|
interrupt flag flip back to False and mistake its own forced error for a
|
|
network bug. We assert the helper reads agent._interrupt_requested at the
|
|
force-close site (request-local token set there), by confirming two
|
|
independent calls don't share cancellation state."""
|
|
agent = _make_agent()
|
|
|
|
# First call: interrupted.
|
|
fake_client_1 = MagicMock()
|
|
|
|
def _create_1(**kwargs):
|
|
agent._interrupt_requested = True
|
|
time.sleep(0.3)
|
|
raise httpx.RemoteProtocolError("forced close turn A")
|
|
|
|
fake_client_1.chat.completions.create.side_effect = _create_1
|
|
agent._create_request_openai_client.return_value = fake_client_1
|
|
agent._close_request_openai_client = MagicMock()
|
|
agent._abort_request_openai_client = MagicMock()
|
|
|
|
with pytest.raises(InterruptedError):
|
|
cch.interruptible_api_call(agent, {"model": "x", "messages": []})
|
|
|
|
# Second call: NOT interrupted (turn boundary cleared the flag). A genuine
|
|
# error must still surface — the previous call's cancellation must not leak.
|
|
agent._interrupt_requested = False
|
|
fake_client_2 = MagicMock()
|
|
fake_client_2.chat.completions.create.side_effect = httpx.RemoteProtocolError(
|
|
"genuine drop turn B"
|
|
)
|
|
agent._create_request_openai_client.return_value = fake_client_2
|
|
|
|
with pytest.raises(httpx.RemoteProtocolError):
|
|
cch.interruptible_api_call(agent, {"model": "x", "messages": []})
|