hermes-agent/tests/agent/test_cascading_interrupt_6600.py

"""Regression guard for the cascading-interrupt hang (PR #6600).

Original diagnosis and fix by Kristian Vastveit (@kristianvast) in PR #6600,
against the then-inline ``_interruptible_api_call`` /
``_interruptible_streaming_api_call`` methods in run_agent.py. Those methods
have since been extracted into ``agent/chat_completion_helpers.py``, so the
fix is reapplied there and these tests target the extracted functions.

The bug: when ``agent.interrupt()`` fires during an active LLM call, the main
poll loop force-closes the worker-local httpx client to stop token generation.
That raises a transport error (RemoteProtocolError) on the worker — the
EXPECTED consequence of our own close, not a network bug. The streaming retry
loop misclassified it as a transient connection error and retried, each doomed
retry stalling for the full stream-stale timeout (up to 300s). Because the
gateway caches AIAgent instances per session, the stale worker outlived the
turn and raced the next turn's request — the root of the multi-minute
cascading-interrupt hang.

The fix: a request-local ``_request_cancelled`` token set by the poll loop
right before the force-close. The worker's exception handler checks it and
exits cleanly (no retry, no fallback, no "reconnecting" status) instead of
treating the forced error as transient.
"""
import threading
import time
import types
from unittest.mock import MagicMock

import httpx
import pytest

from agent import chat_completion_helpers as cch


class _FakeInterruptError(Exception):
    """Stand-in for the transport error a force-close raises on the worker."""


def _make_agent():
    """A MagicMock agent wired with just enough surface for the helpers."""
    agent = MagicMock()
    agent.api_mode = "chat_completions"
    agent._interrupt_requested = False
    agent.verbose_logging = False
    # _compute_non_stream_stale_timeout / streaming setup helpers return
    # benign values; the real call path is mocked per-test.
    agent._compute_non_stream_stale_timeout.return_value = 5.0
    return agent


def test_non_streaming_cancel_does_not_surface_network_error():
    """A force-close during a non-streaming call must raise InterruptedError,
    not the swallowed transport error."""
    agent = _make_agent()

    create_calls = {"n": 0}
    fake_client = MagicMock()

    def _create(**kwargs):
        create_calls["n"] += 1
        # Simulate the main thread firing an interrupt mid-call, then the
        # force-close raising a transport error on this worker.
        agent._interrupt_requested = True
        time.sleep(0.3)  # let the poll loop observe the interrupt + force-close
        raise httpx.RemoteProtocolError("peer closed connection")

    fake_client.chat.completions.create.side_effect = _create
    agent._create_request_openai_client.return_value = fake_client
    agent._close_request_openai_client = MagicMock()
    agent._abort_request_openai_client = MagicMock()

    t0 = time.time()
    with pytest.raises(InterruptedError):
        cch.interruptible_api_call(agent, {"model": "x", "messages": []})
    elapsed = time.time() - t0

    # The forced RemoteProtocolError must NOT surface as the raised error.
    assert create_calls["n"] == 1
    assert elapsed < 3.0, f"interrupt took {elapsed:.1f}s — should be near-instant"


def test_normal_transient_error_still_raises_when_not_cancelled():
    """Regression guard: a real transport error with NO interrupt must still
    surface to the caller (so the outer retry loop can recover)."""
    agent = _make_agent()
    fake_client = MagicMock()
    fake_client.chat.completions.create.side_effect = httpx.RemoteProtocolError(
        "genuine network drop"
    )
    agent._create_request_openai_client.return_value = fake_client
    agent._close_request_openai_client = MagicMock()
    agent._abort_request_openai_client = MagicMock()
    agent._interrupt_requested = False

    with pytest.raises(httpx.RemoteProtocolError):
        cch.interruptible_api_call(agent, {"model": "x", "messages": []})


def test_request_cancelled_token_is_request_local():
    """The cancellation token must be created per call, not shared on the
    agent — a stale worker from a previous turn must not see the next turn's
    interrupt flag flip back to False and mistake its own forced error for a
    network bug. We assert the helper reads agent._interrupt_requested at the
    force-close site (request-local token set there), by confirming two
    independent calls don't share cancellation state."""
    agent = _make_agent()

    # First call: interrupted.
    fake_client_1 = MagicMock()

    def _create_1(**kwargs):
        agent._interrupt_requested = True
        time.sleep(0.3)
        raise httpx.RemoteProtocolError("forced close turn A")

    fake_client_1.chat.completions.create.side_effect = _create_1
    agent._create_request_openai_client.return_value = fake_client_1
    agent._close_request_openai_client = MagicMock()
    agent._abort_request_openai_client = MagicMock()

    with pytest.raises(InterruptedError):
        cch.interruptible_api_call(agent, {"model": "x", "messages": []})

    # Second call: NOT interrupted (turn boundary cleared the flag). A genuine
    # error must still surface — the previous call's cancellation must not leak.
    agent._interrupt_requested = False
    fake_client_2 = MagicMock()
    fake_client_2.chat.completions.create.side_effect = httpx.RemoteProtocolError(
        "genuine drop turn B"
    )
    agent._create_request_openai_client.return_value = fake_client_2

    with pytest.raises(httpx.RemoteProtocolError):
        cch.interruptible_api_call(agent, {"model": "x", "messages": []})