hermes-agent/tests/agent/test_stream_read_timeout_floor.py

"""Stream read timeout must never preempt the stale-stream detector.

Reasoning models (e.g. Opus) routinely pause mid-stream for minutes during
extended thinking.  The stale-stream detector is deliberately scaled up to
tolerate this (180s base, raised to 240s/300s for large contexts).  The httpx
socket read timeout, however, defaulted to a flat 120s for cloud providers and
fired *first* — tearing down a healthy reasoning stream before the stale
detector (which owns retry + diagnostics) could act.

These tests pin the invariant: for a cloud provider on the default read
timeout, the httpx socket read timeout is floored at the stale-stream timeout
so it can never fire before the detector.  They mirror the inline logic in
``agent/chat_completion_helpers.py`` (the real builder lives deep inside a
worker thread, so — like ``test_local_stream_timeout.py`` — the resolution is
reproduced here rather than driven end-to-end).
"""

import os

import pytest

from agent.model_metadata import is_local_endpoint


def _resolve_stale_timeout(base_url, est_tokens, stale_base=180.0):
    """Mirror of the stale-stream detector resolution."""
    if stale_base == 180.0 and base_url and is_local_endpoint(base_url):
        return float("inf")  # detector disabled for local providers
    if est_tokens > 100_000:
        return max(stale_base, 300.0)
    if est_tokens > 50_000:
        return max(stale_base, 240.0)
    return stale_base


def _resolve_read_timeout(base_url, stale_timeout, base_timeout=1800.0):
    """Mirror of the httpx socket read-timeout builder (cloud branch)."""
    read_timeout = float(os.getenv("HERMES_STREAM_READ_TIMEOUT", 120.0))
    if read_timeout == 120.0 and base_url and is_local_endpoint(base_url):
        read_timeout = base_timeout
    elif (
        read_timeout == 120.0
        and stale_timeout is not None
        and stale_timeout != float("inf")
        and stale_timeout > read_timeout
    ):
        read_timeout = stale_timeout
    return read_timeout


CLOUD_URLS = [
    "https://api.githubcopilot.com",
    "https://api.openai.com",
    "https://openrouter.ai/api",
    "https://api.anthropic.com",
]


class TestCloudReadTimeoutFloor:
    @pytest.fixture(autouse=True)
    def _clear_env(self):
        with pytest.MonkeyPatch.context() as mp:
            mp.delenv("HERMES_STREAM_READ_TIMEOUT", raising=False)
            yield

    @pytest.mark.parametrize("base_url", CLOUD_URLS)
    @pytest.mark.parametrize("est_tokens", [0, 10_000, 60_000, 150_000])
    def test_read_timeout_never_below_stale(self, base_url, est_tokens):
        """Core invariant: the socket read timeout >= the stale detector."""
        stale = _resolve_stale_timeout(base_url, est_tokens)
        read = _resolve_read_timeout(base_url, stale)
        assert read >= stale

    @pytest.mark.parametrize("base_url", CLOUD_URLS)
    def test_small_context_floored_to_stale_base(self, base_url):
        """Reported case: ~120s timeouts on Copilot are raised to the 180s base."""
        stale = _resolve_stale_timeout(base_url, est_tokens=37_000)
        read = _resolve_read_timeout(base_url, stale)
        assert read == 180.0

    @pytest.mark.parametrize("base_url", CLOUD_URLS)
    def test_large_context_tracks_scaled_stale(self, base_url):
        """Big contexts scale the stale detector; the read timeout follows."""
        assert _resolve_read_timeout(base_url, _resolve_stale_timeout(base_url, 60_000)) == 240.0
        assert _resolve_read_timeout(base_url, _resolve_stale_timeout(base_url, 150_000)) == 300.0

    def test_user_override_is_respected(self):
        """An explicit HERMES_STREAM_READ_TIMEOUT is never overridden by the floor."""
        with pytest.MonkeyPatch.context() as mp:
            mp.setenv("HERMES_STREAM_READ_TIMEOUT", "90")
            stale = _resolve_stale_timeout("https://api.githubcopilot.com", est_tokens=0)
            assert _resolve_read_timeout("https://api.githubcopilot.com", stale) == 90.0


class TestLocalUnaffected:
    @pytest.fixture(autouse=True)
    def _clear_env(self):
        with pytest.MonkeyPatch.context() as mp:
            mp.delenv("HERMES_STREAM_READ_TIMEOUT", raising=False)
            yield

    def test_local_still_raised_to_base(self):
        """Local providers keep their existing behavior (raise to base timeout)."""
        stale = _resolve_stale_timeout("http://localhost:11434", est_tokens=0)
        assert stale == float("inf")  # detector disabled for local
        read = _resolve_read_timeout("http://localhost:11434", stale)
        assert read == 1800.0  # not clamped by inf

    def test_stale_none_falls_back_to_default(self):
        """If the stale value is unresolved, the read timeout keeps its default."""
        assert _resolve_read_timeout("https://api.githubcopilot.com", None) == 120.0