hermes-agent/tests/agent/test_local_stream_timeout.py
ismell0992-afk 6513138f26 fix(agent): recognize Tailscale CGNAT (100.64.0.0/10) as local for Ollama timeouts
`is_local_endpoint()` leaned on `ipaddress.is_private`, which classifies
RFC-1918 ranges and link-local as private but deliberately excludes the
RFC 6598 CGNAT block (100.64.0.0/10) — the range Tailscale uses for its
mesh IPs. As a result, Ollama reached over Tailscale (e.g.
`http://100.77.243.5:11434`) was treated as remote and missed the
automatic stream-read / stale-stream timeout bumps, so cold model load
plus long prefill would trip the 300 s watchdog before the first token.

Add a module-level `_TAILSCALE_CGNAT = ipaddress.IPv4Network("100.64.0.0/10")`
(built once) and extend `is_local_endpoint()` to match the block both
via the parsed-`IPv4Address` path and the existing bare-string fallback
(for symmetry with the 10/172/192 checks). Also hoist the previously
function-local `import ipaddress` to module scope now that it's used by
the constant.

Extend `TestIsLocalEndpoint` with a CGNAT positive set (lower bound,
representative host, MagicDNS anchor, upper bound) and a near-miss
negative set (just below 100.64.0.0, just above 100.127.255.255, well
outside the block, and first-octet-wrong).
2026-04-22 14:46:10 -07:00

130 lines
5.6 KiB
Python

"""Tests for local provider stream read timeout auto-detection.
When a local LLM provider is detected (Ollama, llama.cpp, vLLM, etc.),
the httpx stream read timeout should be automatically increased from the
default 60s to HERMES_API_TIMEOUT (1800s) to avoid premature connection
kills during long prefill phases.
"""
import os
import pytest
from unittest.mock import patch
from agent.model_metadata import is_local_endpoint
class TestLocalStreamReadTimeout:
"""Verify stream read timeout auto-detection logic."""
@pytest.mark.parametrize("base_url", [
"http://localhost:11434",
"http://127.0.0.1:8080",
"http://0.0.0.0:5000",
"http://192.168.1.100:8000",
"http://10.0.0.5:1234",
"http://host.docker.internal:11434",
"http://host.containers.internal:11434",
"http://host.lima.internal:11434",
])
def test_local_endpoint_bumps_read_timeout(self, base_url):
"""Local endpoint + default timeout -> bumps to base_timeout."""
with patch.dict(os.environ, {}, clear=False):
os.environ.pop("HERMES_STREAM_READ_TIMEOUT", None)
_base_timeout = float(os.getenv("HERMES_API_TIMEOUT", 1800.0))
_stream_read_timeout = float(os.getenv("HERMES_STREAM_READ_TIMEOUT", 120.0))
if _stream_read_timeout == 120.0 and base_url and is_local_endpoint(base_url):
_stream_read_timeout = _base_timeout
assert _stream_read_timeout == 1800.0
def test_user_override_respected_for_local(self):
"""User sets HERMES_STREAM_READ_TIMEOUT -> keep their value even for local."""
with patch.dict(os.environ, {"HERMES_STREAM_READ_TIMEOUT": "300"}, clear=False):
_base_timeout = float(os.getenv("HERMES_API_TIMEOUT", 1800.0))
_stream_read_timeout = float(os.getenv("HERMES_STREAM_READ_TIMEOUT", 120.0))
base_url = "http://localhost:11434"
if _stream_read_timeout == 120.0 and base_url and is_local_endpoint(base_url):
_stream_read_timeout = _base_timeout
assert _stream_read_timeout == 300.0
@pytest.mark.parametrize("base_url", [
"https://api.openai.com",
"https://openrouter.ai/api",
"https://api.anthropic.com",
])
def test_remote_endpoint_keeps_default(self, base_url):
"""Remote endpoint -> keep 120s default."""
with patch.dict(os.environ, {}, clear=False):
os.environ.pop("HERMES_STREAM_READ_TIMEOUT", None)
_base_timeout = float(os.getenv("HERMES_API_TIMEOUT", 1800.0))
_stream_read_timeout = float(os.getenv("HERMES_STREAM_READ_TIMEOUT", 120.0))
if _stream_read_timeout == 120.0 and base_url and is_local_endpoint(base_url):
_stream_read_timeout = _base_timeout
assert _stream_read_timeout == 120.0
def test_empty_base_url_keeps_default(self):
"""No base_url set -> keep 120s default."""
with patch.dict(os.environ, {}, clear=False):
os.environ.pop("HERMES_STREAM_READ_TIMEOUT", None)
_base_timeout = float(os.getenv("HERMES_API_TIMEOUT", 1800.0))
_stream_read_timeout = float(os.getenv("HERMES_STREAM_READ_TIMEOUT", 120.0))
base_url = ""
if _stream_read_timeout == 120.0 and base_url and is_local_endpoint(base_url):
_stream_read_timeout = _base_timeout
assert _stream_read_timeout == 120.0
class TestIsLocalEndpoint:
"""Direct unit tests for is_local_endpoint."""
@pytest.mark.parametrize("url", [
"http://localhost:11434",
"http://127.0.0.1:8080",
"http://0.0.0.0:5000",
"http://[::1]:11434",
"http://192.168.1.100:8000",
"http://10.0.0.5:1234",
"http://172.17.0.1:11434",
])
def test_classic_local_addresses(self, url):
assert is_local_endpoint(url) is True
@pytest.mark.parametrize("url", [
"http://host.docker.internal:11434",
"http://host.docker.internal:8080/v1",
"http://gateway.docker.internal:11434",
"http://host.containers.internal:11434",
"http://host.lima.internal:11434",
])
def test_container_dns_names(self, url):
assert is_local_endpoint(url) is True
@pytest.mark.parametrize("url", [
"https://api.openai.com",
"https://openrouter.ai/api",
"https://api.anthropic.com",
"https://evil.docker.internal.example.com",
])
def test_remote_endpoints(self, url):
assert is_local_endpoint(url) is False
@pytest.mark.parametrize("url", [
"http://100.64.0.0:11434", # lower bound of CGNAT block
"http://100.64.0.1:11434/v1", # lower bound +1
"http://100.77.243.5:11434", # representative Tailscale host
"https://100.100.100.100:443", # Tailscale MagicDNS anchor
"https://100.127.255.254:443", # upper bound -1
"http://100.127.255.255:11434", # upper bound of CGNAT block
])
def test_tailscale_cgnat_is_local(self, url):
"""Tailscale 100.64.0.0/10 should be treated as local for timeout bumps."""
assert is_local_endpoint(url) is True
@pytest.mark.parametrize("url", [
"http://100.63.255.255:11434", # just below CGNAT block
"http://100.128.0.1:11434", # just above CGNAT block
"http://100.200.0.1:11434", # well outside CGNAT
"http://99.64.0.1:11434", # first octet wrong
])
def test_near_but_not_cgnat_is_remote(self, url):
"""Hosts adjacent to but outside 100.64.0.0/10 must not match."""
assert is_local_endpoint(url) is False