feat: add network.force_ipv4 config to fix IPv6 timeout issues (#8196)

On servers with broken or unreachable IPv6, Python's socket.getaddrinfo
returns AAAA records first. urllib/httpx/requests all try IPv6 connections
first and hang for the full TCP timeout before falling back to IPv4. This
affects web_extract, web_search, the OpenAI SDK, and all HTTP tools.

Adds network.force_ipv4 config option (default: false) that monkey-patches
socket.getaddrinfo to resolve as AF_INET when the caller didn't specify a
family. Falls back to full resolution if no A record exists, so pure-IPv6
hosts still work.

Applied early at all three entry points (CLI, gateway, cron scheduler)
before any HTTP clients are created.

Reported by user @29n — Chinese Ubuntu server with unreachable IPv6 causing
timeouts on lobste.rs and other IPv6-enabled sites while Google/GitHub
worked fine (IPv4-only resolution).
This commit is contained in:
Teknium 2026-04-11 23:12:11 -07:00 committed by GitHub
parent 1cec910b6a
commit 1ca9b19750
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 197 additions and 0 deletions

View file

@ -626,6 +626,15 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
except Exception as e: except Exception as e:
logger.warning("Job '%s': failed to load config.yaml, using defaults: %s", job_id, e) logger.warning("Job '%s': failed to load config.yaml, using defaults: %s", job_id, e)
# Apply IPv4 preference if configured.
try:
from hermes_constants import apply_ipv4_preference
_net_cfg = _cfg.get("network", {})
if isinstance(_net_cfg, dict) and _net_cfg.get("force_ipv4"):
apply_ipv4_preference(force=True)
except Exception:
pass
# Reasoning config from config.yaml # Reasoning config from config.yaml
from hermes_constants import parse_reasoning_effort from hermes_constants import parse_reasoning_effort
effort = str(_cfg.get("agent", {}).get("reasoning_effort", "")).strip() effort = str(_cfg.get("agent", {}).get("reasoning_effort", "")).strip()

View file

@ -206,6 +206,15 @@ if _config_path.exists():
except Exception: except Exception:
pass # Non-fatal; gateway can still run with .env values pass # Non-fatal; gateway can still run with .env values
# Apply IPv4 preference if configured (before any HTTP clients are created).
try:
from hermes_constants import apply_ipv4_preference
_network_cfg = (_cfg if '_cfg' in dir() else {}).get("network", {})
if isinstance(_network_cfg, dict) and _network_cfg.get("force_ipv4"):
apply_ipv4_preference(force=True)
except Exception:
pass
# Validate config structure early — log warnings so gateway operators see problems # Validate config structure early — log warnings so gateway operators see problems
try: try:
from hermes_cli.config import print_config_warnings from hermes_cli.config import print_config_warnings

View file

@ -706,6 +706,14 @@ DEFAULT_CONFIG = {
"backup_count": 3, # Number of rotated backup files to keep "backup_count": 3, # Number of rotated backup files to keep
}, },
# Network settings — workarounds for connectivity issues.
"network": {
# Force IPv4 connections. On servers with broken or unreachable IPv6,
# Python tries AAAA records first and hangs for the full TCP timeout
# before falling back to IPv4. Set to true to skip IPv6 entirely.
"force_ipv4": False,
},
# Config schema version - bump this when adding new required fields # Config schema version - bump this when adding new required fields
"_config_version": 16, "_config_version": 16,
} }

View file

@ -151,6 +151,18 @@ try:
except Exception: except Exception:
pass # best-effort — don't crash the CLI if logging setup fails pass # best-effort — don't crash the CLI if logging setup fails
# Apply IPv4 preference early, before any HTTP clients are created.
try:
from hermes_cli.config import load_config as _load_config_early
from hermes_constants import apply_ipv4_preference as _apply_ipv4
_early_cfg = _load_config_early()
_net = _early_cfg.get("network", {})
if isinstance(_net, dict) and _net.get("force_ipv4"):
_apply_ipv4(force=True)
del _early_cfg, _net
except Exception:
pass # best-effort — don't crash if config isn't available yet
import logging import logging
import time as _time import time as _time
from datetime import datetime from datetime import datetime

View file

@ -216,6 +216,51 @@ def get_env_path() -> Path:
return get_hermes_home() / ".env" return get_hermes_home() / ".env"
# ─── Network Preferences ─────────────────────────────────────────────────────
def apply_ipv4_preference(force: bool = False) -> None:
"""Monkey-patch ``socket.getaddrinfo`` to prefer IPv4 connections.
On servers with broken or unreachable IPv6, Python tries AAAA records
first and hangs for the full TCP timeout before falling back to IPv4.
This affects httpx, requests, urllib, the OpenAI SDK everything that
uses ``socket.getaddrinfo``.
When *force* is True, patches ``getaddrinfo`` so that calls with
``family=AF_UNSPEC`` (the default) resolve as ``AF_INET`` instead,
skipping IPv6 entirely. If no A record exists, falls back to the
original unfiltered resolution so pure-IPv6 hosts still work.
Safe to call multiple times only patches once.
Set ``network.force_ipv4: true`` in ``config.yaml`` to enable.
"""
if not force:
return
import socket
# Guard against double-patching
if getattr(socket.getaddrinfo, "_hermes_ipv4_patched", False):
return
_original_getaddrinfo = socket.getaddrinfo
def _ipv4_getaddrinfo(host, port, family=0, type=0, proto=0, flags=0):
if family == 0: # AF_UNSPEC — caller didn't request a specific family
try:
return _original_getaddrinfo(
host, port, socket.AF_INET, type, proto, flags
)
except socket.gaierror:
# No A record — fall back to full resolution (pure-IPv6 hosts)
return _original_getaddrinfo(host, port, family, type, proto, flags)
return _original_getaddrinfo(host, port, family, type, proto, flags)
_ipv4_getaddrinfo._hermes_ipv4_patched = True # type: ignore[attr-defined]
socket.getaddrinfo = _ipv4_getaddrinfo # type: ignore[assignment]
OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1" OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"
OPENROUTER_MODELS_URL = f"{OPENROUTER_BASE_URL}/models" OPENROUTER_MODELS_URL = f"{OPENROUTER_BASE_URL}/models"

View file

@ -0,0 +1,114 @@
"""Tests for network.force_ipv4 — the socket.getaddrinfo monkey-patch."""
import importlib
import socket
from unittest.mock import patch, MagicMock
import pytest
def _reload_constants():
"""Reload hermes_constants to get a fresh apply_ipv4_preference."""
import hermes_constants
importlib.reload(hermes_constants)
return hermes_constants
class TestApplyIPv4Preference:
"""Tests for apply_ipv4_preference()."""
def setup_method(self):
"""Save the original getaddrinfo before each test."""
self._original = socket.getaddrinfo
def teardown_method(self):
"""Restore the original getaddrinfo after each test."""
socket.getaddrinfo = self._original
def test_noop_when_force_false(self):
"""No patch when force=False."""
from hermes_constants import apply_ipv4_preference
original = socket.getaddrinfo
apply_ipv4_preference(force=False)
assert socket.getaddrinfo is original
def test_patches_getaddrinfo_when_forced(self):
"""Patches socket.getaddrinfo when force=True."""
from hermes_constants import apply_ipv4_preference
original = socket.getaddrinfo
apply_ipv4_preference(force=True)
assert socket.getaddrinfo is not original
assert getattr(socket.getaddrinfo, "_hermes_ipv4_patched", False) is True
def test_double_patch_is_safe(self):
"""Calling apply twice doesn't double-wrap."""
from hermes_constants import apply_ipv4_preference
apply_ipv4_preference(force=True)
first_patch = socket.getaddrinfo
apply_ipv4_preference(force=True)
assert socket.getaddrinfo is first_patch
def test_af_unspec_becomes_af_inet(self):
"""AF_UNSPEC (default) calls get rewritten to AF_INET."""
from hermes_constants import apply_ipv4_preference
calls = []
original = socket.getaddrinfo
def mock_getaddrinfo(host, port, family=0, type=0, proto=0, flags=0):
calls.append(family)
return [(socket.AF_INET, socket.SOCK_STREAM, 6, "", ("93.184.216.34", 80))]
socket.getaddrinfo = mock_getaddrinfo
apply_ipv4_preference(force=True)
# Call with default family (AF_UNSPEC = 0)
socket.getaddrinfo("example.com", 80)
assert calls[-1] == socket.AF_INET, "AF_UNSPEC should be rewritten to AF_INET"
def test_explicit_family_preserved(self):
"""Explicit AF_INET6 requests are not intercepted."""
from hermes_constants import apply_ipv4_preference
calls = []
original = socket.getaddrinfo
def mock_getaddrinfo(host, port, family=0, type=0, proto=0, flags=0):
calls.append(family)
return [(family, socket.SOCK_STREAM, 6, "", ("::1", 80))]
socket.getaddrinfo = mock_getaddrinfo
apply_ipv4_preference(force=True)
socket.getaddrinfo("example.com", 80, family=socket.AF_INET6)
assert calls[-1] == socket.AF_INET6, "Explicit AF_INET6 should pass through"
def test_fallback_on_gaierror(self):
"""Falls back to AF_UNSPEC if AF_INET resolution fails."""
from hermes_constants import apply_ipv4_preference
call_families = []
def mock_getaddrinfo(host, port, family=0, type=0, proto=0, flags=0):
call_families.append(family)
if family == socket.AF_INET:
raise socket.gaierror("No A record")
# AF_UNSPEC fallback returns IPv6
return [(socket.AF_INET6, socket.SOCK_STREAM, 6, "", ("::1", 80))]
socket.getaddrinfo = mock_getaddrinfo
apply_ipv4_preference(force=True)
result = socket.getaddrinfo("ipv6only.example.com", 80)
# Should have tried AF_INET first, then fallen back to AF_UNSPEC
assert call_families == [socket.AF_INET, 0]
assert result[0][0] == socket.AF_INET6
class TestConfigDefault:
"""Verify network section exists in DEFAULT_CONFIG."""
def test_network_section_in_default_config(self):
from hermes_cli.config import DEFAULT_CONFIG
assert "network" in DEFAULT_CONFIG
assert DEFAULT_CONFIG["network"]["force_ipv4"] is False