diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index c1c79b6aad0..dfeec87e12d 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -102,6 +102,7 @@ OpenAI = _OpenAIProxy() # module-level name, resolves lazily on call/isinstance from agent.credential_pool import load_pool from agent.model_metadata import MINIMUM_CONTEXT_LENGTH, get_model_context_length +from agent.process_bootstrap import build_keepalive_http_client from hermes_cli.config import get_hermes_home from hermes_constants import OPENROUTER_BASE_URL from utils import base_url_host_matches, base_url_hostname, env_float, model_forces_max_completion_tokens, normalize_proxy_env_vars @@ -109,6 +110,23 @@ from utils import base_url_host_matches, base_url_hostname, env_float, model_for logger = logging.getLogger(__name__) +def _openai_http_client_kwargs( + base_url: Optional[str], + *, + async_mode: bool = False, +) -> Dict[str, Any]: + """Inject keepalive httpx client with env-only proxy (not macOS system proxy).""" + client = build_keepalive_http_client(str(base_url or ""), async_mode=async_mode) + if client is None: + return {} + return {"http_client": client} + + +def _create_openai_client(*, api_key: str, base_url: str, **kwargs: Any) -> Any: + kwargs = {**_openai_http_client_kwargs(base_url), **kwargs} + return OpenAI(api_key=api_key, base_url=base_url, **kwargs) + + # ── Interrupt protection for atomic auxiliary tasks ────────────────────── # Some auxiliary tasks must NOT be aborted mid-flight by a gateway interrupt # (e.g. an incoming user message while the agent is busy). Context @@ -1614,7 +1632,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]: _merged_aux = _apply_user_default_headers(extra.get("default_headers")) if _merged_aux: extra["default_headers"] = _merged_aux - _client = OpenAI(api_key=api_key, base_url=base_url, **extra) + _client = _create_openai_client(api_key=api_key, base_url=base_url, **extra) _client = _maybe_wrap_anthropic(_client, model, api_key, raw_base_url) return _client, model @@ -1654,7 +1672,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]: _merged_aux2 = _apply_user_default_headers(extra.get("default_headers")) if _merged_aux2: extra["default_headers"] = _merged_aux2 - _client = OpenAI(api_key=api_key, base_url=base_url, **extra) + _client = _create_openai_client(api_key=api_key, base_url=base_url, **extra) _client = _maybe_wrap_anthropic(_client, model, api_key, raw_base_url) return _client, model @@ -1672,7 +1690,7 @@ def _try_openrouter(explicit_api_key: str = None, model: str = None) -> Tuple[Op if or_key: base_url = _pool_runtime_base_url(entry, OPENROUTER_BASE_URL) or OPENROUTER_BASE_URL logger.debug("Auxiliary client: OpenRouter via pool") - return OpenAI(api_key=or_key, base_url=base_url, + return _create_openai_client(api_key=or_key, base_url=base_url, default_headers=build_or_headers()), model or _OPENROUTER_MODEL # Pool exists but is exhausted (no usable runtime key) — fall through to # the OPENROUTER_API_KEY env-var path rather than failing outright. @@ -1683,7 +1701,7 @@ def _try_openrouter(explicit_api_key: str = None, model: str = None) -> Tuple[Op _mark_provider_unhealthy("openrouter", ttl=60) return None, None logger.debug("Auxiliary client: OpenRouter") - return OpenAI(api_key=or_key, base_url=OPENROUTER_BASE_URL, + return _create_openai_client(api_key=or_key, base_url=OPENROUTER_BASE_URL, default_headers=build_or_headers()), model or _OPENROUTER_MODEL @@ -1776,7 +1794,7 @@ def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]: return None, None base_url = str((nous or {}).get("inference_base_url") or _nous_base_url()).rstrip("/") return ( - OpenAI( + _create_openai_client( api_key=api_key, base_url=base_url, ), @@ -2053,7 +2071,7 @@ def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]: if _custom_headers: _extra["default_headers"] = _custom_headers if custom_mode == "codex_responses": - real_client = OpenAI(api_key=custom_key, base_url=_clean_base, **_extra) + real_client = _create_openai_client(api_key=custom_key, base_url=_clean_base, **_extra) return CodexAuxiliaryClient(real_client, model), model if custom_mode == "anthropic_messages": # Third-party Anthropic-compatible gateway (MiniMax, Zhipu GLM, @@ -2067,14 +2085,14 @@ def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]: "Custom endpoint declares api_mode=anthropic_messages but the " "anthropic SDK is not installed — falling back to OpenAI-wire." ) - return OpenAI(api_key=custom_key, base_url=_clean_base, **_extra), model + return _create_openai_client(api_key=custom_key, base_url=_clean_base, **_extra), model return ( AnthropicAuxiliaryClient(real_client, model, custom_key, custom_base, is_oauth=False), model, ) # URL-based anthropic detection for custom endpoints that didn't set # api_mode explicitly (e.g. kimi.com/coding reached via custom config). - _fallback_client = OpenAI(api_key=custom_key, base_url=_clean_base, **_extra) + _fallback_client = _create_openai_client(api_key=custom_key, base_url=_clean_base, **_extra) _fallback_client = _maybe_wrap_anthropic( _fallback_client, model, custom_key, custom_base, custom_mode, ) @@ -2103,7 +2121,7 @@ def _build_xai_oauth_aux_client(model: str) -> Tuple[Optional[Any], Optional[str return None, None api_key, base_url = resolved logger.debug("Auxiliary client: xAI OAuth (%s via Responses API)", model) - real_client = OpenAI(api_key=api_key, base_url=base_url) + real_client = _create_openai_client(api_key=api_key, base_url=base_url) return CodexAuxiliaryClient(real_client, model), model @@ -2140,7 +2158,7 @@ def _build_codex_client(model: str) -> Tuple[Optional[Any], Optional[str]]: return None, None base_url = _CODEX_AUX_BASE_URL logger.debug("Auxiliary client: Codex OAuth (%s via Responses API)", model) - real_client = OpenAI( + real_client = _create_openai_client( api_key=codex_token, base_url=base_url, default_headers=_codex_cloudflare_headers(codex_token), @@ -2240,7 +2258,7 @@ def _try_azure_foundry( if _dq: extra["default_query"] = _dq - client = OpenAI(api_key=api_key, base_url=_clean_base, **extra) + client = _create_openai_client(api_key=api_key, base_url=_clean_base, **extra) if runtime_api_mode == "codex_responses": # GPT-5.x / o-series / codex models on Azure Foundry are @@ -3802,6 +3820,10 @@ def _to_async_client(sync_client, model: str, is_vision: bool = False): _merged_async = _apply_user_default_headers(async_kwargs.get("default_headers")) if _merged_async: async_kwargs["default_headers"] = _merged_async + async_kwargs = { + **_openai_http_client_kwargs(sync_base_url, async_mode=True), + **async_kwargs, + } return AsyncOpenAI(**async_kwargs), model @@ -4012,7 +4034,7 @@ def resolve_provider_client( "but no Codex OAuth token found (run: hermes model)") return None, None final_model = _normalize_resolved_model(model, provider) - raw_client = OpenAI( + raw_client = _create_openai_client( api_key=codex_token, base_url=_CODEX_AUX_BASE_URL, default_headers=_codex_cloudflare_headers(codex_token), @@ -4093,7 +4115,7 @@ def resolve_provider_client( _merged_custom = _apply_user_default_headers(extra.get("default_headers")) if _merged_custom: extra["default_headers"] = _merged_custom - client = OpenAI(api_key=custom_key, base_url=_clean_base, **extra) + client = _create_openai_client(api_key=custom_key, base_url=_clean_base, **extra) client = _wrap_if_needed(client, final_model, custom_base, custom_key) return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode else (client, final_model)) @@ -4197,7 +4219,7 @@ def resolve_provider_client( _fb_headers = _apply_user_default_headers(_fb_extra.get("default_headers")) if _fb_headers: _fb_extra["default_headers"] = _fb_headers - client = OpenAI(api_key=custom_key, base_url=_fb_clean, **_fb_extra) + client = _create_openai_client(api_key=custom_key, base_url=_fb_clean, **_fb_extra) return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode else (client, final_model)) sync_anthropic = AnthropicAuxiliaryClient( @@ -4206,7 +4228,7 @@ def resolve_provider_client( if async_mode: return AsyncAnthropicAuxiliaryClient(sync_anthropic), final_model return sync_anthropic, final_model - client = OpenAI(api_key=custom_key, base_url=_clean_base2, **_extra2) + client = _create_openai_client(api_key=custom_key, base_url=_clean_base2, **_extra2) # codex_responses or inherited auto-detect (via _wrap_if_needed). # _wrap_if_needed reads the closed-over `api_mode` (the task-level # override). Named-provider entry api_mode=codex_responses also @@ -4348,7 +4370,7 @@ def resolve_provider_client( _merged_main = _apply_user_default_headers(headers) if _merged_main: headers = _merged_main - client = OpenAI(api_key=api_key, base_url=base_url, + client = _create_openai_client(api_key=api_key, base_url=base_url, **({"default_headers": headers} if headers else {})) # Copilot GPT-5+ models (except gpt-5-mini) require the Responses @@ -4884,7 +4906,7 @@ def _refresh_nous_auxiliary_client( return None, model fresh_key, fresh_base_url = runtime - sync_client = OpenAI(api_key=fresh_key, base_url=fresh_base_url) + sync_client = _create_openai_client(api_key=fresh_key, base_url=fresh_base_url) final_model = model current_loop = None diff --git a/agent/process_bootstrap.py b/agent/process_bootstrap.py index fdd9053f5d8..ce238a9d405 100644 --- a/agent/process_bootstrap.py +++ b/agent/process_bootstrap.py @@ -26,7 +26,7 @@ from __future__ import annotations import os import sys import urllib.request -from typing import Optional +from typing import Any, Optional from utils import base_url_hostname, normalize_proxy_url @@ -142,6 +142,46 @@ def _get_proxy_for_base_url(base_url: Optional[str]) -> Optional[str]: return proxy +def build_keepalive_http_client( + base_url: str = "", + *, + async_mode: bool = False, +) -> Optional[Any]: + """Build an httpx client for OpenAI SDK calls with env-only proxy policy. + + Uses explicit ``HTTPS_PROXY`` / ``NO_PROXY`` env vars via + ``_get_proxy_for_base_url``. A custom transport disables httpx's default + ``trust_env`` path, so macOS system proxy settings from + ``urllib.request.getproxies()`` (which omit the ExceptionsList) are not + applied. Mirrors ``AIAgent._build_keepalive_http_client``. + """ + try: + import httpx + import socket + + if "api.githubcopilot.com" in str(base_url or "").lower(): + client_cls = httpx.AsyncClient if async_mode else httpx.Client + return client_cls() + + sock_opts = [(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1)] + if hasattr(socket, "TCP_KEEPIDLE"): + sock_opts.append((socket.IPPROTO_TCP, socket.TCP_KEEPIDLE, 30)) + sock_opts.append((socket.IPPROTO_TCP, socket.TCP_KEEPINTVL, 10)) + sock_opts.append((socket.IPPROTO_TCP, socket.TCP_KEEPCNT, 3)) + elif hasattr(socket, "TCP_KEEPALIVE"): + sock_opts.append((socket.IPPROTO_TCP, socket.TCP_KEEPALIVE, 30)) + + proxy = _get_proxy_for_base_url(base_url) + transport_cls = httpx.AsyncHTTPTransport if async_mode else httpx.HTTPTransport + client_cls = httpx.AsyncClient if async_mode else httpx.Client + return client_cls( + transport=transport_cls(socket_options=sock_opts), + proxy=proxy, + ) + except Exception: + return None + + def _install_safe_stdio() -> None: """Wrap stdout/stderr so best-effort console output cannot crash the agent.""" for stream_name in ("stdout", "stderr"): @@ -164,4 +204,5 @@ __all__ = [ "_install_safe_stdio", "_get_proxy_from_env", "_get_proxy_for_base_url", + "build_keepalive_http_client", ]