From cecf84daf75ab5a3841204e0a96b54a4a696d0b1 Mon Sep 17 00:00:00 2001 From: Teknium Date: Mon, 20 Apr 2026 20:58:01 -0700 Subject: [PATCH] fix: extend hostname-match provider detection across remaining call sites MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Aslaaen's fix in the original PR covered _detect_api_mode_for_url and the two openai/xai sites in run_agent.py. This finishes the sweep: the same substring-match false-positive class (e.g. https://api.openai.com.evil/v1, https://proxy/api.openai.com/v1, https://api.anthropic.com.example/v1) existed in eight more call sites, and the hostname helper was duplicated in two modules. - utils: add shared base_url_hostname() (single source of truth). - hermes_cli/runtime_provider, run_agent: drop local duplicates, import from utils. Reuse the cached AIAgent._base_url_hostname attribute everywhere it's already populated. - agent/auxiliary_client: switch codex-wrap auto-detect, max_completion_tokens gate (auxiliary_max_tokens_param), and custom-endpoint max_tokens kwarg selection to hostname equality. - run_agent: native-anthropic check in the Claude-style model branch and in the AIAgent init provider-auto-detect branch. - agent/model_metadata: Anthropic /v1/models context-length lookup. - hermes_cli/providers.determine_api_mode: anthropic / openai URL heuristics for custom/unknown providers (the /anthropic path-suffix convention for third-party gateways is preserved). - tools/delegate_tool: anthropic detection for delegated subagent runtimes. - hermes_cli/setup, hermes_cli/tools_config: setup-wizard vision-endpoint native-OpenAI detection (paired with deduping the repeated check into a single is_native_openai boolean per branch). Tests: - tests/test_base_url_hostname.py covers the helper directly (path-containing-host, host-suffix, trailing dot, port, case). - tests/hermes_cli/test_determine_api_mode_hostname.py adds the same regression class for determine_api_mode, plus a test that the /anthropic third-party gateway convention still wins. Also: add asslaenn5@gmail.com → Aslaaen to scripts/release.py AUTHOR_MAP. --- agent/auxiliary_client.py | 8 +-- agent/model_metadata.py | 4 +- hermes_cli/providers.py | 7 ++- hermes_cli/runtime_provider.py | 12 +--- hermes_cli/setup.py | 6 +- hermes_cli/tools_config.py | 6 +- run_agent.py | 21 ++----- scripts/release.py | 1 + .../test_determine_api_mode_hostname.py | 43 +++++++++++++++ tests/test_base_url_hostname.py | 55 +++++++++++++++++++ tools/delegate_tool.py | 3 +- utils.py | 22 ++++++++ 12 files changed, 151 insertions(+), 37 deletions(-) create mode 100644 tests/hermes_cli/test_determine_api_mode_hostname.py create mode 100644 tests/test_base_url_hostname.py diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index ea8702cb8..55199e9b9 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -48,6 +48,7 @@ from openai import OpenAI from agent.credential_pool import load_pool from hermes_cli.config import get_hermes_home from hermes_constants import OPENROUTER_BASE_URL +from utils import base_url_hostname logger = logging.getLogger(__name__) @@ -1516,8 +1517,7 @@ def resolve_provider_client( # Auto-detect: api.openai.com + codex model name pattern if api_mode and api_mode != "codex_responses": return False # explicit non-codex mode - normalized_base = (base_url_str or "").strip().lower() - if "api.openai.com" in normalized_base and "openrouter" not in normalized_base: + if base_url_hostname(base_url_str) == "api.openai.com": model_lower = (model_str or "").lower() if "codex" in model_lower: return True @@ -2025,7 +2025,7 @@ def auxiliary_max_tokens_param(value: int) -> dict: # Only use max_completion_tokens for direct OpenAI custom endpoints if (not or_key and _read_nous_auth() is None - and "api.openai.com" in custom_base.lower()): + and base_url_hostname(custom_base) == "api.openai.com"): return {"max_completion_tokens": value} return {"max_tokens": value} @@ -2460,7 +2460,7 @@ def _build_call_kwargs( # Direct OpenAI api.openai.com with newer models needs max_completion_tokens. if provider == "custom": custom_base = base_url or _current_custom_base_url() - if "api.openai.com" in custom_base.lower(): + if base_url_hostname(custom_base) == "api.openai.com": kwargs["max_completion_tokens"] = max_tokens else: kwargs["max_tokens"] = max_tokens diff --git a/agent/model_metadata.py b/agent/model_metadata.py index c03c5e89c..84cd553c3 100644 --- a/agent/model_metadata.py +++ b/agent/model_metadata.py @@ -14,6 +14,8 @@ from urllib.parse import urlparse import requests import yaml +from utils import base_url_hostname + from hermes_constants import OPENROUTER_MODELS_URL logger = logging.getLogger(__name__) @@ -1078,7 +1080,7 @@ def get_model_context_length( # 4. Anthropic /v1/models API (only for regular API keys, not OAuth) if provider == "anthropic" or ( - base_url and "api.anthropic.com" in base_url + base_url and base_url_hostname(base_url) == "api.anthropic.com" ): ctx = _query_anthropic_context_length(model, base_url or "https://api.anthropic.com", api_key) if ctx: diff --git a/hermes_cli/providers.py b/hermes_cli/providers.py index c701db4d5..ca8b075f5 100644 --- a/hermes_cli/providers.py +++ b/hermes_cli/providers.py @@ -23,6 +23,8 @@ import logging from dataclasses import dataclass from typing import Any, Dict, List, Optional, Tuple +from utils import base_url_hostname + logger = logging.getLogger(__name__) @@ -434,9 +436,10 @@ def determine_api_mode(provider: str, base_url: str = "") -> str: # URL-based heuristics for custom / unknown providers if base_url: url_lower = base_url.rstrip("/").lower() - if url_lower.endswith("/anthropic") or "api.anthropic.com" in url_lower: + hostname = base_url_hostname(base_url) + if url_lower.endswith("/anthropic") or hostname == "api.anthropic.com": return "anthropic_messages" - if "api.openai.com" in url_lower: + if hostname == "api.openai.com": return "codex_responses" if "bedrock-runtime" in url_lower and "amazonaws.com" in url_lower: return "bedrock_converse" diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py index 57b6873d0..8a7b44fa4 100644 --- a/hermes_cli/runtime_provider.py +++ b/hermes_cli/runtime_provider.py @@ -6,7 +6,6 @@ import logging import os import re from typing import Any, Dict, Optional -from urllib.parse import urlparse logger = logging.getLogger(__name__) @@ -30,20 +29,13 @@ from hermes_cli.auth import ( ) from hermes_cli.config import get_compatible_custom_providers, load_config from hermes_constants import OPENROUTER_BASE_URL +from utils import base_url_hostname def _normalize_custom_provider_name(value: str) -> str: return value.strip().lower().replace(" ", "-") -def _base_url_hostname(base_url: str) -> str: - raw = (base_url or "").strip() - if not raw: - return "" - parsed = urlparse(raw if "://" in raw else f"//{raw}") - return (parsed.hostname or "").lower().rstrip(".") - - def _detect_api_mode_for_url(base_url: str) -> Optional[str]: """Auto-detect api_mode from the resolved base URL. @@ -56,7 +48,7 @@ def _detect_api_mode_for_url(base_url: str) -> Optional[str]: ``chat_completions``. """ normalized = (base_url or "").strip().lower().rstrip("/") - hostname = _base_url_hostname(base_url) + hostname = base_url_hostname(base_url) if hostname == "api.x.ai": return "codex_responses" if hostname == "api.openai.com": diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py index b4fa877d8..53b0c180a 100644 --- a/hermes_cli/setup.py +++ b/hermes_cli/setup.py @@ -22,6 +22,7 @@ from typing import Optional, Dict, Any from hermes_cli.nous_subscription import get_nous_subscription_features from tools.tool_backend_helpers import managed_nous_tools_enabled +from utils import base_url_hostname from hermes_constants import get_optional_skills_dir logger = logging.getLogger(__name__) @@ -803,7 +804,8 @@ def setup_model_provider(config: dict, *, quick: bool = False): elif _vision_idx == 1: # OpenAI-compatible endpoint _base_url = prompt(" Base URL (blank for OpenAI)").strip() or "https://api.openai.com/v1" _api_key_label = " API key" - if "api.openai.com" in _base_url.lower(): + _is_native_openai = base_url_hostname(_base_url) == "api.openai.com" + if _is_native_openai: _api_key_label = " OpenAI API key" _oai_key = prompt(_api_key_label, password=True).strip() if _oai_key: @@ -811,7 +813,7 @@ def setup_model_provider(config: dict, *, quick: bool = False): # Save vision base URL to config (not .env — only secrets go there) _vaux = config.setdefault("auxiliary", {}).setdefault("vision", {}) _vaux["base_url"] = _base_url - if "api.openai.com" in _base_url.lower(): + if _is_native_openai: _oai_vision_models = ["gpt-4o", "gpt-4o-mini", "gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano"] _vm_choices = _oai_vision_models + ["Use default (gpt-4o-mini)"] _vm_idx = prompt_choice("Select vision model:", _vm_choices, 0) diff --git a/hermes_cli/tools_config.py b/hermes_cli/tools_config.py index ba8849e6f..23a03b3bd 100644 --- a/hermes_cli/tools_config.py +++ b/hermes_cli/tools_config.py @@ -25,6 +25,7 @@ from hermes_cli.nous_subscription import ( get_nous_subscription_features, ) from tools.tool_backend_helpers import managed_nous_tools_enabled +from utils import base_url_hostname logger = logging.getLogger(__name__) @@ -1179,7 +1180,8 @@ def _configure_simple_requirements(ts_key: str): _print_warning(" Skipped") elif idx == 1: base_url = _prompt(" OPENAI_BASE_URL (blank for OpenAI)").strip() or "https://api.openai.com/v1" - key_label = " OPENAI_API_KEY" if "api.openai.com" in base_url.lower() else " API key" + is_native_openai = base_url_hostname(base_url) == "api.openai.com" + key_label = " OPENAI_API_KEY" if is_native_openai else " API key" api_key = _prompt(key_label, password=True) if api_key and api_key.strip(): save_env_value("OPENAI_API_KEY", api_key.strip()) @@ -1189,7 +1191,7 @@ def _configure_simple_requirements(ts_key: str): _aux = _cfg.setdefault("auxiliary", {}).setdefault("vision", {}) _aux["base_url"] = base_url save_config(_cfg) - if "api.openai.com" in base_url.lower(): + if is_native_openai: save_env_value("AUXILIARY_VISION_MODEL", "gpt-4o-mini") _print_success(" Saved") else: diff --git a/run_agent.py b/run_agent.py index 9da4bf93f..cbda3882e 100644 --- a/run_agent.py +++ b/run_agent.py @@ -38,7 +38,6 @@ import threading from types import SimpleNamespace import uuid from typing import List, Dict, Any, Optional -from urllib.parse import urlparse from openai import OpenAI import fire from datetime import datetime @@ -125,15 +124,7 @@ from agent.trajectory import ( convert_scratchpad_to_think, has_incomplete_scratchpad, save_trajectory as _save_trajectory_to_file, ) -from utils import atomic_json_write, env_var_enabled - - -def _base_url_hostname(base_url: str) -> str: - raw = (base_url or "").strip() - if not raw: - return "" - parsed = urlparse(raw if "://" in raw else f"//{raw}") - return (parsed.hostname or "").lower().rstrip(".") +from utils import atomic_json_write, base_url_hostname, env_var_enabled @@ -712,7 +703,7 @@ class AIAgent: def base_url(self, value: str) -> None: self._base_url = value self._base_url_lower = value.lower() if value else "" - self._base_url_hostname = _base_url_hostname(value) + self._base_url_hostname = base_url_hostname(value) def __init__( self, @@ -860,7 +851,7 @@ class AIAgent: elif (provider_name is None) and self._base_url_hostname == "api.x.ai": self.api_mode = "codex_responses" self.provider = "xai" - elif self.provider == "anthropic" or (provider_name is None and "api.anthropic.com" in self._base_url_lower): + elif self.provider == "anthropic" or (provider_name is None and self._base_url_hostname == "api.anthropic.com"): self.api_mode = "anthropic_messages" self.provider = "anthropic" elif self._base_url_lower.rstrip("/").endswith("/anthropic"): @@ -2270,9 +2261,9 @@ class AIAgent: def _is_direct_openai_url(self, base_url: str = None) -> bool: """Return True when a base URL targets OpenAI's native API.""" if base_url is not None: - hostname = _base_url_hostname(base_url) + hostname = base_url_hostname(base_url) else: - hostname = getattr(self, "_base_url_hostname", "") or _base_url_hostname( + hostname = getattr(self, "_base_url_hostname", "") or base_url_hostname( getattr(self, "_base_url_lower", "") ) return hostname == "api.openai.com" @@ -2376,7 +2367,7 @@ class AIAgent: is_anthropic_wire = eff_api_mode == "anthropic_messages" is_native_anthropic = ( is_anthropic_wire - and (eff_provider == "anthropic" or "api.anthropic.com" in base_lower) + and (eff_provider == "anthropic" or base_url_hostname(eff_base_url) == "api.anthropic.com") ) if is_native_anthropic: diff --git a/scripts/release.py b/scripts/release.py index 6c00ec3db..1a5a1ea8a 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -321,6 +321,7 @@ AUTHOR_MAP = { "haileymarshall005@gmail.com": "haileymarshall", "aniruddhaadak80@users.noreply.github.com": "aniruddhaadak80", "zheng.jerilyn@gmail.com": "jerilynzheng", + "asslaenn5@gmail.com": "Aslaaen", } diff --git a/tests/hermes_cli/test_determine_api_mode_hostname.py b/tests/hermes_cli/test_determine_api_mode_hostname.py new file mode 100644 index 000000000..8b6cd042c --- /dev/null +++ b/tests/hermes_cli/test_determine_api_mode_hostname.py @@ -0,0 +1,43 @@ +"""Regression tests for ``determine_api_mode`` hostname handling. + +Companion to tests/hermes_cli/test_detect_api_mode_for_url.py — the same +false-positive class (custom URLs containing ``api.openai.com`` / +``api.anthropic.com`` as a path segment or host suffix) must be rejected +by ``determine_api_mode`` as well, since it's the code path used by +custom/unknown providers in ``resolve_custom_provider``. +""" + +from __future__ import annotations + +from hermes_cli.providers import determine_api_mode + + +class TestOpenAIHostHardening: + def test_native_openai_url_is_codex_responses(self): + assert determine_api_mode("", "https://api.openai.com/v1") == "codex_responses" + + def test_openai_host_suffix_is_not_codex(self): + assert determine_api_mode("", "https://api.openai.com.example/v1") == "chat_completions" + + def test_openai_path_segment_is_not_codex(self): + assert determine_api_mode("", "https://proxy.example.test/api.openai.com/v1") == "chat_completions" + + +class TestAnthropicHostHardening: + def test_native_anthropic_url_is_anthropic_messages(self): + assert determine_api_mode("", "https://api.anthropic.com") == "anthropic_messages" + + def test_anthropic_host_suffix_is_not_anthropic(self): + assert determine_api_mode("", "https://api.anthropic.com.example/v1") == "chat_completions" + + def test_anthropic_path_segment_is_not_anthropic(self): + # A proxy whose path contains ``api.anthropic.com`` must not be misrouted. + # Note: the ``/anthropic`` convention for third-party gateways still wins + # via explicit path-suffix check — see test_anthropic_path_suffix_still_wins. + assert determine_api_mode("", "https://proxy.example.test/api.anthropic.com/v1") == "chat_completions" + + def test_anthropic_path_suffix_still_wins(self): + # Third-party Anthropic-compatible gateways (MiniMax, Zhipu GLM, LiteLLM + # proxies) expose the Anthropic protocol under a ``/anthropic`` suffix. + # That convention must still resolve to anthropic_messages. + assert determine_api_mode("", "https://api.minimax.io/anthropic") == "anthropic_messages" diff --git a/tests/test_base_url_hostname.py b/tests/test_base_url_hostname.py new file mode 100644 index 000000000..89842cac2 --- /dev/null +++ b/tests/test_base_url_hostname.py @@ -0,0 +1,55 @@ +"""Targeted tests for ``utils.base_url_hostname``. + +The helper is used across provider routing, auxiliary client, and setup +wizards to avoid the substring-match false-positive class documented in +tests/agent/test_direct_provider_url_detection.py. +""" + +from __future__ import annotations + +from utils import base_url_hostname + + +def test_empty_returns_empty_string(): + assert base_url_hostname("") == "" + assert base_url_hostname(None) == "" # type: ignore[arg-type] + + +def test_plain_host_without_scheme(): + assert base_url_hostname("api.openai.com") == "api.openai.com" + assert base_url_hostname("api.openai.com/v1") == "api.openai.com" + + +def test_https_url_extracts_hostname_only(): + assert base_url_hostname("https://api.openai.com/v1") == "api.openai.com" + assert base_url_hostname("https://api.x.ai/v1") == "api.x.ai" + assert base_url_hostname("https://api.anthropic.com") == "api.anthropic.com" + + +def test_hostname_case_insensitive(): + assert base_url_hostname("https://API.OpenAI.com/v1") == "api.openai.com" + + +def test_trailing_dot_stripped(): + # Fully-qualified hostnames may include a trailing dot. + assert base_url_hostname("https://api.openai.com./v1") == "api.openai.com" + + +def test_path_containing_provider_host_is_not_the_hostname(): + # The key regression — proxy paths must never be misread as the host. + assert base_url_hostname("https://proxy.example.test/api.openai.com/v1") == "proxy.example.test" + assert base_url_hostname("https://proxy.example.test/api.anthropic.com/v1") == "proxy.example.test" + + +def test_host_suffix_is_not_the_provider(): + # A hostname that merely ends with the provider domain is not the provider. + assert base_url_hostname("https://api.openai.com.example/v1") == "api.openai.com.example" + assert base_url_hostname("https://api.x.ai.example/v1") == "api.x.ai.example" + + +def test_port_is_ignored(): + assert base_url_hostname("https://api.openai.com:443/v1") == "api.openai.com" + + +def test_whitespace_stripped(): + assert base_url_hostname(" https://api.openai.com/v1 ") == "api.openai.com" diff --git a/tools/delegate_tool.py b/tools/delegate_tool.py index 2e6065245..3851bad3f 100644 --- a/tools/delegate_tool.py +++ b/tools/delegate_tool.py @@ -26,6 +26,7 @@ from concurrent.futures import ThreadPoolExecutor, as_completed from typing import Any, Dict, List, Optional from toolsets import TOOLSETS +from utils import base_url_hostname # Tools that children must never have access to @@ -1027,7 +1028,7 @@ def _resolve_delegation_credentials(cfg: dict, parent_agent) -> dict: if "chatgpt.com/backend-api/codex" in base_lower: provider = "openai-codex" api_mode = "codex_responses" - elif "api.anthropic.com" in base_lower: + elif base_url_hostname(configured_base_url) == "api.anthropic.com": provider = "anthropic" api_mode = "anthropic_messages" diff --git a/utils.py b/utils.py index cf2582853..69a18d584 100644 --- a/utils.py +++ b/utils.py @@ -7,6 +7,7 @@ import stat import tempfile from pathlib import Path from typing import Any, Union +from urllib.parse import urlparse import yaml @@ -194,3 +195,24 @@ def env_int(key: str, default: int = 0) -> int: def env_bool(key: str, default: bool = False) -> bool: """Read an environment variable as a boolean.""" return is_truthy_value(os.getenv(key, ""), default=default) + + +# ─── URL Parsing Helpers ────────────────────────────────────────────────────── + + +def base_url_hostname(base_url: str) -> str: + """Return the lowercased hostname for a base URL, or ``""`` if absent. + + Use exact-hostname comparisons against known provider hosts + (``api.openai.com``, ``api.x.ai``, ``api.anthropic.com``) instead of + substring matches on the raw URL. Substring checks treat attacker- or + proxy-controlled paths/hosts like ``https://api.openai.com.example/v1`` + or ``https://proxy.test/api.openai.com/v1`` as native endpoints, which + leads to wrong api_mode / auth routing. + """ + raw = (base_url or "").strip() + if not raw: + return "" + parsed = urlparse(raw if "://" in raw else f"//{raw}") + return (parsed.hostname or "").lower().rstrip(".") +