fix: extend hostname-match provider detection across remaining call sites

Aslaaen's fix in the original PR covered _detect_api_mode_for_url and the
two openai/xai sites in run_agent.py. This finishes the sweep: the same
substring-match false-positive class (e.g. https://api.openai.com.evil/v1,
https://proxy/api.openai.com/v1, https://api.anthropic.com.example/v1)
existed in eight more call sites, and the hostname helper was duplicated
in two modules.

- utils: add shared base_url_hostname() (single source of truth).
- hermes_cli/runtime_provider, run_agent: drop local duplicates, import
  from utils. Reuse the cached AIAgent._base_url_hostname attribute
  everywhere it's already populated.
- agent/auxiliary_client: switch codex-wrap auto-detect, max_completion_tokens
  gate (auxiliary_max_tokens_param), and custom-endpoint max_tokens kwarg
  selection to hostname equality.
- run_agent: native-anthropic check in the Claude-style model branch
  and in the AIAgent init provider-auto-detect branch.
- agent/model_metadata: Anthropic /v1/models context-length lookup.
- hermes_cli/providers.determine_api_mode: anthropic / openai URL
  heuristics for custom/unknown providers (the /anthropic path-suffix
  convention for third-party gateways is preserved).
- tools/delegate_tool: anthropic detection for delegated subagent
  runtimes.
- hermes_cli/setup, hermes_cli/tools_config: setup-wizard vision-endpoint
  native-OpenAI detection (paired with deduping the repeated check into
  a single is_native_openai boolean per branch).

Tests:
- tests/test_base_url_hostname.py covers the helper directly
  (path-containing-host, host-suffix, trailing dot, port, case).
- tests/hermes_cli/test_determine_api_mode_hostname.py adds the same
  regression class for determine_api_mode, plus a test that the
  /anthropic third-party gateway convention still wins.

Also: add asslaenn5@gmail.com → Aslaaen to scripts/release.py AUTHOR_MAP.
This commit is contained in:
Teknium 2026-04-20 20:58:01 -07:00 committed by Teknium
parent 5356797f1b
commit cecf84daf7
12 changed files with 151 additions and 37 deletions

View file

@ -48,6 +48,7 @@ from openai import OpenAI
from agent.credential_pool import load_pool from agent.credential_pool import load_pool
from hermes_cli.config import get_hermes_home from hermes_cli.config import get_hermes_home
from hermes_constants import OPENROUTER_BASE_URL from hermes_constants import OPENROUTER_BASE_URL
from utils import base_url_hostname
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -1516,8 +1517,7 @@ def resolve_provider_client(
# Auto-detect: api.openai.com + codex model name pattern # Auto-detect: api.openai.com + codex model name pattern
if api_mode and api_mode != "codex_responses": if api_mode and api_mode != "codex_responses":
return False # explicit non-codex mode return False # explicit non-codex mode
normalized_base = (base_url_str or "").strip().lower() if base_url_hostname(base_url_str) == "api.openai.com":
if "api.openai.com" in normalized_base and "openrouter" not in normalized_base:
model_lower = (model_str or "").lower() model_lower = (model_str or "").lower()
if "codex" in model_lower: if "codex" in model_lower:
return True return True
@ -2025,7 +2025,7 @@ def auxiliary_max_tokens_param(value: int) -> dict:
# Only use max_completion_tokens for direct OpenAI custom endpoints # Only use max_completion_tokens for direct OpenAI custom endpoints
if (not or_key if (not or_key
and _read_nous_auth() is None and _read_nous_auth() is None
and "api.openai.com" in custom_base.lower()): and base_url_hostname(custom_base) == "api.openai.com"):
return {"max_completion_tokens": value} return {"max_completion_tokens": value}
return {"max_tokens": value} return {"max_tokens": value}
@ -2460,7 +2460,7 @@ def _build_call_kwargs(
# Direct OpenAI api.openai.com with newer models needs max_completion_tokens. # Direct OpenAI api.openai.com with newer models needs max_completion_tokens.
if provider == "custom": if provider == "custom":
custom_base = base_url or _current_custom_base_url() custom_base = base_url or _current_custom_base_url()
if "api.openai.com" in custom_base.lower(): if base_url_hostname(custom_base) == "api.openai.com":
kwargs["max_completion_tokens"] = max_tokens kwargs["max_completion_tokens"] = max_tokens
else: else:
kwargs["max_tokens"] = max_tokens kwargs["max_tokens"] = max_tokens

View file

@ -14,6 +14,8 @@ from urllib.parse import urlparse
import requests import requests
import yaml import yaml
from utils import base_url_hostname
from hermes_constants import OPENROUTER_MODELS_URL from hermes_constants import OPENROUTER_MODELS_URL
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -1078,7 +1080,7 @@ def get_model_context_length(
# 4. Anthropic /v1/models API (only for regular API keys, not OAuth) # 4. Anthropic /v1/models API (only for regular API keys, not OAuth)
if provider == "anthropic" or ( if provider == "anthropic" or (
base_url and "api.anthropic.com" in base_url base_url and base_url_hostname(base_url) == "api.anthropic.com"
): ):
ctx = _query_anthropic_context_length(model, base_url or "https://api.anthropic.com", api_key) ctx = _query_anthropic_context_length(model, base_url or "https://api.anthropic.com", api_key)
if ctx: if ctx:

View file

@ -23,6 +23,8 @@ import logging
from dataclasses import dataclass from dataclasses import dataclass
from typing import Any, Dict, List, Optional, Tuple from typing import Any, Dict, List, Optional, Tuple
from utils import base_url_hostname
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -434,9 +436,10 @@ def determine_api_mode(provider: str, base_url: str = "") -> str:
# URL-based heuristics for custom / unknown providers # URL-based heuristics for custom / unknown providers
if base_url: if base_url:
url_lower = base_url.rstrip("/").lower() url_lower = base_url.rstrip("/").lower()
if url_lower.endswith("/anthropic") or "api.anthropic.com" in url_lower: hostname = base_url_hostname(base_url)
if url_lower.endswith("/anthropic") or hostname == "api.anthropic.com":
return "anthropic_messages" return "anthropic_messages"
if "api.openai.com" in url_lower: if hostname == "api.openai.com":
return "codex_responses" return "codex_responses"
if "bedrock-runtime" in url_lower and "amazonaws.com" in url_lower: if "bedrock-runtime" in url_lower and "amazonaws.com" in url_lower:
return "bedrock_converse" return "bedrock_converse"

View file

@ -6,7 +6,6 @@ import logging
import os import os
import re import re
from typing import Any, Dict, Optional from typing import Any, Dict, Optional
from urllib.parse import urlparse
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -30,20 +29,13 @@ from hermes_cli.auth import (
) )
from hermes_cli.config import get_compatible_custom_providers, load_config from hermes_cli.config import get_compatible_custom_providers, load_config
from hermes_constants import OPENROUTER_BASE_URL from hermes_constants import OPENROUTER_BASE_URL
from utils import base_url_hostname
def _normalize_custom_provider_name(value: str) -> str: def _normalize_custom_provider_name(value: str) -> str:
return value.strip().lower().replace(" ", "-") return value.strip().lower().replace(" ", "-")
def _base_url_hostname(base_url: str) -> str:
raw = (base_url or "").strip()
if not raw:
return ""
parsed = urlparse(raw if "://" in raw else f"//{raw}")
return (parsed.hostname or "").lower().rstrip(".")
def _detect_api_mode_for_url(base_url: str) -> Optional[str]: def _detect_api_mode_for_url(base_url: str) -> Optional[str]:
"""Auto-detect api_mode from the resolved base URL. """Auto-detect api_mode from the resolved base URL.
@ -56,7 +48,7 @@ def _detect_api_mode_for_url(base_url: str) -> Optional[str]:
``chat_completions``. ``chat_completions``.
""" """
normalized = (base_url or "").strip().lower().rstrip("/") normalized = (base_url or "").strip().lower().rstrip("/")
hostname = _base_url_hostname(base_url) hostname = base_url_hostname(base_url)
if hostname == "api.x.ai": if hostname == "api.x.ai":
return "codex_responses" return "codex_responses"
if hostname == "api.openai.com": if hostname == "api.openai.com":

View file

@ -22,6 +22,7 @@ from typing import Optional, Dict, Any
from hermes_cli.nous_subscription import get_nous_subscription_features from hermes_cli.nous_subscription import get_nous_subscription_features
from tools.tool_backend_helpers import managed_nous_tools_enabled from tools.tool_backend_helpers import managed_nous_tools_enabled
from utils import base_url_hostname
from hermes_constants import get_optional_skills_dir from hermes_constants import get_optional_skills_dir
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -803,7 +804,8 @@ def setup_model_provider(config: dict, *, quick: bool = False):
elif _vision_idx == 1: # OpenAI-compatible endpoint elif _vision_idx == 1: # OpenAI-compatible endpoint
_base_url = prompt(" Base URL (blank for OpenAI)").strip() or "https://api.openai.com/v1" _base_url = prompt(" Base URL (blank for OpenAI)").strip() or "https://api.openai.com/v1"
_api_key_label = " API key" _api_key_label = " API key"
if "api.openai.com" in _base_url.lower(): _is_native_openai = base_url_hostname(_base_url) == "api.openai.com"
if _is_native_openai:
_api_key_label = " OpenAI API key" _api_key_label = " OpenAI API key"
_oai_key = prompt(_api_key_label, password=True).strip() _oai_key = prompt(_api_key_label, password=True).strip()
if _oai_key: if _oai_key:
@ -811,7 +813,7 @@ def setup_model_provider(config: dict, *, quick: bool = False):
# Save vision base URL to config (not .env — only secrets go there) # Save vision base URL to config (not .env — only secrets go there)
_vaux = config.setdefault("auxiliary", {}).setdefault("vision", {}) _vaux = config.setdefault("auxiliary", {}).setdefault("vision", {})
_vaux["base_url"] = _base_url _vaux["base_url"] = _base_url
if "api.openai.com" in _base_url.lower(): if _is_native_openai:
_oai_vision_models = ["gpt-4o", "gpt-4o-mini", "gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano"] _oai_vision_models = ["gpt-4o", "gpt-4o-mini", "gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano"]
_vm_choices = _oai_vision_models + ["Use default (gpt-4o-mini)"] _vm_choices = _oai_vision_models + ["Use default (gpt-4o-mini)"]
_vm_idx = prompt_choice("Select vision model:", _vm_choices, 0) _vm_idx = prompt_choice("Select vision model:", _vm_choices, 0)

View file

@ -25,6 +25,7 @@ from hermes_cli.nous_subscription import (
get_nous_subscription_features, get_nous_subscription_features,
) )
from tools.tool_backend_helpers import managed_nous_tools_enabled from tools.tool_backend_helpers import managed_nous_tools_enabled
from utils import base_url_hostname
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -1179,7 +1180,8 @@ def _configure_simple_requirements(ts_key: str):
_print_warning(" Skipped") _print_warning(" Skipped")
elif idx == 1: elif idx == 1:
base_url = _prompt(" OPENAI_BASE_URL (blank for OpenAI)").strip() or "https://api.openai.com/v1" base_url = _prompt(" OPENAI_BASE_URL (blank for OpenAI)").strip() or "https://api.openai.com/v1"
key_label = " OPENAI_API_KEY" if "api.openai.com" in base_url.lower() else " API key" is_native_openai = base_url_hostname(base_url) == "api.openai.com"
key_label = " OPENAI_API_KEY" if is_native_openai else " API key"
api_key = _prompt(key_label, password=True) api_key = _prompt(key_label, password=True)
if api_key and api_key.strip(): if api_key and api_key.strip():
save_env_value("OPENAI_API_KEY", api_key.strip()) save_env_value("OPENAI_API_KEY", api_key.strip())
@ -1189,7 +1191,7 @@ def _configure_simple_requirements(ts_key: str):
_aux = _cfg.setdefault("auxiliary", {}).setdefault("vision", {}) _aux = _cfg.setdefault("auxiliary", {}).setdefault("vision", {})
_aux["base_url"] = base_url _aux["base_url"] = base_url
save_config(_cfg) save_config(_cfg)
if "api.openai.com" in base_url.lower(): if is_native_openai:
save_env_value("AUXILIARY_VISION_MODEL", "gpt-4o-mini") save_env_value("AUXILIARY_VISION_MODEL", "gpt-4o-mini")
_print_success(" Saved") _print_success(" Saved")
else: else:

View file

@ -38,7 +38,6 @@ import threading
from types import SimpleNamespace from types import SimpleNamespace
import uuid import uuid
from typing import List, Dict, Any, Optional from typing import List, Dict, Any, Optional
from urllib.parse import urlparse
from openai import OpenAI from openai import OpenAI
import fire import fire
from datetime import datetime from datetime import datetime
@ -125,15 +124,7 @@ from agent.trajectory import (
convert_scratchpad_to_think, has_incomplete_scratchpad, convert_scratchpad_to_think, has_incomplete_scratchpad,
save_trajectory as _save_trajectory_to_file, save_trajectory as _save_trajectory_to_file,
) )
from utils import atomic_json_write, env_var_enabled from utils import atomic_json_write, base_url_hostname, env_var_enabled
def _base_url_hostname(base_url: str) -> str:
raw = (base_url or "").strip()
if not raw:
return ""
parsed = urlparse(raw if "://" in raw else f"//{raw}")
return (parsed.hostname or "").lower().rstrip(".")
@ -712,7 +703,7 @@ class AIAgent:
def base_url(self, value: str) -> None: def base_url(self, value: str) -> None:
self._base_url = value self._base_url = value
self._base_url_lower = value.lower() if value else "" self._base_url_lower = value.lower() if value else ""
self._base_url_hostname = _base_url_hostname(value) self._base_url_hostname = base_url_hostname(value)
def __init__( def __init__(
self, self,
@ -860,7 +851,7 @@ class AIAgent:
elif (provider_name is None) and self._base_url_hostname == "api.x.ai": elif (provider_name is None) and self._base_url_hostname == "api.x.ai":
self.api_mode = "codex_responses" self.api_mode = "codex_responses"
self.provider = "xai" self.provider = "xai"
elif self.provider == "anthropic" or (provider_name is None and "api.anthropic.com" in self._base_url_lower): elif self.provider == "anthropic" or (provider_name is None and self._base_url_hostname == "api.anthropic.com"):
self.api_mode = "anthropic_messages" self.api_mode = "anthropic_messages"
self.provider = "anthropic" self.provider = "anthropic"
elif self._base_url_lower.rstrip("/").endswith("/anthropic"): elif self._base_url_lower.rstrip("/").endswith("/anthropic"):
@ -2270,9 +2261,9 @@ class AIAgent:
def _is_direct_openai_url(self, base_url: str = None) -> bool: def _is_direct_openai_url(self, base_url: str = None) -> bool:
"""Return True when a base URL targets OpenAI's native API.""" """Return True when a base URL targets OpenAI's native API."""
if base_url is not None: if base_url is not None:
hostname = _base_url_hostname(base_url) hostname = base_url_hostname(base_url)
else: else:
hostname = getattr(self, "_base_url_hostname", "") or _base_url_hostname( hostname = getattr(self, "_base_url_hostname", "") or base_url_hostname(
getattr(self, "_base_url_lower", "") getattr(self, "_base_url_lower", "")
) )
return hostname == "api.openai.com" return hostname == "api.openai.com"
@ -2376,7 +2367,7 @@ class AIAgent:
is_anthropic_wire = eff_api_mode == "anthropic_messages" is_anthropic_wire = eff_api_mode == "anthropic_messages"
is_native_anthropic = ( is_native_anthropic = (
is_anthropic_wire is_anthropic_wire
and (eff_provider == "anthropic" or "api.anthropic.com" in base_lower) and (eff_provider == "anthropic" or base_url_hostname(eff_base_url) == "api.anthropic.com")
) )
if is_native_anthropic: if is_native_anthropic:

View file

@ -321,6 +321,7 @@ AUTHOR_MAP = {
"haileymarshall005@gmail.com": "haileymarshall", "haileymarshall005@gmail.com": "haileymarshall",
"aniruddhaadak80@users.noreply.github.com": "aniruddhaadak80", "aniruddhaadak80@users.noreply.github.com": "aniruddhaadak80",
"zheng.jerilyn@gmail.com": "jerilynzheng", "zheng.jerilyn@gmail.com": "jerilynzheng",
"asslaenn5@gmail.com": "Aslaaen",
} }

View file

@ -0,0 +1,43 @@
"""Regression tests for ``determine_api_mode`` hostname handling.
Companion to tests/hermes_cli/test_detect_api_mode_for_url.py the same
false-positive class (custom URLs containing ``api.openai.com`` /
``api.anthropic.com`` as a path segment or host suffix) must be rejected
by ``determine_api_mode`` as well, since it's the code path used by
custom/unknown providers in ``resolve_custom_provider``.
"""
from __future__ import annotations
from hermes_cli.providers import determine_api_mode
class TestOpenAIHostHardening:
def test_native_openai_url_is_codex_responses(self):
assert determine_api_mode("", "https://api.openai.com/v1") == "codex_responses"
def test_openai_host_suffix_is_not_codex(self):
assert determine_api_mode("", "https://api.openai.com.example/v1") == "chat_completions"
def test_openai_path_segment_is_not_codex(self):
assert determine_api_mode("", "https://proxy.example.test/api.openai.com/v1") == "chat_completions"
class TestAnthropicHostHardening:
def test_native_anthropic_url_is_anthropic_messages(self):
assert determine_api_mode("", "https://api.anthropic.com") == "anthropic_messages"
def test_anthropic_host_suffix_is_not_anthropic(self):
assert determine_api_mode("", "https://api.anthropic.com.example/v1") == "chat_completions"
def test_anthropic_path_segment_is_not_anthropic(self):
# A proxy whose path contains ``api.anthropic.com`` must not be misrouted.
# Note: the ``/anthropic`` convention for third-party gateways still wins
# via explicit path-suffix check — see test_anthropic_path_suffix_still_wins.
assert determine_api_mode("", "https://proxy.example.test/api.anthropic.com/v1") == "chat_completions"
def test_anthropic_path_suffix_still_wins(self):
# Third-party Anthropic-compatible gateways (MiniMax, Zhipu GLM, LiteLLM
# proxies) expose the Anthropic protocol under a ``/anthropic`` suffix.
# That convention must still resolve to anthropic_messages.
assert determine_api_mode("", "https://api.minimax.io/anthropic") == "anthropic_messages"

View file

@ -0,0 +1,55 @@
"""Targeted tests for ``utils.base_url_hostname``.
The helper is used across provider routing, auxiliary client, and setup
wizards to avoid the substring-match false-positive class documented in
tests/agent/test_direct_provider_url_detection.py.
"""
from __future__ import annotations
from utils import base_url_hostname
def test_empty_returns_empty_string():
assert base_url_hostname("") == ""
assert base_url_hostname(None) == "" # type: ignore[arg-type]
def test_plain_host_without_scheme():
assert base_url_hostname("api.openai.com") == "api.openai.com"
assert base_url_hostname("api.openai.com/v1") == "api.openai.com"
def test_https_url_extracts_hostname_only():
assert base_url_hostname("https://api.openai.com/v1") == "api.openai.com"
assert base_url_hostname("https://api.x.ai/v1") == "api.x.ai"
assert base_url_hostname("https://api.anthropic.com") == "api.anthropic.com"
def test_hostname_case_insensitive():
assert base_url_hostname("https://API.OpenAI.com/v1") == "api.openai.com"
def test_trailing_dot_stripped():
# Fully-qualified hostnames may include a trailing dot.
assert base_url_hostname("https://api.openai.com./v1") == "api.openai.com"
def test_path_containing_provider_host_is_not_the_hostname():
# The key regression — proxy paths must never be misread as the host.
assert base_url_hostname("https://proxy.example.test/api.openai.com/v1") == "proxy.example.test"
assert base_url_hostname("https://proxy.example.test/api.anthropic.com/v1") == "proxy.example.test"
def test_host_suffix_is_not_the_provider():
# A hostname that merely ends with the provider domain is not the provider.
assert base_url_hostname("https://api.openai.com.example/v1") == "api.openai.com.example"
assert base_url_hostname("https://api.x.ai.example/v1") == "api.x.ai.example"
def test_port_is_ignored():
assert base_url_hostname("https://api.openai.com:443/v1") == "api.openai.com"
def test_whitespace_stripped():
assert base_url_hostname(" https://api.openai.com/v1 ") == "api.openai.com"

View file

@ -26,6 +26,7 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import Any, Dict, List, Optional from typing import Any, Dict, List, Optional
from toolsets import TOOLSETS from toolsets import TOOLSETS
from utils import base_url_hostname
# Tools that children must never have access to # Tools that children must never have access to
@ -1027,7 +1028,7 @@ def _resolve_delegation_credentials(cfg: dict, parent_agent) -> dict:
if "chatgpt.com/backend-api/codex" in base_lower: if "chatgpt.com/backend-api/codex" in base_lower:
provider = "openai-codex" provider = "openai-codex"
api_mode = "codex_responses" api_mode = "codex_responses"
elif "api.anthropic.com" in base_lower: elif base_url_hostname(configured_base_url) == "api.anthropic.com":
provider = "anthropic" provider = "anthropic"
api_mode = "anthropic_messages" api_mode = "anthropic_messages"

View file

@ -7,6 +7,7 @@ import stat
import tempfile import tempfile
from pathlib import Path from pathlib import Path
from typing import Any, Union from typing import Any, Union
from urllib.parse import urlparse
import yaml import yaml
@ -194,3 +195,24 @@ def env_int(key: str, default: int = 0) -> int:
def env_bool(key: str, default: bool = False) -> bool: def env_bool(key: str, default: bool = False) -> bool:
"""Read an environment variable as a boolean.""" """Read an environment variable as a boolean."""
return is_truthy_value(os.getenv(key, ""), default=default) return is_truthy_value(os.getenv(key, ""), default=default)
# ─── URL Parsing Helpers ──────────────────────────────────────────────────────
def base_url_hostname(base_url: str) -> str:
"""Return the lowercased hostname for a base URL, or ``""`` if absent.
Use exact-hostname comparisons against known provider hosts
(``api.openai.com``, ``api.x.ai``, ``api.anthropic.com``) instead of
substring matches on the raw URL. Substring checks treat attacker- or
proxy-controlled paths/hosts like ``https://api.openai.com.example/v1``
or ``https://proxy.test/api.openai.com/v1`` as native endpoints, which
leads to wrong api_mode / auth routing.
"""
raw = (base_url or "").strip()
if not raw:
return ""
parsed = urlparse(raw if "://" in raw else f"//{raw}")
return (parsed.hostname or "").lower().rstrip(".")