fix: extend hostname-match provider detection across remaining call sites

Aslaaen's fix in the original PR covered _detect_api_mode_for_url and the
two openai/xai sites in run_agent.py. This finishes the sweep: the same
substring-match false-positive class (e.g. https://api.openai.com.evil/v1,
https://proxy/api.openai.com/v1, https://api.anthropic.com.example/v1)
existed in eight more call sites, and the hostname helper was duplicated
in two modules.

- utils: add shared base_url_hostname() (single source of truth).
- hermes_cli/runtime_provider, run_agent: drop local duplicates, import
  from utils. Reuse the cached AIAgent._base_url_hostname attribute
  everywhere it's already populated.
- agent/auxiliary_client: switch codex-wrap auto-detect, max_completion_tokens
  gate (auxiliary_max_tokens_param), and custom-endpoint max_tokens kwarg
  selection to hostname equality.
- run_agent: native-anthropic check in the Claude-style model branch
  and in the AIAgent init provider-auto-detect branch.
- agent/model_metadata: Anthropic /v1/models context-length lookup.
- hermes_cli/providers.determine_api_mode: anthropic / openai URL
  heuristics for custom/unknown providers (the /anthropic path-suffix
  convention for third-party gateways is preserved).
- tools/delegate_tool: anthropic detection for delegated subagent
  runtimes.
- hermes_cli/setup, hermes_cli/tools_config: setup-wizard vision-endpoint
  native-OpenAI detection (paired with deduping the repeated check into
  a single is_native_openai boolean per branch).

Tests:
- tests/test_base_url_hostname.py covers the helper directly
  (path-containing-host, host-suffix, trailing dot, port, case).
- tests/hermes_cli/test_determine_api_mode_hostname.py adds the same
  regression class for determine_api_mode, plus a test that the
  /anthropic third-party gateway convention still wins.

Also: add asslaenn5@gmail.com → Aslaaen to scripts/release.py AUTHOR_MAP.
This commit is contained in:
Teknium 2026-04-20 20:58:01 -07:00 committed by Teknium
parent 5356797f1b
commit cecf84daf7
12 changed files with 151 additions and 37 deletions

View file

@ -48,6 +48,7 @@ from openai import OpenAI
from agent.credential_pool import load_pool
from hermes_cli.config import get_hermes_home
from hermes_constants import OPENROUTER_BASE_URL
from utils import base_url_hostname
logger = logging.getLogger(__name__)
@ -1516,8 +1517,7 @@ def resolve_provider_client(
# Auto-detect: api.openai.com + codex model name pattern
if api_mode and api_mode != "codex_responses":
return False # explicit non-codex mode
normalized_base = (base_url_str or "").strip().lower()
if "api.openai.com" in normalized_base and "openrouter" not in normalized_base:
if base_url_hostname(base_url_str) == "api.openai.com":
model_lower = (model_str or "").lower()
if "codex" in model_lower:
return True
@ -2025,7 +2025,7 @@ def auxiliary_max_tokens_param(value: int) -> dict:
# Only use max_completion_tokens for direct OpenAI custom endpoints
if (not or_key
and _read_nous_auth() is None
and "api.openai.com" in custom_base.lower()):
and base_url_hostname(custom_base) == "api.openai.com"):
return {"max_completion_tokens": value}
return {"max_tokens": value}
@ -2460,7 +2460,7 @@ def _build_call_kwargs(
# Direct OpenAI api.openai.com with newer models needs max_completion_tokens.
if provider == "custom":
custom_base = base_url or _current_custom_base_url()
if "api.openai.com" in custom_base.lower():
if base_url_hostname(custom_base) == "api.openai.com":
kwargs["max_completion_tokens"] = max_tokens
else:
kwargs["max_tokens"] = max_tokens

View file

@ -14,6 +14,8 @@ from urllib.parse import urlparse
import requests
import yaml
from utils import base_url_hostname
from hermes_constants import OPENROUTER_MODELS_URL
logger = logging.getLogger(__name__)
@ -1078,7 +1080,7 @@ def get_model_context_length(
# 4. Anthropic /v1/models API (only for regular API keys, not OAuth)
if provider == "anthropic" or (
base_url and "api.anthropic.com" in base_url
base_url and base_url_hostname(base_url) == "api.anthropic.com"
):
ctx = _query_anthropic_context_length(model, base_url or "https://api.anthropic.com", api_key)
if ctx:

View file

@ -23,6 +23,8 @@ import logging
from dataclasses import dataclass
from typing import Any, Dict, List, Optional, Tuple
from utils import base_url_hostname
logger = logging.getLogger(__name__)
@ -434,9 +436,10 @@ def determine_api_mode(provider: str, base_url: str = "") -> str:
# URL-based heuristics for custom / unknown providers
if base_url:
url_lower = base_url.rstrip("/").lower()
if url_lower.endswith("/anthropic") or "api.anthropic.com" in url_lower:
hostname = base_url_hostname(base_url)
if url_lower.endswith("/anthropic") or hostname == "api.anthropic.com":
return "anthropic_messages"
if "api.openai.com" in url_lower:
if hostname == "api.openai.com":
return "codex_responses"
if "bedrock-runtime" in url_lower and "amazonaws.com" in url_lower:
return "bedrock_converse"

View file

@ -6,7 +6,6 @@ import logging
import os
import re
from typing import Any, Dict, Optional
from urllib.parse import urlparse
logger = logging.getLogger(__name__)
@ -30,20 +29,13 @@ from hermes_cli.auth import (
)
from hermes_cli.config import get_compatible_custom_providers, load_config
from hermes_constants import OPENROUTER_BASE_URL
from utils import base_url_hostname
def _normalize_custom_provider_name(value: str) -> str:
return value.strip().lower().replace(" ", "-")
def _base_url_hostname(base_url: str) -> str:
raw = (base_url or "").strip()
if not raw:
return ""
parsed = urlparse(raw if "://" in raw else f"//{raw}")
return (parsed.hostname or "").lower().rstrip(".")
def _detect_api_mode_for_url(base_url: str) -> Optional[str]:
"""Auto-detect api_mode from the resolved base URL.
@ -56,7 +48,7 @@ def _detect_api_mode_for_url(base_url: str) -> Optional[str]:
``chat_completions``.
"""
normalized = (base_url or "").strip().lower().rstrip("/")
hostname = _base_url_hostname(base_url)
hostname = base_url_hostname(base_url)
if hostname == "api.x.ai":
return "codex_responses"
if hostname == "api.openai.com":

View file

@ -22,6 +22,7 @@ from typing import Optional, Dict, Any
from hermes_cli.nous_subscription import get_nous_subscription_features
from tools.tool_backend_helpers import managed_nous_tools_enabled
from utils import base_url_hostname
from hermes_constants import get_optional_skills_dir
logger = logging.getLogger(__name__)
@ -803,7 +804,8 @@ def setup_model_provider(config: dict, *, quick: bool = False):
elif _vision_idx == 1: # OpenAI-compatible endpoint
_base_url = prompt(" Base URL (blank for OpenAI)").strip() or "https://api.openai.com/v1"
_api_key_label = " API key"
if "api.openai.com" in _base_url.lower():
_is_native_openai = base_url_hostname(_base_url) == "api.openai.com"
if _is_native_openai:
_api_key_label = " OpenAI API key"
_oai_key = prompt(_api_key_label, password=True).strip()
if _oai_key:
@ -811,7 +813,7 @@ def setup_model_provider(config: dict, *, quick: bool = False):
# Save vision base URL to config (not .env — only secrets go there)
_vaux = config.setdefault("auxiliary", {}).setdefault("vision", {})
_vaux["base_url"] = _base_url
if "api.openai.com" in _base_url.lower():
if _is_native_openai:
_oai_vision_models = ["gpt-4o", "gpt-4o-mini", "gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano"]
_vm_choices = _oai_vision_models + ["Use default (gpt-4o-mini)"]
_vm_idx = prompt_choice("Select vision model:", _vm_choices, 0)

View file

@ -25,6 +25,7 @@ from hermes_cli.nous_subscription import (
get_nous_subscription_features,
)
from tools.tool_backend_helpers import managed_nous_tools_enabled
from utils import base_url_hostname
logger = logging.getLogger(__name__)
@ -1179,7 +1180,8 @@ def _configure_simple_requirements(ts_key: str):
_print_warning(" Skipped")
elif idx == 1:
base_url = _prompt(" OPENAI_BASE_URL (blank for OpenAI)").strip() or "https://api.openai.com/v1"
key_label = " OPENAI_API_KEY" if "api.openai.com" in base_url.lower() else " API key"
is_native_openai = base_url_hostname(base_url) == "api.openai.com"
key_label = " OPENAI_API_KEY" if is_native_openai else " API key"
api_key = _prompt(key_label, password=True)
if api_key and api_key.strip():
save_env_value("OPENAI_API_KEY", api_key.strip())
@ -1189,7 +1191,7 @@ def _configure_simple_requirements(ts_key: str):
_aux = _cfg.setdefault("auxiliary", {}).setdefault("vision", {})
_aux["base_url"] = base_url
save_config(_cfg)
if "api.openai.com" in base_url.lower():
if is_native_openai:
save_env_value("AUXILIARY_VISION_MODEL", "gpt-4o-mini")
_print_success(" Saved")
else:

View file

@ -38,7 +38,6 @@ import threading
from types import SimpleNamespace
import uuid
from typing import List, Dict, Any, Optional
from urllib.parse import urlparse
from openai import OpenAI
import fire
from datetime import datetime
@ -125,15 +124,7 @@ from agent.trajectory import (
convert_scratchpad_to_think, has_incomplete_scratchpad,
save_trajectory as _save_trajectory_to_file,
)
from utils import atomic_json_write, env_var_enabled
def _base_url_hostname(base_url: str) -> str:
raw = (base_url or "").strip()
if not raw:
return ""
parsed = urlparse(raw if "://" in raw else f"//{raw}")
return (parsed.hostname or "").lower().rstrip(".")
from utils import atomic_json_write, base_url_hostname, env_var_enabled
@ -712,7 +703,7 @@ class AIAgent:
def base_url(self, value: str) -> None:
self._base_url = value
self._base_url_lower = value.lower() if value else ""
self._base_url_hostname = _base_url_hostname(value)
self._base_url_hostname = base_url_hostname(value)
def __init__(
self,
@ -860,7 +851,7 @@ class AIAgent:
elif (provider_name is None) and self._base_url_hostname == "api.x.ai":
self.api_mode = "codex_responses"
self.provider = "xai"
elif self.provider == "anthropic" or (provider_name is None and "api.anthropic.com" in self._base_url_lower):
elif self.provider == "anthropic" or (provider_name is None and self._base_url_hostname == "api.anthropic.com"):
self.api_mode = "anthropic_messages"
self.provider = "anthropic"
elif self._base_url_lower.rstrip("/").endswith("/anthropic"):
@ -2270,9 +2261,9 @@ class AIAgent:
def _is_direct_openai_url(self, base_url: str = None) -> bool:
"""Return True when a base URL targets OpenAI's native API."""
if base_url is not None:
hostname = _base_url_hostname(base_url)
hostname = base_url_hostname(base_url)
else:
hostname = getattr(self, "_base_url_hostname", "") or _base_url_hostname(
hostname = getattr(self, "_base_url_hostname", "") or base_url_hostname(
getattr(self, "_base_url_lower", "")
)
return hostname == "api.openai.com"
@ -2376,7 +2367,7 @@ class AIAgent:
is_anthropic_wire = eff_api_mode == "anthropic_messages"
is_native_anthropic = (
is_anthropic_wire
and (eff_provider == "anthropic" or "api.anthropic.com" in base_lower)
and (eff_provider == "anthropic" or base_url_hostname(eff_base_url) == "api.anthropic.com")
)
if is_native_anthropic:

View file

@ -321,6 +321,7 @@ AUTHOR_MAP = {
"haileymarshall005@gmail.com": "haileymarshall",
"aniruddhaadak80@users.noreply.github.com": "aniruddhaadak80",
"zheng.jerilyn@gmail.com": "jerilynzheng",
"asslaenn5@gmail.com": "Aslaaen",
}

View file

@ -0,0 +1,43 @@
"""Regression tests for ``determine_api_mode`` hostname handling.
Companion to tests/hermes_cli/test_detect_api_mode_for_url.py the same
false-positive class (custom URLs containing ``api.openai.com`` /
``api.anthropic.com`` as a path segment or host suffix) must be rejected
by ``determine_api_mode`` as well, since it's the code path used by
custom/unknown providers in ``resolve_custom_provider``.
"""
from __future__ import annotations
from hermes_cli.providers import determine_api_mode
class TestOpenAIHostHardening:
def test_native_openai_url_is_codex_responses(self):
assert determine_api_mode("", "https://api.openai.com/v1") == "codex_responses"
def test_openai_host_suffix_is_not_codex(self):
assert determine_api_mode("", "https://api.openai.com.example/v1") == "chat_completions"
def test_openai_path_segment_is_not_codex(self):
assert determine_api_mode("", "https://proxy.example.test/api.openai.com/v1") == "chat_completions"
class TestAnthropicHostHardening:
def test_native_anthropic_url_is_anthropic_messages(self):
assert determine_api_mode("", "https://api.anthropic.com") == "anthropic_messages"
def test_anthropic_host_suffix_is_not_anthropic(self):
assert determine_api_mode("", "https://api.anthropic.com.example/v1") == "chat_completions"
def test_anthropic_path_segment_is_not_anthropic(self):
# A proxy whose path contains ``api.anthropic.com`` must not be misrouted.
# Note: the ``/anthropic`` convention for third-party gateways still wins
# via explicit path-suffix check — see test_anthropic_path_suffix_still_wins.
assert determine_api_mode("", "https://proxy.example.test/api.anthropic.com/v1") == "chat_completions"
def test_anthropic_path_suffix_still_wins(self):
# Third-party Anthropic-compatible gateways (MiniMax, Zhipu GLM, LiteLLM
# proxies) expose the Anthropic protocol under a ``/anthropic`` suffix.
# That convention must still resolve to anthropic_messages.
assert determine_api_mode("", "https://api.minimax.io/anthropic") == "anthropic_messages"

View file

@ -0,0 +1,55 @@
"""Targeted tests for ``utils.base_url_hostname``.
The helper is used across provider routing, auxiliary client, and setup
wizards to avoid the substring-match false-positive class documented in
tests/agent/test_direct_provider_url_detection.py.
"""
from __future__ import annotations
from utils import base_url_hostname
def test_empty_returns_empty_string():
assert base_url_hostname("") == ""
assert base_url_hostname(None) == "" # type: ignore[arg-type]
def test_plain_host_without_scheme():
assert base_url_hostname("api.openai.com") == "api.openai.com"
assert base_url_hostname("api.openai.com/v1") == "api.openai.com"
def test_https_url_extracts_hostname_only():
assert base_url_hostname("https://api.openai.com/v1") == "api.openai.com"
assert base_url_hostname("https://api.x.ai/v1") == "api.x.ai"
assert base_url_hostname("https://api.anthropic.com") == "api.anthropic.com"
def test_hostname_case_insensitive():
assert base_url_hostname("https://API.OpenAI.com/v1") == "api.openai.com"
def test_trailing_dot_stripped():
# Fully-qualified hostnames may include a trailing dot.
assert base_url_hostname("https://api.openai.com./v1") == "api.openai.com"
def test_path_containing_provider_host_is_not_the_hostname():
# The key regression — proxy paths must never be misread as the host.
assert base_url_hostname("https://proxy.example.test/api.openai.com/v1") == "proxy.example.test"
assert base_url_hostname("https://proxy.example.test/api.anthropic.com/v1") == "proxy.example.test"
def test_host_suffix_is_not_the_provider():
# A hostname that merely ends with the provider domain is not the provider.
assert base_url_hostname("https://api.openai.com.example/v1") == "api.openai.com.example"
assert base_url_hostname("https://api.x.ai.example/v1") == "api.x.ai.example"
def test_port_is_ignored():
assert base_url_hostname("https://api.openai.com:443/v1") == "api.openai.com"
def test_whitespace_stripped():
assert base_url_hostname(" https://api.openai.com/v1 ") == "api.openai.com"

View file

@ -26,6 +26,7 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import Any, Dict, List, Optional
from toolsets import TOOLSETS
from utils import base_url_hostname
# Tools that children must never have access to
@ -1027,7 +1028,7 @@ def _resolve_delegation_credentials(cfg: dict, parent_agent) -> dict:
if "chatgpt.com/backend-api/codex" in base_lower:
provider = "openai-codex"
api_mode = "codex_responses"
elif "api.anthropic.com" in base_lower:
elif base_url_hostname(configured_base_url) == "api.anthropic.com":
provider = "anthropic"
api_mode = "anthropic_messages"

View file

@ -7,6 +7,7 @@ import stat
import tempfile
from pathlib import Path
from typing import Any, Union
from urllib.parse import urlparse
import yaml
@ -194,3 +195,24 @@ def env_int(key: str, default: int = 0) -> int:
def env_bool(key: str, default: bool = False) -> bool:
"""Read an environment variable as a boolean."""
return is_truthy_value(os.getenv(key, ""), default=default)
# ─── URL Parsing Helpers ──────────────────────────────────────────────────────
def base_url_hostname(base_url: str) -> str:
"""Return the lowercased hostname for a base URL, or ``""`` if absent.
Use exact-hostname comparisons against known provider hosts
(``api.openai.com``, ``api.x.ai``, ``api.anthropic.com``) instead of
substring matches on the raw URL. Substring checks treat attacker- or
proxy-controlled paths/hosts like ``https://api.openai.com.example/v1``
or ``https://proxy.test/api.openai.com/v1`` as native endpoints, which
leads to wrong api_mode / auth routing.
"""
raw = (base_url or "").strip()
if not raw:
return ""
parsed = urlparse(raw if "://" in raw else f"//{raw}")
return (parsed.hostname or "").lower().rstrip(".")