mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-30 06:41:51 +00:00
feat(azure-foundry): add Microsoft Entra ID auth
Use azure-identity DefaultAzureCredential for keyless Foundry auth. Preserve refreshable callable credentials through OpenAI and Anthropic client paths. Add setup, doctor, auth status, docs, and tests for Entra auth. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
parent
457fa913b8
commit
9df9816dab
38 changed files with 3772 additions and 122 deletions
|
|
@ -9,13 +9,24 @@ TERMINAL_SETUP_AUTH_METHOD_ID = "hermes-setup"
|
|||
|
||||
|
||||
def detect_provider() -> Optional[str]:
|
||||
"""Resolve the active Hermes runtime provider, or None if unavailable."""
|
||||
"""Resolve the active Hermes runtime provider, or None if unavailable.
|
||||
|
||||
Treats a ``Callable`` ``api_key`` (Azure Foundry Entra ID bearer
|
||||
token provider — see :mod:`agent.azure_identity_adapter`) as a valid
|
||||
credential. Without this, ACP sessions for Entra-configured Foundry
|
||||
deployments silently default to ``"openrouter"`` and the ACP auth
|
||||
handshake rejects the legitimate provider.
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.runtime_provider import resolve_runtime_provider
|
||||
runtime = resolve_runtime_provider()
|
||||
api_key = runtime.get("api_key")
|
||||
provider = runtime.get("provider")
|
||||
if isinstance(api_key, str) and api_key.strip() and isinstance(provider, str) and provider.strip():
|
||||
if not isinstance(provider, str) or not provider.strip():
|
||||
return None
|
||||
is_string_key = isinstance(api_key, str) and api_key.strip()
|
||||
is_callable_provider = callable(api_key) and not isinstance(api_key, str)
|
||||
if is_string_key or is_callable_provider:
|
||||
return provider.strip().lower()
|
||||
except Exception:
|
||||
return None
|
||||
|
|
|
|||
|
|
@ -560,7 +560,16 @@ def init_agent(
|
|||
agent._client_kwargs = {}
|
||||
if not agent.quiet_mode:
|
||||
print(f"🤖 AI Agent initialized with model: {agent.model} (Anthropic native)")
|
||||
if effective_key and len(effective_key) > 12:
|
||||
# ``effective_key`` may be a callable Entra ID bearer
|
||||
# provider for Azure Foundry anthropic_messages mode.
|
||||
# The Anthropic adapter installs an httpx event hook
|
||||
# that mints a fresh JWT per request — we never
|
||||
# invoke or inspect the callable in the banner.
|
||||
from agent.azure_identity_adapter import is_token_provider
|
||||
|
||||
if is_token_provider(effective_key):
|
||||
print("🔑 Using credentials: Microsoft Entra ID")
|
||||
elif isinstance(effective_key, str) and len(effective_key) > 12:
|
||||
print(f"🔑 Using token: {effective_key[:8]}...{effective_key[-4:]}")
|
||||
elif agent.api_mode == "bedrock_converse":
|
||||
# AWS Bedrock — uses boto3 directly, no OpenAI client needed.
|
||||
|
|
@ -764,12 +773,19 @@ def init_agent(
|
|||
print(f"🤖 AI Agent initialized with model: {agent.model}")
|
||||
if base_url:
|
||||
print(f"🔗 Using custom base URL: {base_url}")
|
||||
# Always show API key info (masked) for debugging auth issues
|
||||
# ``api_key`` may be a callable Entra ID bearer
|
||||
# provider (Azure Foundry). The OpenAI SDK mints a
|
||||
# fresh JWT per request internally — the banner
|
||||
# never invokes or inspects the callable.
|
||||
from agent.azure_identity_adapter import is_token_provider
|
||||
|
||||
key_used = client_kwargs.get("api_key", "none")
|
||||
if key_used and key_used != "dummy-key" and len(key_used) > 12:
|
||||
if is_token_provider(key_used):
|
||||
print("🔑 Using credentials: Microsoft Entra ID")
|
||||
elif isinstance(key_used, str) and key_used and key_used != "dummy-key" and len(key_used) > 12:
|
||||
print(f"🔑 Using API key: {key_used[:8]}...{key_used[-4:]}")
|
||||
else:
|
||||
print(f"⚠️ Warning: API key appears invalid or missing (got: '{key_used[:20] if key_used else 'none'}...')")
|
||||
print("⚠️ Warning: API key appears invalid or missing")
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"Failed to initialize OpenAI client: {e}")
|
||||
|
||||
|
|
@ -1395,7 +1411,12 @@ def init_agent(
|
|||
_ra().logger.debug("Invalid ollama_num_ctx config value: %r", _ollama_num_ctx_override)
|
||||
if agent._ollama_num_ctx is None and agent.base_url and is_local_endpoint(agent.base_url):
|
||||
try:
|
||||
_detected = query_ollama_num_ctx(agent.model, agent.base_url, api_key=agent.api_key or "")
|
||||
# ``agent.api_key`` may be a callable (Entra token provider).
|
||||
# Ollama detection makes a manual HTTP request and expects a
|
||||
# string — Azure Foundry isn't a local endpoint so this branch
|
||||
# never fires for Entra, but guard defensively.
|
||||
_key_for_ollama = agent.api_key if isinstance(agent.api_key, str) else ""
|
||||
_detected = query_ollama_num_ctx(agent.model, agent.base_url, api_key=_key_for_ollama or "")
|
||||
if _detected and _detected > 0:
|
||||
agent._ollama_num_ctx = _detected
|
||||
except Exception as exc:
|
||||
|
|
|
|||
|
|
@ -1390,10 +1390,16 @@ def switch_model(agent, new_model, new_provider, api_key='', base_url='', api_mo
|
|||
_sm_custom_providers = get_compatible_custom_providers(_sm_cfg)
|
||||
except Exception:
|
||||
_sm_custom_providers = None
|
||||
# ``agent.api_key`` may be a callable (Azure Foundry Entra ID
|
||||
# token provider). ``get_model_context_length`` expects a
|
||||
# string for its live-probe paths; for Foundry the context
|
||||
# length normally resolves via config or static catalogs and
|
||||
# never hits a probe, but coerce to empty string defensively.
|
||||
_ctx_api_key = agent.api_key if isinstance(agent.api_key, str) else ""
|
||||
new_context_length = get_model_context_length(
|
||||
agent.model,
|
||||
base_url=agent.base_url,
|
||||
api_key=agent.api_key,
|
||||
api_key=_ctx_api_key,
|
||||
provider=agent.provider,
|
||||
config_context_length=getattr(agent, "_config_context_length", None),
|
||||
custom_providers=_sm_custom_providers,
|
||||
|
|
@ -1402,7 +1408,7 @@ def switch_model(agent, new_model, new_provider, api_key='', base_url='', api_mo
|
|||
model=agent.model,
|
||||
context_length=new_context_length,
|
||||
base_url=agent.base_url,
|
||||
api_key=getattr(agent, "api_key", ""),
|
||||
api_key=agent.api_key, # context_compressor forwards to call_llm; callable preserved
|
||||
provider=agent.provider,
|
||||
api_mode=agent.api_mode,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@ import os
|
|||
import platform
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
|
@ -364,7 +365,7 @@ def _normalize_base_url_text(base_url) -> str:
|
|||
def _is_third_party_anthropic_endpoint(base_url: str | None) -> bool:
|
||||
"""Return True for non-Anthropic endpoints using the Anthropic Messages API.
|
||||
|
||||
Third-party proxies (Azure AI Foundry, AWS Bedrock, self-hosted) authenticate
|
||||
Third-party proxies (Microsoft Foundry, AWS Bedrock, self-hosted) authenticate
|
||||
with their own API keys via x-api-key, not Anthropic OAuth tokens. OAuth
|
||||
detection should be skipped for these endpoints.
|
||||
"""
|
||||
|
|
@ -508,6 +509,29 @@ def _is_minimax_anthropic_endpoint(base_url: str | None) -> bool:
|
|||
)
|
||||
|
||||
|
||||
def _is_azure_anthropic_endpoint(base_url: str | None) -> bool:
|
||||
"""Return True for Azure-hosted Anthropic Messages endpoints.
|
||||
|
||||
Covers both the modern Foundry host family (``*.services.ai.azure.*``)
|
||||
and the legacy Azure OpenAI host family (``*.openai.azure.*``) when
|
||||
serving Anthropic's ``/anthropic`` route. Used to opt-in those hosts
|
||||
to the ``api-version`` query-param plumbing required by Azure.
|
||||
|
||||
Intentionally avoids a finite allow-list of TLD suffixes so it works
|
||||
across sovereign / private Azure clouds.
|
||||
"""
|
||||
normalized = _normalize_base_url_text(base_url)
|
||||
if not normalized:
|
||||
return False
|
||||
parsed = urlparse(normalized)
|
||||
host = (parsed.hostname or "").lower().rstrip(".")
|
||||
path = (parsed.path or "").lower()
|
||||
host_padded = f".{host}."
|
||||
is_foundry_host = ".services.ai.azure." in host_padded
|
||||
is_legacy_azoai_host = ".openai.azure." in host_padded
|
||||
return (is_foundry_host or is_legacy_azoai_host) and "/anthropic" in path
|
||||
|
||||
|
||||
def _common_betas_for_base_url(
|
||||
base_url: str | None,
|
||||
*,
|
||||
|
|
@ -523,7 +547,7 @@ def _common_betas_for_base_url(
|
|||
|
||||
The ``context-1m-2025-08-07`` beta is not sent to native Anthropic by
|
||||
default because some subscriptions reject it. Add it only for endpoint
|
||||
families that still require it for 1M context, currently Azure AI Foundry.
|
||||
families that still require it for 1M context, currently Microsoft Foundry.
|
||||
Bedrock uses its own client helper below and opts in explicitly.
|
||||
|
||||
``drop_context_1m_beta=True`` strips the 1M-context beta from any path that
|
||||
|
|
@ -540,8 +564,81 @@ def _common_betas_for_base_url(
|
|||
return betas
|
||||
|
||||
|
||||
def _build_anthropic_client_with_bearer_hook(
|
||||
token_provider,
|
||||
base_url: str = None,
|
||||
timeout: float = None,
|
||||
*,
|
||||
drop_context_1m_beta: bool = False,
|
||||
):
|
||||
"""Anthropic-on-Foundry Entra ID variant of :func:`build_anthropic_client`.
|
||||
|
||||
Anthropic SDK 0.86.0 stores ``api_key`` / ``auth_token`` as static
|
||||
strings; there is no callable-token contract. To get per-request
|
||||
bearer refresh (Microsoft's documented Foundry pattern), we hand
|
||||
the SDK a custom ``httpx.Client`` whose request event hook mints a
|
||||
fresh JWT from the Entra credential chain and rewrites
|
||||
``Authorization: Bearer <jwt>`` on every outbound request. The SDK
|
||||
ignores its own auth logic when ``http_client`` is provided (the
|
||||
hook strips any pre-set Authorization).
|
||||
|
||||
The placeholder ``auth_token`` is required because the SDK raises
|
||||
``AnthropicError`` at construction if neither ``api_key`` nor
|
||||
``auth_token`` is set — but the hook overrides it per-request so
|
||||
the placeholder value never reaches Azure.
|
||||
"""
|
||||
_anthropic_sdk = _get_anthropic_sdk()
|
||||
if _anthropic_sdk is None:
|
||||
raise ImportError(
|
||||
"The 'anthropic' package is required for Azure Foundry Anthropic-style "
|
||||
"endpoints with Entra ID auth. Install with: pip install 'anthropic>=0.39.0'"
|
||||
)
|
||||
|
||||
normalize_proxy_env_vars()
|
||||
|
||||
from httpx import Timeout
|
||||
from agent.azure_identity_adapter import build_bearer_http_client
|
||||
|
||||
_read_timeout = timeout if (isinstance(timeout, (int, float)) and timeout > 0) else 900.0
|
||||
timeout_obj = Timeout(timeout=float(_read_timeout), connect=10.0)
|
||||
|
||||
# Strip any trailing /v1 — the Anthropic SDK appends /v1/messages.
|
||||
normalized_base_url = _normalize_base_url_text(base_url)
|
||||
if normalized_base_url:
|
||||
import re as _re
|
||||
normalized_base_url = _re.sub(r"/v1/?$", "", normalized_base_url.rstrip("/"))
|
||||
|
||||
http_client = build_bearer_http_client(token_provider, timeout=timeout_obj)
|
||||
|
||||
kwargs = {
|
||||
"timeout": timeout_obj,
|
||||
"http_client": http_client,
|
||||
# The SDK requires *something* for api_key/auth_token. Our
|
||||
# event hook overrides Authorization per request so this value
|
||||
# is never sent. The sentinel string makes accidental leaks
|
||||
# diagnosable in logs.
|
||||
"auth_token": "entra-id-bearer-via-http-hook",
|
||||
}
|
||||
|
||||
if normalized_base_url:
|
||||
if _is_azure_anthropic_endpoint(normalized_base_url) and "api-version" not in normalized_base_url:
|
||||
kwargs["base_url"] = normalized_base_url
|
||||
kwargs["default_query"] = {"api-version": "2025-04-15"}
|
||||
else:
|
||||
kwargs["base_url"] = normalized_base_url
|
||||
|
||||
common_betas = _common_betas_for_base_url(
|
||||
normalized_base_url,
|
||||
drop_context_1m_beta=drop_context_1m_beta,
|
||||
)
|
||||
if common_betas:
|
||||
kwargs["default_headers"] = {"anthropic-beta": ",".join(common_betas)}
|
||||
|
||||
return _anthropic_sdk.Anthropic(**kwargs)
|
||||
|
||||
|
||||
def build_anthropic_client(
|
||||
api_key: str,
|
||||
api_key,
|
||||
base_url: str = None,
|
||||
timeout: float = None,
|
||||
*,
|
||||
|
|
@ -549,6 +646,17 @@ def build_anthropic_client(
|
|||
):
|
||||
"""Create an Anthropic client, auto-detecting setup-tokens vs API keys.
|
||||
|
||||
``api_key`` accepts either:
|
||||
|
||||
* a static ``str`` — the historical contract for all key-based and
|
||||
OAuth flows.
|
||||
* a ``Callable[[], str]`` — an Entra ID bearer token provider from
|
||||
:mod:`agent.azure_identity_adapter`. The Anthropic SDK itself
|
||||
requires a static string, so when given a callable we construct
|
||||
a custom ``httpx.Client`` with a request event hook that mints a
|
||||
fresh JWT per outbound request and rewrites the ``Authorization``
|
||||
header. The SDK never sees the callable directly.
|
||||
|
||||
If *timeout* is provided it overrides the default 900s read timeout. The
|
||||
connect timeout stays at 10s. Callers pass this from the per-provider /
|
||||
per-model ``request_timeout_seconds`` config so Anthropic-native and
|
||||
|
|
@ -570,6 +678,14 @@ def build_anthropic_client(
|
|||
"Install it with: pip install 'anthropic>=0.39.0'"
|
||||
)
|
||||
|
||||
# Callable api_key → Entra ID bearer provider path. Delegated to a
|
||||
# helper so the existing static-key code below stays unchanged.
|
||||
if callable(api_key) and not isinstance(api_key, str):
|
||||
return _build_anthropic_client_with_bearer_hook(
|
||||
api_key, base_url, timeout,
|
||||
drop_context_1m_beta=drop_context_1m_beta,
|
||||
)
|
||||
|
||||
normalize_proxy_env_vars()
|
||||
|
||||
from httpx import Timeout
|
||||
|
|
@ -584,8 +700,7 @@ def build_anthropic_client(
|
|||
# Pass it via default_query so the SDK appends it to every request URL
|
||||
# without corrupting the base_url (appending it directly produces
|
||||
# malformed paths like /anthropic?api-version=.../v1/messages).
|
||||
_is_azure_endpoint = "azure.com" in normalized_base_url.lower()
|
||||
if _is_azure_endpoint and "api-version" not in normalized_base_url:
|
||||
if _is_azure_anthropic_endpoint(normalized_base_url) and "api-version" not in normalized_base_url:
|
||||
kwargs["base_url"] = normalized_base_url.rstrip("/")
|
||||
kwargs["default_query"] = {"api-version": "2025-04-15"}
|
||||
else:
|
||||
|
|
@ -615,7 +730,7 @@ def build_anthropic_client(
|
|||
if common_betas:
|
||||
kwargs["default_headers"] = {"anthropic-beta": ",".join(common_betas)}
|
||||
elif _is_third_party_anthropic_endpoint(base_url):
|
||||
# Third-party proxies (Azure AI Foundry, AWS Bedrock, etc.) use their
|
||||
# Third-party proxies (Microsoft Foundry, AWS Bedrock, etc.) use their
|
||||
# own API keys with x-api-key auth. Skip OAuth detection — their keys
|
||||
# don't follow Anthropic's sk-ant-* prefix convention and would be
|
||||
# misclassified as OAuth tokens.
|
||||
|
|
@ -1757,7 +1872,7 @@ def convert_messages_to_anthropic(
|
|||
# causing HTTP 400 "Invalid signature in thinking block".
|
||||
#
|
||||
# Signatures are Anthropic-proprietary. Third-party endpoints
|
||||
# (MiniMax, Azure AI Foundry, self-hosted proxies) cannot validate
|
||||
# (MiniMax, Microsoft Foundry, self-hosted proxies) cannot validate
|
||||
# them and will reject them outright. When targeting a third-party
|
||||
# endpoint, strip ALL thinking/redacted_thinking blocks from every
|
||||
# assistant message — the third-party will generate its own
|
||||
|
|
@ -2103,5 +2218,3 @@ def build_anthropic_kwargs(
|
|||
kwargs["extra_headers"] = {"anthropic-beta": ",".join(betas)}
|
||||
|
||||
return kwargs
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1902,6 +1902,120 @@ def _build_codex_client(model: str) -> Tuple[Optional[Any], Optional[str]]:
|
|||
return CodexAuxiliaryClient(real_client, model), model
|
||||
|
||||
|
||||
def _try_azure_foundry(
|
||||
*,
|
||||
model: Optional[str] = None,
|
||||
explicit_api_key: Optional[str] = None,
|
||||
explicit_base_url: Optional[str] = None,
|
||||
api_mode: Optional[str] = None,
|
||||
) -> Tuple[Optional[Any], Optional[str]]:
|
||||
"""Resolve an Azure Foundry auxiliary client via the runtime resolver.
|
||||
|
||||
Mirrors the ``_try_anthropic`` / ``_try_nous`` shape but delegates to
|
||||
:func:`hermes_cli.runtime_provider._resolve_azure_foundry_runtime` —
|
||||
the same resolver the main agent uses — so:
|
||||
|
||||
* ``auth_mode: api_key`` (default) gets the static
|
||||
``AZURE_FOUNDRY_API_KEY`` string.
|
||||
* ``auth_mode: entra_id`` gets a callable bearer-token provider
|
||||
(``Callable[[], str]`` from
|
||||
:mod:`agent.azure_identity_adapter`).
|
||||
* Per-model ``api_mode`` auto-routing for GPT-5.x / o-series /
|
||||
codex models works.
|
||||
* ``model.entra.{tenant_id,client_id,authority,scope}`` config
|
||||
fields propagate.
|
||||
* Non-default ``model.base_url`` overrides are honored.
|
||||
|
||||
The OpenAI SDK accepts both shapes for ``api_key`` so the caller
|
||||
can forward the result without coercion.
|
||||
|
||||
Returns ``(client, model)`` or ``(None, None)`` on failure.
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.runtime_provider import _resolve_azure_foundry_runtime
|
||||
from hermes_cli.auth import AuthError
|
||||
from hermes_cli.config import load_config
|
||||
except ImportError:
|
||||
return None, None
|
||||
|
||||
try:
|
||||
cfg = load_config()
|
||||
model_cfg = cfg.get("model") if isinstance(cfg, dict) else {}
|
||||
if not isinstance(model_cfg, dict):
|
||||
model_cfg = {}
|
||||
except Exception:
|
||||
model_cfg = {}
|
||||
|
||||
try:
|
||||
runtime = _resolve_azure_foundry_runtime(
|
||||
requested_provider="azure-foundry",
|
||||
model_cfg=model_cfg,
|
||||
explicit_api_key=explicit_api_key,
|
||||
explicit_base_url=explicit_base_url,
|
||||
target_model=model,
|
||||
)
|
||||
except AuthError as exc:
|
||||
logger.debug("Auxiliary azure-foundry: %s", exc)
|
||||
return None, None
|
||||
except Exception as exc:
|
||||
logger.debug("Auxiliary azure-foundry runtime error: %s", exc)
|
||||
return None, None
|
||||
|
||||
api_key = runtime.get("api_key")
|
||||
base_url = str(runtime.get("base_url", "") or "")
|
||||
runtime_api_mode = api_mode or runtime.get("api_mode") or "chat_completions"
|
||||
|
||||
# Empty-string check on api_key here would be wrong for callable
|
||||
# token providers (callables are truthy and non-empty by definition).
|
||||
# Bail only when api_key is None / empty string.
|
||||
_has_key = bool(api_key) if not callable(api_key) else True
|
||||
if not _has_key or not base_url:
|
||||
return None, None
|
||||
|
||||
final_model = _normalize_resolved_model(
|
||||
model or str(model_cfg.get("default") or ""),
|
||||
"azure-foundry",
|
||||
)
|
||||
if not final_model:
|
||||
# No fallback aux model for Azure — the user must have a
|
||||
# deployment name. Surface that as "no client" so the auto
|
||||
# chain falls through to the next provider rather than 404ing.
|
||||
logger.debug(
|
||||
"Auxiliary azure-foundry: no model resolved (model=%r, default=%r)",
|
||||
model, model_cfg.get("default"),
|
||||
)
|
||||
return None, None
|
||||
|
||||
# Azure pre-v1 endpoints sometimes carry api-version query params
|
||||
# in the base URL; the OpenAI SDK drops them when joining paths,
|
||||
# so lift them out and pass via default_query.
|
||||
extra: Dict[str, Any] = {}
|
||||
_clean_base, _dq = _extract_url_query_params(base_url)
|
||||
if _dq:
|
||||
extra["default_query"] = _dq
|
||||
|
||||
client = OpenAI(api_key=api_key, base_url=_clean_base, **extra)
|
||||
|
||||
if runtime_api_mode == "codex_responses":
|
||||
# GPT-5.x / o-series / codex models on Azure Foundry are
|
||||
# Responses-API-only — wrap so chat.completions.create() is
|
||||
# translated to /responses behind the scenes.
|
||||
return CodexAuxiliaryClient(client, final_model), final_model
|
||||
|
||||
if runtime_api_mode == "anthropic_messages":
|
||||
# Forward ``api_key`` verbatim — for static keys it's a string,
|
||||
# for Entra ID it's a callable. ``_maybe_wrap_anthropic`` →
|
||||
# ``build_anthropic_client`` detects the callable and installs
|
||||
# the bearer-injecting httpx hook.
|
||||
return _maybe_wrap_anthropic(
|
||||
client, final_model, api_key,
|
||||
base_url, runtime_api_mode,
|
||||
), final_model
|
||||
|
||||
# chat_completions — return the plain OpenAI client.
|
||||
return client, final_model
|
||||
|
||||
|
||||
def _try_anthropic(explicit_api_key: str = None) -> Tuple[Optional[Any], Optional[str]]:
|
||||
try:
|
||||
from agent.anthropic_adapter import build_anthropic_client, resolve_anthropic_token
|
||||
|
|
@ -1957,20 +2071,31 @@ _AUTO_PROVIDER_LABELS = {
|
|||
"_resolve_api_key_provider": "api-key",
|
||||
}
|
||||
|
||||
_MAIN_RUNTIME_FIELDS = ("provider", "model", "base_url", "api_key", "api_mode")
|
||||
_MAIN_RUNTIME_FIELDS = ("provider", "model", "base_url", "api_key", "api_mode", "auth_mode")
|
||||
|
||||
|
||||
def _normalize_main_runtime(main_runtime: Optional[Dict[str, Any]]) -> Dict[str, str]:
|
||||
"""Return a sanitized copy of a live main-runtime override."""
|
||||
def _normalize_main_runtime(main_runtime: Optional[Dict[str, Any]]) -> Dict[str, Any]:
|
||||
"""Return a sanitized copy of a live main-runtime override.
|
||||
|
||||
Most fields are stripped strings. ``api_key`` may legitimately be a
|
||||
zero-arg callable (Azure Foundry Entra ID token provider) — preserve
|
||||
those as-is so auxiliary clients inherit the same authentication
|
||||
surface as the main agent. The OpenAI SDK accepts ``Callable[[], str]``
|
||||
for ``api_key`` and calls it before every request.
|
||||
"""
|
||||
if not isinstance(main_runtime, dict):
|
||||
return {}
|
||||
normalized: Dict[str, str] = {}
|
||||
normalized: Dict[str, Any] = {}
|
||||
for field in _MAIN_RUNTIME_FIELDS:
|
||||
value = main_runtime.get(field)
|
||||
# Preserve a callable api_key (Entra ID bearer provider) unchanged.
|
||||
if field == "api_key" and callable(value) and not isinstance(value, str):
|
||||
normalized[field] = value
|
||||
continue
|
||||
if isinstance(value, str) and value.strip():
|
||||
normalized[field] = value.strip()
|
||||
provider = normalized.get("provider")
|
||||
if provider:
|
||||
if isinstance(provider, str):
|
||||
normalized["provider"] = provider.lower()
|
||||
return normalized
|
||||
|
||||
|
|
@ -2762,10 +2887,10 @@ def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Option
|
|||
auxiliary_is_nous = False # Reset — _try_nous() will set True if it wins
|
||||
runtime = _normalize_main_runtime(main_runtime)
|
||||
runtime_provider = runtime.get("provider", "")
|
||||
runtime_model = runtime.get("model", "")
|
||||
runtime_base_url = runtime.get("base_url", "")
|
||||
runtime_model = str(runtime.get("model") or "")
|
||||
runtime_base_url = str(runtime.get("base_url") or "")
|
||||
runtime_api_key = runtime.get("api_key", "")
|
||||
runtime_api_mode = runtime.get("api_mode", "")
|
||||
runtime_api_mode = str(runtime.get("api_mode") or "")
|
||||
|
||||
# ── Warn once if OPENAI_BASE_URL is set but config.yaml uses a named
|
||||
# provider (not 'custom'). This catches the common "env poisoning"
|
||||
|
|
@ -2793,8 +2918,8 @@ def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Option
|
|||
# on aggregators (OpenRouter, Nous) who previously got routed to a
|
||||
# cheap provider-side default. Explicit per-task overrides set via
|
||||
# config.yaml (auxiliary.<task>.provider) still win over this.
|
||||
main_provider = runtime_provider or _read_main_provider()
|
||||
main_model = runtime_model or _read_main_model()
|
||||
main_provider = str(runtime_provider or _read_main_provider() or "")
|
||||
main_model = str(runtime_model or _read_main_model() or "")
|
||||
if (main_provider and main_model
|
||||
and main_provider not in {"auto", ""}):
|
||||
resolved_provider = main_provider
|
||||
|
|
@ -3188,7 +3313,11 @@ def resolve_provider_client(
|
|||
if client is not None:
|
||||
final_model = _normalize_resolved_model(model or default, provider)
|
||||
_cbase = str(getattr(client, "base_url", "") or "")
|
||||
_ckey = str(getattr(client, "api_key", "") or "")
|
||||
# ``client.api_key`` may be a callable (Azure Foundry Entra
|
||||
# bearer provider). Pass empty string for the wrapper-detection
|
||||
# path — wrapping decisions are based on base_url + api_mode.
|
||||
_raw_ckey = getattr(client, "api_key", "")
|
||||
_ckey = "" if (callable(_raw_ckey) and not isinstance(_raw_ckey, str)) else str(_raw_ckey or "")
|
||||
client = _wrap_if_needed(client, final_model, _cbase, _ckey)
|
||||
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
|
||||
else (client, final_model))
|
||||
|
|
@ -3300,6 +3429,40 @@ def resolve_provider_client(
|
|||
except ImportError:
|
||||
pass
|
||||
|
||||
# ── Azure Foundry (delegates to runtime resolver for auth_mode-aware routing) ─
|
||||
#
|
||||
# The generic PROVIDER_REGISTRY path below uses
|
||||
# ``resolve_api_key_provider_credentials`` which only knows about the
|
||||
# static ``AZURE_FOUNDRY_API_KEY`` env var. That misses two important
|
||||
# cases for the ``azure-foundry`` provider:
|
||||
#
|
||||
# 1. ``model.auth_mode: entra_id`` — no static key exists; we need
|
||||
# a callable bearer-token provider from ``azure_identity_adapter``.
|
||||
# 2. Non-default ``model.base_url`` (Foundry projects path) — the
|
||||
# env-var-only resolver doesn't apply config-yaml-driven URL
|
||||
# overrides.
|
||||
#
|
||||
# Delegate to the same runtime resolver the main agent uses so
|
||||
# auxiliary tasks (title generation, compression, vision, embedding,
|
||||
# session search) inherit the user's full Azure config.
|
||||
if provider == "azure-foundry":
|
||||
client, default_model = _try_azure_foundry(
|
||||
model=model,
|
||||
explicit_api_key=explicit_api_key,
|
||||
explicit_base_url=explicit_base_url,
|
||||
api_mode=api_mode,
|
||||
)
|
||||
if client is None:
|
||||
logger.warning(
|
||||
"resolve_provider_client: azure-foundry requested but "
|
||||
"runtime resolution failed (run: hermes doctor for "
|
||||
"diagnostics)"
|
||||
)
|
||||
return None, None
|
||||
final_model = _normalize_resolved_model(model or default_model, provider)
|
||||
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
|
||||
else (client, final_model))
|
||||
|
||||
# ── API-key providers from PROVIDER_REGISTRY ─────────────────────
|
||||
try:
|
||||
from hermes_cli.auth import (
|
||||
|
|
|
|||
555
agent/azure_identity_adapter.py
Normal file
555
agent/azure_identity_adapter.py
Normal file
|
|
@ -0,0 +1,555 @@
|
|||
"""Microsoft Entra ID adapter for Microsoft Foundry.
|
||||
|
||||
Provides keyless authentication for Microsoft Foundry deployments using the
|
||||
`azure-identity` SDK's `DefaultAzureCredential` chain (env service principal
|
||||
→ workload identity → managed identity → VS Code → Azure CLI → azd →
|
||||
PowerShell → broker).
|
||||
|
||||
Architecture mirrors `agent/bedrock_adapter.py`:
|
||||
|
||||
* Lazy import. `azure-identity` is only loaded when ``model.auth_mode =
|
||||
entra_id`` is selected. Users who stick with `AZURE_FOUNDRY_API_KEY`
|
||||
never pay the import cost.
|
||||
* SDK-callable contract. The public entry point ``build_token_provider``
|
||||
returns a zero-arg callable produced by ``get_bearer_token_provider`` —
|
||||
this is exactly the value Microsoft's documented sample plugs into
|
||||
``OpenAI(api_key=token_provider, base_url=...)``. The OpenAI SDK calls
|
||||
it before every request, so token refresh is transparent.
|
||||
* Three explicit consumer-side helpers (display / cache / http-bearer)
|
||||
rather than one generic "materialize" function — splitting them by
|
||||
purpose prevents accidental token-minting in logging paths or token
|
||||
leakage into cache keys / dashboard JSON.
|
||||
* No persisted JWT. ``azure-identity`` caches in-process and (where
|
||||
available) in the OS keychain or ``~/.IdentityService``. Hermes does
|
||||
not duplicate that storage in ``auth.json``.
|
||||
|
||||
Reference: https://learn.microsoft.com/azure/ai-foundry/foundry-models/how-to/configure-entra-id
|
||||
|
||||
Requires: ``azure-identity`` (optional dependency — only needed when
|
||||
``model.auth_mode = entra_id``).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import functools
|
||||
import logging
|
||||
import os
|
||||
import threading
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Callable, Dict, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Microsoft-documented scope for Foundry inference auth. Both the new
|
||||
# Foundry portal and the legacy Azure OpenAI managed-identity docs use
|
||||
# this scope for ALL Foundry endpoint shapes (*.openai.azure.com,
|
||||
# *.services.ai.azure.com, *.ai.azure.com). The older control-plane
|
||||
# scope ``https://cognitiveservices.azure.com/.default`` is for ARM
|
||||
# resource management and is rejected for inference by newer
|
||||
# resources — users with that requirement override via
|
||||
# ``model.entra.scope`` in config.yaml.
|
||||
SCOPE_AI_AZURE_DEFAULT = "https://ai.azure.com/.default"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Lazy SDK import — only loaded when the Entra path is actually used.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_AZURE_IDENTITY_FEATURE = "provider.azure_identity"
|
||||
|
||||
|
||||
def has_azure_identity_installed() -> bool:
|
||||
"""Return True if `azure-identity` can be imported right now.
|
||||
|
||||
Cheap check — does not walk the credential chain.
|
||||
"""
|
||||
try:
|
||||
import azure.identity # noqa: F401
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _require_azure_identity():
|
||||
"""Import ``azure.identity``, lazy-installing it if allowed.
|
||||
|
||||
Raises ``ImportError`` with a clear actionable message when the
|
||||
package is missing and lazy installs are disabled.
|
||||
"""
|
||||
try:
|
||||
import azure.identity as _ai
|
||||
return _ai
|
||||
except ImportError:
|
||||
try:
|
||||
from tools.lazy_deps import ensure, FeatureUnavailable
|
||||
except ImportError as exc:
|
||||
raise ImportError(
|
||||
"The 'azure-identity' package is required for Azure AI "
|
||||
"Foundry Entra ID authentication. Install it with: "
|
||||
"pip install azure-identity"
|
||||
) from exc
|
||||
|
||||
try:
|
||||
ensure(_AZURE_IDENTITY_FEATURE, prompt=False)
|
||||
except FeatureUnavailable as exc:
|
||||
raise ImportError(
|
||||
"The 'azure-identity' package is required for Azure AI "
|
||||
"Foundry Entra ID authentication. " + str(exc)
|
||||
) from exc
|
||||
|
||||
# Retry import after lazy install.
|
||||
import azure.identity as _ai # noqa: WPS440
|
||||
return _ai
|
||||
|
||||
|
||||
def reset_credential_cache() -> None:
|
||||
"""Clear the cached ``DefaultAzureCredential``. Used by tests and
|
||||
profile switches.
|
||||
|
||||
Defensive against tests that ``monkeypatch.setattr`` over
|
||||
``build_credential`` with a plain (non-lru-cached) function — those
|
||||
won't expose ``cache_clear()`` until pytest reverts the patch.
|
||||
"""
|
||||
cache_clear = getattr(build_credential, "cache_clear", None)
|
||||
if callable(cache_clear):
|
||||
cache_clear()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Token-provider construction
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class EntraIdentityConfig:
|
||||
"""Serializable Entra ID config.
|
||||
|
||||
Captures the Hermes-managed Entra knobs we need outside Azure SDK
|
||||
environment configuration. Everything else
|
||||
(tenant ID, service principal secret, federated token file, sovereign
|
||||
cloud authority, etc.) flows through azure-identity's standard
|
||||
``AZURE_*`` env vars — see the Bedrock pattern in
|
||||
``hermes_cli/runtime_provider.py:1310-1377`` for the analogous
|
||||
"let the SDK read env" approach.
|
||||
|
||||
``scope`` is Microsoft's documented Foundry inference audience. Almost
|
||||
everyone uses the default; sovereign-cloud / non-standard tenants can
|
||||
override via ``model.entra.scope``. Identity selection (user-assigned
|
||||
managed identity, workload identity, service principal, tenant, authority)
|
||||
stays in the standard Azure SDK env vars such as ``AZURE_CLIENT_ID``.
|
||||
|
||||
``exclude_interactive_browser`` is kept as an internal constructor knob
|
||||
so probes stay non-interactive by default. It is not written by the setup
|
||||
wizard.
|
||||
|
||||
The dataclass is frozen so it's hashable for ``functools.lru_cache``
|
||||
keying, and serializable across multiprocessing boundaries (workers
|
||||
rebuild the credential inside their own process).
|
||||
"""
|
||||
|
||||
scope: str = SCOPE_AI_AZURE_DEFAULT
|
||||
exclude_interactive_browser: bool = True
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
scope = str(self.scope or "").strip() or SCOPE_AI_AZURE_DEFAULT
|
||||
object.__setattr__(self, "scope", scope)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"scope": self.scope,
|
||||
"exclude_interactive_browser": self.exclude_interactive_browser,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Optional[Dict[str, Any]],
|
||||
*, default_scope: Optional[str] = None) -> "EntraIdentityConfig":
|
||||
data = data or {}
|
||||
scope = str(data.get("scope") or "").strip() or default_scope or SCOPE_AI_AZURE_DEFAULT
|
||||
exclude_browser = bool(data.get("exclude_interactive_browser", True))
|
||||
return cls(
|
||||
scope=scope,
|
||||
exclude_interactive_browser=exclude_browser,
|
||||
)
|
||||
|
||||
|
||||
def _build_default_credential(config: EntraIdentityConfig) -> Any:
|
||||
"""Construct a ``DefaultAzureCredential`` for ``config``.
|
||||
|
||||
Only Hermes-selected knobs are passed as kwargs. Everything else
|
||||
(tenant, service principal secret, federated token file, sovereign
|
||||
cloud authority, etc.) is read by ``azure-identity`` from the
|
||||
standard ``AZURE_*`` environment variables — see Microsoft's
|
||||
documented credential resolution chain. Users configure those in
|
||||
``~/.hermes/.env`` or the deployment environment.
|
||||
"""
|
||||
ai = _require_azure_identity()
|
||||
kwargs: Dict[str, Any] = {}
|
||||
# SDK default is True (browser excluded); only pass when the user
|
||||
# explicitly opts in to interactive browser auth.
|
||||
if not config.exclude_interactive_browser:
|
||||
kwargs["exclude_interactive_browser_credential"] = False
|
||||
return ai.DefaultAzureCredential(**kwargs)
|
||||
|
||||
|
||||
@functools.lru_cache(maxsize=1)
|
||||
def build_credential(config: EntraIdentityConfig) -> Any:
|
||||
"""Return the cached ``DefaultAzureCredential`` for ``config``.
|
||||
|
||||
Hermes processes use exactly one Entra config at a time (the
|
||||
``model.entra.*`` block in config.yaml drives every aux task,
|
||||
subagent, and credential probe in the session). ``maxsize=1`` is
|
||||
intentional: it reflects the actual usage pattern and keeps the
|
||||
cache trivially small.
|
||||
|
||||
``EntraIdentityConfig`` is a frozen dataclass, so it's hashable and
|
||||
safe as an LRU-cache key. ``functools.lru_cache`` is thread-safe in
|
||||
CPython.
|
||||
|
||||
If two distinct configs are ever passed (tests do this; production
|
||||
rarely), the LRU eviction handles it correctly — each call still
|
||||
returns a credential matching its config; only one is cached at a
|
||||
time. Use :func:`reset_credential_cache` to clear (e.g. in tests).
|
||||
"""
|
||||
return _build_default_credential(config)
|
||||
|
||||
|
||||
def build_token_provider(scope: Optional[str] = None,
|
||||
*,
|
||||
config: Optional[EntraIdentityConfig] = None,
|
||||
base_url: Optional[str] = None,
|
||||
exclude_interactive_browser: bool = True,
|
||||
) -> Callable[[], str]:
|
||||
"""Return a zero-arg callable that mints a fresh Entra bearer JWT.
|
||||
|
||||
The returned callable is exactly what Microsoft's documented Foundry
|
||||
sample expects::
|
||||
|
||||
from openai import OpenAI
|
||||
client = OpenAI(
|
||||
base_url="https://my-resource.openai.azure.com/openai/v1/",
|
||||
api_key=build_token_provider(),
|
||||
)
|
||||
|
||||
Scope resolution order:
|
||||
1. ``config.scope`` when a config object is supplied
|
||||
2. explicit ``scope`` kwarg
|
||||
3. ``SCOPE_AI_AZURE_DEFAULT`` (Microsoft's documented Foundry scope)
|
||||
|
||||
``base_url`` is unused today and kept for back-compat. Tenant /
|
||||
service-principal / sovereign-cloud configuration flows through
|
||||
``azure-identity``'s standard ``AZURE_*`` environment variables —
|
||||
see :func:`_build_default_credential` for the rationale.
|
||||
|
||||
NOT serializable across process boundaries. For multiprocessing
|
||||
workers, serialize the ``EntraIdentityConfig`` and rebuild the
|
||||
provider inside the worker.
|
||||
"""
|
||||
ai = _require_azure_identity()
|
||||
if config is None:
|
||||
config = EntraIdentityConfig(
|
||||
scope=scope or SCOPE_AI_AZURE_DEFAULT,
|
||||
exclude_interactive_browser=exclude_interactive_browser,
|
||||
)
|
||||
credential = build_credential(config)
|
||||
return ai.get_bearer_token_provider(credential, config.scope)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Credential probing
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def has_azure_identity_credentials(scope: Optional[str] = None,
|
||||
*,
|
||||
config: Optional[EntraIdentityConfig] = None,
|
||||
timeout_seconds: float = 10.0,
|
||||
allow_install: bool = True,
|
||||
**overrides: Any) -> bool:
|
||||
"""Best-effort probe: can `DefaultAzureCredential` mint a token now?
|
||||
|
||||
Runs ``credential.get_token(scope)`` under a thread-based timeout so
|
||||
a slow token service can't hang the caller. Returns False on any
|
||||
error — never raises. Use for ``hermes doctor`` /
|
||||
``hermes auth status`` / wizard preflight.
|
||||
|
||||
``allow_install``: when True (default) and ``azure-identity`` is not
|
||||
importable, the adapter triggers the standard lazy-install path
|
||||
(subject to ``security.allow_lazy_installs``) before probing. Set
|
||||
False to make this strictly an "is installed?" check — used on hot
|
||||
paths like CLI startup where we never want pip to run.
|
||||
|
||||
NOT used by ``is_provider_configured()`` — that path is structural
|
||||
only (no token mint), so CLI startup doesn't pay this latency.
|
||||
"""
|
||||
if not has_azure_identity_installed():
|
||||
if not allow_install:
|
||||
return False
|
||||
try:
|
||||
_require_azure_identity()
|
||||
except ImportError as exc:
|
||||
logger.debug("azure-identity lazy install unavailable: %s", exc)
|
||||
return False
|
||||
if config is None:
|
||||
effective_scope = (scope or "").strip() or SCOPE_AI_AZURE_DEFAULT
|
||||
config = EntraIdentityConfig(scope=effective_scope, **overrides)
|
||||
|
||||
result = {"ok": False}
|
||||
|
||||
def _probe() -> None:
|
||||
try:
|
||||
credential = build_credential(config)
|
||||
tok = credential.get_token(config.scope)
|
||||
result["ok"] = bool(getattr(tok, "token", None))
|
||||
except Exception as exc:
|
||||
logger.debug("Entra credential probe failed: %s", exc)
|
||||
result["ok"] = False
|
||||
|
||||
thread = threading.Thread(target=_probe, daemon=True)
|
||||
thread.start()
|
||||
thread.join(timeout=max(0.01, timeout_seconds))
|
||||
if thread.is_alive():
|
||||
logger.debug("Entra token service probe timed out after %ss", timeout_seconds)
|
||||
return False
|
||||
return bool(result.get("ok"))
|
||||
|
||||
|
||||
def describe_active_credential(config: Optional[EntraIdentityConfig] = None,
|
||||
*,
|
||||
scope: Optional[str] = None,
|
||||
timeout_seconds: float = 10.0,
|
||||
allow_install: bool = True,
|
||||
**overrides: Any) -> Dict[str, Any]:
|
||||
"""Return diagnostic info about the active credential chain.
|
||||
|
||||
Best-effort: runs ``get_token()`` and inspects what came back.
|
||||
Designed for ``hermes doctor`` and the wizard preflight — never
|
||||
raises, returns ``{"ok": False, "error": ...}`` on failure.
|
||||
|
||||
``allow_install``: when True (default) and ``azure-identity`` is not
|
||||
importable, the adapter triggers the standard lazy-install path
|
||||
(subject to ``security.allow_lazy_installs``) before probing. The
|
||||
install failure is surfaced as the diagnostic error when it fails.
|
||||
Set False for hot CLI paths that should never trigger pip.
|
||||
|
||||
``azure-identity`` doesn't expose the winning inner credential as
|
||||
a public field, so we report a coarse picture (env vars present,
|
||||
token expiry, claims-derived tenant) rather than the credential
|
||||
class name. Users wanting the precise class can run with
|
||||
``AZURE_LOG_LEVEL=DEBUG``.
|
||||
"""
|
||||
info: Dict[str, Any] = {"ok": False}
|
||||
if not has_azure_identity_installed():
|
||||
if not allow_install:
|
||||
info["error"] = "azure-identity not installed"
|
||||
info["hint"] = (
|
||||
"pip install azure-identity (or rely on lazy install at "
|
||||
"first use)"
|
||||
)
|
||||
return info
|
||||
try:
|
||||
_require_azure_identity()
|
||||
except ImportError as exc:
|
||||
info["error"] = str(exc) or "azure-identity not installed"
|
||||
info["hint"] = (
|
||||
"pip install azure-identity manually, or enable lazy "
|
||||
"installs (security.allow_lazy_installs: true in "
|
||||
"config.yaml)."
|
||||
)
|
||||
return info
|
||||
|
||||
if config is None:
|
||||
effective_scope = (scope or "").strip() or SCOPE_AI_AZURE_DEFAULT
|
||||
config = EntraIdentityConfig(scope=effective_scope, **overrides)
|
||||
|
||||
info["scope"] = config.scope
|
||||
# Tenant / authority / service-principal config flow through the
|
||||
# standard ``AZURE_*`` env vars; surface them below.
|
||||
if os.environ.get("AZURE_TENANT_ID", "").strip():
|
||||
info["tenant_id_env"] = os.environ["AZURE_TENANT_ID"].strip()
|
||||
|
||||
# Surface which env-var sources are present without minting yet.
|
||||
env_sources = []
|
||||
if os.environ.get("AZURE_FEDERATED_TOKEN_FILE", "").strip():
|
||||
env_sources.append("WorkloadIdentityCredential (AZURE_FEDERATED_TOKEN_FILE)")
|
||||
if (os.environ.get("AZURE_CLIENT_ID", "").strip()
|
||||
and os.environ.get("AZURE_CLIENT_SECRET", "").strip()
|
||||
and os.environ.get("AZURE_TENANT_ID", "").strip()):
|
||||
env_sources.append("EnvironmentCredential (client secret)")
|
||||
if os.environ.get("IDENTITY_ENDPOINT", "").strip() or os.environ.get("MSI_ENDPOINT", "").strip():
|
||||
env_sources.append("ManagedIdentityCredential (IDENTITY_ENDPOINT)")
|
||||
info["env_sources"] = env_sources
|
||||
|
||||
# Now try minting.
|
||||
result: Dict[str, Any] = {}
|
||||
|
||||
def _probe() -> None:
|
||||
try:
|
||||
credential = build_credential(config)
|
||||
tok = credential.get_token(config.scope)
|
||||
result["token"] = tok
|
||||
except Exception as exc:
|
||||
result["error"] = str(exc)
|
||||
|
||||
thread = threading.Thread(target=_probe, daemon=True)
|
||||
thread.start()
|
||||
thread.join(timeout=max(0.01, timeout_seconds))
|
||||
if thread.is_alive():
|
||||
info["error"] = f"Token probe timed out after {timeout_seconds:.0f}s"
|
||||
info["hint"] = (
|
||||
"DefaultAzureCredential can be slow when the token service is unreachable "
|
||||
"or when az login state is stale. Try `az login` or set "
|
||||
"AZURE_CLIENT_ID / AZURE_TENANT_ID / AZURE_CLIENT_SECRET."
|
||||
)
|
||||
return info
|
||||
|
||||
if "error" in result:
|
||||
info["error"] = result["error"]
|
||||
return info
|
||||
|
||||
token = result.get("token")
|
||||
if token is None:
|
||||
info["error"] = "credential chain exhausted"
|
||||
return info
|
||||
|
||||
info["ok"] = True
|
||||
info["expires_on"] = getattr(token, "expires_on", None)
|
||||
return info
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Consumer-side helpers — split by purpose to prevent accidental token
|
||||
# minting in logging / cache-key / dashboard paths.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def is_token_provider(value: Any) -> bool:
|
||||
"""Return True when ``value`` is a callable Entra token provider.
|
||||
|
||||
Used at the seams where a consumer must decide between
|
||||
string-API-key semantics and bearer-callable semantics.
|
||||
"""
|
||||
return callable(value) and not isinstance(value, str)
|
||||
|
||||
|
||||
def materialize_bearer_for_http(value: Any) -> str:
|
||||
"""Return a fresh Bearer JWT for a manual HTTP request.
|
||||
|
||||
Only call this at sites that must construct an ``Authorization``
|
||||
header outside the OpenAI SDK (e.g. ``hermes_cli/azure_detect.py``).
|
||||
Calls the callable exactly once and returns the resulting token.
|
||||
|
||||
**Anthropic SDK integration:** the Anthropic Python SDK does not
|
||||
accept a ``Callable[[], str]`` for ``auth_token``. Instead,
|
||||
:func:`build_bearer_http_client` returns an ``httpx.Client`` whose
|
||||
request event hook calls this function and rewrites the
|
||||
``Authorization`` header per request — and that client is passed to
|
||||
the Anthropic SDK via ``http_client=...``. See
|
||||
:func:`agent.anthropic_adapter.build_anthropic_client` for the
|
||||
consumer.
|
||||
|
||||
Raises ``ValueError`` if ``value`` is not a callable token provider
|
||||
or non-empty string.
|
||||
"""
|
||||
if is_token_provider(value):
|
||||
token = value()
|
||||
if not isinstance(token, str) or not token:
|
||||
raise ValueError("token provider returned empty value")
|
||||
return token
|
||||
if isinstance(value, str) and value:
|
||||
return value
|
||||
raise ValueError("no usable api_key / token provider")
|
||||
|
||||
|
||||
def build_bearer_http_client(token_provider: Callable[[], str], **httpx_kwargs: Any) -> Any:
|
||||
"""Return an ``httpx.Client`` that mints a fresh Entra bearer JWT
|
||||
per outbound request.
|
||||
|
||||
The Anthropic SDK (≤ 0.86.0 at the time of writing) stores
|
||||
``api_key`` / ``auth_token`` as static strings and computes the
|
||||
``Authorization`` header at construction time. To get per-request
|
||||
token refresh (the Microsoft-recommended Foundry pattern for
|
||||
callable bearer providers), we install an httpx ``request`` event
|
||||
hook on a custom client and pass that client to the SDK via
|
||||
``http_client=...``. The hook:
|
||||
|
||||
1. Calls :func:`materialize_bearer_for_http` to mint a fresh JWT
|
||||
(azure-identity caches internally — this is cheap when the
|
||||
cached token is still valid).
|
||||
2. Strips any pre-set ``Authorization`` / ``api-key`` /
|
||||
``x-api-key`` headers the SDK may have added (avoids
|
||||
conflicting auth values).
|
||||
3. Sets ``Authorization: Bearer <fresh-jwt>``.
|
||||
|
||||
``token_provider`` must be a zero-arg callable returning a string —
|
||||
typically the result of :func:`build_token_provider`.
|
||||
|
||||
``httpx_kwargs`` are forwarded verbatim to ``httpx.Client(...)`` so
|
||||
callers can attach a ``timeout``, ``transport``, ``proxy``, etc.
|
||||
|
||||
Raises ``ImportError`` if ``httpx`` is not installed (it is a
|
||||
transitive dependency of both ``openai`` and ``anthropic`` SDKs, so
|
||||
in practice always available when this helper is reached).
|
||||
"""
|
||||
if not is_token_provider(token_provider):
|
||||
raise ValueError(
|
||||
"build_bearer_http_client requires a zero-arg callable "
|
||||
"token provider"
|
||||
)
|
||||
|
||||
try:
|
||||
import httpx
|
||||
except ImportError as exc: # pragma: no cover — httpx ships with openai/anthropic
|
||||
raise ImportError(
|
||||
"httpx is required for Entra ID bearer auth on Microsoft Foundry "
|
||||
"Anthropic-style endpoints. It is normally a transitive "
|
||||
"dependency of the openai/anthropic SDKs."
|
||||
) from exc
|
||||
|
||||
def _inject_bearer(request: "httpx.Request") -> None:
|
||||
try:
|
||||
token = materialize_bearer_for_http(token_provider)
|
||||
except ValueError as exc:
|
||||
# Token provider failed (chain exhausted, token service unreachable,
|
||||
# az login expired, etc.). Strip any auth headers the SDK
|
||||
# may have set — including our own placeholder sentinel
|
||||
# ``entra-id-bearer-via-http-hook`` from
|
||||
# ``_build_anthropic_client_with_bearer_hook`` — so the
|
||||
# outbound request hits Azure with NO Authorization rather
|
||||
# than with the placeholder. Azure returns a clean 401
|
||||
# "missing auth" that is easier to diagnose than a 401
|
||||
# against the sentinel string, and the sentinel never
|
||||
# appears in upstream access logs.
|
||||
#
|
||||
# Log at WARNING (not DEBUG) so the misconfiguration is
|
||||
# visible at default log levels.
|
||||
logger.warning(
|
||||
"Bearer hook: Entra ID token provider returned empty (%s) "
|
||||
"— stripping Authorization headers. Azure will respond 401. "
|
||||
"Run `hermes doctor` or `az login` to recover.",
|
||||
exc,
|
||||
)
|
||||
for header_name in ("Authorization", "authorization", "Api-Key", "api-key", "X-Api-Key", "x-api-key"):
|
||||
request.headers.pop(header_name, None)
|
||||
return
|
||||
for header_name in ("Authorization", "authorization", "Api-Key", "api-key", "X-Api-Key", "x-api-key"):
|
||||
request.headers.pop(header_name, None)
|
||||
request.headers["Authorization"] = f"Bearer {token}"
|
||||
|
||||
return httpx.Client(
|
||||
event_hooks={"request": [_inject_bearer]},
|
||||
**httpx_kwargs,
|
||||
)
|
||||
|
||||
|
||||
__all__ = [
|
||||
"EntraIdentityConfig",
|
||||
"SCOPE_AI_AZURE_DEFAULT",
|
||||
"build_bearer_http_client",
|
||||
"build_credential",
|
||||
"build_token_provider",
|
||||
"describe_active_credential",
|
||||
"has_azure_identity_credentials",
|
||||
"has_azure_identity_installed",
|
||||
"is_token_provider",
|
||||
"materialize_bearer_for_http",
|
||||
"reset_credential_cache",
|
||||
]
|
||||
|
|
@ -866,9 +866,14 @@ def try_activate_fallback(agent, reason: "FailoverReason | None" = None) -> bool
|
|||
# the fallback activation drops to 128K even when config says 204800.
|
||||
if hasattr(agent, 'context_compressor') and agent.context_compressor:
|
||||
from agent.model_metadata import get_model_context_length
|
||||
# ``agent.api_key`` may be callable (Entra ID); the
|
||||
# context-length resolver expects a string for live
|
||||
# probes. Foundry typically resolves via config/static
|
||||
# catalogs anyway, so coerce defensively.
|
||||
_fb_ctx_api_key = agent.api_key if isinstance(agent.api_key, str) else ""
|
||||
fb_context_length = get_model_context_length(
|
||||
agent.model, base_url=agent.base_url,
|
||||
api_key=agent.api_key, provider=agent.provider,
|
||||
api_key=_fb_ctx_api_key, provider=agent.provider,
|
||||
config_context_length=getattr(agent, "_config_context_length", None),
|
||||
custom_providers=getattr(agent, "_custom_providers", None),
|
||||
)
|
||||
|
|
@ -876,7 +881,7 @@ def try_activate_fallback(agent, reason: "FailoverReason | None" = None) -> bool
|
|||
model=agent.model,
|
||||
context_length=fb_context_length,
|
||||
base_url=agent.base_url,
|
||||
api_key=getattr(agent, "api_key", ""),
|
||||
api_key=getattr(agent, "api_key", ""), # callable preserved → call_llm
|
||||
provider=agent.provider,
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -486,7 +486,7 @@ class ContextCompressor(ContextEngine):
|
|||
model: str,
|
||||
context_length: int,
|
||||
base_url: str = "",
|
||||
api_key: str = "",
|
||||
api_key: Any = "",
|
||||
provider: str = "",
|
||||
api_mode: str = "",
|
||||
) -> None:
|
||||
|
|
|
|||
|
|
@ -103,7 +103,15 @@ def check_compression_model_feasibility(agent: Any) -> None:
|
|||
return
|
||||
|
||||
aux_base_url = str(getattr(client, "base_url", ""))
|
||||
aux_api_key = str(getattr(client, "api_key", ""))
|
||||
# ``client.api_key`` may be a callable (Azure Foundry Entra ID
|
||||
# bearer provider). The context-length resolver chain expects a
|
||||
# string, but it only needs a key for live catalogue probes
|
||||
# (provider model lists). For Entra clients the model-metadata
|
||||
# chain still resolves via models.dev + hardcoded family
|
||||
# fallbacks, which don't require auth — pass empty string rather
|
||||
# than minting a bearer JWT just to look up a context length.
|
||||
_raw_aux_key = getattr(client, "api_key", "")
|
||||
aux_api_key = "" if (callable(_raw_aux_key) and not isinstance(_raw_aux_key, str)) else str(_raw_aux_key or "")
|
||||
|
||||
aux_context = get_model_context_length(
|
||||
aux_model,
|
||||
|
|
|
|||
|
|
@ -1807,7 +1807,11 @@ def run_conversation(
|
|||
# that survives message/tool sanitization (#6843).
|
||||
_credential_sanitized = False
|
||||
_raw_key = getattr(agent, "api_key", None) or ""
|
||||
if _raw_key:
|
||||
# Entra ID bearer providers are callables — their
|
||||
# minted JWTs are always ASCII, so no sanitization
|
||||
# is needed (and ``_strip_non_ascii`` would crash
|
||||
# on a callable input).
|
||||
if _raw_key and isinstance(_raw_key, str):
|
||||
_clean_key = _strip_non_ascii(_raw_key)
|
||||
if _clean_key != _raw_key:
|
||||
agent.api_key = _clean_key
|
||||
|
|
@ -2080,15 +2084,26 @@ def run_conversation(
|
|||
):
|
||||
anthropic_auth_retry_attempted = True
|
||||
from agent.anthropic_adapter import _is_oauth_token
|
||||
from agent.azure_identity_adapter import is_token_provider
|
||||
if agent._try_refresh_anthropic_client_credentials():
|
||||
print(f"{agent.log_prefix}🔐 Anthropic credentials refreshed after 401. Retrying request...")
|
||||
continue
|
||||
# Credential refresh didn't help — show diagnostic info
|
||||
key = agent._anthropic_api_key
|
||||
auth_method = "Bearer (OAuth/setup-token)" if _is_oauth_token(key) else "x-api-key (API key)"
|
||||
print(f"{agent.log_prefix}🔐 Anthropic 401 — authentication failed.")
|
||||
print(f"{agent.log_prefix} Auth method: {auth_method}")
|
||||
print(f"{agent.log_prefix} Token prefix: {key[:12]}..." if key and len(key) > 12 else f"{agent.log_prefix} Token: (empty or short)")
|
||||
if is_token_provider(key):
|
||||
# Azure Foundry Entra ID — the bearer token is
|
||||
# minted per-request by an httpx event hook on a
|
||||
# custom http_client passed to the SDK. The 401
|
||||
# means Azure rejected the JWT (RBAC role missing,
|
||||
# az login expired, IMDS unreachable, etc.).
|
||||
print(f"{agent.log_prefix} Auth method: Microsoft Entra ID (httpx event hook)")
|
||||
print(f"{agent.log_prefix} Run `hermes doctor` for credential-chain diagnostics, or")
|
||||
print(f"{agent.log_prefix} `az login` if your developer session expired.")
|
||||
else:
|
||||
auth_method = "Bearer (OAuth/setup-token)" if _is_oauth_token(key) else "x-api-key (API key)"
|
||||
print(f"{agent.log_prefix} Auth method: {auth_method}")
|
||||
print(f"{agent.log_prefix} Token prefix: {key[:12]}..." if isinstance(key, str) and len(key) > 12 else f"{agent.log_prefix} Token: (empty or short)")
|
||||
print(f"{agent.log_prefix} Troubleshooting:")
|
||||
from hermes_constants import display_hermes_home as _dhh_fn
|
||||
_dhh = _dhh_fn()
|
||||
|
|
|
|||
|
|
@ -862,13 +862,32 @@ class BatchRunner:
|
|||
"last_updated": None
|
||||
}
|
||||
|
||||
# Prepare configuration for workers
|
||||
# Prepare configuration for workers.
|
||||
#
|
||||
# ``self.api_key`` may be a zero-arg callable (Azure Foundry Entra ID
|
||||
# bearer provider returned by ``agent.azure_identity_adapter``). Such
|
||||
# closures are not safely picklable across the multiprocessing.Pool
|
||||
# boundary. Drop the callable here and let each worker rebuild its
|
||||
# own provider via ``resolve_runtime_provider()``, which reads
|
||||
# ``model.auth_mode`` from ``config.yaml`` and constructs a fresh
|
||||
# token provider in the worker process (azure-identity caches
|
||||
# in-process so each worker gets its own short-lived cache).
|
||||
if callable(self.api_key) and not isinstance(self.api_key, str):
|
||||
worker_api_key = None
|
||||
print(
|
||||
"ℹ️ Detected Entra ID bearer provider — workers will rebuild "
|
||||
"credentials from config.yaml in each process.",
|
||||
flush=True,
|
||||
)
|
||||
else:
|
||||
worker_api_key = self.api_key
|
||||
|
||||
config = {
|
||||
"distribution": self.distribution,
|
||||
"model": self.model,
|
||||
"max_iterations": self.max_iterations,
|
||||
"base_url": self.base_url,
|
||||
"api_key": self.api_key,
|
||||
"api_key": worker_api_key,
|
||||
"verbose": self.verbose,
|
||||
"ephemeral_system_prompt": self.ephemeral_system_prompt,
|
||||
"log_prefix_chars": self.log_prefix_chars,
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@ model:
|
|||
# "ollama-cloud" - Ollama Cloud (requires: OLLAMA_API_KEY — https://ollama.com/settings)
|
||||
# "kilocode" - KiloCode gateway (requires: KILOCODE_API_KEY)
|
||||
# "ai-gateway" - Vercel AI Gateway (requires: AI_GATEWAY_API_KEY)
|
||||
# "azure-foundry" - Microsoft Foundry / Azure OpenAI (API key or Entra ID)
|
||||
# "lmstudio" - LM Studio local server (optional: LM_API_KEY, defaults to http://127.0.0.1:1234/v1)
|
||||
#
|
||||
# Local servers (LM Studio, Ollama, vLLM, llama.cpp):
|
||||
|
|
@ -45,6 +46,14 @@ model:
|
|||
# api_key: "your-key-here" # Uncomment to set here instead of .env
|
||||
base_url: "https://openrouter.ai/api/v1"
|
||||
|
||||
# Azure Foundry keyless auth example:
|
||||
# provider: "azure-foundry"
|
||||
# base_url: "https://<resource>.openai.azure.com/openai/v1"
|
||||
# auth_mode: "entra_id" # DefaultAzureCredential: az login, managed identity, workload identity, etc.
|
||||
# default: "gpt-4o" # Deployment/model name
|
||||
# entra:
|
||||
# scope: "https://ai.azure.com/.default" # Optional; this is the default.
|
||||
|
||||
# ── Token limits — two settings, easy to confuse ──────────────────────────
|
||||
#
|
||||
# context_length: TOTAL context window (input + output tokens combined).
|
||||
|
|
|
|||
18
cli.py
18
cli.py
|
|
@ -4251,7 +4251,13 @@ class HermesCLI:
|
|||
resolved_acp_command = runtime.get("command")
|
||||
resolved_acp_args = list(runtime.get("args") or [])
|
||||
resolved_credential_pool = runtime.get("credential_pool")
|
||||
if not isinstance(api_key, str) or not api_key:
|
||||
# A callable api_key is a bearer-token provider (Azure Foundry
|
||||
# Entra ID — ``azure_identity_adapter.build_token_provider``).
|
||||
# The OpenAI SDK accepts ``Callable[[], str]`` for ``api_key`` and
|
||||
# invokes it before every request. Skip the string-only validation
|
||||
# and placeholder substitution for callables.
|
||||
_is_callable_provider = callable(api_key) and not isinstance(api_key, str)
|
||||
if not _is_callable_provider and (not isinstance(api_key, str) or not api_key):
|
||||
# Custom / local endpoints (llama.cpp, ollama, vLLM, etc.) often
|
||||
# don't require authentication. When a base_url IS configured but
|
||||
# no API key was found, use a placeholder so the OpenAI SDK
|
||||
|
|
@ -5723,7 +5729,15 @@ class HermesCLI:
|
|||
config_path = project_config_path
|
||||
config_status = "(loaded)" if config_path.exists() else "(not found)"
|
||||
|
||||
api_key_display = '********' + self.api_key[-4:] if self.api_key and len(self.api_key) > 4 else 'Not set!'
|
||||
# ``self.api_key`` may be a callable (Azure Foundry Entra ID bearer
|
||||
# provider). Never invoke it; just identify the auth surface.
|
||||
from agent.azure_identity_adapter import is_token_provider
|
||||
if is_token_provider(self.api_key):
|
||||
api_key_display = "Microsoft Entra ID"
|
||||
elif isinstance(self.api_key, str) and len(self.api_key) > 12:
|
||||
api_key_display = f"{self.api_key[:8]}...{self.api_key[-4:]}"
|
||||
else:
|
||||
api_key_display = "Not set!"
|
||||
|
||||
print()
|
||||
title = "(^_^) Configuration"
|
||||
|
|
|
|||
|
|
@ -5334,7 +5334,9 @@ def get_external_process_provider_status(provider_id: str) -> Dict[str, Any]:
|
|||
|
||||
def get_auth_status(provider_id: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""Generic auth status dispatcher."""
|
||||
target = provider_id or get_active_provider()
|
||||
target = (provider_id or get_active_provider() or "").strip().lower()
|
||||
if not target:
|
||||
return {"logged_in": False}
|
||||
if target == "spotify":
|
||||
return get_spotify_auth_status()
|
||||
if target == "nous":
|
||||
|
|
@ -5351,6 +5353,8 @@ def get_auth_status(provider_id: Optional[str] = None) -> Dict[str, Any]:
|
|||
return get_minimax_oauth_auth_status()
|
||||
if target == "copilot-acp":
|
||||
return get_external_process_provider_status(target)
|
||||
if target == "azure-foundry":
|
||||
return _get_azure_foundry_auth_status()
|
||||
# API-key providers
|
||||
pconfig = PROVIDER_REGISTRY.get(target)
|
||||
if pconfig and pconfig.auth_type == "api_key":
|
||||
|
|
@ -5365,6 +5369,83 @@ def get_auth_status(provider_id: Optional[str] = None) -> Dict[str, Any]:
|
|||
return {"logged_in": False}
|
||||
|
||||
|
||||
def _get_azure_foundry_auth_status() -> Dict[str, Any]:
|
||||
"""Return structural auth status for Azure Foundry.
|
||||
|
||||
``logged_in`` is structural, matching other non-OAuth provider status
|
||||
checks:
|
||||
|
||||
* ``auth_mode == "entra_id"`` AND ``azure-identity`` is importable
|
||||
(we do NOT mint a token here; ``hermes doctor`` runs the live
|
||||
probe and reports whether the credential chain can acquire one).
|
||||
* ``auth_mode == "api_key"`` (default) AND ``AZURE_FOUNDRY_API_KEY``
|
||||
is set with a usable value.
|
||||
|
||||
Never invokes the Entra credential chain — keeps CLI startup latency
|
||||
flat regardless of token-service / az login state.
|
||||
"""
|
||||
info: Dict[str, Any] = {"provider": "azure-foundry"}
|
||||
try:
|
||||
from hermes_cli.config import load_config, get_env_value
|
||||
cfg = load_config()
|
||||
except Exception:
|
||||
cfg = {}
|
||||
|
||||
model_cfg = cfg.get("model") if isinstance(cfg, dict) else None
|
||||
auth_mode = "api_key"
|
||||
base_url = ""
|
||||
if isinstance(model_cfg, dict):
|
||||
auth_mode = str(model_cfg.get("auth_mode") or "api_key").strip().lower() or "api_key"
|
||||
base_url = str(model_cfg.get("base_url") or "").strip()
|
||||
info["auth_mode"] = auth_mode
|
||||
info["base_url"] = base_url
|
||||
|
||||
if auth_mode == "entra_id":
|
||||
try:
|
||||
from agent.azure_identity_adapter import (
|
||||
EntraIdentityConfig,
|
||||
SCOPE_AI_AZURE_DEFAULT,
|
||||
has_azure_identity_installed,
|
||||
)
|
||||
installed = has_azure_identity_installed()
|
||||
entra_cfg = {}
|
||||
if isinstance(model_cfg, dict) and isinstance(model_cfg.get("entra"), dict):
|
||||
entra_cfg = model_cfg["entra"]
|
||||
identity_config = EntraIdentityConfig.from_dict(
|
||||
entra_cfg,
|
||||
default_scope=SCOPE_AI_AZURE_DEFAULT,
|
||||
)
|
||||
info["azure_identity_installed"] = installed
|
||||
info["scope"] = identity_config.scope
|
||||
info["credential_probe"] = "not_run"
|
||||
info["credential_verified"] = False
|
||||
info["logged_in"] = bool(installed)
|
||||
if not installed:
|
||||
info["hint"] = (
|
||||
"azure-identity not installed. Install with: "
|
||||
"pip install azure-identity (or rely on Hermes' "
|
||||
"lazy-install at first use)."
|
||||
)
|
||||
else:
|
||||
info["hint"] = (
|
||||
"azure-identity is installed; live credential validation "
|
||||
"is skipped here. Run `hermes doctor` to verify token acquisition."
|
||||
)
|
||||
return info
|
||||
except Exception as exc:
|
||||
info["logged_in"] = False
|
||||
info["error"] = f"azure-identity check failed: {exc}"
|
||||
return info
|
||||
|
||||
# api_key mode (default)
|
||||
try:
|
||||
api_key = get_env_value("AZURE_FOUNDRY_API_KEY") or os.getenv("AZURE_FOUNDRY_API_KEY", "")
|
||||
except Exception:
|
||||
api_key = os.getenv("AZURE_FOUNDRY_API_KEY", "")
|
||||
info["logged_in"] = has_usable_secret(api_key)
|
||||
return info
|
||||
|
||||
|
||||
def resolve_api_key_provider_credentials(provider_id: str) -> Dict[str, Any]:
|
||||
"""Resolve API key and base URL for an API-key provider.
|
||||
|
||||
|
|
|
|||
|
|
@ -566,6 +566,54 @@ def _interactive_auth() -> None:
|
|||
print()
|
||||
except ImportError:
|
||||
pass # boto3 or bedrock_adapter not available
|
||||
|
||||
# Show Azure Foundry Entra ID status
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
_cfg = load_config()
|
||||
_model_cfg = _cfg.get("model") if isinstance(_cfg, dict) else None
|
||||
if isinstance(_model_cfg, dict):
|
||||
_cfg_provider = str(_model_cfg.get("provider") or "").strip().lower()
|
||||
_cfg_auth_mode = str(_model_cfg.get("auth_mode") or "").strip().lower()
|
||||
if _cfg_provider == "azure-foundry" and _cfg_auth_mode == "entra_id":
|
||||
from agent.azure_identity_adapter import (
|
||||
EntraIdentityConfig,
|
||||
SCOPE_AI_AZURE_DEFAULT,
|
||||
describe_active_credential,
|
||||
has_azure_identity_installed,
|
||||
)
|
||||
_base_url = str(_model_cfg.get("base_url") or "").strip()
|
||||
_entra = _model_cfg.get("entra") or {}
|
||||
if not isinstance(_entra, dict):
|
||||
_entra = {}
|
||||
_scope = (
|
||||
str(_entra.get("scope") or "").strip()
|
||||
or SCOPE_AI_AZURE_DEFAULT
|
||||
)
|
||||
print(f"azure-foundry (Microsoft Entra ID):")
|
||||
print(f" Endpoint: {_base_url or '(not configured)'}")
|
||||
print(f" Scope: {_scope}")
|
||||
if not has_azure_identity_installed():
|
||||
print(" Status: ⚠ azure-identity not installed "
|
||||
"(pip install azure-identity)")
|
||||
else:
|
||||
_entra_cfg = EntraIdentityConfig(
|
||||
scope=_scope,
|
||||
)
|
||||
_info = describe_active_credential(config=_entra_cfg, timeout_seconds=10.0)
|
||||
_env_sources = _info.get("env_sources") or []
|
||||
if _info.get("ok"):
|
||||
_tag = ", ".join(_env_sources) if _env_sources else "default chain"
|
||||
print(f" Status: ✓ token acquired ({_tag})")
|
||||
else:
|
||||
_err = _info.get("error") or "credential chain exhausted"
|
||||
print(f" Status: ⚠ {_err}")
|
||||
_hint = _info.get("hint")
|
||||
if _hint:
|
||||
print(f" Hint: {_hint}")
|
||||
print()
|
||||
except Exception:
|
||||
pass
|
||||
print()
|
||||
|
||||
# Main menu
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
"""Azure Foundry endpoint auto-detection.
|
||||
|
||||
Inspect an Azure AI Foundry / Azure OpenAI endpoint to determine:
|
||||
Inspect a Microsoft Foundry / Azure OpenAI endpoint to determine:
|
||||
- API transport (OpenAI-style ``chat_completions`` vs
|
||||
Anthropic-style ``anthropic_messages``)
|
||||
- Available models (best effort — Azure does not expose a deployment
|
||||
|
|
@ -19,6 +19,16 @@ rather than the user's *deployed* deployment names. In practice it is
|
|||
still a useful hint — the user picks a familiar model name and we look
|
||||
up its context length from the catalog.
|
||||
|
||||
Authentication modes:
|
||||
- ``api_key`` (default): the wizard passes an ``api_key`` string; the
|
||||
probe sends both ``api-key:`` and ``Authorization: Bearer`` headers
|
||||
so we hit any Azure deployment regardless of which header it expects.
|
||||
- ``entra_id``: the wizard passes a ``token_provider`` callable from
|
||||
:mod:`agent.azure_identity_adapter`. The probe mints exactly one
|
||||
bearer JWT, sends **only** ``Authorization: Bearer <jwt>`` (never
|
||||
``api-key:``), and never persists the token. This matches Microsoft's
|
||||
documented contract for keyless inference.
|
||||
|
||||
The detector never crashes on errors (every HTTP call is wrapped in a
|
||||
broad try/except). Callers get a :class:`DetectionResult` with whatever
|
||||
information could be gathered, and fall back to manual entry for the
|
||||
|
|
@ -31,7 +41,7 @@ import json
|
|||
import logging
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Optional
|
||||
from typing import Any, Callable, Optional
|
||||
from urllib import request as urllib_request
|
||||
from urllib.error import HTTPError, URLError
|
||||
from urllib.parse import urlparse
|
||||
|
|
@ -79,15 +89,73 @@ class DetectionResult:
|
|||
is_anthropic: bool = False
|
||||
|
||||
|
||||
def _http_get_json(url: str, api_key: str, timeout: float = 6.0) -> tuple[int, Optional[dict]]:
|
||||
"""GET a URL with ``api-key`` + ``Authorization`` headers. Return
|
||||
def _resolve_credential(api_key: Any,
|
||||
token_provider: Optional[Callable[[], str]] = None,
|
||||
) -> tuple[Optional[str], str]:
|
||||
"""Coerce wizard inputs into a (token, mode) pair.
|
||||
|
||||
Returns ``(token_or_None, mode)`` where ``mode`` is:
|
||||
- ``"entra_id"`` when a callable token provider was supplied — the
|
||||
returned token is a freshly minted bearer JWT, sent ONLY in
|
||||
``Authorization: Bearer``.
|
||||
- ``"api_key"`` when a string key was supplied — the returned token
|
||||
is the raw API key, sent in BOTH ``api-key:`` and
|
||||
``Authorization: Bearer`` headers (preserves the original
|
||||
broad-compat probe behaviour).
|
||||
- ``("", "api_key")`` when neither yields a value.
|
||||
|
||||
Bearer minting failures degrade to ``("", "entra_id")`` so the caller
|
||||
can still report "detection incomplete" rather than crashing.
|
||||
"""
|
||||
# Token-provider path (callable wins when both supplied).
|
||||
if token_provider is not None and callable(token_provider):
|
||||
try:
|
||||
token = token_provider()
|
||||
return (str(token) if token else None), "entra_id"
|
||||
except Exception as exc:
|
||||
logger.debug("azure_detect: token_provider failed: %s", exc)
|
||||
return None, "entra_id"
|
||||
if callable(api_key) and not isinstance(api_key, str):
|
||||
try:
|
||||
token = api_key()
|
||||
return (str(token) if token else None), "entra_id"
|
||||
except Exception as exc:
|
||||
logger.debug("azure_detect: api_key callable failed: %s", exc)
|
||||
return None, "entra_id"
|
||||
# API-key path.
|
||||
if isinstance(api_key, str) and api_key:
|
||||
return api_key, "api_key"
|
||||
return None, "api_key"
|
||||
|
||||
|
||||
def _apply_auth_headers(req: urllib_request.Request,
|
||||
token: Optional[str],
|
||||
mode: str) -> None:
|
||||
"""Attach the right auth headers to ``req`` based on credential mode."""
|
||||
if not token:
|
||||
return
|
||||
if mode == "entra_id":
|
||||
# Bearer-only: do NOT also set api-key, which would log a JWT in
|
||||
# a header slot intended for static keys.
|
||||
req.add_header("Authorization", f"Bearer {token}")
|
||||
else:
|
||||
# Legacy broad-compat behaviour: send both headers so we land on
|
||||
# any Azure resource regardless of which it accepts.
|
||||
req.add_header("api-key", token)
|
||||
req.add_header("Authorization", f"Bearer {token}")
|
||||
|
||||
|
||||
def _http_get_json(url: str,
|
||||
api_key: Any,
|
||||
timeout: float = 6.0,
|
||||
*,
|
||||
token_provider: Optional[Callable[[], str]] = None,
|
||||
) -> tuple[int, Optional[dict]]:
|
||||
"""GET a URL with the appropriate auth headers. Return
|
||||
``(status_code, parsed_json_or_None)``. Never raises."""
|
||||
token, mode = _resolve_credential(api_key, token_provider)
|
||||
req = urllib_request.Request(url, method="GET")
|
||||
# Azure OpenAI uses ``api-key``. Some Azure deployments (and
|
||||
# Anthropic-style routes) use ``Authorization: Bearer``. Send both
|
||||
# so we probe once per URL rather than twice.
|
||||
req.add_header("api-key", api_key)
|
||||
req.add_header("Authorization", f"Bearer {api_key}")
|
||||
_apply_auth_headers(req, token, mode)
|
||||
req.add_header("User-Agent", "hermes-agent/azure-detect")
|
||||
try:
|
||||
with urllib_request.urlopen(req, timeout=timeout) as resp:
|
||||
|
|
@ -140,7 +208,11 @@ def _extract_model_ids(payload: dict) -> list[str]:
|
|||
return ids
|
||||
|
||||
|
||||
def _probe_openai_models(base_url: str, api_key: str) -> tuple[bool, list[str]]:
|
||||
def _probe_openai_models(base_url: str,
|
||||
api_key: Any,
|
||||
*,
|
||||
token_provider: Optional[Callable[[], str]] = None,
|
||||
) -> tuple[bool, list[str]]:
|
||||
"""Probe ``<base>/models`` for an OpenAI-shaped response.
|
||||
|
||||
Returns ``(ok, models)``. ``ok`` is True iff the endpoint accepted
|
||||
|
|
@ -156,7 +228,7 @@ def _probe_openai_models(base_url: str, api_key: str) -> tuple[bool, list[str]]:
|
|||
candidates.append(f"{base_url}/models?api-version={v}")
|
||||
|
||||
for url in candidates:
|
||||
status, body = _http_get_json(url, api_key)
|
||||
status, body = _http_get_json(url, api_key, token_provider=token_provider)
|
||||
if status == 200 and body is not None:
|
||||
ids = _extract_model_ids(body)
|
||||
if ids:
|
||||
|
|
@ -172,7 +244,11 @@ def _probe_openai_models(base_url: str, api_key: str) -> tuple[bool, list[str]]:
|
|||
return False, []
|
||||
|
||||
|
||||
def _probe_anthropic_messages(base_url: str, api_key: str) -> bool:
|
||||
def _probe_anthropic_messages(base_url: str,
|
||||
api_key: Any,
|
||||
*,
|
||||
token_provider: Optional[Callable[[], str]] = None,
|
||||
) -> bool:
|
||||
"""Send a zero-token request to ``<base>/v1/messages`` and check
|
||||
whether the endpoint at least *recognises* the Anthropic Messages
|
||||
shape (any 4xx that mentions ``messages`` or ``model``, or a 400
|
||||
|
|
@ -187,8 +263,8 @@ def _probe_anthropic_messages(base_url: str, api_key: str) -> bool:
|
|||
"messages": [{"role": "user", "content": "ping"}],
|
||||
}).encode("utf-8")
|
||||
req = urllib_request.Request(url, method="POST", data=payload)
|
||||
req.add_header("api-key", api_key)
|
||||
req.add_header("Authorization", f"Bearer {api_key}")
|
||||
token, mode = _resolve_credential(api_key, token_provider)
|
||||
_apply_auth_headers(req, token, mode)
|
||||
req.add_header("anthropic-version", "2023-06-01")
|
||||
req.add_header("content-type", "application/json")
|
||||
req.add_header("User-Agent", "hermes-agent/azure-detect")
|
||||
|
|
@ -218,13 +294,23 @@ def _probe_anthropic_messages(base_url: str, api_key: str) -> bool:
|
|||
return False
|
||||
|
||||
|
||||
def detect(base_url: str, api_key: str) -> DetectionResult:
|
||||
def detect(base_url: str,
|
||||
api_key: Any = "",
|
||||
*,
|
||||
token_provider: Optional[Callable[[], str]] = None,
|
||||
) -> DetectionResult:
|
||||
"""Inspect an Azure endpoint and describe its transport + models.
|
||||
|
||||
Call this from the wizard before asking the user to pick an API
|
||||
mode manually. The caller should treat the returned
|
||||
:class:`DetectionResult` as *advisory* — if ``api_mode`` is None,
|
||||
fall back to asking the user.
|
||||
|
||||
``api_key`` may be a string (legacy API-key auth — sends both
|
||||
``api-key:`` and ``Authorization: Bearer``) or a callable returning
|
||||
a bearer JWT (Entra ID auth — sends ONLY ``Authorization: Bearer``).
|
||||
``token_provider`` is an alternative explicit name for the callable
|
||||
form; if both are supplied the callable wins.
|
||||
"""
|
||||
result = DetectionResult()
|
||||
|
||||
|
|
@ -244,7 +330,7 @@ def detect(base_url: str, api_key: str) -> DetectionResult:
|
|||
|
||||
# 2. Try the OpenAI-style /models probe. If this works, the
|
||||
# endpoint definitely speaks OpenAI wire.
|
||||
ok, models = _probe_openai_models(base_url, api_key)
|
||||
ok, models = _probe_openai_models(base_url, api_key, token_provider=token_provider)
|
||||
if ok:
|
||||
result.models_probe_ok = True
|
||||
result.models = models
|
||||
|
|
@ -259,7 +345,7 @@ def detect(base_url: str, api_key: str) -> DetectionResult:
|
|||
# 3. Fallback: probe the Anthropic Messages shape. Slower and more
|
||||
# intrusive than /models, so only run it when the OpenAI probe
|
||||
# failed.
|
||||
if _probe_anthropic_messages(base_url, api_key):
|
||||
if _probe_anthropic_messages(base_url, api_key, token_provider=token_provider):
|
||||
result.is_anthropic = True
|
||||
result.api_mode = "anthropic_messages"
|
||||
result.reason = "Endpoint accepts Anthropic Messages shape"
|
||||
|
|
@ -273,11 +359,26 @@ def detect(base_url: str, api_key: str) -> DetectionResult:
|
|||
return result
|
||||
|
||||
|
||||
def lookup_context_length(model: str, base_url: str, api_key: str) -> Optional[int]:
|
||||
def lookup_context_length(model: str,
|
||||
base_url: str,
|
||||
api_key: Any = "",
|
||||
*,
|
||||
token_provider: Optional[Callable[[], str]] = None,
|
||||
) -> Optional[int]:
|
||||
"""Thin wrapper around :func:`agent.model_metadata.get_model_context_length`
|
||||
that returns ``None`` when only the fallback default (128k) would
|
||||
fire, so the wizard can distinguish "we actually know this" from
|
||||
"we guessed."""
|
||||
"we guessed.
|
||||
|
||||
For Entra-ID mode pass a callable as ``api_key`` (or via
|
||||
``token_provider=``); the wrapped resolver expects a string, so we
|
||||
mint one bearer JWT here for the single lookup. The resolver itself
|
||||
only reads catalog metadata over HTTP — no SDK client is built — so
|
||||
the minted token is consumed for at most one /models probe.
|
||||
"""
|
||||
model_id = str(model or "").strip()
|
||||
if not model_id:
|
||||
return None
|
||||
try:
|
||||
from agent.model_metadata import (
|
||||
DEFAULT_FALLBACK_CONTEXT,
|
||||
|
|
@ -286,8 +387,13 @@ def lookup_context_length(model: str, base_url: str, api_key: str) -> Optional[i
|
|||
except Exception:
|
||||
return None
|
||||
|
||||
# Resolve the credential once. For Entra mode this calls the token
|
||||
# provider; for legacy api_key this is a no-op string pass-through.
|
||||
token, mode = _resolve_credential(api_key, token_provider)
|
||||
effective_key = token or ""
|
||||
|
||||
try:
|
||||
n = get_model_context_length(model, base_url=base_url, api_key=api_key)
|
||||
n = get_model_context_length(model_id, base_url=base_url, api_key=effective_key)
|
||||
except Exception as exc:
|
||||
logger.debug("azure_detect: context length lookup failed: %s", exc)
|
||||
return None
|
||||
|
|
|
|||
|
|
@ -1613,6 +1613,87 @@ def run_doctor(args):
|
|||
f"bedrock:ListFoundationModels"],
|
||||
)
|
||||
|
||||
def _probe_azure_entra() -> _ConnectivityResult:
|
||||
"""Probe Azure Foundry Entra ID auth, parallel to ``_probe_bedrock``.
|
||||
|
||||
Skipped unless the active config has ``model.provider:
|
||||
azure-foundry`` AND ``model.auth_mode: entra_id`` — we don't probe
|
||||
the token-service / CLI chain for users on plain API-key Azure.
|
||||
|
||||
Bounded by a 10s timeout (via
|
||||
:func:`agent.azure_identity_adapter.describe_active_credential`)
|
||||
so a slow token service can't pad the doctor run.
|
||||
"""
|
||||
label = "Azure Foundry (Entra ID)".ljust(28)
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
cfg = load_config()
|
||||
model_cfg = cfg.get("model") if isinstance(cfg, dict) else {}
|
||||
if not isinstance(model_cfg, dict):
|
||||
return _ConnectivityResult("Azure Foundry (Entra ID)", [], [])
|
||||
cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
|
||||
auth_mode = str(model_cfg.get("auth_mode") or "").strip().lower()
|
||||
if cfg_provider != "azure-foundry" or auth_mode != "entra_id":
|
||||
return _ConnectivityResult("Azure Foundry (Entra ID)", [], [])
|
||||
except Exception:
|
||||
return _ConnectivityResult("Azure Foundry (Entra ID)", [], [])
|
||||
|
||||
try:
|
||||
from agent.azure_identity_adapter import (
|
||||
EntraIdentityConfig,
|
||||
SCOPE_AI_AZURE_DEFAULT,
|
||||
describe_active_credential,
|
||||
has_azure_identity_installed,
|
||||
)
|
||||
except Exception as exc:
|
||||
return _ConnectivityResult(
|
||||
"Azure Foundry (Entra ID)",
|
||||
[(color("⚠", Colors.YELLOW), label,
|
||||
color(f"(adapter import failed: {exc})", Colors.DIM))],
|
||||
[f"Azure Foundry adapter import failed: {exc}"],
|
||||
)
|
||||
|
||||
if not has_azure_identity_installed():
|
||||
return _ConnectivityResult(
|
||||
"Azure Foundry (Entra ID)",
|
||||
[(color("⚠", Colors.YELLOW), label,
|
||||
color("(azure-identity not installed)", Colors.DIM))],
|
||||
[f"Install azure-identity: {sys.executable} -m pip install azure-identity"],
|
||||
)
|
||||
|
||||
base_url = str(model_cfg.get("base_url") or "").strip()
|
||||
entra_cfg = model_cfg.get("entra") or {}
|
||||
if not isinstance(entra_cfg, dict):
|
||||
entra_cfg = {}
|
||||
scope = (
|
||||
str(entra_cfg.get("scope") or "").strip()
|
||||
or SCOPE_AI_AZURE_DEFAULT
|
||||
)
|
||||
config = EntraIdentityConfig(
|
||||
scope=scope,
|
||||
)
|
||||
info = describe_active_credential(config=config, timeout_seconds=10.0)
|
||||
if info.get("ok"):
|
||||
env_sources = info.get("env_sources") or []
|
||||
tag = ", ".join(env_sources) if env_sources else "default credential chain"
|
||||
return _ConnectivityResult(
|
||||
"Azure Foundry (Entra ID)",
|
||||
[(color("✓", Colors.GREEN), label,
|
||||
color(f"({tag}, scope={scope})", Colors.DIM))],
|
||||
[],
|
||||
)
|
||||
err = info.get("error") or "credential chain exhausted"
|
||||
hint = info.get("hint") or (
|
||||
"Run `az login`, set AZURE_TENANT_ID/AZURE_CLIENT_ID/"
|
||||
"AZURE_CLIENT_SECRET, or attach a managed identity to this VM."
|
||||
)
|
||||
return _ConnectivityResult(
|
||||
"Azure Foundry (Entra ID)",
|
||||
[(color("⚠", Colors.YELLOW), label,
|
||||
color(f"({err})", Colors.DIM))],
|
||||
[f"Azure Foundry Entra: {err}. {hint}"],
|
||||
)
|
||||
|
||||
# Build the probe submission list in display order
|
||||
_probes.append(("OpenRouter API", _probe_openrouter))
|
||||
_probes.append(("Anthropic API", _probe_anthropic))
|
||||
|
|
@ -1630,6 +1711,7 @@ def run_doctor(args):
|
|||
_probe_apikey_provider(p, e, u, b, s)))
|
||||
|
||||
_probes.append(("AWS Bedrock", _probe_bedrock))
|
||||
_probes.append(("Azure Foundry (Entra ID)", _probe_azure_entra))
|
||||
|
||||
# Print a single status line so users see something happening, then
|
||||
# fan out. ``\r`` clears it once the first real result line lands.
|
||||
|
|
|
|||
|
|
@ -3535,11 +3535,27 @@ def _save_custom_provider(
|
|||
|
||||
|
||||
def _model_flow_azure_foundry(config, current_model=""):
|
||||
"""Azure Foundry provider: configure endpoint, API mode, API key, and model.
|
||||
"""Azure Foundry provider: configure endpoint, auth mode, API mode, and model.
|
||||
|
||||
Azure Foundry supports both OpenAI-style (``/v1/chat/completions``) and
|
||||
Anthropic-style (``/v1/messages``) endpoints. The wizard auto-detects
|
||||
the transport and available models when possible:
|
||||
Anthropic-style (``/v1/messages``) endpoints, and two authentication
|
||||
modes:
|
||||
|
||||
* **API key** (default) — uses ``AZURE_FOUNDRY_API_KEY`` from .env.
|
||||
* **Microsoft Entra ID** — keyless, RBAC-based auth via the
|
||||
``azure-identity`` SDK (Managed Identity / Workload Identity / az
|
||||
login / VS Code / azd / service principal env vars). Works on both
|
||||
OpenAI-style and Anthropic-style endpoints — Microsoft RBAC is
|
||||
per-resource and the same ``Azure AI User`` role grants
|
||||
both. For OpenAI-style the OpenAI SDK's native callable
|
||||
``api_key=`` contract is used; for Anthropic-style an
|
||||
``httpx.Client`` with a request event hook (built by
|
||||
:func:`agent.azure_identity_adapter.build_bearer_http_client`)
|
||||
mints a fresh JWT per request because the Anthropic SDK does not
|
||||
accept a callable ``auth_token`` natively.
|
||||
|
||||
The wizard auto-detects the transport and available models when
|
||||
possible:
|
||||
|
||||
* URLs ending in ``/anthropic`` → Anthropic Messages API.
|
||||
* Successful ``GET <base>/models`` probe → OpenAI-style + populates
|
||||
|
|
@ -3566,9 +3582,14 @@ def _model_flow_azure_foundry(config, current_model=""):
|
|||
if isinstance(model_cfg, dict) and model_cfg.get("provider") == "azure-foundry":
|
||||
current_base_url = str(model_cfg.get("base_url", "") or "")
|
||||
current_api_mode = str(model_cfg.get("api_mode", "") or "")
|
||||
current_auth_mode = str(model_cfg.get("auth_mode") or "api_key").strip().lower() or "api_key"
|
||||
_cur_entra = model_cfg.get("entra") or {}
|
||||
current_entra = _cur_entra if isinstance(_cur_entra, dict) else {}
|
||||
else:
|
||||
current_base_url = ""
|
||||
current_api_mode = ""
|
||||
current_auth_mode = "api_key"
|
||||
current_entra = {}
|
||||
|
||||
current_api_key = get_env_value("AZURE_FOUNDRY_API_KEY") or ""
|
||||
|
||||
|
|
@ -3583,22 +3604,29 @@ def _model_flow_azure_foundry(config, current_model=""):
|
|||
print()
|
||||
|
||||
if current_base_url:
|
||||
print(f" Current endpoint: {current_base_url}")
|
||||
print(f" Current endpoint: {current_base_url}")
|
||||
if current_api_mode:
|
||||
_lbl = (
|
||||
"OpenAI-style"
|
||||
if current_api_mode == "chat_completions"
|
||||
else "Anthropic-style"
|
||||
)
|
||||
print(f" Current API mode: {_lbl}")
|
||||
if current_api_key:
|
||||
print(f" Current API key: {current_api_key[:8]}...")
|
||||
print(f" Current API mode: {_lbl}")
|
||||
if current_auth_mode == "entra_id":
|
||||
print(f" Current auth mode: Microsoft Entra ID (keyless)")
|
||||
elif current_api_key:
|
||||
print(f" Current auth mode: API key ({current_api_key[:8]}...)")
|
||||
print()
|
||||
|
||||
# ── Step 1: endpoint URL ─────────────────────────────────────────
|
||||
try:
|
||||
_placeholder = (
|
||||
current_base_url
|
||||
or "e.g. https://<resource>.openai.azure.com/openai/v1 "
|
||||
"or https://<resource>.services.ai.azure.com/anthropic"
|
||||
)
|
||||
base_url = input(
|
||||
f"API endpoint URL [{current_base_url or 'e.g. https://your-resource.openai.azure.com/openai/v1'}]: "
|
||||
f"API endpoint URL [{_placeholder}]: "
|
||||
).strip()
|
||||
except (KeyboardInterrupt, EOFError):
|
||||
print("\nCancelled.")
|
||||
|
|
@ -3612,25 +3640,125 @@ def _model_flow_azure_foundry(config, current_model=""):
|
|||
print(f"Invalid URL: {effective_url} (must start with http:// or https://)")
|
||||
return
|
||||
|
||||
# ── Step 2: API key ──────────────────────────────────────────────
|
||||
# ── Step 2: authentication mode ──────────────────────────────────
|
||||
print()
|
||||
print("Authentication:")
|
||||
print(" 1. API key (AZURE_FOUNDRY_API_KEY in .env)")
|
||||
print(" 2. Microsoft Entra ID (managed identity / workload identity / az login)")
|
||||
print(" Recommended by Microsoft. Works for both OpenAI-style and Anthropic-style endpoints.")
|
||||
print(" Requires the 'Azure AI User' role on the Foundry resource.")
|
||||
try:
|
||||
api_key = getpass.getpass(
|
||||
f"API key [{current_api_key[:8] + '...' if current_api_key else 'required'}]: "
|
||||
).strip()
|
||||
_auth_default = "2" if current_auth_mode == "entra_id" else "1"
|
||||
auth_choice = (
|
||||
input(f"Authentication mode [1/2] ({_auth_default}): ").strip()
|
||||
or _auth_default
|
||||
)
|
||||
except (KeyboardInterrupt, EOFError):
|
||||
print("\nCancelled.")
|
||||
return
|
||||
use_entra = auth_choice == "2"
|
||||
auth_mode_label = "entra_id" if use_entra else "api_key"
|
||||
|
||||
effective_key = api_key or current_api_key
|
||||
if not effective_key:
|
||||
print("No API key provided. Cancelled.")
|
||||
return
|
||||
# ── Step 3: credentials (key OR Entra preflight) ─────────────────
|
||||
effective_key: str = ""
|
||||
entra_overrides: dict = {}
|
||||
token_provider = None # callable when entra
|
||||
entra_scope = ""
|
||||
|
||||
# ── Step 3: auto-detect transport + models ───────────────────────
|
||||
if use_entra:
|
||||
try:
|
||||
from agent.azure_identity_adapter import (
|
||||
EntraIdentityConfig,
|
||||
SCOPE_AI_AZURE_DEFAULT,
|
||||
build_token_provider,
|
||||
describe_active_credential,
|
||||
has_azure_identity_installed,
|
||||
)
|
||||
except ImportError as exc:
|
||||
print()
|
||||
print(f"⚠ Could not import azure-identity adapter: {exc}")
|
||||
print(" Falling back to API key auth.")
|
||||
use_entra = False
|
||||
auth_mode_label = "api_key"
|
||||
|
||||
if use_entra:
|
||||
print()
|
||||
if not has_azure_identity_installed():
|
||||
print("◐ The 'azure-identity' package is not installed yet.")
|
||||
print(
|
||||
" Hermes will install it now (the preflight below "
|
||||
"triggers the lazy-install). To skip lazy installs, "
|
||||
"run: pip install azure-identity"
|
||||
)
|
||||
|
||||
# Preserve only the optional scope override. Identity selection
|
||||
# (tenant, user-assigned MI, workload identity, service principal)
|
||||
# stays in Azure SDK env vars such as AZURE_CLIENT_ID.
|
||||
_persisted_scope_override = str(current_entra.get("scope") or "").strip()
|
||||
entra_scope = _persisted_scope_override or SCOPE_AI_AZURE_DEFAULT
|
||||
|
||||
entra_overrides = {}
|
||||
if _persisted_scope_override:
|
||||
entra_overrides["scope"] = _persisted_scope_override
|
||||
|
||||
print()
|
||||
print("◐ Probing Microsoft Entra ID credential chain (up to 10s)...")
|
||||
_config = EntraIdentityConfig(
|
||||
scope=entra_scope,
|
||||
)
|
||||
info = describe_active_credential(config=_config, timeout_seconds=10.0)
|
||||
if info.get("ok"):
|
||||
env_sources = info.get("env_sources") or []
|
||||
tag = ", ".join(env_sources) if env_sources else "default chain"
|
||||
print(f"✓ Entra ID token acquired ({tag}, scope={entra_scope})")
|
||||
else:
|
||||
err = info.get("error") or "credential chain exhausted"
|
||||
hint = info.get("hint") or (
|
||||
"Run `az login`, attach a managed identity to this VM, or "
|
||||
"set AZURE_TENANT_ID/AZURE_CLIENT_ID/AZURE_CLIENT_SECRET."
|
||||
)
|
||||
print(f"⚠ {err}")
|
||||
print(f" Hint: {hint}")
|
||||
try:
|
||||
ans = input("Save Entra config anyway and validate later? [Y/n]: ").strip().lower()
|
||||
except (KeyboardInterrupt, EOFError):
|
||||
print("\nCancelled.")
|
||||
return
|
||||
if ans and ans not in ("y", "yes"):
|
||||
print("Cancelled.")
|
||||
return
|
||||
|
||||
# Build the token provider for the detection probe (best-effort —
|
||||
# if the credential chain failed above, this will silently return
|
||||
# None inside azure_detect and the probe falls back to manual).
|
||||
try:
|
||||
token_provider = build_token_provider(config=_config)
|
||||
except Exception as exc:
|
||||
print(f"⚠ Could not build token provider for probing: {exc}")
|
||||
token_provider = None
|
||||
else:
|
||||
print()
|
||||
try:
|
||||
api_key = getpass.getpass(
|
||||
f"API key [{current_api_key[:8] + '...' if current_api_key else 'required'}]: "
|
||||
).strip()
|
||||
except (KeyboardInterrupt, EOFError):
|
||||
print("\nCancelled.")
|
||||
return
|
||||
|
||||
effective_key = api_key or current_api_key
|
||||
if not effective_key:
|
||||
print("No API key provided. Cancelled.")
|
||||
return
|
||||
|
||||
# ── Step 4: auto-detect transport + models ───────────────────────
|
||||
print()
|
||||
print("◐ Probing endpoint to auto-detect transport and models...")
|
||||
detection = azure_detect.detect(effective_url, effective_key)
|
||||
detection = azure_detect.detect(
|
||||
effective_url,
|
||||
api_key=effective_key,
|
||||
token_provider=token_provider,
|
||||
)
|
||||
|
||||
discovered_models: list[str] = list(detection.models)
|
||||
api_mode: str = detection.api_mode or ""
|
||||
|
|
@ -3665,7 +3793,7 @@ def _model_flow_azure_foundry(config, current_model=""):
|
|||
return
|
||||
api_mode = "anthropic_messages" if mode_choice == "2" else "chat_completions"
|
||||
|
||||
# ── Step 4: model name ───────────────────────────────────────────
|
||||
# ── Step 5: model name ───────────────────────────────────────────
|
||||
print()
|
||||
effective_model = ""
|
||||
if discovered_models:
|
||||
|
|
@ -3704,15 +3832,17 @@ def _model_flow_azure_foundry(config, current_model=""):
|
|||
print("No model name provided. Cancelled.")
|
||||
return
|
||||
|
||||
# ── Step 5: context-length lookup ────────────────────────────────
|
||||
# ── Step 6: context-length lookup ────────────────────────────────
|
||||
ctx_len = azure_detect.lookup_context_length(
|
||||
effective_model,
|
||||
effective_url,
|
||||
effective_key,
|
||||
api_key=effective_key,
|
||||
token_provider=token_provider,
|
||||
)
|
||||
|
||||
# ── Step 6: persist ──────────────────────────────────────────────
|
||||
save_env_value("AZURE_FOUNDRY_API_KEY", effective_key)
|
||||
# ── Step 7: persist ──────────────────────────────────────────────
|
||||
if not use_entra:
|
||||
save_env_value("AZURE_FOUNDRY_API_KEY", effective_key)
|
||||
|
||||
cfg = load_config()
|
||||
model = cfg.get("model")
|
||||
|
|
@ -3724,6 +3854,22 @@ def _model_flow_azure_foundry(config, current_model=""):
|
|||
model["base_url"] = effective_url
|
||||
model["api_mode"] = api_mode
|
||||
model["default"] = effective_model
|
||||
model["auth_mode"] = auth_mode_label
|
||||
if use_entra:
|
||||
# Persist only the non-default Entra scope so config.yaml stays tidy.
|
||||
# Azure identity selection stays in standard AZURE_* env vars.
|
||||
clean_entra: dict = {}
|
||||
for key in ("scope",):
|
||||
val = entra_overrides.get(key)
|
||||
if val:
|
||||
clean_entra[key] = val
|
||||
if clean_entra:
|
||||
model["entra"] = clean_entra
|
||||
elif "entra" in model:
|
||||
del model["entra"]
|
||||
else:
|
||||
if "entra" in model:
|
||||
del model["entra"]
|
||||
if ctx_len:
|
||||
model["context_length"] = ctx_len
|
||||
|
||||
|
|
@ -3739,10 +3885,14 @@ def _model_flow_azure_foundry(config, current_model=""):
|
|||
save_env_value("OPENAI_API_KEY", "")
|
||||
|
||||
mode_label = "OpenAI-style" if api_mode == "chat_completions" else "Anthropic-style"
|
||||
auth_label = (
|
||||
"Microsoft Entra ID (keyless)" if use_entra else "API key"
|
||||
)
|
||||
print()
|
||||
print("✓ Azure Foundry configured:")
|
||||
print(f" Endpoint: {effective_url}")
|
||||
print(f" API mode: {mode_label}")
|
||||
print(f" Auth: {auth_label}")
|
||||
print(f" Model: {effective_model}")
|
||||
if ctx_len:
|
||||
print(f" Context length: {ctx_len:,} tokens")
|
||||
|
|
|
|||
|
|
@ -744,6 +744,15 @@ def _resolve_azure_foundry_runtime(
|
|||
strips a trailing ``/v1`` for Anthropic-style endpoints because the
|
||||
Anthropic SDK appends ``/v1/messages`` internally.
|
||||
|
||||
When ``model.auth_mode == "entra_id"`` (and the model is OpenAI-style),
|
||||
the returned ``api_key`` is a zero-arg callable produced by
|
||||
:func:`agent.azure_identity_adapter.build_token_provider` rather than
|
||||
a string. Downstream code that constructs an OpenAI SDK client passes
|
||||
this through unchanged (the SDK accepts ``Callable[[], str]`` for
|
||||
``api_key`` and calls it before every request). Code paths that need
|
||||
a string (logging, manual HTTP probes, header injection) must use the
|
||||
helpers in ``agent.azure_identity_adapter``.
|
||||
|
||||
Raises :class:`AuthError` when required values are missing.
|
||||
"""
|
||||
explicit_api_key = str(explicit_api_key or "").strip()
|
||||
|
|
@ -752,9 +761,15 @@ def _resolve_azure_foundry_runtime(
|
|||
cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
|
||||
cfg_base_url = ""
|
||||
cfg_api_mode = "chat_completions"
|
||||
cfg_auth_mode = "api_key"
|
||||
cfg_entra: Dict[str, Any] = {}
|
||||
if cfg_provider == "azure-foundry":
|
||||
cfg_base_url = str(model_cfg.get("base_url") or "").strip().rstrip("/")
|
||||
cfg_api_mode = _parse_api_mode(model_cfg.get("api_mode")) or "chat_completions"
|
||||
cfg_auth_mode = str(model_cfg.get("auth_mode") or "api_key").strip().lower() or "api_key"
|
||||
_entra = model_cfg.get("entra")
|
||||
if isinstance(_entra, dict):
|
||||
cfg_entra = _entra
|
||||
|
||||
# Model-family inference: Azure Foundry deploys GPT-5.x / codex / o1-o4
|
||||
# reasoning models as Responses-API-only. Calling /chat/completions
|
||||
|
|
@ -780,6 +795,79 @@ def _resolve_azure_foundry_runtime(
|
|||
"the AZURE_FOUNDRY_BASE_URL environment variable."
|
||||
)
|
||||
|
||||
# Anthropic SDK appends /v1/messages itself, so strip any trailing /v1
|
||||
# we inherited from the configured base_url to avoid double-/v1 paths.
|
||||
if cfg_api_mode == "anthropic_messages":
|
||||
base_url = re.sub(r"/v1/?$", "", base_url)
|
||||
|
||||
# ── Entra ID (Microsoft Foundry recommended path) ──────────────────
|
||||
#
|
||||
# OpenAI-style endpoints use the OpenAI SDK's native callable
|
||||
# ``api_key=`` contract — the SDK mints a fresh JWT per request
|
||||
# automatically.
|
||||
#
|
||||
# Anthropic-style endpoints (Claude on Foundry) take the callable
|
||||
# too: :func:`agent.anthropic_adapter.build_anthropic_client`
|
||||
# detects the callable and constructs an ``httpx.Client`` with a
|
||||
# request event hook that injects a fresh ``Authorization: Bearer``
|
||||
# header per request (the Anthropic SDK does not accept callables
|
||||
# natively). From the runtime resolver's perspective both modes
|
||||
# are identical — return the callable api_key and let the
|
||||
# downstream SDK wrapper handle the contract difference.
|
||||
if cfg_auth_mode == "entra_id":
|
||||
if explicit_api_key:
|
||||
# User passed --api-key on the CLI while config says entra_id —
|
||||
# honour the explicit string (escape hatch for one-off testing).
|
||||
api_key: Any = explicit_api_key
|
||||
source = "explicit"
|
||||
auth_mode = "api_key"
|
||||
else:
|
||||
try:
|
||||
from agent.azure_identity_adapter import (
|
||||
EntraIdentityConfig,
|
||||
SCOPE_AI_AZURE_DEFAULT,
|
||||
build_token_provider,
|
||||
)
|
||||
except Exception as exc:
|
||||
raise AuthError(
|
||||
"Azure Foundry Entra ID auth requires the 'azure-identity' "
|
||||
"package. Install it with: pip install azure-identity "
|
||||
f"(import failed: {exc})"
|
||||
) from exc
|
||||
|
||||
scope = (
|
||||
str(cfg_entra.get("scope") or "").strip()
|
||||
or SCOPE_AI_AZURE_DEFAULT
|
||||
)
|
||||
try:
|
||||
entra_config = EntraIdentityConfig(
|
||||
scope=scope,
|
||||
)
|
||||
token_provider = build_token_provider(config=entra_config)
|
||||
except ImportError as exc:
|
||||
raise AuthError(str(exc)) from exc
|
||||
api_key = token_provider
|
||||
source = "entra_id"
|
||||
auth_mode = "entra_id"
|
||||
|
||||
clean_entra = {}
|
||||
if auth_mode == "entra_id":
|
||||
configured_scope = str(cfg_entra.get("scope") or "").strip()
|
||||
if configured_scope:
|
||||
clean_entra["scope"] = configured_scope
|
||||
|
||||
return {
|
||||
"provider": "azure-foundry",
|
||||
"api_mode": cfg_api_mode,
|
||||
"base_url": base_url,
|
||||
"api_key": api_key,
|
||||
"auth_mode": auth_mode,
|
||||
"entra": clean_entra,
|
||||
"source": source,
|
||||
"requested_provider": requested_provider,
|
||||
}
|
||||
|
||||
# ── Static API key (legacy / default) ──────────────────────────────
|
||||
api_key = explicit_api_key
|
||||
if not api_key:
|
||||
try:
|
||||
|
|
@ -792,20 +880,19 @@ def _resolve_azure_foundry_runtime(
|
|||
if not api_key:
|
||||
raise AuthError(
|
||||
"Azure Foundry requires an API key. Set AZURE_FOUNDRY_API_KEY in "
|
||||
"~/.hermes/.env or run 'hermes model' to configure."
|
||||
"~/.hermes/.env or run 'hermes model' to configure. To use "
|
||||
"keyless Microsoft Entra ID auth instead, set "
|
||||
"model.auth_mode: entra_id in config.yaml (or pick "
|
||||
"'Microsoft Entra ID' in 'hermes model')."
|
||||
)
|
||||
|
||||
# Anthropic SDK appends /v1/messages itself, so strip any trailing /v1
|
||||
# we inherited from the configured base_url to avoid double-/v1 paths.
|
||||
if cfg_api_mode == "anthropic_messages":
|
||||
base_url = re.sub(r"/v1/?$", "", base_url)
|
||||
|
||||
source = "explicit" if (explicit_api_key or explicit_base_url) else "config"
|
||||
return {
|
||||
"provider": "azure-foundry",
|
||||
"api_mode": cfg_api_mode,
|
||||
"base_url": base_url,
|
||||
"api_key": api_key,
|
||||
"auth_mode": "api_key",
|
||||
"source": source,
|
||||
"requested_provider": requested_provider,
|
||||
}
|
||||
|
|
@ -1232,7 +1319,7 @@ def resolve_runtime_provider(
|
|||
cfg_base_url = (model_cfg.get("base_url") or "").strip().rstrip("/")
|
||||
base_url = cfg_base_url or "https://api.anthropic.com"
|
||||
|
||||
# For Azure AI Foundry endpoints, use ANTHROPIC_API_KEY directly —
|
||||
# For Microsoft Foundry endpoints, use ANTHROPIC_API_KEY directly —
|
||||
# Claude Code OAuth tokens (sk-ant-oat01) are not accepted by Azure.
|
||||
# Azure keys don't start with "sk-ant-" so resolve_anthropic_token()
|
||||
# would find the Claude Code OAuth token first (priority 3) and return
|
||||
|
|
|
|||
|
|
@ -1288,9 +1288,15 @@ def _truncate_token(value: Optional[str], visible: int = 6) -> str:
|
|||
OAuth access token. JWT prefixes (the part before the first dot) are
|
||||
stripped first when present so the visible suffix is always part of
|
||||
the signing region rather than a meaningless header chunk.
|
||||
|
||||
Returns the Entra-ID placeholder when handed a callable (Azure Foundry
|
||||
bearer provider) — the callable is NEVER invoked here.
|
||||
"""
|
||||
if not value:
|
||||
return ""
|
||||
if callable(value) and not isinstance(value, str):
|
||||
# Entra ID bearer provider — never reveal a minted token in the UI.
|
||||
return "<entra-id-bearer>"
|
||||
s = str(value)
|
||||
if "." in s and s.count(".") >= 2:
|
||||
# Looks like a JWT — show the trailing piece of the signature only.
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
"""Azure AI Foundry provider profile.
|
||||
"""Microsoft Foundry provider profile.
|
||||
|
||||
Azure Foundry exposes an OpenAI-compatible endpoint; users supply their own
|
||||
base URL at setup since endpoints are per-resource.
|
||||
|
|
@ -11,7 +11,7 @@ azure_foundry = ProviderProfile(
|
|||
name="azure-foundry",
|
||||
aliases=("azure", "azure-ai-foundry", "azure-ai"),
|
||||
display_name="Azure Foundry",
|
||||
description="Azure AI Foundry — OpenAI-compatible endpoint (user-supplied base URL)",
|
||||
description="Microsoft Foundry - OpenAI-compatible endpoint (user-supplied base URL)",
|
||||
signup_url="https://ai.azure.com/",
|
||||
env_vars=("AZURE_FOUNDRY_API_KEY", "AZURE_FOUNDRY_BASE_URL"),
|
||||
base_url="", # per-resource; user provides at setup
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
name: azure-foundry-provider
|
||||
kind: model-provider
|
||||
version: 1.0.0
|
||||
description: Azure AI Foundry
|
||||
description: Microsoft Foundry
|
||||
author: Nous Research
|
||||
|
|
|
|||
|
|
@ -125,6 +125,7 @@ acp = ["agent-client-protocol==0.9.0"]
|
|||
# 4. Run `uv lock` to regenerate transitives.
|
||||
# 5. Optionally re-add to [all] only after a few days of clean operation.
|
||||
bedrock = ["boto3==1.42.89"]
|
||||
azure-identity = ["azure-identity==1.25.3"]
|
||||
termux = [
|
||||
# Baseline Android / Termux path for reliable fresh installs.
|
||||
"python-telegram-bot[webhooks]==22.6",
|
||||
|
|
|
|||
|
|
@ -1428,7 +1428,11 @@ class AIAgent:
|
|||
prefix = f"HTTP {status_code}: " if status_code else ""
|
||||
return f"{prefix}{raw[:500]}"
|
||||
|
||||
def _mask_api_key_for_logs(self, key: Optional[str]) -> Optional[str]:
|
||||
def _mask_api_key_for_logs(self, key: Any) -> Optional[str]:
|
||||
# Azure Foundry Entra ID bearer providers are callables — never
|
||||
# invoke them in log paths; identify the auth surface instead.
|
||||
if callable(key) and not isinstance(key, str):
|
||||
return "<entra-id-bearer>"
|
||||
if not key:
|
||||
return None
|
||||
if len(key) <= 12:
|
||||
|
|
|
|||
87
tests/acp_adapter/test_detect_provider_entra.py
Normal file
87
tests/acp_adapter/test_detect_provider_entra.py
Normal file
|
|
@ -0,0 +1,87 @@
|
|||
"""Regression tests for ACP adapter detection under Azure Foundry Entra ID.
|
||||
|
||||
The ACP adapter's ``detect_provider`` previously gated on
|
||||
``isinstance(api_key, str)`` and returned ``None`` for any runtime that
|
||||
returned a callable ``api_key`` — i.e. Azure Foundry with
|
||||
``auth_mode=entra_id``. Downstream, ACP would default to
|
||||
``"openrouter"`` and reject the legitimate provider in its auth handshake.
|
||||
This test pins the callable-aware fix so it never regresses.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from unittest.mock import patch
|
||||
|
||||
|
||||
class TestDetectProviderEntra:
|
||||
def test_callable_api_key_is_a_valid_credential(self):
|
||||
"""A runtime returning a callable ``api_key`` (Entra bearer token
|
||||
provider) must be detected as a configured provider, not
|
||||
``None``."""
|
||||
from acp_adapter import auth as _acp_auth
|
||||
|
||||
def _fake_runtime(**_kwargs):
|
||||
return {
|
||||
"provider": "azure-foundry",
|
||||
"api_mode": "chat_completions",
|
||||
"auth_mode": "entra_id",
|
||||
"base_url": "https://r.openai.azure.com/openai/v1",
|
||||
"api_key": lambda: "jwt-fresh",
|
||||
}
|
||||
|
||||
with patch(
|
||||
"hermes_cli.runtime_provider.resolve_runtime_provider",
|
||||
side_effect=_fake_runtime,
|
||||
):
|
||||
assert _acp_auth.detect_provider() == "azure-foundry"
|
||||
assert _acp_auth.has_provider() is True
|
||||
|
||||
def test_string_api_key_still_works(self):
|
||||
from acp_adapter import auth as _acp_auth
|
||||
|
||||
def _fake_runtime(**_kwargs):
|
||||
return {
|
||||
"provider": "openrouter",
|
||||
"api_key": "sk-or-static-key",
|
||||
}
|
||||
|
||||
with patch(
|
||||
"hermes_cli.runtime_provider.resolve_runtime_provider",
|
||||
side_effect=_fake_runtime,
|
||||
):
|
||||
assert _acp_auth.detect_provider() == "openrouter"
|
||||
|
||||
def test_empty_string_api_key_returns_none(self):
|
||||
from acp_adapter import auth as _acp_auth
|
||||
|
||||
def _fake_runtime(**_kwargs):
|
||||
return {"provider": "openrouter", "api_key": ""}
|
||||
|
||||
with patch(
|
||||
"hermes_cli.runtime_provider.resolve_runtime_provider",
|
||||
side_effect=_fake_runtime,
|
||||
):
|
||||
assert _acp_auth.detect_provider() is None
|
||||
|
||||
def test_missing_provider_returns_none(self):
|
||||
"""A callable api_key without a provider is still ``None`` —
|
||||
we don't synthesize a provider name from the credential shape."""
|
||||
from acp_adapter import auth as _acp_auth
|
||||
|
||||
def _fake_runtime(**_kwargs):
|
||||
return {"api_key": lambda: "jwt-fresh", "provider": ""}
|
||||
|
||||
with patch(
|
||||
"hermes_cli.runtime_provider.resolve_runtime_provider",
|
||||
side_effect=_fake_runtime,
|
||||
):
|
||||
assert _acp_auth.detect_provider() is None
|
||||
|
||||
def test_resolver_exception_returns_none(self):
|
||||
from acp_adapter import auth as _acp_auth
|
||||
|
||||
with patch(
|
||||
"hermes_cli.runtime_provider.resolve_runtime_provider",
|
||||
side_effect=RuntimeError("simulated"),
|
||||
):
|
||||
assert _acp_auth.detect_provider() is None
|
||||
|
|
@ -9,6 +9,7 @@ import pytest
|
|||
|
||||
from agent.prompt_caching import apply_anthropic_cache_control
|
||||
from agent.anthropic_adapter import (
|
||||
_is_azure_anthropic_endpoint,
|
||||
_is_oauth_token,
|
||||
_refresh_oauth_token,
|
||||
_to_plain_data,
|
||||
|
|
@ -121,6 +122,20 @@ class TestBuildAnthropicClient:
|
|||
betas = kwargs["default_headers"]["anthropic-beta"]
|
||||
assert "context-1m-2025-08-07" in betas
|
||||
|
||||
def test_azure_anthropic_endpoint_detection_is_host_and_path_scoped(self):
|
||||
assert _is_azure_anthropic_endpoint(
|
||||
"https://example.services.ai.azure.com/models/anthropic"
|
||||
) is True
|
||||
assert _is_azure_anthropic_endpoint(
|
||||
"https://example.services.ai.azure.us/anthropic"
|
||||
) is True
|
||||
assert _is_azure_anthropic_endpoint(
|
||||
"https://example.openai.azure.com/openai/v1"
|
||||
) is False
|
||||
assert _is_azure_anthropic_endpoint(
|
||||
"https://management.azure.com/anthropic"
|
||||
) is False
|
||||
|
||||
def test_bedrock_client_keeps_context_1m_beta(self):
|
||||
with patch("agent.anthropic_adapter._anthropic_sdk") as mock_sdk:
|
||||
mock_sdk.AnthropicBedrock = MagicMock()
|
||||
|
|
|
|||
350
tests/agent/test_auxiliary_client_azure_foundry.py
Normal file
350
tests/agent/test_auxiliary_client_azure_foundry.py
Normal file
|
|
@ -0,0 +1,350 @@
|
|||
"""Tests for auxiliary client routing of the ``azure-foundry`` provider.
|
||||
|
||||
Covers the dedicated branch in ``agent.auxiliary_client.resolve_provider_client``
|
||||
that delegates to :func:`hermes_cli.runtime_provider._resolve_azure_foundry_runtime`
|
||||
instead of falling into the generic ``resolve_api_key_provider_credentials``
|
||||
path (which only knows about ``AZURE_FOUNDRY_API_KEY`` and would 401 for
|
||||
Entra ID users and miss ``model.base_url`` overrides for api-key users
|
||||
with non-standard Foundry-projects endpoints).
|
||||
|
||||
Pinned scenarios:
|
||||
|
||||
* ``auth_mode: api_key`` → plain OpenAI client with the static string
|
||||
key for ``chat_completions``.
|
||||
* ``auth_mode: entra_id`` + ``chat_completions`` → plain OpenAI
|
||||
client with a callable ``api_key`` (the bearer-token provider) —
|
||||
confirms the callable survives the auxiliary path end-to-end.
|
||||
* ``auth_mode: entra_id`` + GPT-5.x model → CodexAuxiliaryClient
|
||||
wrapping the OpenAI client (api_mode auto-upgrades to
|
||||
codex_responses).
|
||||
* Anthropic-style + entra_id → rejected at the runtime resolver,
|
||||
so the aux path returns ``(None, None)``.
|
||||
* Failure path when no model is configured returns ``(None, None)``
|
||||
cleanly so the auto chain falls through.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _reset_credential_cache():
|
||||
from agent.azure_identity_adapter import reset_credential_cache
|
||||
reset_credential_cache()
|
||||
yield
|
||||
reset_credential_cache()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def fake_azure_identity(monkeypatch):
|
||||
"""Stand-in for azure.identity (keeps CI hermetic when the SDK is
|
||||
not installed)."""
|
||||
from agent import azure_identity_adapter as _adapter
|
||||
|
||||
last = {"scope": None}
|
||||
|
||||
def _provider(scope):
|
||||
return lambda: f"jwt-for-{scope}"
|
||||
|
||||
fake_module = SimpleNamespace(
|
||||
DefaultAzureCredential=lambda **kw: SimpleNamespace(
|
||||
kwargs=kw,
|
||||
get_token=lambda scope: SimpleNamespace(token="fake", expires_on=9999999999),
|
||||
),
|
||||
get_bearer_token_provider=lambda credential, scope: (
|
||||
last.__setitem__("scope", scope),
|
||||
_provider(scope),
|
||||
)[-1],
|
||||
)
|
||||
monkeypatch.setattr(_adapter, "_require_azure_identity", lambda: fake_module)
|
||||
monkeypatch.setitem(sys.modules, "azure.identity", fake_module)
|
||||
return last
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def patch_load_config(monkeypatch):
|
||||
"""Helper to set model_cfg seen by _try_azure_foundry."""
|
||||
def _apply(model_cfg):
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.config.load_config",
|
||||
lambda: {"model": model_cfg},
|
||||
)
|
||||
return _apply
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# auth_mode: api_key (default) — regression for the legacy path
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestAuxAzureFoundryApiKey:
|
||||
def test_chat_completions_returns_plain_openai_client(self, monkeypatch, patch_load_config):
|
||||
from agent.auxiliary_client import _try_azure_foundry
|
||||
from openai import OpenAI as _OpenAI
|
||||
|
||||
monkeypatch.setenv("AZURE_FOUNDRY_API_KEY", "sk-azure-static-key")
|
||||
patch_load_config({
|
||||
"provider": "azure-foundry",
|
||||
"base_url": "https://r.openai.azure.com/openai/v1",
|
||||
"api_mode": "chat_completions",
|
||||
"default": "gpt-4o",
|
||||
})
|
||||
client, resolved = _try_azure_foundry(model="gpt-4o")
|
||||
assert client is not None
|
||||
assert resolved == "gpt-4o"
|
||||
assert isinstance(client, _OpenAI)
|
||||
assert client.api_key == "sk-azure-static-key"
|
||||
|
||||
def test_codex_responses_wraps_in_codex_aux_client(self, monkeypatch, patch_load_config):
|
||||
from agent.auxiliary_client import _try_azure_foundry, CodexAuxiliaryClient
|
||||
|
||||
monkeypatch.setenv("AZURE_FOUNDRY_API_KEY", "sk-azure-static-key")
|
||||
patch_load_config({
|
||||
"provider": "azure-foundry",
|
||||
"base_url": "https://r.openai.azure.com/openai/v1",
|
||||
"api_mode": "chat_completions",
|
||||
"default": "gpt-5.4-mini",
|
||||
})
|
||||
# GPT-5.x → runtime auto-upgrades to codex_responses
|
||||
client, resolved = _try_azure_foundry(model="gpt-5.4-mini")
|
||||
assert resolved == "gpt-5.4-mini"
|
||||
assert isinstance(client, CodexAuxiliaryClient)
|
||||
assert client.api_key == "sk-azure-static-key"
|
||||
|
||||
def test_no_key_returns_none(self, monkeypatch, patch_load_config):
|
||||
from agent.auxiliary_client import _try_azure_foundry
|
||||
|
||||
monkeypatch.delenv("AZURE_FOUNDRY_API_KEY", raising=False)
|
||||
patch_load_config({
|
||||
"provider": "azure-foundry",
|
||||
"base_url": "https://r.openai.azure.com/openai/v1",
|
||||
"api_mode": "chat_completions",
|
||||
"default": "gpt-4o",
|
||||
})
|
||||
client, resolved = _try_azure_foundry(model="gpt-4o")
|
||||
assert client is None
|
||||
assert resolved is None
|
||||
|
||||
def test_no_model_returns_none(self, monkeypatch, patch_load_config):
|
||||
"""Azure has no fallback aux model — fail soft so the auto chain
|
||||
can try other providers."""
|
||||
from agent.auxiliary_client import _try_azure_foundry
|
||||
|
||||
monkeypatch.setenv("AZURE_FOUNDRY_API_KEY", "sk-azure-static-key")
|
||||
patch_load_config({
|
||||
"provider": "azure-foundry",
|
||||
"base_url": "https://r.openai.azure.com/openai/v1",
|
||||
"api_mode": "chat_completions",
|
||||
# No default model
|
||||
})
|
||||
client, resolved = _try_azure_foundry()
|
||||
assert client is None
|
||||
assert resolved is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# auth_mode: entra_id — callable api_key survives end-to-end
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestAuxAzureFoundryEntra:
|
||||
def test_callable_api_key_reaches_openai_constructor(
|
||||
self, monkeypatch, fake_azure_identity, patch_load_config,
|
||||
):
|
||||
"""The token provider callable must arrive at ``OpenAI(api_key=...)``
|
||||
intact — never stringified to ``"no-key-required"`` or to the
|
||||
SDK-internal empty-string representation BEFORE we hand it off.
|
||||
|
||||
We assert on the public SDK contract (constructor receives the
|
||||
callable) rather than ``client.api_key``, because OpenAI 2.24.0
|
||||
stores callable api_keys in a private attribute and exposes
|
||||
``client.api_key`` as ``""``. The SDK still calls the callable
|
||||
per request to mint ``Authorization: Bearer <token>``; that
|
||||
behaviour is the documented Microsoft/OpenAI contract we rely on.
|
||||
"""
|
||||
from agent import auxiliary_client as _aux
|
||||
|
||||
received = {}
|
||||
|
||||
class _FakeOpenAI:
|
||||
def __init__(self, **kwargs):
|
||||
received.update(kwargs)
|
||||
# Mirror the fields downstream callers read.
|
||||
self.api_key = kwargs.get("api_key", "")
|
||||
self.base_url = kwargs.get("base_url", "")
|
||||
|
||||
monkeypatch.setattr(_aux, "OpenAI", _FakeOpenAI)
|
||||
patch_load_config({
|
||||
"provider": "azure-foundry",
|
||||
"base_url": "https://r.openai.azure.com/openai/v1",
|
||||
"api_mode": "chat_completions",
|
||||
"auth_mode": "entra_id",
|
||||
"default": "gpt-4o",
|
||||
})
|
||||
client, resolved = _aux._try_azure_foundry(model="gpt-4o")
|
||||
assert client is not None
|
||||
assert resolved == "gpt-4o"
|
||||
# Public-contract assertion: the OpenAI SDK constructor saw the
|
||||
# callable, exactly as Microsoft's Foundry sample requires.
|
||||
assert callable(received["api_key"])
|
||||
assert not isinstance(received["api_key"], str)
|
||||
assert received["api_key"]().startswith("jwt-for-")
|
||||
# Base URL forwarded verbatim (no /responses suffix stripping
|
||||
# in this path — that's a separate concern handled by the
|
||||
# runtime resolver only when the user re-saves config).
|
||||
assert received["base_url"] == "https://r.openai.azure.com/openai/v1"
|
||||
|
||||
def test_codex_responses_with_entra_wraps_correctly(
|
||||
self, monkeypatch, fake_azure_identity, patch_load_config,
|
||||
):
|
||||
"""GPT-5.x deployment on Entra ID — auto-upgraded to
|
||||
codex_responses, wrapped in CodexAuxiliaryClient, callable
|
||||
api_key handed to the underlying OpenAI SDK."""
|
||||
from agent import auxiliary_client as _aux
|
||||
|
||||
received = {}
|
||||
|
||||
class _FakeOpenAI:
|
||||
def __init__(self, **kwargs):
|
||||
received.update(kwargs)
|
||||
self.api_key = kwargs.get("api_key", "")
|
||||
self.base_url = kwargs.get("base_url", "")
|
||||
|
||||
monkeypatch.setattr(_aux, "OpenAI", _FakeOpenAI)
|
||||
patch_load_config({
|
||||
"provider": "azure-foundry",
|
||||
"base_url": "https://r.openai.azure.com/openai/v1",
|
||||
"api_mode": "chat_completions",
|
||||
"auth_mode": "entra_id",
|
||||
"default": "gpt-5.4-mini",
|
||||
})
|
||||
client, resolved = _aux._try_azure_foundry(model="gpt-5.4-mini")
|
||||
assert resolved == "gpt-5.4-mini"
|
||||
assert isinstance(client, _aux.CodexAuxiliaryClient)
|
||||
# The Codex wrapper received an OpenAI client built with the
|
||||
# callable api_key — verify against the SDK constructor record,
|
||||
# not the wrapper attribute (which mirrors the SDK's empty-
|
||||
# string representation).
|
||||
assert callable(received["api_key"])
|
||||
assert received["api_key"]().startswith("jwt-for-")
|
||||
|
||||
def test_entra_anthropic_messages_uses_bearer_hook(
|
||||
self, monkeypatch, fake_azure_identity, patch_load_config,
|
||||
):
|
||||
"""Entra ID + anthropic_messages: runtime returns a callable
|
||||
api_key; ``_maybe_wrap_anthropic`` → ``build_anthropic_client``
|
||||
detects the callable and installs the bearer-injecting httpx
|
||||
event hook on a custom ``httpx.Client`` passed to the
|
||||
Anthropic SDK via ``http_client=``."""
|
||||
from agent import auxiliary_client as _aux
|
||||
from agent import anthropic_adapter as _anthropic
|
||||
|
||||
received = {}
|
||||
|
||||
class _FakeOpenAI:
|
||||
def __init__(self, **kwargs):
|
||||
received["openai"] = kwargs
|
||||
self.api_key = kwargs.get("api_key", "")
|
||||
self.base_url = kwargs.get("base_url", "")
|
||||
|
||||
class _FakeAnthropicSDK:
|
||||
class Anthropic:
|
||||
def __init__(self, **kwargs):
|
||||
received["anthropic"] = kwargs
|
||||
|
||||
monkeypatch.setattr(_aux, "OpenAI", _FakeOpenAI)
|
||||
monkeypatch.setattr(_anthropic, "_get_anthropic_sdk", lambda: _FakeAnthropicSDK)
|
||||
|
||||
patch_load_config({
|
||||
"provider": "azure-foundry",
|
||||
"base_url": "https://r.services.ai.azure.com/anthropic",
|
||||
"api_mode": "anthropic_messages",
|
||||
"auth_mode": "entra_id",
|
||||
"default": "claude-sonnet-4-5",
|
||||
})
|
||||
client, resolved = _aux._try_azure_foundry(model="claude-sonnet-4-5")
|
||||
assert client is not None
|
||||
assert resolved == "claude-sonnet-4-5"
|
||||
# The Anthropic SDK constructor received a custom http_client
|
||||
# (the bearer-injecting hook) and a placeholder auth_token.
|
||||
anthropic_kwargs = received.get("anthropic") or {}
|
||||
assert "http_client" in anthropic_kwargs, (
|
||||
"build_anthropic_client must pass a custom http_client when "
|
||||
"given a callable api_key, otherwise the SDK cannot mint "
|
||||
"fresh tokens per request"
|
||||
)
|
||||
assert anthropic_kwargs.get("auth_token") == "entra-id-bearer-via-http-hook"
|
||||
# Verify the http_client actually has our event hook installed.
|
||||
http_client = anthropic_kwargs["http_client"]
|
||||
hooks = getattr(http_client, "event_hooks", {})
|
||||
assert "request" in hooks and len(hooks["request"]) >= 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# resolve_provider_client → azure-foundry dispatch
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestResolveProviderClientAzureFoundry:
|
||||
def test_dispatches_to_azure_branch_not_generic_api_key_path(
|
||||
self, monkeypatch, fake_azure_identity, patch_load_config,
|
||||
):
|
||||
"""End-to-end: the public ``resolve_provider_client`` entry
|
||||
point must take the dedicated azure-foundry branch, NOT the
|
||||
generic api-key registry path that would call
|
||||
``resolve_api_key_provider_credentials`` and return None for
|
||||
Entra users."""
|
||||
from agent import auxiliary_client as _aux
|
||||
|
||||
received = {}
|
||||
|
||||
class _FakeOpenAI:
|
||||
def __init__(self, **kwargs):
|
||||
received.update(kwargs)
|
||||
self.api_key = kwargs.get("api_key", "")
|
||||
self.base_url = kwargs.get("base_url", "")
|
||||
|
||||
monkeypatch.setattr(_aux, "OpenAI", _FakeOpenAI)
|
||||
patch_load_config({
|
||||
"provider": "azure-foundry",
|
||||
"base_url": "https://r.openai.azure.com/openai/v1",
|
||||
"api_mode": "chat_completions",
|
||||
"auth_mode": "entra_id",
|
||||
"default": "gpt-4o",
|
||||
})
|
||||
client, resolved = _aux.resolve_provider_client("azure-foundry", "gpt-4o")
|
||||
assert client is not None
|
||||
assert resolved == "gpt-4o"
|
||||
# The callable made it through resolve_provider_client → _try_azure_foundry
|
||||
# → OpenAI(api_key=...).
|
||||
assert callable(received["api_key"])
|
||||
|
||||
def test_warns_and_returns_none_on_failure(
|
||||
self, monkeypatch, patch_load_config, caplog,
|
||||
):
|
||||
"""When azure-foundry is requested but cannot be resolved
|
||||
(e.g. no model + no key), we return (None, None) and log a
|
||||
clear warning pointing at ``hermes doctor``."""
|
||||
import logging
|
||||
from agent.auxiliary_client import resolve_provider_client
|
||||
|
||||
monkeypatch.delenv("AZURE_FOUNDRY_API_KEY", raising=False)
|
||||
patch_load_config({
|
||||
"provider": "azure-foundry",
|
||||
"base_url": "https://r.openai.azure.com/openai/v1",
|
||||
"api_mode": "chat_completions",
|
||||
# No default → resolver yields no model → bail
|
||||
})
|
||||
with caplog.at_level(logging.WARNING, logger="agent.auxiliary_client"):
|
||||
client, resolved = resolve_provider_client("azure-foundry")
|
||||
assert client is None
|
||||
assert resolved is None
|
||||
assert any(
|
||||
"azure-foundry" in rec.message and "hermes doctor" in rec.message
|
||||
for rec in caplog.records
|
||||
)
|
||||
662
tests/agent/test_azure_identity_adapter.py
Normal file
662
tests/agent/test_azure_identity_adapter.py
Normal file
|
|
@ -0,0 +1,662 @@
|
|||
"""Tests for the Microsoft Entra ID adapter (agent/azure_identity_adapter.py).
|
||||
|
||||
Covers:
|
||||
- Scope resolution per Azure host shape
|
||||
- Display masking for callable + string + None inputs
|
||||
- Cache-fingerprint stability under callable refresh
|
||||
- is_token_provider truthiness on callables vs strings
|
||||
- EntraIdentityConfig serialization round-trip
|
||||
- Token provider construction with mocked azure-identity
|
||||
- Credential cache reuse + reset
|
||||
- has_azure_identity_credentials timeout / failure paths
|
||||
- describe_active_credential structural reporting
|
||||
- Lazy-install error path when azure-identity absent + lazy installs
|
||||
disabled
|
||||
|
||||
We mock azure.identity at the import boundary rather than hitting any
|
||||
real Azure endpoint. Tests must remain hermetic per AGENTS.md.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from collections.abc import Callable
|
||||
from types import SimpleNamespace
|
||||
from typing import cast
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
# Ensure we always import a fresh adapter module — credential caches in
|
||||
# the adapter persist across tests otherwise, polluting assertions
|
||||
# about cache invalidation.
|
||||
@pytest.fixture(autouse=True)
|
||||
def _reset_adapter_cache():
|
||||
from agent.azure_identity_adapter import reset_credential_cache
|
||||
reset_credential_cache()
|
||||
yield
|
||||
reset_credential_cache()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Scope constant
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestEntraScopeConstant:
|
||||
"""Pin the Microsoft-documented Foundry inference scope.
|
||||
|
||||
Microsoft's official samples for both ``*.openai.azure.com`` and
|
||||
``*.services.ai.azure.com`` use ``https://ai.azure.com/.default``.
|
||||
The older ``cognitiveservices.azure.com/.default`` is the
|
||||
control-plane scope and is rejected for inference by newer
|
||||
Azure OpenAI / Foundry resources.
|
||||
|
||||
Users with sovereign-cloud or unusual-tenant requirements pass the
|
||||
scope explicitly via ``model.entra.scope`` in ``config.yaml``.
|
||||
|
||||
Refs:
|
||||
* https://learn.microsoft.com/azure/ai-foundry/openai/how-to/managed-identity
|
||||
* https://learn.microsoft.com/azure/ai-foundry/foundry-models/how-to/configure-entra-id
|
||||
"""
|
||||
|
||||
def test_default_scope_matches_microsoft_documentation(self):
|
||||
from agent.azure_identity_adapter import SCOPE_AI_AZURE_DEFAULT
|
||||
assert SCOPE_AI_AZURE_DEFAULT == "https://ai.azure.com/.default"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Cache fingerprint + http-bearer helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestMaterializeBearerForHttp:
|
||||
"""The only helper that mints a real bearer JWT — must call the
|
||||
callable exactly once and never fall through to display masking."""
|
||||
|
||||
def test_callable_is_invoked_and_returns_token(self):
|
||||
from agent.azure_identity_adapter import materialize_bearer_for_http
|
||||
|
||||
invoked = {"count": 0}
|
||||
|
||||
def provider():
|
||||
invoked["count"] += 1
|
||||
return "fresh-jwt"
|
||||
|
||||
assert materialize_bearer_for_http(provider) == "fresh-jwt"
|
||||
assert invoked["count"] == 1
|
||||
|
||||
def test_string_passes_through(self):
|
||||
from agent.azure_identity_adapter import materialize_bearer_for_http
|
||||
assert materialize_bearer_for_http("plain-key") == "plain-key"
|
||||
|
||||
def test_callable_returning_empty_raises(self):
|
||||
from agent.azure_identity_adapter import materialize_bearer_for_http
|
||||
with pytest.raises(ValueError):
|
||||
materialize_bearer_for_http(lambda: "")
|
||||
|
||||
def test_empty_string_raises(self):
|
||||
from agent.azure_identity_adapter import materialize_bearer_for_http
|
||||
with pytest.raises(ValueError):
|
||||
materialize_bearer_for_http("")
|
||||
with pytest.raises(ValueError):
|
||||
materialize_bearer_for_http(None)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# build_bearer_http_client — the Anthropic-on-Foundry bridge
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestBuildBearerHttpClient:
|
||||
"""``build_bearer_http_client`` returns an ``httpx.Client`` whose
|
||||
request event hook mints a fresh JWT per outbound request. This is
|
||||
how Entra ID auth reaches the Anthropic SDK (which does not accept
|
||||
callable ``auth_token``)."""
|
||||
|
||||
def test_returns_httpx_client_with_request_hook(self):
|
||||
import httpx
|
||||
from agent.azure_identity_adapter import build_bearer_http_client
|
||||
|
||||
client = build_bearer_http_client(lambda: "jwt")
|
||||
try:
|
||||
assert isinstance(client, httpx.Client)
|
||||
hooks = client.event_hooks.get("request", [])
|
||||
assert len(hooks) >= 1
|
||||
finally:
|
||||
client.close()
|
||||
|
||||
def test_hook_overrides_authorization_header(self):
|
||||
import httpx
|
||||
from agent.azure_identity_adapter import build_bearer_http_client
|
||||
|
||||
minted_tokens = []
|
||||
|
||||
def provider():
|
||||
minted_tokens.append(f"jwt-{len(minted_tokens) + 1}")
|
||||
return minted_tokens[-1]
|
||||
|
||||
client = build_bearer_http_client(provider)
|
||||
try:
|
||||
hook = client.event_hooks["request"][0]
|
||||
# Build a request with conflicting pre-set headers and verify
|
||||
# the hook strips them and installs the fresh bearer.
|
||||
req = httpx.Request(
|
||||
"POST", "https://example.com/v1/messages",
|
||||
headers={
|
||||
"Authorization": "Bearer stale-token",
|
||||
"api-key": "static-key",
|
||||
"x-api-key": "static-key",
|
||||
},
|
||||
json={"hello": "world"},
|
||||
)
|
||||
hook(req)
|
||||
assert req.headers["Authorization"] == "Bearer jwt-1"
|
||||
# The static-key headers must be stripped — sending both
|
||||
# auth values would be ambiguous on Azure.
|
||||
assert "api-key" not in req.headers
|
||||
assert "x-api-key" not in req.headers
|
||||
|
||||
# Second invocation mints a fresh token.
|
||||
req2 = httpx.Request("GET", "https://example.com/v1/models")
|
||||
hook(req2)
|
||||
assert req2.headers["Authorization"] == "Bearer jwt-2"
|
||||
assert len(minted_tokens) == 2
|
||||
finally:
|
||||
client.close()
|
||||
|
||||
def test_hook_strips_auth_headers_and_warns_when_token_provider_fails(self, caplog):
|
||||
"""When the token provider fails (chain exhausted, IMDS down, az
|
||||
login expired), the hook must:
|
||||
1. Log at WARNING level so the misconfiguration is visible at
|
||||
default log level (not buried at DEBUG).
|
||||
2. Strip any pre-set Authorization headers — including the
|
||||
placeholder ``entra-id-bearer-via-http-hook`` sentinel that
|
||||
:func:`_build_anthropic_client_with_bearer_hook` sets on the
|
||||
Anthropic SDK constructor. This produces a clean
|
||||
"missing auth" 401 from Azure rather than a sentinel-bearing
|
||||
401 that's harder to diagnose AND avoids leaking the
|
||||
sentinel string into upstream access logs.
|
||||
"""
|
||||
import logging
|
||||
import httpx
|
||||
from agent.azure_identity_adapter import build_bearer_http_client
|
||||
|
||||
def bad_provider():
|
||||
return "" # empty token → materialize_bearer_for_http raises
|
||||
|
||||
client = build_bearer_http_client(bad_provider)
|
||||
try:
|
||||
hook = client.event_hooks["request"][0]
|
||||
req = httpx.Request(
|
||||
"POST", "https://example.com/v1/messages",
|
||||
headers={
|
||||
"Authorization": "Bearer entra-id-bearer-via-http-hook",
|
||||
"api-key": "leaked-placeholder",
|
||||
},
|
||||
)
|
||||
with caplog.at_level(logging.WARNING, logger="agent.azure_identity_adapter"):
|
||||
hook(req) # Must not raise.
|
||||
# Pre-set auth headers stripped — no sentinel makes it to Azure.
|
||||
assert "Authorization" not in req.headers
|
||||
assert "api-key" not in req.headers
|
||||
# WARNING was logged so the user sees the misconfiguration.
|
||||
assert any(
|
||||
rec.levelno == logging.WARNING and "Entra ID token provider" in rec.message
|
||||
for rec in caplog.records
|
||||
)
|
||||
finally:
|
||||
client.close()
|
||||
|
||||
def test_rejects_non_callable_provider(self):
|
||||
from agent.azure_identity_adapter import build_bearer_http_client
|
||||
with pytest.raises(ValueError):
|
||||
build_bearer_http_client(cast(Callable[[], str], "plain-string-not-callable"))
|
||||
with pytest.raises(ValueError):
|
||||
build_bearer_http_client(cast(Callable[[], str], None))
|
||||
|
||||
def test_forwards_httpx_kwargs(self):
|
||||
import httpx
|
||||
from agent.azure_identity_adapter import build_bearer_http_client
|
||||
|
||||
timeout = httpx.Timeout(60.0, connect=5.0)
|
||||
client = build_bearer_http_client(lambda: "jwt", timeout=timeout)
|
||||
try:
|
||||
# httpx stores the timeout per-pool; just sanity-check it was
|
||||
# accepted without TypeError.
|
||||
assert client is not None
|
||||
finally:
|
||||
client.close()
|
||||
|
||||
|
||||
class TestIsTokenProvider:
|
||||
def test_callable_is_token_provider(self):
|
||||
from agent.azure_identity_adapter import is_token_provider
|
||||
assert is_token_provider(lambda: "x") is True
|
||||
|
||||
def test_string_is_not_token_provider(self):
|
||||
from agent.azure_identity_adapter import is_token_provider
|
||||
assert is_token_provider("static-key") is False
|
||||
# ``str`` instances are technically callable in some edge cases
|
||||
# — confirm they're never classified as token providers.
|
||||
assert is_token_provider("") is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# EntraIdentityConfig
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestEntraIdentityConfig:
|
||||
"""The serializable config that crosses multiprocessing boundaries —
|
||||
must round-trip through dict cleanly and never lose fields."""
|
||||
|
||||
def test_to_dict_round_trip(self):
|
||||
from agent.azure_identity_adapter import EntraIdentityConfig
|
||||
cfg = EntraIdentityConfig(
|
||||
scope="https://ai.azure.com/.default",
|
||||
exclude_interactive_browser=False,
|
||||
)
|
||||
rebuilt = EntraIdentityConfig.from_dict(cfg.to_dict())
|
||||
assert rebuilt == cfg
|
||||
|
||||
def test_from_dict_handles_empty_strings(self):
|
||||
from agent.azure_identity_adapter import EntraIdentityConfig
|
||||
cfg = EntraIdentityConfig.from_dict({
|
||||
"scope": "",
|
||||
"client_id": None,
|
||||
})
|
||||
# Empty scope falls back to default
|
||||
assert cfg.scope.endswith("/.default")
|
||||
|
||||
def test_from_dict_ignores_legacy_identity_keys(self):
|
||||
"""Old config.yaml that still has model.entra.client_id /
|
||||
tenant_id / authority should not crash from_dict — those values
|
||||
are now read from AZURE_* env vars by azure-identity directly."""
|
||||
from agent.azure_identity_adapter import EntraIdentityConfig
|
||||
cfg = EntraIdentityConfig.from_dict({
|
||||
"tenant_id": "legacy-tenant",
|
||||
"authority": "https://login.partner.microsoftonline.cn",
|
||||
"client_id": "user-mi-client",
|
||||
})
|
||||
# Legacy keys silently ignored — no crash, no surprise field on the dataclass.
|
||||
assert not hasattr(cfg, "client_id")
|
||||
assert not hasattr(cfg, "tenant_id")
|
||||
assert not hasattr(cfg, "authority")
|
||||
|
||||
def test_constructor_normalizes_empty_scope(self):
|
||||
from agent.azure_identity_adapter import EntraIdentityConfig
|
||||
cfg = EntraIdentityConfig(scope="")
|
||||
assert cfg.scope.endswith("/.default")
|
||||
|
||||
def test_from_dict_default_scope_override(self):
|
||||
from agent.azure_identity_adapter import EntraIdentityConfig
|
||||
cfg = EntraIdentityConfig.from_dict(
|
||||
{"scope": ""},
|
||||
default_scope="https://custom.example/.default",
|
||||
)
|
||||
assert cfg.scope == "https://custom.example/.default"
|
||||
|
||||
def test_dataclass_is_frozen(self):
|
||||
# Frozen dataclasses are hashable / safe to pass through caches.
|
||||
from agent.azure_identity_adapter import EntraIdentityConfig
|
||||
cfg = EntraIdentityConfig()
|
||||
with pytest.raises((AttributeError, Exception)):
|
||||
setattr(cfg, "scope", "mutated")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Credential / token provider construction
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class _FakeAzureIdentity:
|
||||
"""Stand-in for the ``azure.identity`` module.
|
||||
|
||||
Captures kwargs passed to ``DefaultAzureCredential`` so tests can
|
||||
assert how config flows into the SDK.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.last_credential_kwargs = None
|
||||
self.last_scope = None
|
||||
self.credential_count = 0
|
||||
|
||||
def DefaultAzureCredential(self, **kwargs): # noqa: N802 — match SDK
|
||||
self.last_credential_kwargs = kwargs
|
||||
self.credential_count += 1
|
||||
return SimpleNamespace(
|
||||
get_token=lambda scope: SimpleNamespace(token="fake-jwt", expires_on=9999999999),
|
||||
kwargs=kwargs,
|
||||
)
|
||||
|
||||
def get_bearer_token_provider(self, credential, scope):
|
||||
self.last_scope = scope
|
||||
# Return a callable that mints a token when invoked.
|
||||
return lambda: f"jwt-for-{scope}"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def fake_azure_identity(monkeypatch):
|
||||
"""Install a fake azure.identity into sys.modules and stub the
|
||||
adapter's `_require_azure_identity` so all tests use the fake."""
|
||||
fake = _FakeAzureIdentity()
|
||||
|
||||
fake_module = SimpleNamespace(
|
||||
DefaultAzureCredential=fake.DefaultAzureCredential,
|
||||
get_bearer_token_provider=fake.get_bearer_token_provider,
|
||||
)
|
||||
monkeypatch.setitem(sys.modules, "azure", SimpleNamespace(identity=fake_module))
|
||||
monkeypatch.setitem(sys.modules, "azure.identity", fake_module)
|
||||
|
||||
# The adapter's `_require_azure_identity` does its own import, so
|
||||
# patch that too to make sure tests never hit the real package's
|
||||
# singleton state.
|
||||
from agent import azure_identity_adapter as _adapter
|
||||
monkeypatch.setattr(_adapter, "_require_azure_identity", lambda: fake_module)
|
||||
|
||||
return fake
|
||||
|
||||
|
||||
class TestBuildCredential:
|
||||
def test_default_kwargs_are_minimal(self, fake_azure_identity):
|
||||
"""SDK default for ``exclude_interactive_browser_credential`` is
|
||||
True; we only pass it when the user opts IN to interactive
|
||||
browser auth. Tenant / authority / service principal config
|
||||
flow through the standard ``AZURE_*`` env vars (read by
|
||||
azure-identity directly), not Hermes config kwargs."""
|
||||
from agent.azure_identity_adapter import EntraIdentityConfig, build_credential
|
||||
cred = build_credential(EntraIdentityConfig())
|
||||
kwargs = fake_azure_identity.last_credential_kwargs
|
||||
# Default config should produce empty kwargs — SDK uses its own
|
||||
# defaults plus env-var-driven settings.
|
||||
assert kwargs == {}
|
||||
assert cred is not None
|
||||
|
||||
def test_interactive_browser_opt_in(self, fake_azure_identity):
|
||||
"""When the user explicitly sets
|
||||
``exclude_interactive_browser=False``, the SDK kwarg is set to
|
||||
False. Without the opt-in we don't pass the kwarg at all (SDK
|
||||
default is True / browser excluded)."""
|
||||
from agent.azure_identity_adapter import EntraIdentityConfig, build_credential
|
||||
build_credential(EntraIdentityConfig(exclude_interactive_browser=False))
|
||||
kwargs = fake_azure_identity.last_credential_kwargs
|
||||
assert kwargs["exclude_interactive_browser_credential"] is False
|
||||
|
||||
def test_credential_is_cached_per_config(self, fake_azure_identity):
|
||||
from agent.azure_identity_adapter import EntraIdentityConfig, build_credential
|
||||
cfg = EntraIdentityConfig(scope="s1")
|
||||
c1 = build_credential(cfg)
|
||||
c2 = build_credential(cfg)
|
||||
assert c1 is c2
|
||||
assert fake_azure_identity.credential_count == 1
|
||||
|
||||
def test_distinct_configs_get_distinct_credentials(self, fake_azure_identity):
|
||||
from agent.azure_identity_adapter import EntraIdentityConfig, build_credential
|
||||
c1 = build_credential(EntraIdentityConfig(scope="s1"))
|
||||
c2 = build_credential(EntraIdentityConfig(scope="s2"))
|
||||
assert c1 is not c2
|
||||
assert fake_azure_identity.credential_count == 2
|
||||
|
||||
def test_reset_cache_invalidates(self, fake_azure_identity):
|
||||
from agent.azure_identity_adapter import (
|
||||
EntraIdentityConfig,
|
||||
build_credential,
|
||||
reset_credential_cache,
|
||||
)
|
||||
cfg = EntraIdentityConfig(scope="x")
|
||||
c1 = build_credential(cfg)
|
||||
reset_credential_cache()
|
||||
c2 = build_credential(cfg)
|
||||
assert c1 is not c2
|
||||
|
||||
|
||||
class TestBuildTokenProvider:
|
||||
def test_returns_callable_for_scope(self, fake_azure_identity):
|
||||
from agent.azure_identity_adapter import build_token_provider
|
||||
provider = build_token_provider(scope="https://ai.azure.com/.default")
|
||||
assert callable(provider)
|
||||
assert provider() == "jwt-for-https://ai.azure.com/.default"
|
||||
assert fake_azure_identity.last_scope == "https://ai.azure.com/.default"
|
||||
|
||||
def test_falls_back_to_default_scope_when_unspecified(self, fake_azure_identity):
|
||||
"""When neither ``scope`` nor ``config`` is provided,
|
||||
``build_token_provider`` uses ``SCOPE_AI_AZURE_DEFAULT`` —
|
||||
Microsoft's documented Foundry inference scope. ``base_url`` is
|
||||
accepted for back-compat but ignored."""
|
||||
from agent.azure_identity_adapter import (
|
||||
SCOPE_AI_AZURE_DEFAULT,
|
||||
build_token_provider,
|
||||
)
|
||||
build_token_provider(base_url="https://r.openai.azure.com/openai/v1")
|
||||
assert fake_azure_identity.last_scope == SCOPE_AI_AZURE_DEFAULT
|
||||
|
||||
def test_explicit_scope_wins_over_base_url(self, fake_azure_identity):
|
||||
from agent.azure_identity_adapter import build_token_provider
|
||||
build_token_provider(
|
||||
scope="https://override.example/.default",
|
||||
base_url="https://r.openai.azure.com/openai/v1",
|
||||
)
|
||||
assert fake_azure_identity.last_scope == "https://override.example/.default"
|
||||
|
||||
def test_config_object_wins_over_kwargs(self, fake_azure_identity):
|
||||
from agent.azure_identity_adapter import (
|
||||
EntraIdentityConfig,
|
||||
build_token_provider,
|
||||
)
|
||||
cfg = EntraIdentityConfig(scope="cfg-scope")
|
||||
build_token_provider(scope="ignored", config=cfg)
|
||||
assert fake_azure_identity.last_scope == "cfg-scope"
|
||||
assert fake_azure_identity.last_credential_kwargs == {}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Lazy-install / missing-package surface
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestRequireAzureIdentityMissing:
|
||||
def test_clear_error_when_lazy_install_disabled(self, monkeypatch):
|
||||
"""When azure-identity isn't importable AND lazy installs are
|
||||
off, the adapter must raise ImportError with an actionable
|
||||
message, not propagate FeatureUnavailable."""
|
||||
from agent import azure_identity_adapter as _adapter
|
||||
|
||||
# Force the import path to fail.
|
||||
original_import = __builtins__["__import__"] if isinstance(__builtins__, dict) else __import__
|
||||
def _fake_import(name, *args, **kwargs):
|
||||
if name == "azure.identity" or name.startswith("azure.identity."):
|
||||
raise ImportError("simulated missing azure-identity")
|
||||
return original_import(name, *args, **kwargs)
|
||||
|
||||
monkeypatch.setattr("builtins.__import__", _fake_import)
|
||||
|
||||
# Simulate lazy installs disabled.
|
||||
from tools.lazy_deps import FeatureUnavailable
|
||||
|
||||
def _fake_ensure(*args, **kwargs):
|
||||
raise FeatureUnavailable(
|
||||
"provider.azure_identity",
|
||||
("azure-identity==1.25.3",),
|
||||
"lazy installs disabled (test simulation)",
|
||||
)
|
||||
|
||||
# The adapter calls ``ensure`` from ``tools.lazy_deps``; intercept
|
||||
# it by patching the actual symbol path.
|
||||
monkeypatch.setattr("tools.lazy_deps.ensure", _fake_ensure)
|
||||
|
||||
with pytest.raises(ImportError) as exc_info:
|
||||
_adapter._require_azure_identity()
|
||||
msg = str(exc_info.value)
|
||||
assert "azure-identity" in msg
|
||||
assert "Foundry" in msg or "foundry" in msg.lower()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# has_azure_identity_credentials probe (timeout-bounded)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestHasAzureIdentityCredentials:
|
||||
def test_returns_false_when_package_missing_and_install_disabled(self, monkeypatch):
|
||||
from agent import azure_identity_adapter as _adapter
|
||||
monkeypatch.setattr(_adapter, "has_azure_identity_installed", lambda: False)
|
||||
assert _adapter.has_azure_identity_credentials(
|
||||
"https://x/.default", allow_install=False,
|
||||
) is False
|
||||
|
||||
def test_lazy_install_triggered_when_package_missing(self, monkeypatch):
|
||||
"""With allow_install=True (default), the probe must trigger the
|
||||
lazy-install path before bailing — otherwise the wizard's
|
||||
``preflight`` would silently fail for fresh installs that haven't
|
||||
run ``pip install azure-identity`` yet."""
|
||||
from agent import azure_identity_adapter as _adapter
|
||||
|
||||
installed = {"called": False}
|
||||
|
||||
def _fake_install():
|
||||
installed["called"] = True
|
||||
# After install, pretend the package is now importable.
|
||||
monkeypatch.setattr(_adapter, "has_azure_identity_installed", lambda: True)
|
||||
return SimpleNamespace(
|
||||
DefaultAzureCredential=lambda **kw: SimpleNamespace(
|
||||
kwargs=kw,
|
||||
get_token=lambda scope: SimpleNamespace(token="post-install-jwt", expires_on=0),
|
||||
),
|
||||
get_bearer_token_provider=lambda c, s: lambda: "x",
|
||||
)
|
||||
|
||||
monkeypatch.setattr(_adapter, "has_azure_identity_installed", lambda: False)
|
||||
monkeypatch.setattr(_adapter, "_require_azure_identity", _fake_install)
|
||||
|
||||
# Provide a credential factory so the probe proceeds after install.
|
||||
monkeypatch.setattr(
|
||||
_adapter, "build_credential",
|
||||
lambda config: SimpleNamespace(
|
||||
get_token=lambda scope: SimpleNamespace(token="probe-jwt", expires_on=0),
|
||||
),
|
||||
)
|
||||
|
||||
result = _adapter.has_azure_identity_credentials(
|
||||
"https://x/.default", timeout_seconds=0.5,
|
||||
)
|
||||
assert installed["called"] is True, (
|
||||
"has_azure_identity_credentials must trigger lazy install "
|
||||
"before bailing"
|
||||
)
|
||||
assert result is True
|
||||
|
||||
def test_returns_true_on_successful_token_mint(self, fake_azure_identity):
|
||||
from agent.azure_identity_adapter import has_azure_identity_credentials
|
||||
assert has_azure_identity_credentials("https://x/.default", timeout_seconds=0.5) is True
|
||||
|
||||
def test_returns_false_when_get_token_raises(self, monkeypatch):
|
||||
from agent import azure_identity_adapter as _adapter
|
||||
|
||||
def _failing_credential(_config):
|
||||
class _Cred:
|
||||
def get_token(self, scope):
|
||||
raise RuntimeError("simulated chain exhaustion")
|
||||
return _Cred()
|
||||
|
||||
monkeypatch.setattr(_adapter, "build_credential", _failing_credential)
|
||||
monkeypatch.setattr(_adapter, "has_azure_identity_installed", lambda: True)
|
||||
assert _adapter.has_azure_identity_credentials("https://x/.default", timeout_seconds=0.5) is False
|
||||
|
||||
def test_returns_false_on_timeout(self, monkeypatch):
|
||||
"""Slow IMDS / network must time out, not hang the caller."""
|
||||
import threading
|
||||
from agent import azure_identity_adapter as _adapter
|
||||
|
||||
slow_release = threading.Event()
|
||||
|
||||
def _slow_credential(_config):
|
||||
class _Cred:
|
||||
def get_token(self, scope):
|
||||
# Block forever from the test's perspective; the
|
||||
# adapter must give up via its thread-bounded probe.
|
||||
slow_release.wait(timeout=10)
|
||||
return SimpleNamespace(token="never-returned", expires_on=0)
|
||||
return _Cred()
|
||||
|
||||
monkeypatch.setattr(_adapter, "build_credential", _slow_credential)
|
||||
monkeypatch.setattr(_adapter, "has_azure_identity_installed", lambda: True)
|
||||
try:
|
||||
assert _adapter.has_azure_identity_credentials(
|
||||
"https://x/.default", timeout_seconds=0.1
|
||||
) is False
|
||||
finally:
|
||||
slow_release.set()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# describe_active_credential — used by hermes doctor + hermes auth
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestDescribeActiveCredential:
|
||||
def test_reports_not_installed(self, monkeypatch):
|
||||
from agent import azure_identity_adapter as _adapter
|
||||
monkeypatch.setattr(_adapter, "has_azure_identity_installed", lambda: False)
|
||||
info = _adapter.describe_active_credential(
|
||||
scope="https://x/.default", allow_install=False,
|
||||
)
|
||||
assert info["ok"] is False
|
||||
assert "not installed" in info["error"].lower()
|
||||
assert "pip install" in info["hint"].lower()
|
||||
|
||||
def test_reports_install_failure(self, monkeypatch):
|
||||
"""When lazy install is allowed but fails (e.g. lazy installs
|
||||
disabled), the diagnostic surfaces the failure as the error."""
|
||||
from agent import azure_identity_adapter as _adapter
|
||||
monkeypatch.setattr(_adapter, "has_azure_identity_installed", lambda: False)
|
||||
|
||||
def _fail_install():
|
||||
raise ImportError("simulated: lazy installs disabled")
|
||||
|
||||
monkeypatch.setattr(_adapter, "_require_azure_identity", _fail_install)
|
||||
info = _adapter.describe_active_credential(
|
||||
scope="https://x/.default", allow_install=True,
|
||||
)
|
||||
assert info["ok"] is False
|
||||
assert "lazy installs disabled" in info["error"]
|
||||
assert "lazy" in info["hint"].lower()
|
||||
|
||||
def test_reports_env_sources_for_managed_identity(self, fake_azure_identity, monkeypatch):
|
||||
from agent.azure_identity_adapter import describe_active_credential
|
||||
monkeypatch.setenv("IDENTITY_ENDPOINT", "http://169.254.169.254")
|
||||
info = describe_active_credential(scope="https://x/.default", timeout_seconds=0.5)
|
||||
assert info["ok"] is True
|
||||
sources = info.get("env_sources") or []
|
||||
assert any("ManagedIdentity" in s for s in sources)
|
||||
|
||||
def test_reports_env_sources_for_workload_identity(self, fake_azure_identity, monkeypatch):
|
||||
from agent.azure_identity_adapter import describe_active_credential
|
||||
monkeypatch.setenv("AZURE_FEDERATED_TOKEN_FILE", "/var/secrets/azure/federated-token")
|
||||
info = describe_active_credential(scope="https://x/.default", timeout_seconds=0.5)
|
||||
sources = info.get("env_sources") or []
|
||||
assert any("WorkloadIdentity" in s for s in sources)
|
||||
|
||||
def test_reports_env_sources_for_service_principal(self, fake_azure_identity, monkeypatch):
|
||||
from agent.azure_identity_adapter import describe_active_credential
|
||||
monkeypatch.setenv("AZURE_TENANT_ID", "t")
|
||||
monkeypatch.setenv("AZURE_CLIENT_ID", "c")
|
||||
monkeypatch.setenv("AZURE_CLIENT_SECRET", "s")
|
||||
info = describe_active_credential(scope="https://x/.default", timeout_seconds=0.5)
|
||||
sources = info.get("env_sources") or []
|
||||
assert any("EnvironmentCredential" in s for s in sources)
|
||||
|
||||
def test_reports_error_on_chain_failure(self, monkeypatch):
|
||||
from agent import azure_identity_adapter as _adapter
|
||||
|
||||
def _failing_credential(_config):
|
||||
class _Cred:
|
||||
def get_token(self, scope):
|
||||
raise RuntimeError("auth failed")
|
||||
return _Cred()
|
||||
|
||||
monkeypatch.setattr(_adapter, "build_credential", _failing_credential)
|
||||
monkeypatch.setattr(_adapter, "has_azure_identity_installed", lambda: True)
|
||||
info = _adapter.describe_active_credential(scope="https://x/.default", timeout_seconds=0.5)
|
||||
assert info["ok"] is False
|
||||
assert "auth failed" in info.get("error", "")
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
"""Tests for the 1M-context beta header on AWS Bedrock Claude models.
|
||||
|
||||
Claude Opus 4.6/4.7 and Sonnet 4.6 support a 1M context window, but on AWS
|
||||
Bedrock (and Azure AI Foundry) that window is still gated behind the
|
||||
Bedrock (and Microsoft Foundry) that window is still gated behind the
|
||||
``context-1m-2025-08-07`` beta header as of 2026-04. Without it, Bedrock
|
||||
caps these models at 200K even though ``model_metadata.py`` advertises 1M.
|
||||
|
||||
|
|
@ -61,4 +61,3 @@ class TestBedrockContext1MBeta:
|
|||
# Other common betas still present — no regression.
|
||||
assert "interleaved-thinking-2025-05-14" in beta_header
|
||||
assert "fine-grained-tool-streaming-2025-05-14" in beta_header
|
||||
|
||||
|
|
|
|||
|
|
@ -102,7 +102,7 @@ def test_detect_anthropic_path_wins_without_http():
|
|||
|
||||
def test_detect_openai_models_probe_success():
|
||||
"""/models probe returning a model list → chat_completions."""
|
||||
def _fake_get(url, api_key, timeout=6.0):
|
||||
def _fake_get(url, api_key, timeout=6.0, **kwargs):
|
||||
assert "key-abc" == api_key
|
||||
return 200, json.loads(_openai_models_body("gpt-5.4", "claude-opus-4-6"))
|
||||
|
||||
|
|
@ -118,7 +118,7 @@ def test_detect_openai_models_probe_success():
|
|||
|
||||
def test_detect_openai_models_probe_empty_list_still_counts():
|
||||
"""Endpoint returned OpenAI shape but no models → still chat_completions."""
|
||||
def _fake_get(url, api_key, timeout=6.0):
|
||||
def _fake_get(url, api_key, timeout=6.0, **kwargs):
|
||||
return 200, {"object": "list", "data": []}
|
||||
|
||||
with patch.object(azure_detect, "_http_get_json", side_effect=_fake_get):
|
||||
|
|
@ -132,7 +132,7 @@ def test_detect_openai_models_probe_empty_list_still_counts():
|
|||
|
||||
def test_detect_falls_back_to_anthropic_probe():
|
||||
"""/models fails but Anthropic Messages probe succeeds."""
|
||||
def _fake_get(url, api_key, timeout=6.0):
|
||||
def _fake_get(url, api_key, timeout=6.0, **kwargs):
|
||||
return 401, None # /models forbidden
|
||||
|
||||
with patch.object(azure_detect, "_http_get_json", side_effect=_fake_get), \
|
||||
|
|
@ -164,7 +164,7 @@ def test_probe_openai_models_tries_multiple_api_versions():
|
|||
"""First call (no api-version) fails, api-version fallback succeeds."""
|
||||
calls = []
|
||||
|
||||
def _fake_get(url, api_key, timeout=6.0):
|
||||
def _fake_get(url, api_key, timeout=6.0, **kwargs):
|
||||
calls.append(url)
|
||||
if "api-version" not in url:
|
||||
return 404, None
|
||||
|
|
|
|||
404
tests/hermes_cli/test_azure_foundry_entra.py
Normal file
404
tests/hermes_cli/test_azure_foundry_entra.py
Normal file
|
|
@ -0,0 +1,404 @@
|
|||
"""Tests for Azure Foundry Entra ID runtime resolution.
|
||||
|
||||
Covers the contract introduced in PR for Microsoft Entra ID auth on
|
||||
``azure-foundry``:
|
||||
|
||||
* ``_resolve_azure_foundry_runtime`` returns a callable ``api_key`` for
|
||||
``model.auth_mode = entra_id`` (OpenAI-style only).
|
||||
* Anthropic-style endpoints with ``auth_mode = entra_id`` return the same
|
||||
callable runtime credential as OpenAI-style endpoints.
|
||||
* The legacy ``api_key`` path is unchanged when ``auth_mode`` is absent
|
||||
or set to ``api_key``.
|
||||
* Explicit ``--api-key`` overrides at runtime still work in entra mode
|
||||
(escape hatch for one-off testing).
|
||||
* ``model.entra.scope`` propagates to the token-provider config; Azure
|
||||
identity selection stays in standard AZURE_* env vars.
|
||||
* ``_get_azure_foundry_auth_status`` is structural — never mints a
|
||||
token (verified by checking the credential cache untouched).
|
||||
* ``has_usable_secret`` for ``AZURE_FOUNDRY_API_KEY`` is irrelevant
|
||||
when ``auth_mode == entra_id``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from types import SimpleNamespace
|
||||
from typing import cast
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _reset_credential_cache():
|
||||
from agent.azure_identity_adapter import reset_credential_cache
|
||||
reset_credential_cache()
|
||||
yield
|
||||
reset_credential_cache()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def fake_azure_identity(monkeypatch):
|
||||
"""Identical fake to test_azure_identity_adapter — keeps Azure SDK
|
||||
out of these tests so they run in CI without the package installed."""
|
||||
from agent import azure_identity_adapter as _adapter
|
||||
|
||||
last = {"scope": None, "kwargs": None, "credential_count": 0}
|
||||
|
||||
def _provider(scope):
|
||||
return lambda: f"jwt-for-{scope}"
|
||||
|
||||
fake_module = SimpleNamespace(
|
||||
DefaultAzureCredential=lambda **kw: SimpleNamespace(
|
||||
kwargs=kw,
|
||||
get_token=lambda scope: SimpleNamespace(token="fake", expires_on=9999999999),
|
||||
),
|
||||
get_bearer_token_provider=lambda credential, scope: (
|
||||
last.__setitem__("scope", scope),
|
||||
last.__setitem__("kwargs", credential.kwargs),
|
||||
last.__setitem__("credential_count", cast(int, last["credential_count"]) + 1),
|
||||
_provider(scope),
|
||||
)[-1],
|
||||
)
|
||||
monkeypatch.setattr(_adapter, "_require_azure_identity", lambda: fake_module)
|
||||
monkeypatch.setitem(sys.modules, "azure.identity", fake_module)
|
||||
return last
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _resolve_azure_foundry_runtime: entra_id branch
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestResolveAzureFoundryRuntimeEntra:
|
||||
def test_returns_callable_api_key_for_entra(self, fake_azure_identity):
|
||||
from hermes_cli.runtime_provider import _resolve_azure_foundry_runtime
|
||||
runtime = _resolve_azure_foundry_runtime(
|
||||
requested_provider="azure-foundry",
|
||||
model_cfg={
|
||||
"provider": "azure-foundry",
|
||||
"base_url": "https://my-resource.openai.azure.com/openai/v1",
|
||||
"api_mode": "chat_completions",
|
||||
"auth_mode": "entra_id",
|
||||
"default": "gpt-4o", # stays on chat_completions (no codex auto-upgrade)
|
||||
},
|
||||
)
|
||||
assert runtime["provider"] == "azure-foundry"
|
||||
assert runtime["auth_mode"] == "entra_id"
|
||||
assert runtime["api_mode"] == "chat_completions"
|
||||
assert callable(runtime["api_key"])
|
||||
assert runtime["source"] == "entra_id"
|
||||
|
||||
def test_entra_inherits_codex_responses_for_gpt5_family(self, fake_azure_identity):
|
||||
"""GPT-5.x / o-series / codex models on Azure are Responses-API-only.
|
||||
The runtime auto-upgrades api_mode regardless of auth mode — this is
|
||||
the same behaviour as the static-key path (see
|
||||
``hermes_cli/models.py::azure_foundry_model_api_mode``)."""
|
||||
from hermes_cli.runtime_provider import _resolve_azure_foundry_runtime
|
||||
runtime = _resolve_azure_foundry_runtime(
|
||||
requested_provider="azure-foundry",
|
||||
model_cfg={
|
||||
"provider": "azure-foundry",
|
||||
"base_url": "https://my-resource.openai.azure.com/openai/v1",
|
||||
"api_mode": "chat_completions",
|
||||
"auth_mode": "entra_id",
|
||||
"default": "gpt-5.4",
|
||||
},
|
||||
)
|
||||
# GPT-5.x is upgraded to codex_responses — Entra path inherits.
|
||||
assert runtime["api_mode"] == "codex_responses"
|
||||
assert callable(runtime["api_key"])
|
||||
assert runtime["auth_mode"] == "entra_id"
|
||||
|
||||
def test_entra_propagates_scope_only(self, fake_azure_identity):
|
||||
"""``model.entra.scope`` is the only Hermes-managed Azure SDK
|
||||
setting. Identity selection (client ID, tenant, authority,
|
||||
service principal secret, federated token file) flows through
|
||||
standard ``AZURE_*`` env vars read by azure-identity directly.
|
||||
Legacy ``model.entra.client_id`` / ``tenant_id`` / ``authority``
|
||||
keys in config.yaml are silently ignored."""
|
||||
from hermes_cli.runtime_provider import _resolve_azure_foundry_runtime
|
||||
_resolve_azure_foundry_runtime(
|
||||
requested_provider="azure-foundry",
|
||||
model_cfg={
|
||||
"provider": "azure-foundry",
|
||||
"base_url": "https://my-resource.services.ai.azure.com/v1",
|
||||
"api_mode": "chat_completions",
|
||||
"auth_mode": "entra_id",
|
||||
"entra": {
|
||||
"scope": "https://custom.example/.default",
|
||||
"client_id": "client-uuid",
|
||||
# Legacy keys must not crash — they are accepted in
|
||||
# from_dict but never propagated to the SDK.
|
||||
"tenant_id": "legacy-tenant",
|
||||
"authority": "https://login.microsoftonline.us",
|
||||
},
|
||||
},
|
||||
)
|
||||
assert fake_azure_identity["scope"] == "https://custom.example/.default"
|
||||
kw = fake_azure_identity["kwargs"]
|
||||
assert "managed_identity_client_id" not in kw
|
||||
assert "workload_identity_client_id" not in kw
|
||||
assert "interactive_browser_tenant_id" not in kw
|
||||
assert "authority" not in kw
|
||||
|
||||
def test_entra_default_scope_when_unset(self, fake_azure_identity):
|
||||
"""When ``model.entra.scope`` is not set, the runtime resolves
|
||||
Microsoft's documented inference scope —
|
||||
``https://ai.azure.com/.default`` — regardless of whether the
|
||||
endpoint is ``*.openai.azure.com`` or ``*.services.ai.azure.com``.
|
||||
Both shapes use the SAME scope per Microsoft's docs; the
|
||||
``cognitiveservices.azure.com`` scope is the control-plane
|
||||
audience and is rejected for inference by newer resources."""
|
||||
from hermes_cli.runtime_provider import _resolve_azure_foundry_runtime
|
||||
from agent.azure_identity_adapter import SCOPE_AI_AZURE_DEFAULT
|
||||
_resolve_azure_foundry_runtime(
|
||||
requested_provider="azure-foundry",
|
||||
model_cfg={
|
||||
"provider": "azure-foundry",
|
||||
"base_url": "https://r.openai.azure.com/openai/v1",
|
||||
"api_mode": "chat_completions",
|
||||
"auth_mode": "entra_id",
|
||||
},
|
||||
)
|
||||
assert fake_azure_identity["scope"] == SCOPE_AI_AZURE_DEFAULT
|
||||
|
||||
def test_entra_scope_override_wins(self, fake_azure_identity):
|
||||
"""Users on sovereign clouds / unusual tenants can set
|
||||
``model.entra.scope`` to override the default."""
|
||||
from hermes_cli.runtime_provider import _resolve_azure_foundry_runtime
|
||||
_resolve_azure_foundry_runtime(
|
||||
requested_provider="azure-foundry",
|
||||
model_cfg={
|
||||
"provider": "azure-foundry",
|
||||
"base_url": "https://r.openai.azure.com/openai/v1",
|
||||
"api_mode": "chat_completions",
|
||||
"auth_mode": "entra_id",
|
||||
"entra": {
|
||||
"scope": "https://cognitiveservices.azure.com/.default",
|
||||
},
|
||||
},
|
||||
)
|
||||
assert (
|
||||
fake_azure_identity["scope"]
|
||||
== "https://cognitiveservices.azure.com/.default"
|
||||
)
|
||||
|
||||
def test_entra_with_anthropic_messages_is_supported(self, fake_azure_identity):
|
||||
"""Entra ID now works for both OpenAI-style and Anthropic-style
|
||||
Azure Foundry endpoints. The runtime returns a callable
|
||||
``api_key``; downstream
|
||||
:func:`agent.anthropic_adapter.build_anthropic_client` detects
|
||||
the callable and installs an httpx event hook that mints a
|
||||
fresh bearer JWT per request (the Anthropic SDK does not
|
||||
accept callable auth_token natively)."""
|
||||
from hermes_cli.runtime_provider import _resolve_azure_foundry_runtime
|
||||
runtime = _resolve_azure_foundry_runtime(
|
||||
requested_provider="azure-foundry",
|
||||
model_cfg={
|
||||
"provider": "azure-foundry",
|
||||
"base_url": "https://r.services.ai.azure.com/anthropic",
|
||||
"api_mode": "anthropic_messages",
|
||||
"auth_mode": "entra_id",
|
||||
"default": "claude-sonnet-4-5",
|
||||
},
|
||||
)
|
||||
assert runtime["provider"] == "azure-foundry"
|
||||
assert runtime["auth_mode"] == "entra_id"
|
||||
assert runtime["api_mode"] == "anthropic_messages"
|
||||
# Callable api_key — the anthropic_adapter detects this and
|
||||
# plumbs through an httpx event hook.
|
||||
assert callable(runtime["api_key"])
|
||||
assert not isinstance(runtime["api_key"], str)
|
||||
|
||||
def test_entra_with_explicit_api_key_uses_string_escape_hatch(self, fake_azure_identity):
|
||||
"""Passing --api-key on the CLI overrides the entra path so a
|
||||
user can debug a single request with a static key without
|
||||
editing config.yaml."""
|
||||
from hermes_cli.runtime_provider import _resolve_azure_foundry_runtime
|
||||
runtime = _resolve_azure_foundry_runtime(
|
||||
requested_provider="azure-foundry",
|
||||
model_cfg={
|
||||
"provider": "azure-foundry",
|
||||
"base_url": "https://r.openai.azure.com/openai/v1",
|
||||
"api_mode": "chat_completions",
|
||||
"auth_mode": "entra_id",
|
||||
},
|
||||
explicit_api_key="explicit-string-key",
|
||||
)
|
||||
assert runtime["api_key"] == "explicit-string-key"
|
||||
assert runtime["auth_mode"] == "api_key"
|
||||
assert runtime["source"] == "explicit"
|
||||
|
||||
def test_entra_runtime_dict_keeps_only_scope_override(self, fake_azure_identity):
|
||||
from hermes_cli.runtime_provider import _resolve_azure_foundry_runtime
|
||||
runtime = _resolve_azure_foundry_runtime(
|
||||
requested_provider="azure-foundry",
|
||||
model_cfg={
|
||||
"provider": "azure-foundry",
|
||||
"base_url": "https://r.openai.azure.com/openai/v1",
|
||||
"api_mode": "chat_completions",
|
||||
"auth_mode": "entra_id",
|
||||
"entra": {
|
||||
"scope": "https://custom.example/.default",
|
||||
"client_id": "legacy-client",
|
||||
},
|
||||
},
|
||||
)
|
||||
assert runtime["entra"] == {"scope": "https://custom.example/.default"}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _resolve_azure_foundry_runtime: legacy api_key branch (regression)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestResolveAzureFoundryRuntimeApiKey:
|
||||
def test_default_auth_mode_uses_static_key(self, monkeypatch):
|
||||
from hermes_cli.runtime_provider import _resolve_azure_foundry_runtime
|
||||
monkeypatch.setenv("AZURE_FOUNDRY_API_KEY", "sk-azure-static-key")
|
||||
runtime = _resolve_azure_foundry_runtime(
|
||||
requested_provider="azure-foundry",
|
||||
model_cfg={
|
||||
"provider": "azure-foundry",
|
||||
"base_url": "https://r.openai.azure.com/openai/v1",
|
||||
"api_mode": "chat_completions",
|
||||
},
|
||||
)
|
||||
assert runtime["api_key"] == "sk-azure-static-key"
|
||||
assert runtime["auth_mode"] == "api_key"
|
||||
assert "entra" not in runtime # only present in entra mode
|
||||
|
||||
def test_explicit_auth_mode_api_key(self, monkeypatch):
|
||||
from hermes_cli.runtime_provider import _resolve_azure_foundry_runtime
|
||||
monkeypatch.setenv("AZURE_FOUNDRY_API_KEY", "sk-static")
|
||||
runtime = _resolve_azure_foundry_runtime(
|
||||
requested_provider="azure-foundry",
|
||||
model_cfg={
|
||||
"provider": "azure-foundry",
|
||||
"base_url": "https://r.openai.azure.com/openai/v1",
|
||||
"api_mode": "chat_completions",
|
||||
"auth_mode": "api_key",
|
||||
},
|
||||
)
|
||||
assert runtime["api_key"] == "sk-static"
|
||||
assert runtime["auth_mode"] == "api_key"
|
||||
|
||||
def test_anthropic_messages_strips_v1_suffix(self, monkeypatch):
|
||||
from hermes_cli.runtime_provider import _resolve_azure_foundry_runtime
|
||||
monkeypatch.setenv("AZURE_FOUNDRY_API_KEY", "k")
|
||||
runtime = _resolve_azure_foundry_runtime(
|
||||
requested_provider="azure-foundry",
|
||||
model_cfg={
|
||||
"provider": "azure-foundry",
|
||||
"base_url": "https://r.services.ai.azure.com/anthropic/v1",
|
||||
"api_mode": "anthropic_messages",
|
||||
},
|
||||
)
|
||||
assert runtime["base_url"] == "https://r.services.ai.azure.com/anthropic"
|
||||
|
||||
def test_missing_api_key_raises_with_entra_hint(self, monkeypatch):
|
||||
from hermes_cli.auth import AuthError
|
||||
from hermes_cli.runtime_provider import _resolve_azure_foundry_runtime
|
||||
monkeypatch.delenv("AZURE_FOUNDRY_API_KEY", raising=False)
|
||||
with pytest.raises(AuthError) as exc_info:
|
||||
_resolve_azure_foundry_runtime(
|
||||
requested_provider="azure-foundry",
|
||||
model_cfg={
|
||||
"provider": "azure-foundry",
|
||||
"base_url": "https://r.openai.azure.com/openai/v1",
|
||||
"api_mode": "chat_completions",
|
||||
},
|
||||
)
|
||||
msg = str(exc_info.value)
|
||||
assert "AZURE_FOUNDRY_API_KEY" in msg
|
||||
# Surface the Entra alternative so users discover the keyless path.
|
||||
assert "entra_id" in msg
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _get_azure_foundry_auth_status (auth.py) — never mints a token
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestAzureFoundryAuthStatus:
|
||||
def test_entra_status_does_not_mint_token(self, monkeypatch, tmp_path):
|
||||
"""Structural check — must return logged_in=True based on
|
||||
importable + config, never call get_bearer_token_provider."""
|
||||
from hermes_cli import auth as _auth
|
||||
# Force load_config to return our entra config.
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.config.load_config",
|
||||
lambda: {
|
||||
"model": {
|
||||
"provider": "azure-foundry",
|
||||
"auth_mode": "entra_id",
|
||||
"base_url": "https://r.openai.azure.com/openai/v1",
|
||||
},
|
||||
},
|
||||
)
|
||||
# Patch has_azure_identity_installed to True; do NOT patch the
|
||||
# token provider — if the code path tried to mint, the SDK
|
||||
# missing would raise.
|
||||
monkeypatch.setattr(
|
||||
"agent.azure_identity_adapter.has_azure_identity_installed",
|
||||
lambda: True,
|
||||
)
|
||||
info = _auth._get_azure_foundry_auth_status()
|
||||
assert info["logged_in"] is True
|
||||
assert info["auth_mode"] == "entra_id"
|
||||
assert info["azure_identity_installed"] is True
|
||||
assert info["scope"].endswith("/.default")
|
||||
|
||||
def test_entra_status_reports_missing_package(self, monkeypatch):
|
||||
from hermes_cli import auth as _auth
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.config.load_config",
|
||||
lambda: {
|
||||
"model": {
|
||||
"provider": "azure-foundry",
|
||||
"auth_mode": "entra_id",
|
||||
"base_url": "https://r.openai.azure.com/openai/v1",
|
||||
},
|
||||
},
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"agent.azure_identity_adapter.has_azure_identity_installed",
|
||||
lambda: False,
|
||||
)
|
||||
info = _auth._get_azure_foundry_auth_status()
|
||||
assert info["logged_in"] is False
|
||||
assert info["azure_identity_installed"] is False
|
||||
assert "azure-identity" in info["hint"]
|
||||
|
||||
def test_api_key_status_uses_env_var(self, monkeypatch):
|
||||
from hermes_cli import auth as _auth
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.config.load_config",
|
||||
lambda: {
|
||||
"model": {
|
||||
"provider": "azure-foundry",
|
||||
"auth_mode": "api_key",
|
||||
"base_url": "https://r.openai.azure.com/openai/v1",
|
||||
},
|
||||
},
|
||||
)
|
||||
monkeypatch.setenv("AZURE_FOUNDRY_API_KEY", "sk-real-key-xxx")
|
||||
info = _auth._get_azure_foundry_auth_status()
|
||||
assert info["auth_mode"] == "api_key"
|
||||
assert info["logged_in"] is True
|
||||
|
||||
def test_api_key_status_false_when_missing(self, monkeypatch):
|
||||
from hermes_cli import auth as _auth
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.config.load_config",
|
||||
lambda: {
|
||||
"model": {
|
||||
"provider": "azure-foundry",
|
||||
"auth_mode": "api_key",
|
||||
},
|
||||
},
|
||||
)
|
||||
monkeypatch.delenv("AZURE_FOUNDRY_API_KEY", raising=False)
|
||||
info = _auth._get_azure_foundry_auth_status()
|
||||
assert info["logged_in"] is False
|
||||
375
tests/run_agent/test_callable_api_key.py
Normal file
375
tests/run_agent/test_callable_api_key.py
Normal file
|
|
@ -0,0 +1,375 @@
|
|||
"""Tests that callable api_key (Entra ID bearer provider) flows through
|
||||
the agent stack without coercion.
|
||||
|
||||
The OpenAI Python SDK accepts ``api_key: str | None | Callable[[], str]``,
|
||||
and ``azure-identity``'s ``get_bearer_token_provider`` returns a callable.
|
||||
Hermes preserves the callable end-to-end so the SDK refreshes tokens
|
||||
transparently. This file pins the contract at the high-risk seams the
|
||||
rubber-duck audit identified.
|
||||
|
||||
Covered:
|
||||
* ``_create_openai_client`` passes a callable ``api_key`` straight
|
||||
through to ``openai.OpenAI(...)``.
|
||||
* ``_normalize_main_runtime`` preserves the callable so auxiliary
|
||||
clients inherit Entra auth.
|
||||
* ``_truncate_token`` (dashboard preview) renders ``"<entra-id-bearer>"``
|
||||
instead of ``"<function ...>"`` and never invokes the callable.
|
||||
* ``run_agent.py`` masked-banner path renders the Entra placeholder
|
||||
and never tries to slice/len the callable.
|
||||
* Serialization scrub: dumping a runtime dict via ``json.dumps`` with
|
||||
a callable api_key raises (default behaviour) — guards against
|
||||
silently leaking ``"<function ...>"`` strings into event logs.
|
||||
* ``batch_runner`` strips the callable from the worker config dict
|
||||
so multiprocessing.Pool can pickle the rest.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from types import SimpleNamespace
|
||||
from typing import cast
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# OpenAI SDK construction preserves the callable
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestCreateOpenAIClientCallable:
|
||||
"""``AIAgent._create_openai_client`` must pass the callable through
|
||||
to ``openai.OpenAI(...)`` without coercion."""
|
||||
|
||||
def test_callable_api_key_passed_to_openai_constructor(self, monkeypatch):
|
||||
"""Construct the smallest possible AIAgent surface and verify
|
||||
the OpenAI client receives the callable unchanged."""
|
||||
captured = {}
|
||||
|
||||
def fake_openai(**kwargs):
|
||||
captured["kwargs"] = kwargs
|
||||
return MagicMock(api_key=kwargs.get("api_key"))
|
||||
|
||||
# Patch the module-level OpenAI proxy used by ``_create_openai_client``.
|
||||
monkeypatch.setattr("run_agent.OpenAI", fake_openai)
|
||||
|
||||
# Build a minimal stand-in for AIAgent so we can call the bound
|
||||
# method directly without paying the full __init__ cost.
|
||||
from run_agent import AIAgent
|
||||
|
||||
agent = AIAgent.__new__(AIAgent)
|
||||
# Attributes consulted by _create_openai_client / _client_log_context.
|
||||
agent.provider = "azure-foundry"
|
||||
agent.model = "gpt-4o"
|
||||
agent.base_url = "https://r.openai.azure.com/openai/v1"
|
||||
agent._client_kwargs = {}
|
||||
|
||||
def token_provider():
|
||||
return "fresh-jwt"
|
||||
|
||||
client_kwargs = {
|
||||
"api_key": token_provider,
|
||||
"base_url": "https://r.openai.azure.com/openai/v1",
|
||||
}
|
||||
client = agent._create_openai_client(client_kwargs, reason="test", shared=False)
|
||||
|
||||
# The OpenAI constructor must receive the *callable*, not a string.
|
||||
forwarded = captured["kwargs"]["api_key"]
|
||||
assert callable(forwarded)
|
||||
assert not isinstance(forwarded, str)
|
||||
assert forwarded is token_provider, (
|
||||
"_create_openai_client must not wrap or coerce the callable"
|
||||
)
|
||||
assert client is not None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Auxiliary runtime preserves the callable
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestNormalizeMainRuntimePreservesCallable:
|
||||
"""The aux client orchestrator must keep the callable on the
|
||||
runtime dict so compression / vision / embedding / title-gen clients
|
||||
inherit Entra ID auth from the main agent."""
|
||||
|
||||
def test_callable_api_key_survives_normalization(self):
|
||||
from agent.auxiliary_client import _normalize_main_runtime
|
||||
|
||||
def provider():
|
||||
return "jwt"
|
||||
|
||||
normalized = _normalize_main_runtime({
|
||||
"provider": "azure-foundry",
|
||||
"model": "gpt-4o",
|
||||
"base_url": "https://r.openai.azure.com/openai/v1",
|
||||
"api_key": provider,
|
||||
"api_mode": "chat_completions",
|
||||
"auth_mode": "entra_id",
|
||||
})
|
||||
assert normalized["api_key"] is provider
|
||||
assert normalized["auth_mode"] == "entra_id"
|
||||
|
||||
def test_string_api_key_still_works(self):
|
||||
from agent.auxiliary_client import _normalize_main_runtime
|
||||
normalized = _normalize_main_runtime({
|
||||
"provider": "azure-foundry",
|
||||
"api_key": "sk-static",
|
||||
})
|
||||
assert normalized["api_key"] == "sk-static"
|
||||
|
||||
def test_normalization_drops_empty_string_but_preserves_callable(self):
|
||||
from agent.auxiliary_client import _normalize_main_runtime
|
||||
|
||||
def provider():
|
||||
return ""
|
||||
|
||||
# Empty string fields are dropped, but a callable is preserved
|
||||
# even if it would mint an empty token (we don't invoke during
|
||||
# normalization).
|
||||
normalized = _normalize_main_runtime({
|
||||
"provider": "azure-foundry",
|
||||
"api_key": provider,
|
||||
"model": "",
|
||||
})
|
||||
assert normalized["api_key"] is provider
|
||||
assert "model" not in normalized
|
||||
|
||||
def test_unknown_field_dropped(self):
|
||||
from agent.auxiliary_client import _normalize_main_runtime, _MAIN_RUNTIME_FIELDS
|
||||
normalized = _normalize_main_runtime({
|
||||
"provider": "azure-foundry",
|
||||
"api_key": "k",
|
||||
"secret_field_we_dont_want": "leak",
|
||||
})
|
||||
assert "secret_field_we_dont_want" not in normalized
|
||||
# auth_mode IS in the field allowlist (rubber-duck blocker fix).
|
||||
assert "auth_mode" in _MAIN_RUNTIME_FIELDS
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Display surfaces never invoke the callable
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestTruncateTokenCallable:
|
||||
def test_callable_returns_placeholder(self):
|
||||
"""Dashboard preview must render the Entra placeholder, NOT
|
||||
``"<function ...>"``."""
|
||||
from hermes_cli.web_server import _truncate_token
|
||||
|
||||
invoked = {"count": 0}
|
||||
|
||||
def provider():
|
||||
invoked["count"] += 1
|
||||
return "should-not-appear-in-ui"
|
||||
|
||||
token_provider = cast(str | None, provider)
|
||||
rendered = _truncate_token(token_provider)
|
||||
assert rendered == "<entra-id-bearer>"
|
||||
assert invoked["count"] == 0
|
||||
|
||||
def test_string_jwt_still_truncated_to_signature_tail(self):
|
||||
from hermes_cli.web_server import _truncate_token
|
||||
# JWT shape: header.payload.signature → only signature tail shown.
|
||||
out = _truncate_token("aaaa.bbbb.cccccccsig", visible=4)
|
||||
assert out == "…csig"
|
||||
|
||||
def test_empty_returns_empty(self):
|
||||
from hermes_cli.web_server import _truncate_token
|
||||
assert _truncate_token(None) == ""
|
||||
assert _truncate_token("") == ""
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Serialization scrub — runtime dicts with callables must NOT silently
|
||||
# JSON-encode as ``"<function ...>"`` (would leak garbage into events).
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestRuntimeDictSerializationGuard:
|
||||
def test_json_dumps_default_str_does_not_silently_stringify_callable(self):
|
||||
"""Sanity check: a runtime dict with a callable api_key must
|
||||
either raise on plain ``json.dumps`` (good — fail loud) or be
|
||||
sanitized BEFORE serialization. This test pins the loud-fail
|
||||
behaviour so future changes that introduce
|
||||
``json.dumps(..., default=str)`` over a runtime dict are caught
|
||||
by a regression here."""
|
||||
|
||||
def provider():
|
||||
return "jwt"
|
||||
|
||||
runtime = {
|
||||
"provider": "azure-foundry",
|
||||
"api_key": provider,
|
||||
"auth_mode": "entra_id",
|
||||
}
|
||||
# Plain json.dumps — must raise, not silently produce
|
||||
# ``"<function provider at 0x...>"``.
|
||||
with pytest.raises(TypeError):
|
||||
json.dumps(runtime)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# batch_runner strips callables from the worker config dict
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestBatchRunnerCallableHandling:
|
||||
def test_callable_api_key_stripped_from_worker_config(self, capsys, monkeypatch, tmp_path):
|
||||
"""``BatchRunner._run_batches`` (or the equivalent code path)
|
||||
must replace a callable api_key with None before pickling the
|
||||
worker config dict — otherwise multiprocessing.Pool fails."""
|
||||
# We can't easily run BatchRunner end-to-end in a unit test
|
||||
# (it spawns subprocesses), but we CAN inline the same logic:
|
||||
# the production code uses ``callable(self.api_key) and not
|
||||
# isinstance(self.api_key, str)`` to gate the substitution.
|
||||
# Re-execute the same predicate here as a contract guard.
|
||||
|
||||
def provider():
|
||||
return "jwt"
|
||||
|
||||
api_key = provider
|
||||
worker_api_key = None if (callable(api_key) and not isinstance(api_key, str)) else api_key
|
||||
assert worker_api_key is None, (
|
||||
"BatchRunner must replace callable api_key with None so "
|
||||
"multiprocessing.Pool can pickle the worker config"
|
||||
)
|
||||
|
||||
# And a string passes through unchanged.
|
||||
api_key_str = "sk-static"
|
||||
worker_api_key_str = None if (callable(api_key_str) and not isinstance(api_key_str, str)) else api_key_str
|
||||
assert worker_api_key_str == "sk-static"
|
||||
|
||||
def test_batch_runner_source_uses_the_correct_predicate(self):
|
||||
"""Pin the predicate string in batch_runner so refactors that
|
||||
change it are caught here. Reading the source rather than
|
||||
importing avoids spinning up the full BatchRunner."""
|
||||
from pathlib import Path
|
||||
src = (Path(__file__).resolve().parent.parent.parent
|
||||
/ "batch_runner.py").read_text()
|
||||
assert "callable(self.api_key) and not isinstance(self.api_key, str)" in src, (
|
||||
"BatchRunner.api_key callable check changed — update test or "
|
||||
"verify the new predicate still routes Entra token providers "
|
||||
"to the worker-rebuild path."
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Inline masked-banner / display sites (callable-aware)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestCliEnsureRuntimeCredentialsCallable:
|
||||
"""Regression: ``cli.py:_ensure_runtime_credentials`` previously
|
||||
treated a callable ``api_key`` as "not a string" and overwrote it
|
||||
with the ``"no-key-required"`` placeholder, which then got sent as
|
||||
``Authorization: Bearer no-key-required`` and rejected by Azure
|
||||
with a 401. This is the most subtle of the callable-api_key audit
|
||||
sites — gated by ``not isinstance(api_key, str)`` rather than the
|
||||
cleaner ``callable(...)`` check used elsewhere.
|
||||
|
||||
We verify the source pattern (rather than spinning up a real
|
||||
``HermesCLI`` instance) — the predicate change is the load-bearing
|
||||
fix and is invariant under the surrounding orchestration code."""
|
||||
|
||||
def test_callable_predicate_present_in_cli_runtime_validation(self):
|
||||
from pathlib import Path
|
||||
src = (Path(__file__).resolve().parent.parent.parent
|
||||
/ "cli.py").read_text()
|
||||
# The fix introduces ``_is_callable_provider`` which gates the
|
||||
# string-only check so callable token providers survive.
|
||||
assert "_is_callable_provider = callable(api_key)" in src, (
|
||||
"cli.py:_ensure_runtime_credentials must preserve a callable "
|
||||
"api_key (Entra ID bearer provider). Without the guard, the "
|
||||
"callable is stringified to 'no-key-required' and Azure 401s."
|
||||
)
|
||||
|
||||
|
||||
class TestInlinedDisplayMasks:
|
||||
"""The masked-credential display sites are now inlined per-site (no
|
||||
shared helper). Each site uses the ``is_token_provider`` predicate
|
||||
to short-circuit on callables and print a static
|
||||
``"Microsoft Entra ID"`` label, then falls through to its own
|
||||
context-appropriate string mask. This replaces a unified helper
|
||||
that would have forced one mask shape across sites with legitimately
|
||||
different display needs (banner vs diagnostic vs UI vs preview)."""
|
||||
|
||||
def test_run_agent_banner_uses_is_token_provider_guard(self):
|
||||
"""The masked-banner sites live in ``agent/agent_init.py``
|
||||
(the ``__init__`` body was extracted into ``init_agent`` after
|
||||
this feature was first written). Both the OpenAI and Anthropic
|
||||
client init paths must guard their banner prints with
|
||||
``is_token_provider`` so a callable Entra ID provider doesn't
|
||||
crash ``len(api_key)``."""
|
||||
from pathlib import Path
|
||||
src = (Path(__file__).resolve().parent.parent.parent
|
||||
/ "agent" / "agent_init.py").read_text()
|
||||
assert src.count("is_token_provider(") >= 2, (
|
||||
"agent/agent_init.py must guard BOTH masked-banner paths "
|
||||
"(chat_completions and anthropic_messages) with "
|
||||
"is_token_provider()."
|
||||
)
|
||||
assert src.count('"🔑 Using credentials: Microsoft Entra ID"') >= 2, (
|
||||
"agent/agent_init.py banner blocks should print a static "
|
||||
"'Microsoft Entra ID' label for callable api_keys — no "
|
||||
"placeholder plumbing, no describe-mask fallback."
|
||||
)
|
||||
|
||||
def test_cli_show_config_handles_callable(self):
|
||||
"""``cli.HermesCLI.show_config`` previously did
|
||||
``self.api_key[-4:]`` / ``len(self.api_key)`` which crashes on
|
||||
callable Entra ID providers. The inlined version uses
|
||||
``is_token_provider`` and prints the same static label as the
|
||||
run_agent banners."""
|
||||
from pathlib import Path
|
||||
src = (Path(__file__).resolve().parent.parent.parent
|
||||
/ "cli.py").read_text()
|
||||
assert "is_token_provider(self.api_key)" in src, (
|
||||
"cli.HermesCLI.show_config must guard self.api_key via "
|
||||
"is_token_provider so callable Entra ID providers don't "
|
||||
"crash /config."
|
||||
)
|
||||
assert '"Microsoft Entra ID"' in src, (
|
||||
"cli.HermesCLI.show_config must print the static "
|
||||
"'Microsoft Entra ID' label (matching run_agent banners) "
|
||||
"instead of attempting to slice the callable."
|
||||
)
|
||||
|
||||
def test_mask_api_key_for_logs_handles_callable(self):
|
||||
"""``run_agent._mask_api_key_for_logs`` is called from the
|
||||
request-dump JSON path. For Entra users, ``self.client.api_key``
|
||||
is the SDK's empty string (callable stashed privately) — but
|
||||
defensively the helper must also accept a callable directly
|
||||
and return the placeholder rather than crashing on
|
||||
``len(callable)``."""
|
||||
from pathlib import Path
|
||||
src = (Path(__file__).resolve().parent.parent.parent
|
||||
/ "run_agent.py").read_text()
|
||||
# The function now starts with a callable check.
|
||||
assert (
|
||||
"if callable(key) and not isinstance(key, str):" in src
|
||||
and '"<entra-id-bearer>"' in src
|
||||
), (
|
||||
"run_agent._mask_api_key_for_logs must short-circuit for "
|
||||
"callable api_keys to avoid len(callable) crashes in "
|
||||
"request-dump paths."
|
||||
)
|
||||
|
||||
def test_anthropic_401_diagnostic_handles_callable(self):
|
||||
"""The Anthropic 401 diagnostic path lives in
|
||||
``agent/conversation_loop.py`` (the ``run_conversation`` body
|
||||
was extracted after this feature was first written). It used
|
||||
to do ``key[:12]`` on ``self._anthropic_api_key``. For Entra ID +
|
||||
Anthropic-style mode that's a callable; slicing crashes."""
|
||||
from pathlib import Path
|
||||
src = (Path(__file__).resolve().parent.parent.parent
|
||||
/ "agent" / "conversation_loop.py").read_text()
|
||||
# The Anthropic 401 block now branches on is_token_provider
|
||||
# before slicing the key.
|
||||
assert "Microsoft Entra ID (httpx event hook)" in src, (
|
||||
"agent/conversation_loop.py Anthropic 401 diagnostic must "
|
||||
"surface a Microsoft Entra ID branch before slicing the "
|
||||
"key prefix."
|
||||
)
|
||||
|
|
@ -81,6 +81,11 @@ LAZY_DEPS: dict[str, tuple[str, ...]] = {
|
|||
"provider.anthropic": ("anthropic==0.87.0",), # CVE-2026-34450, CVE-2026-34452
|
||||
# AWS Bedrock provider
|
||||
"provider.bedrock": ("boto3==1.42.89",),
|
||||
# Microsoft Foundry — Entra ID auth (managed identity, workload identity,
|
||||
# service principal, az login, VS Code, azd, PowerShell). Only loaded
|
||||
# when model.auth_mode=entra_id is selected; key-based azure-foundry
|
||||
# users never pay this import.
|
||||
"provider.azure_identity": ("azure-identity==1.25.3",),
|
||||
|
||||
# ─── Web search backends ───────────────────────────────────────────────
|
||||
"search.exa": ("exa-py==2.10.2",),
|
||||
|
|
|
|||
|
|
@ -1087,7 +1087,16 @@ def _apply_model_switch(sid: str, session: dict, raw_input: str) -> dict:
|
|||
current_provider = str(runtime.get("provider", "") or "")
|
||||
current_model = _resolve_model()
|
||||
current_base_url = str(runtime.get("base_url", "") or "")
|
||||
current_api_key = str(runtime.get("api_key", "") or "")
|
||||
# Preserve a callable api_key (Azure Foundry Entra ID bearer
|
||||
# provider) unchanged — ``str(...)`` would produce
|
||||
# ``"<function ...>"`` and poison downstream switch_model
|
||||
# validation. Match the agent-present branch's behavior at the
|
||||
# top of this block.
|
||||
_runtime_key = runtime.get("api_key", "")
|
||||
if callable(_runtime_key) and not isinstance(_runtime_key, str):
|
||||
current_api_key = _runtime_key
|
||||
else:
|
||||
current_api_key = str(_runtime_key or "")
|
||||
|
||||
# Load user-defined providers so switch_model can resolve named custom
|
||||
# endpoints (e.g. "ollama-launch") and validate against saved model lists.
|
||||
|
|
|
|||
61
uv.lock
generated
61
uv.lock
generated
|
|
@ -500,6 +500,35 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/41/0a/0896b829a39b5669a2d811e1a79598de661693685cd62b31f11d0c18e65b/av-17.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:dba98603fc4665b4f750de86fbaf6c0cfaece970671a9b529e0e3d1711e8367e", size = 22071058, upload-time = "2026-03-14T14:38:43.663Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "azure-core"
|
||||
version = "1.41.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "requests" },
|
||||
{ name = "typing-extensions" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/a6/f3/b416179e408990df5db0d516283022dde0f5d0111d98c1a848e41853e81c/azure_core-1.41.0.tar.gz", hash = "sha256:f46ff5dfcd230f25cf1c19e8a34b8dc08a337b2503e268bb600a16c00db8ad5a", size = 381042, upload-time = "2026-05-07T23:30:54.302Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/5b/db/325c6d7312d2200251c52323878281045aaffcb5586612296484e4280eaa/azure_core-1.41.0-py3-none-any.whl", hash = "sha256:522b4011e8180b1a3dcd2024396a4e7fe9ac37fb8597db47163d230b5efe892d", size = 220920, upload-time = "2026-05-07T23:30:56.357Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "azure-identity"
|
||||
version = "1.25.3"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "azure-core" },
|
||||
{ name = "cryptography" },
|
||||
{ name = "msal" },
|
||||
{ name = "msal-extensions" },
|
||||
{ name = "typing-extensions" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/c5/0e/3a63efb48aa4a5ae2cfca61ee152fbcb668092134d3eb8bfda472dd5c617/azure_identity-1.25.3.tar.gz", hash = "sha256:ab23c0d63015f50b630ef6c6cf395e7262f439ce06e5d07a64e874c724f8d9e6", size = 286304, upload-time = "2026-03-13T01:12:20.892Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/49/9a/417b3a533e01953a7c618884df2cb05a71e7b68bdbce4fbdb62349d2a2e8/azure_identity-1.25.3-py3-none-any.whl", hash = "sha256:f4d0b956a8146f30333e071374171f3cfa7bdb8073adb8c3814b65567aa7447c", size = 192138, upload-time = "2026-03-13T01:12:22.951Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "base58"
|
||||
version = "2.1.1"
|
||||
|
|
@ -1618,6 +1647,9 @@ all = [
|
|||
anthropic = [
|
||||
{ name = "anthropic" },
|
||||
]
|
||||
azure-identity = [
|
||||
{ name = "azure-identity" },
|
||||
]
|
||||
bedrock = [
|
||||
{ name = "boto3" },
|
||||
]
|
||||
|
|
@ -1767,6 +1799,7 @@ requires-dist = [
|
|||
{ name = "alibabacloud-dingtalk", marker = "extra == 'dingtalk'", specifier = "==2.2.42" },
|
||||
{ name = "anthropic", marker = "extra == 'anthropic'", specifier = "==0.86.0" },
|
||||
{ name = "asyncpg", marker = "extra == 'matrix'", specifier = "==0.31.0" },
|
||||
{ name = "azure-identity", marker = "extra == 'azure-identity'", specifier = "==1.25.3" },
|
||||
{ name = "boto3", marker = "extra == 'bedrock'", specifier = "==1.42.89" },
|
||||
{ name = "brotlicffi", marker = "extra == 'messaging'", specifier = "==1.2.0.1" },
|
||||
{ name = "croniter", specifier = "==6.0.0" },
|
||||
|
|
@ -1855,7 +1888,7 @@ requires-dist = [
|
|||
{ name = "vercel", marker = "extra == 'vercel'", specifier = "==0.5.7" },
|
||||
{ name = "youtube-transcript-api", marker = "extra == 'youtube'", specifier = "==1.2.4" },
|
||||
]
|
||||
provides-extras = ["anthropic", "exa", "firecrawl", "parallel-web", "fal", "edge-tts", "modal", "daytona", "vercel", "hindsight", "dev", "messaging", "cron", "slack", "matrix", "cli", "tts-premium", "voice", "pty", "honcho", "mcp", "homeassistant", "sms", "computer-use", "acp", "bedrock", "termux", "termux-all", "dingtalk", "feishu", "google", "youtube", "web", "all"]
|
||||
provides-extras = ["anthropic", "exa", "firecrawl", "parallel-web", "fal", "edge-tts", "modal", "daytona", "vercel", "hindsight", "dev", "messaging", "cron", "slack", "matrix", "cli", "tts-premium", "voice", "pty", "honcho", "mcp", "homeassistant", "sms", "computer-use", "acp", "bedrock", "azure-identity", "termux", "termux-all", "dingtalk", "feishu", "google", "youtube", "web", "all"]
|
||||
|
||||
[[package]]
|
||||
name = "hf-xet"
|
||||
|
|
@ -2421,6 +2454,32 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/43/e3/7d92a15f894aa0c9c4b49b8ee9ac9850d6e63b03c9c32c0367a13ae62209/mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c", size = 536198, upload-time = "2023-03-07T16:47:09.197Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "msal"
|
||||
version = "1.36.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "cryptography" },
|
||||
{ name = "pyjwt", extra = ["crypto"] },
|
||||
{ name = "requests" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/de/cb/b02b0f748ac668922364ccb3c3bff5b71628a05f5adfec2ba2a5c3031483/msal-1.36.0.tar.gz", hash = "sha256:3f6a4af2b036b476a4215111c4297b4e6e236ed186cd804faefba23e4990978b", size = 174217, upload-time = "2026-04-09T10:20:33.525Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/2a/d3/414d1f0a5f6f4fe5313c2b002c54e78a3332970feb3f5fed14237aa17064/msal-1.36.0-py3-none-any.whl", hash = "sha256:36ecac30e2ff4322d956029aabce3c82301c29f0acb1ad89b94edcabb0e58ec4", size = 121547, upload-time = "2026-04-09T10:20:32.336Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "msal-extensions"
|
||||
version = "1.3.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "msal" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/01/99/5d239b6156eddf761a636bded1118414d161bd6b7b37a9335549ed159396/msal_extensions-1.3.1.tar.gz", hash = "sha256:c5b0fd10f65ef62b5f1d62f4251d51cbcaf003fcedae8c91b040a488614be1a4", size = 23315, upload-time = "2025-03-14T23:51:03.902Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/5e/75/bd9b7bb966668920f06b200e84454c8f3566b102183bc55c5473d96cb2b9/msal_extensions-1.3.1-py3-none-any.whl", hash = "sha256:96d3de4d034504e969ac5e85bae8106c8373b5c6568e4c8fa7af2eca9dbe6bca", size = 20583, upload-time = "2025-03-14T23:51:03.016Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "msgpack"
|
||||
version = "1.1.2"
|
||||
|
|
|
|||
|
|
@ -1,23 +1,23 @@
|
|||
---
|
||||
sidebar_position: 15
|
||||
title: "Azure AI Foundry"
|
||||
description: "Use Hermes Agent with Azure AI Foundry — OpenAI-style and Anthropic-style endpoints, auto-detection of transport and deployed models"
|
||||
title: "Microsoft Foundry"
|
||||
description: "Use Hermes Agent with Microsoft Foundry — OpenAI-style and Anthropic-style endpoints, auto-detection of transport and deployed models"
|
||||
---
|
||||
|
||||
# Azure AI Foundry
|
||||
# Microsoft Foundry
|
||||
|
||||
Hermes Agent supports Azure AI Foundry (and Azure OpenAI) as a first-class provider. A single Azure resource can host models with two different wire formats:
|
||||
Hermes Agent's `azure-foundry` provider supports Microsoft Foundry (formerly Azure AI Foundry) and Azure OpenAI. A single Foundry resource can host models with two different wire formats:
|
||||
|
||||
- **OpenAI-style** — `POST /v1/chat/completions` on endpoints like `https://<resource>.openai.azure.com/openai/v1`. Used for GPT-4.x, GPT-5.x, Llama, Mistral, and most open-weight models.
|
||||
- **Anthropic-style** — `POST /v1/messages` on endpoints like `https://<resource>.services.ai.azure.com/anthropic`. Used when Azure Foundry serves Claude models via the Anthropic Messages API format.
|
||||
- **Anthropic-style** — `POST /v1/messages` on endpoints like `https://<resource>.services.ai.azure.com/anthropic`. Used when Microsoft Foundry serves Claude models via the Anthropic Messages API format.
|
||||
|
||||
The setup wizard probes your endpoint and auto-detects which transport it uses, which deployments are available, and each model's context length.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- An Azure AI Foundry or Azure OpenAI resource with at least one deployment
|
||||
- An API key for that resource (available in the Azure Portal under "Keys and Endpoint")
|
||||
- A Microsoft Foundry or Azure OpenAI resource with at least one deployment
|
||||
- The deployment's endpoint URL
|
||||
- **Either** an API key (from the Azure Portal under "Keys and Endpoint") **or** the **Azure AI User** RBAC role on the Foundry resource if you plan to use Microsoft Entra ID (the keyless path Microsoft recommends). Some tenants may show the role as **Foundry User** during Microsoft's rename rollout.
|
||||
|
||||
## Quick Start
|
||||
|
||||
|
|
@ -25,20 +25,172 @@ The setup wizard probes your endpoint and auto-detects which transport it uses,
|
|||
hermes model
|
||||
# → Select "Azure Foundry"
|
||||
# → Enter your endpoint URL
|
||||
# → Enter your API key
|
||||
# → Choose Authentication:
|
||||
# 1. API key
|
||||
# 2. Microsoft Entra ID (managed identity / workload identity / az login)
|
||||
# → (Entra) Hermes probes DefaultAzureCredential; on success it never asks for a key
|
||||
# → (API key) Enter your API key
|
||||
# Hermes probes the endpoint and auto-detects transport + models
|
||||
# → Pick a model from the list (or type a deployment name manually)
|
||||
```
|
||||
|
||||
The wizard will:
|
||||
|
||||
1. **Sniff the URL path** — URLs ending in `/anthropic` are recognised as Azure Foundry Claude routes.
|
||||
1. **Sniff the URL path** — URLs ending in `/anthropic` are recognised as Microsoft Foundry Claude routes.
|
||||
2. **Probe `GET <base>/models`** — if the endpoint returns an OpenAI-shaped model list, Hermes switches to `chat_completions` and prefills a picker with the returned deployment IDs.
|
||||
3. **Probe Anthropic Messages shape** — fallback for endpoints that do not expose `/models` but do accept the Anthropic Messages format.
|
||||
4. **Fall back to manual entry** — private/gated endpoints that reject every probe still work; you pick the API mode and type a deployment name by hand.
|
||||
|
||||
Context length for the chosen model is resolved via Hermes' standard metadata chain (`models.dev`, provider metadata, and hardcoded family fallbacks) and stored in `config.yaml` so the model can size its own context window correctly.
|
||||
|
||||
## Microsoft Entra ID (keyless, RBAC) — recommended
|
||||
|
||||
Microsoft recommends [keyless authentication with Microsoft Entra ID](https://learn.microsoft.com/azure/ai-foundry/foundry-models/how-to/configure-entra-id) for production Foundry workloads. Hermes supports Entra ID for **both** API surfaces:
|
||||
|
||||
- **OpenAI-style** (`api_mode: chat_completions` / `codex_responses`) — GPT-4/5, Llama, Mistral, DeepSeek, etc.
|
||||
- **Anthropic-style** (`api_mode: anthropic_messages`) — Claude models on Microsoft Foundry.
|
||||
|
||||
Foundry's RBAC is per-resource (`Azure AI User` grants both surfaces; some tenants may display `Foundry User`) and Microsoft documents the same inference scope (`https://ai.azure.com/.default`) for both. Under the hood:
|
||||
|
||||
- OpenAI-style uses the OpenAI Python SDK's native callable `api_key=` contract — the SDK mints a fresh JWT per request automatically.
|
||||
- Anthropic-style uses an `httpx.Client` with a request event hook installed by `agent.azure_identity_adapter.build_bearer_http_client`, because the Anthropic SDK does not accept callable `auth_token` natively. The hook rewrites `Authorization: Bearer <fresh-jwt>` per outbound request. Same Microsoft RBAC, same Foundry scope — the SDK contract is the only difference.
|
||||
|
||||
### Why use Entra ID?
|
||||
|
||||
- No long-lived API keys to rotate or revoke.
|
||||
- RBAC-driven access — grant or remove `Azure AI User` on the Foundry resource, no config rewrite needed.
|
||||
- Access and audit logs are segmented by assignee instead of all callers sharing one static key.
|
||||
- Single auth surface for Azure VMs, AKS pods, App Service, Functions, Container Apps, and Foundry Agent Service via managed identity.
|
||||
- Workload identity and service-principal flows for CI/CD pipelines.
|
||||
|
||||
### One-time setup (Azure side)
|
||||
|
||||
1. In the Azure Portal, open your Foundry resource → **Access control (IAM)** → **Add → Add role assignment**.
|
||||
2. Pick the **Azure AI User** role (or **Foundry User** if your tenant has the renamed role).
|
||||
3. Assign it to:
|
||||
- **Your user account** for local development with `az login`.
|
||||
- **A managed identity or workload identity** for Azure-hosted compute (recommended for production).
|
||||
- **A Foundry Agent Service hosted agent's agent identity** when Hermes runs inside a hosted agent.
|
||||
- **A service principal** for CI/CD pipelines when workload identity is not available.
|
||||
4. Wait ~5 minutes for the role to propagate.
|
||||
|
||||
Azure CLI equivalent:
|
||||
|
||||
```bash
|
||||
az role assignment create \
|
||||
--assignee <principal-or-agent-identity-client-id> \
|
||||
--role "Azure AI User" \
|
||||
--scope <foundry-resource-id>
|
||||
```
|
||||
|
||||
### One-time setup (Hermes side)
|
||||
|
||||
```bash
|
||||
hermes model
|
||||
# → Select "Azure Foundry"
|
||||
# → Enter your endpoint URL
|
||||
# → Authentication: 2 (Microsoft Entra ID)
|
||||
# → (optional) user-assigned managed identity client ID
|
||||
# → (optional) Azure tenant ID
|
||||
# → Hermes probes DefaultAzureCredential() and reports which inner
|
||||
# credential succeeded (e.g. AzureCliCredential, ManagedIdentityCredential)
|
||||
```
|
||||
|
||||
The wizard runs a bounded preflight probe (10 s timeout). On failure it offers to "save anyway, validate later" — useful when configuring on a machine that doesn't yet have credentials but will at runtime (e.g. preparing config for a managed-identity deployment).
|
||||
|
||||
`azure-identity` is installed automatically on first use via Hermes' lazy-install path. To pre-install:
|
||||
|
||||
```bash
|
||||
pip install azure-identity
|
||||
```
|
||||
|
||||
### Configuration written to `config.yaml`
|
||||
|
||||
```yaml
|
||||
model:
|
||||
provider: azure-foundry
|
||||
base_url: https://my-resource.openai.azure.com/openai/v1
|
||||
api_mode: chat_completions
|
||||
auth_mode: entra_id
|
||||
default: gpt-4o
|
||||
context_length: 128000
|
||||
entra:
|
||||
scope: https://ai.azure.com/.default # only when overriding the default
|
||||
```
|
||||
|
||||
Hermes only manages one Entra-specific knob in `config.yaml`:
|
||||
|
||||
- **`scope`** — the OAuth resource scope. Defaults to Microsoft's documented inference scope (`https://ai.azure.com/.default`). Override only if your resource was provisioned against a non-standard audience.
|
||||
|
||||
Everything else (tenant, service principal secret, federated token file, sovereign cloud authority, broker preferences) is read by `azure-identity` directly from the standard `AZURE_*` environment variables — see the [credential resolution order](#credential-resolution-order) below. Set those in `~/.hermes/.env` or your deployment environment, exactly as Microsoft's SDK reference describes.
|
||||
|
||||
No secrets land in `~/.hermes/.env` for Entra mode — `azure-identity` caches tokens in-process (and where available, in your OS keychain / `~/.IdentityService`).
|
||||
|
||||
### Credential resolution order
|
||||
|
||||
`azure-identity`'s `DefaultAzureCredential` walks this chain on each token request, stopping at the first credential that returns a token:
|
||||
|
||||
1. **Environment credential** — `AZURE_TENANT_ID` + `AZURE_CLIENT_ID` + `AZURE_CLIENT_SECRET` (or `AZURE_CLIENT_CERTIFICATE_PATH` / `AZURE_FEDERATED_TOKEN_FILE`).
|
||||
2. **Workload Identity** — `AZURE_FEDERATED_TOKEN_FILE` (AKS federated tokens / OIDC).
|
||||
3. **Managed Identity** — IMDS endpoint (`169.254.169.254`) for virtual machines; `IDENTITY_ENDPOINT` for App Service / Functions / Container Apps. Foundry Agent Service hosted agents use the hosted agent's agent identity.
|
||||
4. **Visual Studio Code** — Azure account extension.
|
||||
5. **Azure CLI** — `az login` session.
|
||||
6. **Azure Developer CLI** — `azd auth login`.
|
||||
7. **Azure PowerShell** — `Connect-AzAccount`.
|
||||
8. **Broker** (Windows / WSL only) — Web Account Manager.
|
||||
|
||||
Interactive browser credential is excluded by default for unattended Hermes runs; use Azure CLI, Azure Developer CLI, managed identity, workload identity, or service principal credentials instead.
|
||||
|
||||
### Deployment patterns
|
||||
|
||||
**Local development:**
|
||||
```bash
|
||||
az login
|
||||
hermes model # pick Azure Foundry → Entra ID
|
||||
hermes # uses your az login token
|
||||
```
|
||||
|
||||
**Azure VM / Functions / App Service / Container Apps (system-assigned managed identity):**
|
||||
1. Enable system-assigned identity on the compute resource.
|
||||
2. Grant the identity `Azure AI User` (or `Foundry User`) on the Foundry resource.
|
||||
3. Set `model.auth_mode: entra_id` in config.yaml — no env vars needed.
|
||||
|
||||
**Azure VM / Functions / App Service / Container Apps (user-assigned managed identity):**
|
||||
- Set `AZURE_CLIENT_ID` to the user-assigned identity's client ID so `DefaultAzureCredential` picks the right one.
|
||||
|
||||
**Foundry Agent Service hosted agent:**
|
||||
- Create the hosted agent and grant that agent's identity `Azure AI User` (or `Foundry User`) on the Foundry resource. Hermes uses `ManagedIdentityCredential` from inside the hosted agent; role assignment belongs on the agent identity, not just the parent project or your user.
|
||||
|
||||
**AKS Workload Identity (replaces AAD Pod Identity):**
|
||||
- Annotate the pod's service account with the workload identity client ID.
|
||||
- The pod's federated token file is auto-detected via `AZURE_FEDERATED_TOKEN_FILE`.
|
||||
- `model.auth_mode: entra_id` works without further config changes.
|
||||
|
||||
**Service principal in CI:**
|
||||
- Set `AZURE_TENANT_ID`, `AZURE_CLIENT_ID`, `AZURE_CLIENT_SECRET` in the runner env.
|
||||
|
||||
**Sovereign clouds (Government, China):**
|
||||
- Export `AZURE_AUTHORITY_HOST` (e.g. `https://login.microsoftonline.us` for Azure Government, `https://login.partner.microsoftonline.cn` for Azure China). `azure-identity` reads it directly.
|
||||
|
||||
### Health checks
|
||||
|
||||
`hermes doctor` runs a 10 s probe against `DefaultAzureCredential` when `model.auth_mode: entra_id`, reporting which inner credential won (env vars present, managed identity endpoint reachable, etc.).
|
||||
|
||||
`hermes auth` shows a structured status block:
|
||||
|
||||
```
|
||||
azure-foundry (Microsoft Entra ID):
|
||||
Endpoint: https://my-resource.openai.azure.com/openai/v1
|
||||
Scope: https://ai.azure.com/.default
|
||||
Status: configured; live token probe is skipped here
|
||||
```
|
||||
|
||||
### Limitations
|
||||
|
||||
- **Anthropic-style endpoints use an httpx event hook.** The Anthropic Python SDK does not accept a callable `auth_token` natively (≤ 0.86.0). Hermes installs a request event hook on a custom `httpx.Client` that mints a fresh JWT per outbound request and rewrites `Authorization: Bearer <jwt>`. This is functionally equivalent to the OpenAI SDK's native `Callable[[], str]` contract but adds one indirection layer. If the Anthropic SDK adds first-class callable-auth support in a future release, Hermes will switch to it transparently.
|
||||
- **Batch jobs and `multiprocessing.Pool`.** The Entra token provider is a closure that cannot be pickled across process boundaries. `batch_runner.py` automatically drops the callable from the worker config and lets each worker process rebuild its own provider from `config.yaml` — no user action required, but each worker pays one chain walk at startup.
|
||||
- **No bearer JWT persistence in `auth.json`.** Hermes does not duplicate `azure-identity`'s internal token cache; cold starts walk the credential chain on first inference.
|
||||
|
||||
## Configuration (written to `config.yaml`)
|
||||
|
||||
After running the wizard you'll see something like this:
|
||||
|
|
@ -72,11 +224,11 @@ model:
|
|||
|
||||
Important behaviour:
|
||||
|
||||
- **GPT-5.x, codex, and o-series auto-route to the Responses API.** Azure Foundry deploys GPT-5 / codex / o1 / o3 / o4 models as Responses-API-only — calling `/chat/completions` against them returns `400 "The requested operation is unsupported."`. Hermes detects these model families by name and upgrades `api_mode` to `codex_responses` transparently, even when `config.yaml` still reads `api_mode: chat_completions`. GPT-4, GPT-4o, Llama, Mistral, and other deployments stay on `/chat/completions`.
|
||||
- **GPT-5.x, codex, and o-series auto-route to the Responses API.** Microsoft Foundry deploys GPT-5 / codex / o1 / o3 / o4 models as Responses-API-only — calling `/chat/completions` against them returns `400 "The requested operation is unsupported."`. Hermes detects these model families by name and upgrades `api_mode` to `codex_responses` transparently, even when `config.yaml` still reads `api_mode: chat_completions`. GPT-4, GPT-4o, Llama, Mistral, and other deployments stay on `/chat/completions`.
|
||||
- **`max_completion_tokens` is used automatically.** Azure OpenAI (like direct OpenAI) requires `max_completion_tokens` for gpt-4o, o-series, and gpt-5.x models. Hermes sends the right parameter based on the endpoint.
|
||||
- **Pre-v1 endpoints that require `api-version`.** If you have a legacy base URL like `https://<resource>.openai.azure.com/openai?api-version=2025-04-01-preview`, Hermes extracts the query string and forwards it via `default_query` on every request (the OpenAI SDK otherwise drops it when joining paths).
|
||||
|
||||
## Anthropic-style endpoints (Claude via Azure Foundry)
|
||||
## Anthropic-style endpoints (Claude via Microsoft Foundry)
|
||||
|
||||
For Claude deployments, use the Anthropic-style route:
|
||||
|
||||
|
|
@ -96,7 +248,7 @@ Important behaviour:
|
|||
|
||||
## Alternative: `provider: anthropic` + Azure base URL
|
||||
|
||||
If you already have `provider: anthropic` configured and just want to point it at Azure AI Foundry for Claude, you can skip the `azure-foundry` provider entirely:
|
||||
If you already have `provider: anthropic` configured and just want to point it at Microsoft Foundry for Claude, you can skip the `azure-foundry` provider entirely:
|
||||
|
||||
```yaml
|
||||
model:
|
||||
|
|
@ -117,7 +269,7 @@ Azure does **not** expose a pure-API-key endpoint to list your *deployed* model
|
|||
What Hermes can do:
|
||||
|
||||
- Azure OpenAI v1 endpoints (`<resource>.openai.azure.com/openai/v1`) expose `GET /models` with the resource's **available** model catalog. Hermes uses this list to prefill the model picker.
|
||||
- Azure Foundry `/anthropic` routes: detected via URL path, model name entered manually.
|
||||
- Microsoft Foundry `/anthropic` routes: detected via URL path, model name entered manually.
|
||||
- Private / firewalled endpoints: manual entry with a friendly "couldn't probe" message.
|
||||
|
||||
You can always type a deployment name directly — Hermes does not validate against the returned list.
|
||||
|
|
@ -126,9 +278,18 @@ You can always type a deployment name directly — Hermes does not validate agai
|
|||
|
||||
| Variable | Purpose |
|
||||
|----------|---------|
|
||||
| `AZURE_FOUNDRY_API_KEY` | Primary API key for Azure AI Foundry / Azure OpenAI |
|
||||
| `AZURE_FOUNDRY_API_KEY` | Primary API key for Microsoft Foundry / Azure OpenAI (api_key mode) |
|
||||
| `AZURE_FOUNDRY_BASE_URL` | Endpoint URL (set via `hermes model`; env var is used as a fallback) |
|
||||
| `AZURE_ANTHROPIC_KEY` | Used by `provider: anthropic` + Azure base URL (alternative to `ANTHROPIC_API_KEY`) |
|
||||
| `AZURE_TENANT_ID` | Entra ID tenant for service-principal flows |
|
||||
| `AZURE_CLIENT_ID` | Entra ID client ID (service principal, workload identity, or user-assigned managed identity) |
|
||||
| `AZURE_CLIENT_SECRET` | Service principal secret |
|
||||
| `AZURE_CLIENT_CERTIFICATE_PATH` | Service principal cert (alternative to secret) |
|
||||
| `AZURE_FEDERATED_TOKEN_FILE` | Workload Identity federated token path (AKS) |
|
||||
| `AZURE_AUTHORITY_HOST` | Sovereign cloud authority host override |
|
||||
| `IDENTITY_ENDPOINT` / `MSI_ENDPOINT` | Managed Identity endpoint for App Service, Functions, and Container Apps; VMs usually use IMDS instead |
|
||||
|
||||
The Azure SDK reads the `AZURE_*` env vars directly. Hermes never inspects them other than to report which sources are present in `hermes doctor` output.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
|
|
@ -150,8 +311,21 @@ model:
|
|||
api_mode: anthropic_messages # or chat_completions
|
||||
```
|
||||
|
||||
**Entra ID: "credential chain exhausted" or 401 Unauthorized after switching to `auth_mode: entra_id`.**
|
||||
- Run `az login` to refresh your developer session (the cached token may have expired).
|
||||
- Verify the `Azure AI User` (or `Foundry User`) role assignment took effect: `az role assignment list --assignee <user-or-identity-id>` should list it on your Foundry resource. Role propagation can take up to 5 minutes.
|
||||
- For user-assigned managed identities, double-check `AZURE_CLIENT_ID` matches the identity attached to the compute resource.
|
||||
- Run `hermes doctor` — the Azure Entra probe reports whether token acquisition succeeded and includes a remediation hint.
|
||||
|
||||
**Entra ID: wizard preflight hangs or times out.**
|
||||
The 10 s preflight is a soft check. Choose "Save anyway and validate later" and run `hermes doctor` after deploying to the target environment. Common causes include an unreachable token service or stale local login state — prefer workload identity in CI, set `AZURE_TENANT_ID`+`AZURE_CLIENT_ID`+`AZURE_CLIENT_SECRET` when using a service principal, or run `az login` for local development.
|
||||
|
||||
**401 on Anthropic-style endpoint with Entra ID.**
|
||||
Verify the same `Azure AI User` (or `Foundry User`) role is assigned on the Foundry resource (it covers both `/openai/v1` and `/anthropic` paths). If the OpenAI-style probe works during the wizard but `claude-*` requests fail at runtime, the most common cause is a stale `model.entra.scope` left over from an earlier wizard run — delete the `entra.scope` line from `config.yaml` so the runtime falls back to the default `https://ai.azure.com/.default` scope.
|
||||
|
||||
## Related
|
||||
|
||||
- [Environment variables](/docs/reference/environment-variables)
|
||||
- [Configuration](/docs/user-guide/configuration)
|
||||
- [AWS Bedrock](/docs/guides/aws-bedrock) — the other major cloud provider integration
|
||||
- [Microsoft: Configure Entra ID for Foundry](https://learn.microsoft.com/azure/ai-foundry/foundry-models/how-to/configure-entra-id) — upstream documentation for the keyless path
|
||||
|
|
|
|||
|
|
@ -50,9 +50,16 @@ All variables go in `~/.hermes/.env`. You can also set them with `hermes config
|
|||
| `XIAOMI_BASE_URL` | Override Xiaomi MiMo base URL (default: `https://api.xiaomimimo.com/v1`) |
|
||||
| `TOKENHUB_API_KEY` | Tencent TokenHub API key ([tokenhub.tencentmaas.com](https://tokenhub.tencentmaas.com)) |
|
||||
| `TOKENHUB_BASE_URL` | Override Tencent TokenHub base URL (default: `https://tokenhub.tencentmaas.com/v1`) |
|
||||
| `AZURE_FOUNDRY_API_KEY` | Azure AI Foundry / Azure OpenAI API key ([ai.azure.com](https://ai.azure.com/)) |
|
||||
| `AZURE_FOUNDRY_BASE_URL` | Azure AI Foundry endpoint URL (e.g. `https://<resource>.openai.azure.com/openai/v1` for OpenAI-style, or `https://<resource>.services.ai.azure.com/anthropic` for Anthropic-style) |
|
||||
| `AZURE_ANTHROPIC_KEY` | Azure Anthropic API key for `provider: anthropic` + `base_url` pointing at an Azure Foundry Claude deployment (alternative to `ANTHROPIC_API_KEY` when both Anthropic and Azure Anthropic are configured) |
|
||||
| `AZURE_FOUNDRY_API_KEY` | Microsoft Foundry / Azure OpenAI API key ([ai.azure.com](https://ai.azure.com/)). Not needed when `model.auth_mode: entra_id` |
|
||||
| `AZURE_FOUNDRY_BASE_URL` | Microsoft Foundry endpoint URL (e.g. `https://<resource>.openai.azure.com/openai/v1` for OpenAI-style, or `https://<resource>.services.ai.azure.com/anthropic` for Anthropic-style) |
|
||||
| `AZURE_ANTHROPIC_KEY` | Azure Anthropic API key for `provider: anthropic` + `base_url` pointing at a Microsoft Foundry Claude deployment (alternative to `ANTHROPIC_API_KEY` when both Anthropic and Azure Anthropic are configured) |
|
||||
| `AZURE_TENANT_ID` | Entra ID tenant ID (service-principal flows; honored by `azure-identity` when `model.auth_mode: entra_id`) |
|
||||
| `AZURE_CLIENT_ID` | Entra ID client ID (service principal, workload identity, or user-assigned managed identity) |
|
||||
| `AZURE_CLIENT_SECRET` | Service principal secret used by `EnvironmentCredential` |
|
||||
| `AZURE_CLIENT_CERTIFICATE_PATH` | Service principal certificate (alternative to `AZURE_CLIENT_SECRET`) |
|
||||
| `AZURE_FEDERATED_TOKEN_FILE` | Federated token file path for AKS Workload Identity / OIDC flows |
|
||||
| `AZURE_AUTHORITY_HOST` | Sovereign-cloud authority override (e.g. `https://login.microsoftonline.us` for Azure Government). See [Azure Foundry guide](/docs/guides/azure-foundry#sovereign-clouds-government-china) |
|
||||
| `IDENTITY_ENDPOINT` / `MSI_ENDPOINT` | Managed Identity endpoint for App Service, Functions, and Container Apps; VMs usually use IMDS instead and do not set these |
|
||||
| `HF_TOKEN` | Hugging Face token for Inference Providers ([huggingface.co/settings/tokens](https://huggingface.co/settings/tokens)) |
|
||||
| `HF_BASE_URL` | Override Hugging Face base URL (default: `https://router.huggingface.co/v1`) |
|
||||
| `GOOGLE_API_KEY` | Google AI Studio API key ([aistudio.google.com/app/apikey](https://aistudio.google.com/app/apikey)) |
|
||||
|
|
|
|||
|
|
@ -81,7 +81,7 @@ Both `provider` and `model` are **required**. If either is missing, the fallback
|
|||
| Kimi / Moonshot (China) | `kimi-coding-cn` | `KIMI_CN_API_KEY` |
|
||||
| StepFun | `stepfun` | `STEPFUN_API_KEY` |
|
||||
| Tencent TokenHub | `tencent-tokenhub` | `TOKENHUB_API_KEY` |
|
||||
| Azure AI Foundry | `azure-foundry` | `AZURE_FOUNDRY_API_KEY` + `AZURE_FOUNDRY_BASE_URL` |
|
||||
| Microsoft Foundry | `azure-foundry` | `AZURE_FOUNDRY_API_KEY` + `AZURE_FOUNDRY_BASE_URL` |
|
||||
| LM Studio (local) | `lmstudio` | `LM_API_KEY` (or none for local) + `LM_BASE_URL` |
|
||||
| Hugging Face | `huggingface` | `HF_TOKEN` |
|
||||
| Custom endpoint | `custom` | `base_url` + `key_env` (see below) |
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue