feat(azure-foundry): add Microsoft Entra ID auth

Use azure-identity DefaultAzureCredential for keyless Foundry auth.

Preserve refreshable callable credentials through OpenAI and Anthropic client paths.

Add setup, doctor, auth status, docs, and tests for Entra auth.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
glennc 2026-05-15 14:36:18 -07:00 committed by Teknium
parent 457fa913b8
commit 9df9816dab
38 changed files with 3772 additions and 122 deletions

View file

@ -9,13 +9,24 @@ TERMINAL_SETUP_AUTH_METHOD_ID = "hermes-setup"
def detect_provider() -> Optional[str]:
"""Resolve the active Hermes runtime provider, or None if unavailable."""
"""Resolve the active Hermes runtime provider, or None if unavailable.
Treats a ``Callable`` ``api_key`` (Azure Foundry Entra ID bearer
token provider see :mod:`agent.azure_identity_adapter`) as a valid
credential. Without this, ACP sessions for Entra-configured Foundry
deployments silently default to ``"openrouter"`` and the ACP auth
handshake rejects the legitimate provider.
"""
try:
from hermes_cli.runtime_provider import resolve_runtime_provider
runtime = resolve_runtime_provider()
api_key = runtime.get("api_key")
provider = runtime.get("provider")
if isinstance(api_key, str) and api_key.strip() and isinstance(provider, str) and provider.strip():
if not isinstance(provider, str) or not provider.strip():
return None
is_string_key = isinstance(api_key, str) and api_key.strip()
is_callable_provider = callable(api_key) and not isinstance(api_key, str)
if is_string_key or is_callable_provider:
return provider.strip().lower()
except Exception:
return None

View file

@ -560,7 +560,16 @@ def init_agent(
agent._client_kwargs = {}
if not agent.quiet_mode:
print(f"🤖 AI Agent initialized with model: {agent.model} (Anthropic native)")
if effective_key and len(effective_key) > 12:
# ``effective_key`` may be a callable Entra ID bearer
# provider for Azure Foundry anthropic_messages mode.
# The Anthropic adapter installs an httpx event hook
# that mints a fresh JWT per request — we never
# invoke or inspect the callable in the banner.
from agent.azure_identity_adapter import is_token_provider
if is_token_provider(effective_key):
print("🔑 Using credentials: Microsoft Entra ID")
elif isinstance(effective_key, str) and len(effective_key) > 12:
print(f"🔑 Using token: {effective_key[:8]}...{effective_key[-4:]}")
elif agent.api_mode == "bedrock_converse":
# AWS Bedrock — uses boto3 directly, no OpenAI client needed.
@ -764,12 +773,19 @@ def init_agent(
print(f"🤖 AI Agent initialized with model: {agent.model}")
if base_url:
print(f"🔗 Using custom base URL: {base_url}")
# Always show API key info (masked) for debugging auth issues
# ``api_key`` may be a callable Entra ID bearer
# provider (Azure Foundry). The OpenAI SDK mints a
# fresh JWT per request internally — the banner
# never invokes or inspects the callable.
from agent.azure_identity_adapter import is_token_provider
key_used = client_kwargs.get("api_key", "none")
if key_used and key_used != "dummy-key" and len(key_used) > 12:
if is_token_provider(key_used):
print("🔑 Using credentials: Microsoft Entra ID")
elif isinstance(key_used, str) and key_used and key_used != "dummy-key" and len(key_used) > 12:
print(f"🔑 Using API key: {key_used[:8]}...{key_used[-4:]}")
else:
print(f"⚠️ Warning: API key appears invalid or missing (got: '{key_used[:20] if key_used else 'none'}...')")
print("⚠️ Warning: API key appears invalid or missing")
except Exception as e:
raise RuntimeError(f"Failed to initialize OpenAI client: {e}")
@ -1395,7 +1411,12 @@ def init_agent(
_ra().logger.debug("Invalid ollama_num_ctx config value: %r", _ollama_num_ctx_override)
if agent._ollama_num_ctx is None and agent.base_url and is_local_endpoint(agent.base_url):
try:
_detected = query_ollama_num_ctx(agent.model, agent.base_url, api_key=agent.api_key or "")
# ``agent.api_key`` may be a callable (Entra token provider).
# Ollama detection makes a manual HTTP request and expects a
# string — Azure Foundry isn't a local endpoint so this branch
# never fires for Entra, but guard defensively.
_key_for_ollama = agent.api_key if isinstance(agent.api_key, str) else ""
_detected = query_ollama_num_ctx(agent.model, agent.base_url, api_key=_key_for_ollama or "")
if _detected and _detected > 0:
agent._ollama_num_ctx = _detected
except Exception as exc:

View file

@ -1390,10 +1390,16 @@ def switch_model(agent, new_model, new_provider, api_key='', base_url='', api_mo
_sm_custom_providers = get_compatible_custom_providers(_sm_cfg)
except Exception:
_sm_custom_providers = None
# ``agent.api_key`` may be a callable (Azure Foundry Entra ID
# token provider). ``get_model_context_length`` expects a
# string for its live-probe paths; for Foundry the context
# length normally resolves via config or static catalogs and
# never hits a probe, but coerce to empty string defensively.
_ctx_api_key = agent.api_key if isinstance(agent.api_key, str) else ""
new_context_length = get_model_context_length(
agent.model,
base_url=agent.base_url,
api_key=agent.api_key,
api_key=_ctx_api_key,
provider=agent.provider,
config_context_length=getattr(agent, "_config_context_length", None),
custom_providers=_sm_custom_providers,
@ -1402,7 +1408,7 @@ def switch_model(agent, new_model, new_provider, api_key='', base_url='', api_mo
model=agent.model,
context_length=new_context_length,
base_url=agent.base_url,
api_key=getattr(agent, "api_key", ""),
api_key=agent.api_key, # context_compressor forwards to call_llm; callable preserved
provider=agent.provider,
api_mode=agent.api_mode,
)

View file

@ -17,6 +17,7 @@ import os
import platform
import subprocess
from pathlib import Path
from urllib.parse import urlparse
from hermes_constants import get_hermes_home
from typing import Any, Dict, List, Optional, Tuple
@ -364,7 +365,7 @@ def _normalize_base_url_text(base_url) -> str:
def _is_third_party_anthropic_endpoint(base_url: str | None) -> bool:
"""Return True for non-Anthropic endpoints using the Anthropic Messages API.
Third-party proxies (Azure AI Foundry, AWS Bedrock, self-hosted) authenticate
Third-party proxies (Microsoft Foundry, AWS Bedrock, self-hosted) authenticate
with their own API keys via x-api-key, not Anthropic OAuth tokens. OAuth
detection should be skipped for these endpoints.
"""
@ -508,6 +509,29 @@ def _is_minimax_anthropic_endpoint(base_url: str | None) -> bool:
)
def _is_azure_anthropic_endpoint(base_url: str | None) -> bool:
"""Return True for Azure-hosted Anthropic Messages endpoints.
Covers both the modern Foundry host family (``*.services.ai.azure.*``)
and the legacy Azure OpenAI host family (``*.openai.azure.*``) when
serving Anthropic's ``/anthropic`` route. Used to opt-in those hosts
to the ``api-version`` query-param plumbing required by Azure.
Intentionally avoids a finite allow-list of TLD suffixes so it works
across sovereign / private Azure clouds.
"""
normalized = _normalize_base_url_text(base_url)
if not normalized:
return False
parsed = urlparse(normalized)
host = (parsed.hostname or "").lower().rstrip(".")
path = (parsed.path or "").lower()
host_padded = f".{host}."
is_foundry_host = ".services.ai.azure." in host_padded
is_legacy_azoai_host = ".openai.azure." in host_padded
return (is_foundry_host or is_legacy_azoai_host) and "/anthropic" in path
def _common_betas_for_base_url(
base_url: str | None,
*,
@ -523,7 +547,7 @@ def _common_betas_for_base_url(
The ``context-1m-2025-08-07`` beta is not sent to native Anthropic by
default because some subscriptions reject it. Add it only for endpoint
families that still require it for 1M context, currently Azure AI Foundry.
families that still require it for 1M context, currently Microsoft Foundry.
Bedrock uses its own client helper below and opts in explicitly.
``drop_context_1m_beta=True`` strips the 1M-context beta from any path that
@ -540,8 +564,81 @@ def _common_betas_for_base_url(
return betas
def _build_anthropic_client_with_bearer_hook(
token_provider,
base_url: str = None,
timeout: float = None,
*,
drop_context_1m_beta: bool = False,
):
"""Anthropic-on-Foundry Entra ID variant of :func:`build_anthropic_client`.
Anthropic SDK 0.86.0 stores ``api_key`` / ``auth_token`` as static
strings; there is no callable-token contract. To get per-request
bearer refresh (Microsoft's documented Foundry pattern), we hand
the SDK a custom ``httpx.Client`` whose request event hook mints a
fresh JWT from the Entra credential chain and rewrites
``Authorization: Bearer <jwt>`` on every outbound request. The SDK
ignores its own auth logic when ``http_client`` is provided (the
hook strips any pre-set Authorization).
The placeholder ``auth_token`` is required because the SDK raises
``AnthropicError`` at construction if neither ``api_key`` nor
``auth_token`` is set but the hook overrides it per-request so
the placeholder value never reaches Azure.
"""
_anthropic_sdk = _get_anthropic_sdk()
if _anthropic_sdk is None:
raise ImportError(
"The 'anthropic' package is required for Azure Foundry Anthropic-style "
"endpoints with Entra ID auth. Install with: pip install 'anthropic>=0.39.0'"
)
normalize_proxy_env_vars()
from httpx import Timeout
from agent.azure_identity_adapter import build_bearer_http_client
_read_timeout = timeout if (isinstance(timeout, (int, float)) and timeout > 0) else 900.0
timeout_obj = Timeout(timeout=float(_read_timeout), connect=10.0)
# Strip any trailing /v1 — the Anthropic SDK appends /v1/messages.
normalized_base_url = _normalize_base_url_text(base_url)
if normalized_base_url:
import re as _re
normalized_base_url = _re.sub(r"/v1/?$", "", normalized_base_url.rstrip("/"))
http_client = build_bearer_http_client(token_provider, timeout=timeout_obj)
kwargs = {
"timeout": timeout_obj,
"http_client": http_client,
# The SDK requires *something* for api_key/auth_token. Our
# event hook overrides Authorization per request so this value
# is never sent. The sentinel string makes accidental leaks
# diagnosable in logs.
"auth_token": "entra-id-bearer-via-http-hook",
}
if normalized_base_url:
if _is_azure_anthropic_endpoint(normalized_base_url) and "api-version" not in normalized_base_url:
kwargs["base_url"] = normalized_base_url
kwargs["default_query"] = {"api-version": "2025-04-15"}
else:
kwargs["base_url"] = normalized_base_url
common_betas = _common_betas_for_base_url(
normalized_base_url,
drop_context_1m_beta=drop_context_1m_beta,
)
if common_betas:
kwargs["default_headers"] = {"anthropic-beta": ",".join(common_betas)}
return _anthropic_sdk.Anthropic(**kwargs)
def build_anthropic_client(
api_key: str,
api_key,
base_url: str = None,
timeout: float = None,
*,
@ -549,6 +646,17 @@ def build_anthropic_client(
):
"""Create an Anthropic client, auto-detecting setup-tokens vs API keys.
``api_key`` accepts either:
* a static ``str`` the historical contract for all key-based and
OAuth flows.
* a ``Callable[[], str]`` an Entra ID bearer token provider from
:mod:`agent.azure_identity_adapter`. The Anthropic SDK itself
requires a static string, so when given a callable we construct
a custom ``httpx.Client`` with a request event hook that mints a
fresh JWT per outbound request and rewrites the ``Authorization``
header. The SDK never sees the callable directly.
If *timeout* is provided it overrides the default 900s read timeout. The
connect timeout stays at 10s. Callers pass this from the per-provider /
per-model ``request_timeout_seconds`` config so Anthropic-native and
@ -570,6 +678,14 @@ def build_anthropic_client(
"Install it with: pip install 'anthropic>=0.39.0'"
)
# Callable api_key → Entra ID bearer provider path. Delegated to a
# helper so the existing static-key code below stays unchanged.
if callable(api_key) and not isinstance(api_key, str):
return _build_anthropic_client_with_bearer_hook(
api_key, base_url, timeout,
drop_context_1m_beta=drop_context_1m_beta,
)
normalize_proxy_env_vars()
from httpx import Timeout
@ -584,8 +700,7 @@ def build_anthropic_client(
# Pass it via default_query so the SDK appends it to every request URL
# without corrupting the base_url (appending it directly produces
# malformed paths like /anthropic?api-version=.../v1/messages).
_is_azure_endpoint = "azure.com" in normalized_base_url.lower()
if _is_azure_endpoint and "api-version" not in normalized_base_url:
if _is_azure_anthropic_endpoint(normalized_base_url) and "api-version" not in normalized_base_url:
kwargs["base_url"] = normalized_base_url.rstrip("/")
kwargs["default_query"] = {"api-version": "2025-04-15"}
else:
@ -615,7 +730,7 @@ def build_anthropic_client(
if common_betas:
kwargs["default_headers"] = {"anthropic-beta": ",".join(common_betas)}
elif _is_third_party_anthropic_endpoint(base_url):
# Third-party proxies (Azure AI Foundry, AWS Bedrock, etc.) use their
# Third-party proxies (Microsoft Foundry, AWS Bedrock, etc.) use their
# own API keys with x-api-key auth. Skip OAuth detection — their keys
# don't follow Anthropic's sk-ant-* prefix convention and would be
# misclassified as OAuth tokens.
@ -1757,7 +1872,7 @@ def convert_messages_to_anthropic(
# causing HTTP 400 "Invalid signature in thinking block".
#
# Signatures are Anthropic-proprietary. Third-party endpoints
# (MiniMax, Azure AI Foundry, self-hosted proxies) cannot validate
# (MiniMax, Microsoft Foundry, self-hosted proxies) cannot validate
# them and will reject them outright. When targeting a third-party
# endpoint, strip ALL thinking/redacted_thinking blocks from every
# assistant message — the third-party will generate its own
@ -2103,5 +2218,3 @@ def build_anthropic_kwargs(
kwargs["extra_headers"] = {"anthropic-beta": ",".join(betas)}
return kwargs

View file

@ -1902,6 +1902,120 @@ def _build_codex_client(model: str) -> Tuple[Optional[Any], Optional[str]]:
return CodexAuxiliaryClient(real_client, model), model
def _try_azure_foundry(
*,
model: Optional[str] = None,
explicit_api_key: Optional[str] = None,
explicit_base_url: Optional[str] = None,
api_mode: Optional[str] = None,
) -> Tuple[Optional[Any], Optional[str]]:
"""Resolve an Azure Foundry auxiliary client via the runtime resolver.
Mirrors the ``_try_anthropic`` / ``_try_nous`` shape but delegates to
:func:`hermes_cli.runtime_provider._resolve_azure_foundry_runtime`
the same resolver the main agent uses so:
* ``auth_mode: api_key`` (default) gets the static
``AZURE_FOUNDRY_API_KEY`` string.
* ``auth_mode: entra_id`` gets a callable bearer-token provider
(``Callable[[], str]`` from
:mod:`agent.azure_identity_adapter`).
* Per-model ``api_mode`` auto-routing for GPT-5.x / o-series /
codex models works.
* ``model.entra.{tenant_id,client_id,authority,scope}`` config
fields propagate.
* Non-default ``model.base_url`` overrides are honored.
The OpenAI SDK accepts both shapes for ``api_key`` so the caller
can forward the result without coercion.
Returns ``(client, model)`` or ``(None, None)`` on failure.
"""
try:
from hermes_cli.runtime_provider import _resolve_azure_foundry_runtime
from hermes_cli.auth import AuthError
from hermes_cli.config import load_config
except ImportError:
return None, None
try:
cfg = load_config()
model_cfg = cfg.get("model") if isinstance(cfg, dict) else {}
if not isinstance(model_cfg, dict):
model_cfg = {}
except Exception:
model_cfg = {}
try:
runtime = _resolve_azure_foundry_runtime(
requested_provider="azure-foundry",
model_cfg=model_cfg,
explicit_api_key=explicit_api_key,
explicit_base_url=explicit_base_url,
target_model=model,
)
except AuthError as exc:
logger.debug("Auxiliary azure-foundry: %s", exc)
return None, None
except Exception as exc:
logger.debug("Auxiliary azure-foundry runtime error: %s", exc)
return None, None
api_key = runtime.get("api_key")
base_url = str(runtime.get("base_url", "") or "")
runtime_api_mode = api_mode or runtime.get("api_mode") or "chat_completions"
# Empty-string check on api_key here would be wrong for callable
# token providers (callables are truthy and non-empty by definition).
# Bail only when api_key is None / empty string.
_has_key = bool(api_key) if not callable(api_key) else True
if not _has_key or not base_url:
return None, None
final_model = _normalize_resolved_model(
model or str(model_cfg.get("default") or ""),
"azure-foundry",
)
if not final_model:
# No fallback aux model for Azure — the user must have a
# deployment name. Surface that as "no client" so the auto
# chain falls through to the next provider rather than 404ing.
logger.debug(
"Auxiliary azure-foundry: no model resolved (model=%r, default=%r)",
model, model_cfg.get("default"),
)
return None, None
# Azure pre-v1 endpoints sometimes carry api-version query params
# in the base URL; the OpenAI SDK drops them when joining paths,
# so lift them out and pass via default_query.
extra: Dict[str, Any] = {}
_clean_base, _dq = _extract_url_query_params(base_url)
if _dq:
extra["default_query"] = _dq
client = OpenAI(api_key=api_key, base_url=_clean_base, **extra)
if runtime_api_mode == "codex_responses":
# GPT-5.x / o-series / codex models on Azure Foundry are
# Responses-API-only — wrap so chat.completions.create() is
# translated to /responses behind the scenes.
return CodexAuxiliaryClient(client, final_model), final_model
if runtime_api_mode == "anthropic_messages":
# Forward ``api_key`` verbatim — for static keys it's a string,
# for Entra ID it's a callable. ``_maybe_wrap_anthropic`` →
# ``build_anthropic_client`` detects the callable and installs
# the bearer-injecting httpx hook.
return _maybe_wrap_anthropic(
client, final_model, api_key,
base_url, runtime_api_mode,
), final_model
# chat_completions — return the plain OpenAI client.
return client, final_model
def _try_anthropic(explicit_api_key: str = None) -> Tuple[Optional[Any], Optional[str]]:
try:
from agent.anthropic_adapter import build_anthropic_client, resolve_anthropic_token
@ -1957,20 +2071,31 @@ _AUTO_PROVIDER_LABELS = {
"_resolve_api_key_provider": "api-key",
}
_MAIN_RUNTIME_FIELDS = ("provider", "model", "base_url", "api_key", "api_mode")
_MAIN_RUNTIME_FIELDS = ("provider", "model", "base_url", "api_key", "api_mode", "auth_mode")
def _normalize_main_runtime(main_runtime: Optional[Dict[str, Any]]) -> Dict[str, str]:
"""Return a sanitized copy of a live main-runtime override."""
def _normalize_main_runtime(main_runtime: Optional[Dict[str, Any]]) -> Dict[str, Any]:
"""Return a sanitized copy of a live main-runtime override.
Most fields are stripped strings. ``api_key`` may legitimately be a
zero-arg callable (Azure Foundry Entra ID token provider) preserve
those as-is so auxiliary clients inherit the same authentication
surface as the main agent. The OpenAI SDK accepts ``Callable[[], str]``
for ``api_key`` and calls it before every request.
"""
if not isinstance(main_runtime, dict):
return {}
normalized: Dict[str, str] = {}
normalized: Dict[str, Any] = {}
for field in _MAIN_RUNTIME_FIELDS:
value = main_runtime.get(field)
# Preserve a callable api_key (Entra ID bearer provider) unchanged.
if field == "api_key" and callable(value) and not isinstance(value, str):
normalized[field] = value
continue
if isinstance(value, str) and value.strip():
normalized[field] = value.strip()
provider = normalized.get("provider")
if provider:
if isinstance(provider, str):
normalized["provider"] = provider.lower()
return normalized
@ -2762,10 +2887,10 @@ def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Option
auxiliary_is_nous = False # Reset — _try_nous() will set True if it wins
runtime = _normalize_main_runtime(main_runtime)
runtime_provider = runtime.get("provider", "")
runtime_model = runtime.get("model", "")
runtime_base_url = runtime.get("base_url", "")
runtime_model = str(runtime.get("model") or "")
runtime_base_url = str(runtime.get("base_url") or "")
runtime_api_key = runtime.get("api_key", "")
runtime_api_mode = runtime.get("api_mode", "")
runtime_api_mode = str(runtime.get("api_mode") or "")
# ── Warn once if OPENAI_BASE_URL is set but config.yaml uses a named
# provider (not 'custom'). This catches the common "env poisoning"
@ -2793,8 +2918,8 @@ def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Option
# on aggregators (OpenRouter, Nous) who previously got routed to a
# cheap provider-side default. Explicit per-task overrides set via
# config.yaml (auxiliary.<task>.provider) still win over this.
main_provider = runtime_provider or _read_main_provider()
main_model = runtime_model or _read_main_model()
main_provider = str(runtime_provider or _read_main_provider() or "")
main_model = str(runtime_model or _read_main_model() or "")
if (main_provider and main_model
and main_provider not in {"auto", ""}):
resolved_provider = main_provider
@ -3188,7 +3313,11 @@ def resolve_provider_client(
if client is not None:
final_model = _normalize_resolved_model(model or default, provider)
_cbase = str(getattr(client, "base_url", "") or "")
_ckey = str(getattr(client, "api_key", "") or "")
# ``client.api_key`` may be a callable (Azure Foundry Entra
# bearer provider). Pass empty string for the wrapper-detection
# path — wrapping decisions are based on base_url + api_mode.
_raw_ckey = getattr(client, "api_key", "")
_ckey = "" if (callable(_raw_ckey) and not isinstance(_raw_ckey, str)) else str(_raw_ckey or "")
client = _wrap_if_needed(client, final_model, _cbase, _ckey)
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
else (client, final_model))
@ -3300,6 +3429,40 @@ def resolve_provider_client(
except ImportError:
pass
# ── Azure Foundry (delegates to runtime resolver for auth_mode-aware routing) ─
#
# The generic PROVIDER_REGISTRY path below uses
# ``resolve_api_key_provider_credentials`` which only knows about the
# static ``AZURE_FOUNDRY_API_KEY`` env var. That misses two important
# cases for the ``azure-foundry`` provider:
#
# 1. ``model.auth_mode: entra_id`` — no static key exists; we need
# a callable bearer-token provider from ``azure_identity_adapter``.
# 2. Non-default ``model.base_url`` (Foundry projects path) — the
# env-var-only resolver doesn't apply config-yaml-driven URL
# overrides.
#
# Delegate to the same runtime resolver the main agent uses so
# auxiliary tasks (title generation, compression, vision, embedding,
# session search) inherit the user's full Azure config.
if provider == "azure-foundry":
client, default_model = _try_azure_foundry(
model=model,
explicit_api_key=explicit_api_key,
explicit_base_url=explicit_base_url,
api_mode=api_mode,
)
if client is None:
logger.warning(
"resolve_provider_client: azure-foundry requested but "
"runtime resolution failed (run: hermes doctor for "
"diagnostics)"
)
return None, None
final_model = _normalize_resolved_model(model or default_model, provider)
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
else (client, final_model))
# ── API-key providers from PROVIDER_REGISTRY ─────────────────────
try:
from hermes_cli.auth import (

View file

@ -0,0 +1,555 @@
"""Microsoft Entra ID adapter for Microsoft Foundry.
Provides keyless authentication for Microsoft Foundry deployments using the
`azure-identity` SDK's `DefaultAzureCredential` chain (env service principal
workload identity managed identity VS Code Azure CLI azd
PowerShell broker).
Architecture mirrors `agent/bedrock_adapter.py`:
* Lazy import. `azure-identity` is only loaded when ``model.auth_mode =
entra_id`` is selected. Users who stick with `AZURE_FOUNDRY_API_KEY`
never pay the import cost.
* SDK-callable contract. The public entry point ``build_token_provider``
returns a zero-arg callable produced by ``get_bearer_token_provider``
this is exactly the value Microsoft's documented sample plugs into
``OpenAI(api_key=token_provider, base_url=...)``. The OpenAI SDK calls
it before every request, so token refresh is transparent.
* Three explicit consumer-side helpers (display / cache / http-bearer)
rather than one generic "materialize" function splitting them by
purpose prevents accidental token-minting in logging paths or token
leakage into cache keys / dashboard JSON.
* No persisted JWT. ``azure-identity`` caches in-process and (where
available) in the OS keychain or ``~/.IdentityService``. Hermes does
not duplicate that storage in ``auth.json``.
Reference: https://learn.microsoft.com/azure/ai-foundry/foundry-models/how-to/configure-entra-id
Requires: ``azure-identity`` (optional dependency only needed when
``model.auth_mode = entra_id``).
"""
from __future__ import annotations
import functools
import logging
import os
import threading
from dataclasses import dataclass
from typing import Any, Callable, Dict, Optional
logger = logging.getLogger(__name__)
# Microsoft-documented scope for Foundry inference auth. Both the new
# Foundry portal and the legacy Azure OpenAI managed-identity docs use
# this scope for ALL Foundry endpoint shapes (*.openai.azure.com,
# *.services.ai.azure.com, *.ai.azure.com). The older control-plane
# scope ``https://cognitiveservices.azure.com/.default`` is for ARM
# resource management and is rejected for inference by newer
# resources — users with that requirement override via
# ``model.entra.scope`` in config.yaml.
SCOPE_AI_AZURE_DEFAULT = "https://ai.azure.com/.default"
# ---------------------------------------------------------------------------
# Lazy SDK import — only loaded when the Entra path is actually used.
# ---------------------------------------------------------------------------
_AZURE_IDENTITY_FEATURE = "provider.azure_identity"
def has_azure_identity_installed() -> bool:
"""Return True if `azure-identity` can be imported right now.
Cheap check does not walk the credential chain.
"""
try:
import azure.identity # noqa: F401
return True
except Exception:
return False
def _require_azure_identity():
"""Import ``azure.identity``, lazy-installing it if allowed.
Raises ``ImportError`` with a clear actionable message when the
package is missing and lazy installs are disabled.
"""
try:
import azure.identity as _ai
return _ai
except ImportError:
try:
from tools.lazy_deps import ensure, FeatureUnavailable
except ImportError as exc:
raise ImportError(
"The 'azure-identity' package is required for Azure AI "
"Foundry Entra ID authentication. Install it with: "
"pip install azure-identity"
) from exc
try:
ensure(_AZURE_IDENTITY_FEATURE, prompt=False)
except FeatureUnavailable as exc:
raise ImportError(
"The 'azure-identity' package is required for Azure AI "
"Foundry Entra ID authentication. " + str(exc)
) from exc
# Retry import after lazy install.
import azure.identity as _ai # noqa: WPS440
return _ai
def reset_credential_cache() -> None:
"""Clear the cached ``DefaultAzureCredential``. Used by tests and
profile switches.
Defensive against tests that ``monkeypatch.setattr`` over
``build_credential`` with a plain (non-lru-cached) function those
won't expose ``cache_clear()`` until pytest reverts the patch.
"""
cache_clear = getattr(build_credential, "cache_clear", None)
if callable(cache_clear):
cache_clear()
# ---------------------------------------------------------------------------
# Token-provider construction
# ---------------------------------------------------------------------------
@dataclass(frozen=True)
class EntraIdentityConfig:
"""Serializable Entra ID config.
Captures the Hermes-managed Entra knobs we need outside Azure SDK
environment configuration. Everything else
(tenant ID, service principal secret, federated token file, sovereign
cloud authority, etc.) flows through azure-identity's standard
``AZURE_*`` env vars see the Bedrock pattern in
``hermes_cli/runtime_provider.py:1310-1377`` for the analogous
"let the SDK read env" approach.
``scope`` is Microsoft's documented Foundry inference audience. Almost
everyone uses the default; sovereign-cloud / non-standard tenants can
override via ``model.entra.scope``. Identity selection (user-assigned
managed identity, workload identity, service principal, tenant, authority)
stays in the standard Azure SDK env vars such as ``AZURE_CLIENT_ID``.
``exclude_interactive_browser`` is kept as an internal constructor knob
so probes stay non-interactive by default. It is not written by the setup
wizard.
The dataclass is frozen so it's hashable for ``functools.lru_cache``
keying, and serializable across multiprocessing boundaries (workers
rebuild the credential inside their own process).
"""
scope: str = SCOPE_AI_AZURE_DEFAULT
exclude_interactive_browser: bool = True
def __post_init__(self) -> None:
scope = str(self.scope or "").strip() or SCOPE_AI_AZURE_DEFAULT
object.__setattr__(self, "scope", scope)
def to_dict(self) -> Dict[str, Any]:
return {
"scope": self.scope,
"exclude_interactive_browser": self.exclude_interactive_browser,
}
@classmethod
def from_dict(cls, data: Optional[Dict[str, Any]],
*, default_scope: Optional[str] = None) -> "EntraIdentityConfig":
data = data or {}
scope = str(data.get("scope") or "").strip() or default_scope or SCOPE_AI_AZURE_DEFAULT
exclude_browser = bool(data.get("exclude_interactive_browser", True))
return cls(
scope=scope,
exclude_interactive_browser=exclude_browser,
)
def _build_default_credential(config: EntraIdentityConfig) -> Any:
"""Construct a ``DefaultAzureCredential`` for ``config``.
Only Hermes-selected knobs are passed as kwargs. Everything else
(tenant, service principal secret, federated token file, sovereign
cloud authority, etc.) is read by ``azure-identity`` from the
standard ``AZURE_*`` environment variables see Microsoft's
documented credential resolution chain. Users configure those in
``~/.hermes/.env`` or the deployment environment.
"""
ai = _require_azure_identity()
kwargs: Dict[str, Any] = {}
# SDK default is True (browser excluded); only pass when the user
# explicitly opts in to interactive browser auth.
if not config.exclude_interactive_browser:
kwargs["exclude_interactive_browser_credential"] = False
return ai.DefaultAzureCredential(**kwargs)
@functools.lru_cache(maxsize=1)
def build_credential(config: EntraIdentityConfig) -> Any:
"""Return the cached ``DefaultAzureCredential`` for ``config``.
Hermes processes use exactly one Entra config at a time (the
``model.entra.*`` block in config.yaml drives every aux task,
subagent, and credential probe in the session). ``maxsize=1`` is
intentional: it reflects the actual usage pattern and keeps the
cache trivially small.
``EntraIdentityConfig`` is a frozen dataclass, so it's hashable and
safe as an LRU-cache key. ``functools.lru_cache`` is thread-safe in
CPython.
If two distinct configs are ever passed (tests do this; production
rarely), the LRU eviction handles it correctly each call still
returns a credential matching its config; only one is cached at a
time. Use :func:`reset_credential_cache` to clear (e.g. in tests).
"""
return _build_default_credential(config)
def build_token_provider(scope: Optional[str] = None,
*,
config: Optional[EntraIdentityConfig] = None,
base_url: Optional[str] = None,
exclude_interactive_browser: bool = True,
) -> Callable[[], str]:
"""Return a zero-arg callable that mints a fresh Entra bearer JWT.
The returned callable is exactly what Microsoft's documented Foundry
sample expects::
from openai import OpenAI
client = OpenAI(
base_url="https://my-resource.openai.azure.com/openai/v1/",
api_key=build_token_provider(),
)
Scope resolution order:
1. ``config.scope`` when a config object is supplied
2. explicit ``scope`` kwarg
3. ``SCOPE_AI_AZURE_DEFAULT`` (Microsoft's documented Foundry scope)
``base_url`` is unused today and kept for back-compat. Tenant /
service-principal / sovereign-cloud configuration flows through
``azure-identity``'s standard ``AZURE_*`` environment variables —
see :func:`_build_default_credential` for the rationale.
NOT serializable across process boundaries. For multiprocessing
workers, serialize the ``EntraIdentityConfig`` and rebuild the
provider inside the worker.
"""
ai = _require_azure_identity()
if config is None:
config = EntraIdentityConfig(
scope=scope or SCOPE_AI_AZURE_DEFAULT,
exclude_interactive_browser=exclude_interactive_browser,
)
credential = build_credential(config)
return ai.get_bearer_token_provider(credential, config.scope)
# ---------------------------------------------------------------------------
# Credential probing
# ---------------------------------------------------------------------------
def has_azure_identity_credentials(scope: Optional[str] = None,
*,
config: Optional[EntraIdentityConfig] = None,
timeout_seconds: float = 10.0,
allow_install: bool = True,
**overrides: Any) -> bool:
"""Best-effort probe: can `DefaultAzureCredential` mint a token now?
Runs ``credential.get_token(scope)`` under a thread-based timeout so
a slow token service can't hang the caller. Returns False on any
error never raises. Use for ``hermes doctor`` /
``hermes auth status`` / wizard preflight.
``allow_install``: when True (default) and ``azure-identity`` is not
importable, the adapter triggers the standard lazy-install path
(subject to ``security.allow_lazy_installs``) before probing. Set
False to make this strictly an "is installed?" check used on hot
paths like CLI startup where we never want pip to run.
NOT used by ``is_provider_configured()`` that path is structural
only (no token mint), so CLI startup doesn't pay this latency.
"""
if not has_azure_identity_installed():
if not allow_install:
return False
try:
_require_azure_identity()
except ImportError as exc:
logger.debug("azure-identity lazy install unavailable: %s", exc)
return False
if config is None:
effective_scope = (scope or "").strip() or SCOPE_AI_AZURE_DEFAULT
config = EntraIdentityConfig(scope=effective_scope, **overrides)
result = {"ok": False}
def _probe() -> None:
try:
credential = build_credential(config)
tok = credential.get_token(config.scope)
result["ok"] = bool(getattr(tok, "token", None))
except Exception as exc:
logger.debug("Entra credential probe failed: %s", exc)
result["ok"] = False
thread = threading.Thread(target=_probe, daemon=True)
thread.start()
thread.join(timeout=max(0.01, timeout_seconds))
if thread.is_alive():
logger.debug("Entra token service probe timed out after %ss", timeout_seconds)
return False
return bool(result.get("ok"))
def describe_active_credential(config: Optional[EntraIdentityConfig] = None,
*,
scope: Optional[str] = None,
timeout_seconds: float = 10.0,
allow_install: bool = True,
**overrides: Any) -> Dict[str, Any]:
"""Return diagnostic info about the active credential chain.
Best-effort: runs ``get_token()`` and inspects what came back.
Designed for ``hermes doctor`` and the wizard preflight never
raises, returns ``{"ok": False, "error": ...}`` on failure.
``allow_install``: when True (default) and ``azure-identity`` is not
importable, the adapter triggers the standard lazy-install path
(subject to ``security.allow_lazy_installs``) before probing. The
install failure is surfaced as the diagnostic error when it fails.
Set False for hot CLI paths that should never trigger pip.
``azure-identity`` doesn't expose the winning inner credential as
a public field, so we report a coarse picture (env vars present,
token expiry, claims-derived tenant) rather than the credential
class name. Users wanting the precise class can run with
``AZURE_LOG_LEVEL=DEBUG``.
"""
info: Dict[str, Any] = {"ok": False}
if not has_azure_identity_installed():
if not allow_install:
info["error"] = "azure-identity not installed"
info["hint"] = (
"pip install azure-identity (or rely on lazy install at "
"first use)"
)
return info
try:
_require_azure_identity()
except ImportError as exc:
info["error"] = str(exc) or "azure-identity not installed"
info["hint"] = (
"pip install azure-identity manually, or enable lazy "
"installs (security.allow_lazy_installs: true in "
"config.yaml)."
)
return info
if config is None:
effective_scope = (scope or "").strip() or SCOPE_AI_AZURE_DEFAULT
config = EntraIdentityConfig(scope=effective_scope, **overrides)
info["scope"] = config.scope
# Tenant / authority / service-principal config flow through the
# standard ``AZURE_*`` env vars; surface them below.
if os.environ.get("AZURE_TENANT_ID", "").strip():
info["tenant_id_env"] = os.environ["AZURE_TENANT_ID"].strip()
# Surface which env-var sources are present without minting yet.
env_sources = []
if os.environ.get("AZURE_FEDERATED_TOKEN_FILE", "").strip():
env_sources.append("WorkloadIdentityCredential (AZURE_FEDERATED_TOKEN_FILE)")
if (os.environ.get("AZURE_CLIENT_ID", "").strip()
and os.environ.get("AZURE_CLIENT_SECRET", "").strip()
and os.environ.get("AZURE_TENANT_ID", "").strip()):
env_sources.append("EnvironmentCredential (client secret)")
if os.environ.get("IDENTITY_ENDPOINT", "").strip() or os.environ.get("MSI_ENDPOINT", "").strip():
env_sources.append("ManagedIdentityCredential (IDENTITY_ENDPOINT)")
info["env_sources"] = env_sources
# Now try minting.
result: Dict[str, Any] = {}
def _probe() -> None:
try:
credential = build_credential(config)
tok = credential.get_token(config.scope)
result["token"] = tok
except Exception as exc:
result["error"] = str(exc)
thread = threading.Thread(target=_probe, daemon=True)
thread.start()
thread.join(timeout=max(0.01, timeout_seconds))
if thread.is_alive():
info["error"] = f"Token probe timed out after {timeout_seconds:.0f}s"
info["hint"] = (
"DefaultAzureCredential can be slow when the token service is unreachable "
"or when az login state is stale. Try `az login` or set "
"AZURE_CLIENT_ID / AZURE_TENANT_ID / AZURE_CLIENT_SECRET."
)
return info
if "error" in result:
info["error"] = result["error"]
return info
token = result.get("token")
if token is None:
info["error"] = "credential chain exhausted"
return info
info["ok"] = True
info["expires_on"] = getattr(token, "expires_on", None)
return info
# ---------------------------------------------------------------------------
# Consumer-side helpers — split by purpose to prevent accidental token
# minting in logging / cache-key / dashboard paths.
# ---------------------------------------------------------------------------
def is_token_provider(value: Any) -> bool:
"""Return True when ``value`` is a callable Entra token provider.
Used at the seams where a consumer must decide between
string-API-key semantics and bearer-callable semantics.
"""
return callable(value) and not isinstance(value, str)
def materialize_bearer_for_http(value: Any) -> str:
"""Return a fresh Bearer JWT for a manual HTTP request.
Only call this at sites that must construct an ``Authorization``
header outside the OpenAI SDK (e.g. ``hermes_cli/azure_detect.py``).
Calls the callable exactly once and returns the resulting token.
**Anthropic SDK integration:** the Anthropic Python SDK does not
accept a ``Callable[[], str]`` for ``auth_token``. Instead,
:func:`build_bearer_http_client` returns an ``httpx.Client`` whose
request event hook calls this function and rewrites the
``Authorization`` header per request and that client is passed to
the Anthropic SDK via ``http_client=...``. See
:func:`agent.anthropic_adapter.build_anthropic_client` for the
consumer.
Raises ``ValueError`` if ``value`` is not a callable token provider
or non-empty string.
"""
if is_token_provider(value):
token = value()
if not isinstance(token, str) or not token:
raise ValueError("token provider returned empty value")
return token
if isinstance(value, str) and value:
return value
raise ValueError("no usable api_key / token provider")
def build_bearer_http_client(token_provider: Callable[[], str], **httpx_kwargs: Any) -> Any:
"""Return an ``httpx.Client`` that mints a fresh Entra bearer JWT
per outbound request.
The Anthropic SDK ( 0.86.0 at the time of writing) stores
``api_key`` / ``auth_token`` as static strings and computes the
``Authorization`` header at construction time. To get per-request
token refresh (the Microsoft-recommended Foundry pattern for
callable bearer providers), we install an httpx ``request`` event
hook on a custom client and pass that client to the SDK via
``http_client=...``. The hook:
1. Calls :func:`materialize_bearer_for_http` to mint a fresh JWT
(azure-identity caches internally this is cheap when the
cached token is still valid).
2. Strips any pre-set ``Authorization`` / ``api-key`` /
``x-api-key`` headers the SDK may have added (avoids
conflicting auth values).
3. Sets ``Authorization: Bearer <fresh-jwt>``.
``token_provider`` must be a zero-arg callable returning a string
typically the result of :func:`build_token_provider`.
``httpx_kwargs`` are forwarded verbatim to ``httpx.Client(...)`` so
callers can attach a ``timeout``, ``transport``, ``proxy``, etc.
Raises ``ImportError`` if ``httpx`` is not installed (it is a
transitive dependency of both ``openai`` and ``anthropic`` SDKs, so
in practice always available when this helper is reached).
"""
if not is_token_provider(token_provider):
raise ValueError(
"build_bearer_http_client requires a zero-arg callable "
"token provider"
)
try:
import httpx
except ImportError as exc: # pragma: no cover — httpx ships with openai/anthropic
raise ImportError(
"httpx is required for Entra ID bearer auth on Microsoft Foundry "
"Anthropic-style endpoints. It is normally a transitive "
"dependency of the openai/anthropic SDKs."
) from exc
def _inject_bearer(request: "httpx.Request") -> None:
try:
token = materialize_bearer_for_http(token_provider)
except ValueError as exc:
# Token provider failed (chain exhausted, token service unreachable,
# az login expired, etc.). Strip any auth headers the SDK
# may have set — including our own placeholder sentinel
# ``entra-id-bearer-via-http-hook`` from
# ``_build_anthropic_client_with_bearer_hook`` — so the
# outbound request hits Azure with NO Authorization rather
# than with the placeholder. Azure returns a clean 401
# "missing auth" that is easier to diagnose than a 401
# against the sentinel string, and the sentinel never
# appears in upstream access logs.
#
# Log at WARNING (not DEBUG) so the misconfiguration is
# visible at default log levels.
logger.warning(
"Bearer hook: Entra ID token provider returned empty (%s) "
"— stripping Authorization headers. Azure will respond 401. "
"Run `hermes doctor` or `az login` to recover.",
exc,
)
for header_name in ("Authorization", "authorization", "Api-Key", "api-key", "X-Api-Key", "x-api-key"):
request.headers.pop(header_name, None)
return
for header_name in ("Authorization", "authorization", "Api-Key", "api-key", "X-Api-Key", "x-api-key"):
request.headers.pop(header_name, None)
request.headers["Authorization"] = f"Bearer {token}"
return httpx.Client(
event_hooks={"request": [_inject_bearer]},
**httpx_kwargs,
)
__all__ = [
"EntraIdentityConfig",
"SCOPE_AI_AZURE_DEFAULT",
"build_bearer_http_client",
"build_credential",
"build_token_provider",
"describe_active_credential",
"has_azure_identity_credentials",
"has_azure_identity_installed",
"is_token_provider",
"materialize_bearer_for_http",
"reset_credential_cache",
]

View file

@ -866,9 +866,14 @@ def try_activate_fallback(agent, reason: "FailoverReason | None" = None) -> bool
# the fallback activation drops to 128K even when config says 204800.
if hasattr(agent, 'context_compressor') and agent.context_compressor:
from agent.model_metadata import get_model_context_length
# ``agent.api_key`` may be callable (Entra ID); the
# context-length resolver expects a string for live
# probes. Foundry typically resolves via config/static
# catalogs anyway, so coerce defensively.
_fb_ctx_api_key = agent.api_key if isinstance(agent.api_key, str) else ""
fb_context_length = get_model_context_length(
agent.model, base_url=agent.base_url,
api_key=agent.api_key, provider=agent.provider,
api_key=_fb_ctx_api_key, provider=agent.provider,
config_context_length=getattr(agent, "_config_context_length", None),
custom_providers=getattr(agent, "_custom_providers", None),
)
@ -876,7 +881,7 @@ def try_activate_fallback(agent, reason: "FailoverReason | None" = None) -> bool
model=agent.model,
context_length=fb_context_length,
base_url=agent.base_url,
api_key=getattr(agent, "api_key", ""),
api_key=getattr(agent, "api_key", ""), # callable preserved → call_llm
provider=agent.provider,
)

View file

@ -486,7 +486,7 @@ class ContextCompressor(ContextEngine):
model: str,
context_length: int,
base_url: str = "",
api_key: str = "",
api_key: Any = "",
provider: str = "",
api_mode: str = "",
) -> None:

View file

@ -103,7 +103,15 @@ def check_compression_model_feasibility(agent: Any) -> None:
return
aux_base_url = str(getattr(client, "base_url", ""))
aux_api_key = str(getattr(client, "api_key", ""))
# ``client.api_key`` may be a callable (Azure Foundry Entra ID
# bearer provider). The context-length resolver chain expects a
# string, but it only needs a key for live catalogue probes
# (provider model lists). For Entra clients the model-metadata
# chain still resolves via models.dev + hardcoded family
# fallbacks, which don't require auth — pass empty string rather
# than minting a bearer JWT just to look up a context length.
_raw_aux_key = getattr(client, "api_key", "")
aux_api_key = "" if (callable(_raw_aux_key) and not isinstance(_raw_aux_key, str)) else str(_raw_aux_key or "")
aux_context = get_model_context_length(
aux_model,

View file

@ -1807,7 +1807,11 @@ def run_conversation(
# that survives message/tool sanitization (#6843).
_credential_sanitized = False
_raw_key = getattr(agent, "api_key", None) or ""
if _raw_key:
# Entra ID bearer providers are callables — their
# minted JWTs are always ASCII, so no sanitization
# is needed (and ``_strip_non_ascii`` would crash
# on a callable input).
if _raw_key and isinstance(_raw_key, str):
_clean_key = _strip_non_ascii(_raw_key)
if _clean_key != _raw_key:
agent.api_key = _clean_key
@ -2080,15 +2084,26 @@ def run_conversation(
):
anthropic_auth_retry_attempted = True
from agent.anthropic_adapter import _is_oauth_token
from agent.azure_identity_adapter import is_token_provider
if agent._try_refresh_anthropic_client_credentials():
print(f"{agent.log_prefix}🔐 Anthropic credentials refreshed after 401. Retrying request...")
continue
# Credential refresh didn't help — show diagnostic info
key = agent._anthropic_api_key
auth_method = "Bearer (OAuth/setup-token)" if _is_oauth_token(key) else "x-api-key (API key)"
print(f"{agent.log_prefix}🔐 Anthropic 401 — authentication failed.")
print(f"{agent.log_prefix} Auth method: {auth_method}")
print(f"{agent.log_prefix} Token prefix: {key[:12]}..." if key and len(key) > 12 else f"{agent.log_prefix} Token: (empty or short)")
if is_token_provider(key):
# Azure Foundry Entra ID — the bearer token is
# minted per-request by an httpx event hook on a
# custom http_client passed to the SDK. The 401
# means Azure rejected the JWT (RBAC role missing,
# az login expired, IMDS unreachable, etc.).
print(f"{agent.log_prefix} Auth method: Microsoft Entra ID (httpx event hook)")
print(f"{agent.log_prefix} Run `hermes doctor` for credential-chain diagnostics, or")
print(f"{agent.log_prefix} `az login` if your developer session expired.")
else:
auth_method = "Bearer (OAuth/setup-token)" if _is_oauth_token(key) else "x-api-key (API key)"
print(f"{agent.log_prefix} Auth method: {auth_method}")
print(f"{agent.log_prefix} Token prefix: {key[:12]}..." if isinstance(key, str) and len(key) > 12 else f"{agent.log_prefix} Token: (empty or short)")
print(f"{agent.log_prefix} Troubleshooting:")
from hermes_constants import display_hermes_home as _dhh_fn
_dhh = _dhh_fn()

View file

@ -862,13 +862,32 @@ class BatchRunner:
"last_updated": None
}
# Prepare configuration for workers
# Prepare configuration for workers.
#
# ``self.api_key`` may be a zero-arg callable (Azure Foundry Entra ID
# bearer provider returned by ``agent.azure_identity_adapter``). Such
# closures are not safely picklable across the multiprocessing.Pool
# boundary. Drop the callable here and let each worker rebuild its
# own provider via ``resolve_runtime_provider()``, which reads
# ``model.auth_mode`` from ``config.yaml`` and constructs a fresh
# token provider in the worker process (azure-identity caches
# in-process so each worker gets its own short-lived cache).
if callable(self.api_key) and not isinstance(self.api_key, str):
worker_api_key = None
print(
" Detected Entra ID bearer provider — workers will rebuild "
"credentials from config.yaml in each process.",
flush=True,
)
else:
worker_api_key = self.api_key
config = {
"distribution": self.distribution,
"model": self.model,
"max_iterations": self.max_iterations,
"base_url": self.base_url,
"api_key": self.api_key,
"api_key": worker_api_key,
"verbose": self.verbose,
"ephemeral_system_prompt": self.ephemeral_system_prompt,
"log_prefix_chars": self.log_prefix_chars,

View file

@ -30,6 +30,7 @@ model:
# "ollama-cloud" - Ollama Cloud (requires: OLLAMA_API_KEY — https://ollama.com/settings)
# "kilocode" - KiloCode gateway (requires: KILOCODE_API_KEY)
# "ai-gateway" - Vercel AI Gateway (requires: AI_GATEWAY_API_KEY)
# "azure-foundry" - Microsoft Foundry / Azure OpenAI (API key or Entra ID)
# "lmstudio" - LM Studio local server (optional: LM_API_KEY, defaults to http://127.0.0.1:1234/v1)
#
# Local servers (LM Studio, Ollama, vLLM, llama.cpp):
@ -45,6 +46,14 @@ model:
# api_key: "your-key-here" # Uncomment to set here instead of .env
base_url: "https://openrouter.ai/api/v1"
# Azure Foundry keyless auth example:
# provider: "azure-foundry"
# base_url: "https://<resource>.openai.azure.com/openai/v1"
# auth_mode: "entra_id" # DefaultAzureCredential: az login, managed identity, workload identity, etc.
# default: "gpt-4o" # Deployment/model name
# entra:
# scope: "https://ai.azure.com/.default" # Optional; this is the default.
# ── Token limits — two settings, easy to confuse ──────────────────────────
#
# context_length: TOTAL context window (input + output tokens combined).

18
cli.py
View file

@ -4251,7 +4251,13 @@ class HermesCLI:
resolved_acp_command = runtime.get("command")
resolved_acp_args = list(runtime.get("args") or [])
resolved_credential_pool = runtime.get("credential_pool")
if not isinstance(api_key, str) or not api_key:
# A callable api_key is a bearer-token provider (Azure Foundry
# Entra ID — ``azure_identity_adapter.build_token_provider``).
# The OpenAI SDK accepts ``Callable[[], str]`` for ``api_key`` and
# invokes it before every request. Skip the string-only validation
# and placeholder substitution for callables.
_is_callable_provider = callable(api_key) and not isinstance(api_key, str)
if not _is_callable_provider and (not isinstance(api_key, str) or not api_key):
# Custom / local endpoints (llama.cpp, ollama, vLLM, etc.) often
# don't require authentication. When a base_url IS configured but
# no API key was found, use a placeholder so the OpenAI SDK
@ -5723,7 +5729,15 @@ class HermesCLI:
config_path = project_config_path
config_status = "(loaded)" if config_path.exists() else "(not found)"
api_key_display = '********' + self.api_key[-4:] if self.api_key and len(self.api_key) > 4 else 'Not set!'
# ``self.api_key`` may be a callable (Azure Foundry Entra ID bearer
# provider). Never invoke it; just identify the auth surface.
from agent.azure_identity_adapter import is_token_provider
if is_token_provider(self.api_key):
api_key_display = "Microsoft Entra ID"
elif isinstance(self.api_key, str) and len(self.api_key) > 12:
api_key_display = f"{self.api_key[:8]}...{self.api_key[-4:]}"
else:
api_key_display = "Not set!"
print()
title = "(^_^) Configuration"

View file

@ -5334,7 +5334,9 @@ def get_external_process_provider_status(provider_id: str) -> Dict[str, Any]:
def get_auth_status(provider_id: Optional[str] = None) -> Dict[str, Any]:
"""Generic auth status dispatcher."""
target = provider_id or get_active_provider()
target = (provider_id or get_active_provider() or "").strip().lower()
if not target:
return {"logged_in": False}
if target == "spotify":
return get_spotify_auth_status()
if target == "nous":
@ -5351,6 +5353,8 @@ def get_auth_status(provider_id: Optional[str] = None) -> Dict[str, Any]:
return get_minimax_oauth_auth_status()
if target == "copilot-acp":
return get_external_process_provider_status(target)
if target == "azure-foundry":
return _get_azure_foundry_auth_status()
# API-key providers
pconfig = PROVIDER_REGISTRY.get(target)
if pconfig and pconfig.auth_type == "api_key":
@ -5365,6 +5369,83 @@ def get_auth_status(provider_id: Optional[str] = None) -> Dict[str, Any]:
return {"logged_in": False}
def _get_azure_foundry_auth_status() -> Dict[str, Any]:
"""Return structural auth status for Azure Foundry.
``logged_in`` is structural, matching other non-OAuth provider status
checks:
* ``auth_mode == "entra_id"`` AND ``azure-identity`` is importable
(we do NOT mint a token here; ``hermes doctor`` runs the live
probe and reports whether the credential chain can acquire one).
* ``auth_mode == "api_key"`` (default) AND ``AZURE_FOUNDRY_API_KEY``
is set with a usable value.
Never invokes the Entra credential chain keeps CLI startup latency
flat regardless of token-service / az login state.
"""
info: Dict[str, Any] = {"provider": "azure-foundry"}
try:
from hermes_cli.config import load_config, get_env_value
cfg = load_config()
except Exception:
cfg = {}
model_cfg = cfg.get("model") if isinstance(cfg, dict) else None
auth_mode = "api_key"
base_url = ""
if isinstance(model_cfg, dict):
auth_mode = str(model_cfg.get("auth_mode") or "api_key").strip().lower() or "api_key"
base_url = str(model_cfg.get("base_url") or "").strip()
info["auth_mode"] = auth_mode
info["base_url"] = base_url
if auth_mode == "entra_id":
try:
from agent.azure_identity_adapter import (
EntraIdentityConfig,
SCOPE_AI_AZURE_DEFAULT,
has_azure_identity_installed,
)
installed = has_azure_identity_installed()
entra_cfg = {}
if isinstance(model_cfg, dict) and isinstance(model_cfg.get("entra"), dict):
entra_cfg = model_cfg["entra"]
identity_config = EntraIdentityConfig.from_dict(
entra_cfg,
default_scope=SCOPE_AI_AZURE_DEFAULT,
)
info["azure_identity_installed"] = installed
info["scope"] = identity_config.scope
info["credential_probe"] = "not_run"
info["credential_verified"] = False
info["logged_in"] = bool(installed)
if not installed:
info["hint"] = (
"azure-identity not installed. Install with: "
"pip install azure-identity (or rely on Hermes' "
"lazy-install at first use)."
)
else:
info["hint"] = (
"azure-identity is installed; live credential validation "
"is skipped here. Run `hermes doctor` to verify token acquisition."
)
return info
except Exception as exc:
info["logged_in"] = False
info["error"] = f"azure-identity check failed: {exc}"
return info
# api_key mode (default)
try:
api_key = get_env_value("AZURE_FOUNDRY_API_KEY") or os.getenv("AZURE_FOUNDRY_API_KEY", "")
except Exception:
api_key = os.getenv("AZURE_FOUNDRY_API_KEY", "")
info["logged_in"] = has_usable_secret(api_key)
return info
def resolve_api_key_provider_credentials(provider_id: str) -> Dict[str, Any]:
"""Resolve API key and base URL for an API-key provider.

View file

@ -566,6 +566,54 @@ def _interactive_auth() -> None:
print()
except ImportError:
pass # boto3 or bedrock_adapter not available
# Show Azure Foundry Entra ID status
try:
from hermes_cli.config import load_config
_cfg = load_config()
_model_cfg = _cfg.get("model") if isinstance(_cfg, dict) else None
if isinstance(_model_cfg, dict):
_cfg_provider = str(_model_cfg.get("provider") or "").strip().lower()
_cfg_auth_mode = str(_model_cfg.get("auth_mode") or "").strip().lower()
if _cfg_provider == "azure-foundry" and _cfg_auth_mode == "entra_id":
from agent.azure_identity_adapter import (
EntraIdentityConfig,
SCOPE_AI_AZURE_DEFAULT,
describe_active_credential,
has_azure_identity_installed,
)
_base_url = str(_model_cfg.get("base_url") or "").strip()
_entra = _model_cfg.get("entra") or {}
if not isinstance(_entra, dict):
_entra = {}
_scope = (
str(_entra.get("scope") or "").strip()
or SCOPE_AI_AZURE_DEFAULT
)
print(f"azure-foundry (Microsoft Entra ID):")
print(f" Endpoint: {_base_url or '(not configured)'}")
print(f" Scope: {_scope}")
if not has_azure_identity_installed():
print(" Status: ⚠ azure-identity not installed "
"(pip install azure-identity)")
else:
_entra_cfg = EntraIdentityConfig(
scope=_scope,
)
_info = describe_active_credential(config=_entra_cfg, timeout_seconds=10.0)
_env_sources = _info.get("env_sources") or []
if _info.get("ok"):
_tag = ", ".join(_env_sources) if _env_sources else "default chain"
print(f" Status: ✓ token acquired ({_tag})")
else:
_err = _info.get("error") or "credential chain exhausted"
print(f" Status: ⚠ {_err}")
_hint = _info.get("hint")
if _hint:
print(f" Hint: {_hint}")
print()
except Exception:
pass
print()
# Main menu

View file

@ -1,6 +1,6 @@
"""Azure Foundry endpoint auto-detection.
Inspect an Azure AI Foundry / Azure OpenAI endpoint to determine:
Inspect a Microsoft Foundry / Azure OpenAI endpoint to determine:
- API transport (OpenAI-style ``chat_completions`` vs
Anthropic-style ``anthropic_messages``)
- Available models (best effort Azure does not expose a deployment
@ -19,6 +19,16 @@ rather than the user's *deployed* deployment names. In practice it is
still a useful hint the user picks a familiar model name and we look
up its context length from the catalog.
Authentication modes:
- ``api_key`` (default): the wizard passes an ``api_key`` string; the
probe sends both ``api-key:`` and ``Authorization: Bearer`` headers
so we hit any Azure deployment regardless of which header it expects.
- ``entra_id``: the wizard passes a ``token_provider`` callable from
:mod:`agent.azure_identity_adapter`. The probe mints exactly one
bearer JWT, sends **only** ``Authorization: Bearer <jwt>`` (never
``api-key:``), and never persists the token. This matches Microsoft's
documented contract for keyless inference.
The detector never crashes on errors (every HTTP call is wrapped in a
broad try/except). Callers get a :class:`DetectionResult` with whatever
information could be gathered, and fall back to manual entry for the
@ -31,7 +41,7 @@ import json
import logging
import re
from dataclasses import dataclass, field
from typing import Optional
from typing import Any, Callable, Optional
from urllib import request as urllib_request
from urllib.error import HTTPError, URLError
from urllib.parse import urlparse
@ -79,15 +89,73 @@ class DetectionResult:
is_anthropic: bool = False
def _http_get_json(url: str, api_key: str, timeout: float = 6.0) -> tuple[int, Optional[dict]]:
"""GET a URL with ``api-key`` + ``Authorization`` headers. Return
def _resolve_credential(api_key: Any,
token_provider: Optional[Callable[[], str]] = None,
) -> tuple[Optional[str], str]:
"""Coerce wizard inputs into a (token, mode) pair.
Returns ``(token_or_None, mode)`` where ``mode`` is:
- ``"entra_id"`` when a callable token provider was supplied the
returned token is a freshly minted bearer JWT, sent ONLY in
``Authorization: Bearer``.
- ``"api_key"`` when a string key was supplied the returned token
is the raw API key, sent in BOTH ``api-key:`` and
``Authorization: Bearer`` headers (preserves the original
broad-compat probe behaviour).
- ``("", "api_key")`` when neither yields a value.
Bearer minting failures degrade to ``("", "entra_id")`` so the caller
can still report "detection incomplete" rather than crashing.
"""
# Token-provider path (callable wins when both supplied).
if token_provider is not None and callable(token_provider):
try:
token = token_provider()
return (str(token) if token else None), "entra_id"
except Exception as exc:
logger.debug("azure_detect: token_provider failed: %s", exc)
return None, "entra_id"
if callable(api_key) and not isinstance(api_key, str):
try:
token = api_key()
return (str(token) if token else None), "entra_id"
except Exception as exc:
logger.debug("azure_detect: api_key callable failed: %s", exc)
return None, "entra_id"
# API-key path.
if isinstance(api_key, str) and api_key:
return api_key, "api_key"
return None, "api_key"
def _apply_auth_headers(req: urllib_request.Request,
token: Optional[str],
mode: str) -> None:
"""Attach the right auth headers to ``req`` based on credential mode."""
if not token:
return
if mode == "entra_id":
# Bearer-only: do NOT also set api-key, which would log a JWT in
# a header slot intended for static keys.
req.add_header("Authorization", f"Bearer {token}")
else:
# Legacy broad-compat behaviour: send both headers so we land on
# any Azure resource regardless of which it accepts.
req.add_header("api-key", token)
req.add_header("Authorization", f"Bearer {token}")
def _http_get_json(url: str,
api_key: Any,
timeout: float = 6.0,
*,
token_provider: Optional[Callable[[], str]] = None,
) -> tuple[int, Optional[dict]]:
"""GET a URL with the appropriate auth headers. Return
``(status_code, parsed_json_or_None)``. Never raises."""
token, mode = _resolve_credential(api_key, token_provider)
req = urllib_request.Request(url, method="GET")
# Azure OpenAI uses ``api-key``. Some Azure deployments (and
# Anthropic-style routes) use ``Authorization: Bearer``. Send both
# so we probe once per URL rather than twice.
req.add_header("api-key", api_key)
req.add_header("Authorization", f"Bearer {api_key}")
_apply_auth_headers(req, token, mode)
req.add_header("User-Agent", "hermes-agent/azure-detect")
try:
with urllib_request.urlopen(req, timeout=timeout) as resp:
@ -140,7 +208,11 @@ def _extract_model_ids(payload: dict) -> list[str]:
return ids
def _probe_openai_models(base_url: str, api_key: str) -> tuple[bool, list[str]]:
def _probe_openai_models(base_url: str,
api_key: Any,
*,
token_provider: Optional[Callable[[], str]] = None,
) -> tuple[bool, list[str]]:
"""Probe ``<base>/models`` for an OpenAI-shaped response.
Returns ``(ok, models)``. ``ok`` is True iff the endpoint accepted
@ -156,7 +228,7 @@ def _probe_openai_models(base_url: str, api_key: str) -> tuple[bool, list[str]]:
candidates.append(f"{base_url}/models?api-version={v}")
for url in candidates:
status, body = _http_get_json(url, api_key)
status, body = _http_get_json(url, api_key, token_provider=token_provider)
if status == 200 and body is not None:
ids = _extract_model_ids(body)
if ids:
@ -172,7 +244,11 @@ def _probe_openai_models(base_url: str, api_key: str) -> tuple[bool, list[str]]:
return False, []
def _probe_anthropic_messages(base_url: str, api_key: str) -> bool:
def _probe_anthropic_messages(base_url: str,
api_key: Any,
*,
token_provider: Optional[Callable[[], str]] = None,
) -> bool:
"""Send a zero-token request to ``<base>/v1/messages`` and check
whether the endpoint at least *recognises* the Anthropic Messages
shape (any 4xx that mentions ``messages`` or ``model``, or a 400
@ -187,8 +263,8 @@ def _probe_anthropic_messages(base_url: str, api_key: str) -> bool:
"messages": [{"role": "user", "content": "ping"}],
}).encode("utf-8")
req = urllib_request.Request(url, method="POST", data=payload)
req.add_header("api-key", api_key)
req.add_header("Authorization", f"Bearer {api_key}")
token, mode = _resolve_credential(api_key, token_provider)
_apply_auth_headers(req, token, mode)
req.add_header("anthropic-version", "2023-06-01")
req.add_header("content-type", "application/json")
req.add_header("User-Agent", "hermes-agent/azure-detect")
@ -218,13 +294,23 @@ def _probe_anthropic_messages(base_url: str, api_key: str) -> bool:
return False
def detect(base_url: str, api_key: str) -> DetectionResult:
def detect(base_url: str,
api_key: Any = "",
*,
token_provider: Optional[Callable[[], str]] = None,
) -> DetectionResult:
"""Inspect an Azure endpoint and describe its transport + models.
Call this from the wizard before asking the user to pick an API
mode manually. The caller should treat the returned
:class:`DetectionResult` as *advisory* if ``api_mode`` is None,
fall back to asking the user.
``api_key`` may be a string (legacy API-key auth sends both
``api-key:`` and ``Authorization: Bearer``) or a callable returning
a bearer JWT (Entra ID auth sends ONLY ``Authorization: Bearer``).
``token_provider`` is an alternative explicit name for the callable
form; if both are supplied the callable wins.
"""
result = DetectionResult()
@ -244,7 +330,7 @@ def detect(base_url: str, api_key: str) -> DetectionResult:
# 2. Try the OpenAI-style /models probe. If this works, the
# endpoint definitely speaks OpenAI wire.
ok, models = _probe_openai_models(base_url, api_key)
ok, models = _probe_openai_models(base_url, api_key, token_provider=token_provider)
if ok:
result.models_probe_ok = True
result.models = models
@ -259,7 +345,7 @@ def detect(base_url: str, api_key: str) -> DetectionResult:
# 3. Fallback: probe the Anthropic Messages shape. Slower and more
# intrusive than /models, so only run it when the OpenAI probe
# failed.
if _probe_anthropic_messages(base_url, api_key):
if _probe_anthropic_messages(base_url, api_key, token_provider=token_provider):
result.is_anthropic = True
result.api_mode = "anthropic_messages"
result.reason = "Endpoint accepts Anthropic Messages shape"
@ -273,11 +359,26 @@ def detect(base_url: str, api_key: str) -> DetectionResult:
return result
def lookup_context_length(model: str, base_url: str, api_key: str) -> Optional[int]:
def lookup_context_length(model: str,
base_url: str,
api_key: Any = "",
*,
token_provider: Optional[Callable[[], str]] = None,
) -> Optional[int]:
"""Thin wrapper around :func:`agent.model_metadata.get_model_context_length`
that returns ``None`` when only the fallback default (128k) would
fire, so the wizard can distinguish "we actually know this" from
"we guessed."""
"we guessed.
For Entra-ID mode pass a callable as ``api_key`` (or via
``token_provider=``); the wrapped resolver expects a string, so we
mint one bearer JWT here for the single lookup. The resolver itself
only reads catalog metadata over HTTP no SDK client is built so
the minted token is consumed for at most one /models probe.
"""
model_id = str(model or "").strip()
if not model_id:
return None
try:
from agent.model_metadata import (
DEFAULT_FALLBACK_CONTEXT,
@ -286,8 +387,13 @@ def lookup_context_length(model: str, base_url: str, api_key: str) -> Optional[i
except Exception:
return None
# Resolve the credential once. For Entra mode this calls the token
# provider; for legacy api_key this is a no-op string pass-through.
token, mode = _resolve_credential(api_key, token_provider)
effective_key = token or ""
try:
n = get_model_context_length(model, base_url=base_url, api_key=api_key)
n = get_model_context_length(model_id, base_url=base_url, api_key=effective_key)
except Exception as exc:
logger.debug("azure_detect: context length lookup failed: %s", exc)
return None

View file

@ -1613,6 +1613,87 @@ def run_doctor(args):
f"bedrock:ListFoundationModels"],
)
def _probe_azure_entra() -> _ConnectivityResult:
"""Probe Azure Foundry Entra ID auth, parallel to ``_probe_bedrock``.
Skipped unless the active config has ``model.provider:
azure-foundry`` AND ``model.auth_mode: entra_id`` we don't probe
the token-service / CLI chain for users on plain API-key Azure.
Bounded by a 10s timeout (via
:func:`agent.azure_identity_adapter.describe_active_credential`)
so a slow token service can't pad the doctor run.
"""
label = "Azure Foundry (Entra ID)".ljust(28)
try:
from hermes_cli.config import load_config
cfg = load_config()
model_cfg = cfg.get("model") if isinstance(cfg, dict) else {}
if not isinstance(model_cfg, dict):
return _ConnectivityResult("Azure Foundry (Entra ID)", [], [])
cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
auth_mode = str(model_cfg.get("auth_mode") or "").strip().lower()
if cfg_provider != "azure-foundry" or auth_mode != "entra_id":
return _ConnectivityResult("Azure Foundry (Entra ID)", [], [])
except Exception:
return _ConnectivityResult("Azure Foundry (Entra ID)", [], [])
try:
from agent.azure_identity_adapter import (
EntraIdentityConfig,
SCOPE_AI_AZURE_DEFAULT,
describe_active_credential,
has_azure_identity_installed,
)
except Exception as exc:
return _ConnectivityResult(
"Azure Foundry (Entra ID)",
[(color("", Colors.YELLOW), label,
color(f"(adapter import failed: {exc})", Colors.DIM))],
[f"Azure Foundry adapter import failed: {exc}"],
)
if not has_azure_identity_installed():
return _ConnectivityResult(
"Azure Foundry (Entra ID)",
[(color("", Colors.YELLOW), label,
color("(azure-identity not installed)", Colors.DIM))],
[f"Install azure-identity: {sys.executable} -m pip install azure-identity"],
)
base_url = str(model_cfg.get("base_url") or "").strip()
entra_cfg = model_cfg.get("entra") or {}
if not isinstance(entra_cfg, dict):
entra_cfg = {}
scope = (
str(entra_cfg.get("scope") or "").strip()
or SCOPE_AI_AZURE_DEFAULT
)
config = EntraIdentityConfig(
scope=scope,
)
info = describe_active_credential(config=config, timeout_seconds=10.0)
if info.get("ok"):
env_sources = info.get("env_sources") or []
tag = ", ".join(env_sources) if env_sources else "default credential chain"
return _ConnectivityResult(
"Azure Foundry (Entra ID)",
[(color("", Colors.GREEN), label,
color(f"({tag}, scope={scope})", Colors.DIM))],
[],
)
err = info.get("error") or "credential chain exhausted"
hint = info.get("hint") or (
"Run `az login`, set AZURE_TENANT_ID/AZURE_CLIENT_ID/"
"AZURE_CLIENT_SECRET, or attach a managed identity to this VM."
)
return _ConnectivityResult(
"Azure Foundry (Entra ID)",
[(color("", Colors.YELLOW), label,
color(f"({err})", Colors.DIM))],
[f"Azure Foundry Entra: {err}. {hint}"],
)
# Build the probe submission list in display order
_probes.append(("OpenRouter API", _probe_openrouter))
_probes.append(("Anthropic API", _probe_anthropic))
@ -1630,6 +1711,7 @@ def run_doctor(args):
_probe_apikey_provider(p, e, u, b, s)))
_probes.append(("AWS Bedrock", _probe_bedrock))
_probes.append(("Azure Foundry (Entra ID)", _probe_azure_entra))
# Print a single status line so users see something happening, then
# fan out. ``\r`` clears it once the first real result line lands.

View file

@ -3535,11 +3535,27 @@ def _save_custom_provider(
def _model_flow_azure_foundry(config, current_model=""):
"""Azure Foundry provider: configure endpoint, API mode, API key, and model.
"""Azure Foundry provider: configure endpoint, auth mode, API mode, and model.
Azure Foundry supports both OpenAI-style (``/v1/chat/completions``) and
Anthropic-style (``/v1/messages``) endpoints. The wizard auto-detects
the transport and available models when possible:
Anthropic-style (``/v1/messages``) endpoints, and two authentication
modes:
* **API key** (default) uses ``AZURE_FOUNDRY_API_KEY`` from .env.
* **Microsoft Entra ID** keyless, RBAC-based auth via the
``azure-identity`` SDK (Managed Identity / Workload Identity / az
login / VS Code / azd / service principal env vars). Works on both
OpenAI-style and Anthropic-style endpoints Microsoft RBAC is
per-resource and the same ``Azure AI User`` role grants
both. For OpenAI-style the OpenAI SDK's native callable
``api_key=`` contract is used; for Anthropic-style an
``httpx.Client`` with a request event hook (built by
:func:`agent.azure_identity_adapter.build_bearer_http_client`)
mints a fresh JWT per request because the Anthropic SDK does not
accept a callable ``auth_token`` natively.
The wizard auto-detects the transport and available models when
possible:
* URLs ending in ``/anthropic`` Anthropic Messages API.
* Successful ``GET <base>/models`` probe OpenAI-style + populates
@ -3566,9 +3582,14 @@ def _model_flow_azure_foundry(config, current_model=""):
if isinstance(model_cfg, dict) and model_cfg.get("provider") == "azure-foundry":
current_base_url = str(model_cfg.get("base_url", "") or "")
current_api_mode = str(model_cfg.get("api_mode", "") or "")
current_auth_mode = str(model_cfg.get("auth_mode") or "api_key").strip().lower() or "api_key"
_cur_entra = model_cfg.get("entra") or {}
current_entra = _cur_entra if isinstance(_cur_entra, dict) else {}
else:
current_base_url = ""
current_api_mode = ""
current_auth_mode = "api_key"
current_entra = {}
current_api_key = get_env_value("AZURE_FOUNDRY_API_KEY") or ""
@ -3583,22 +3604,29 @@ def _model_flow_azure_foundry(config, current_model=""):
print()
if current_base_url:
print(f" Current endpoint: {current_base_url}")
print(f" Current endpoint: {current_base_url}")
if current_api_mode:
_lbl = (
"OpenAI-style"
if current_api_mode == "chat_completions"
else "Anthropic-style"
)
print(f" Current API mode: {_lbl}")
if current_api_key:
print(f" Current API key: {current_api_key[:8]}...")
print(f" Current API mode: {_lbl}")
if current_auth_mode == "entra_id":
print(f" Current auth mode: Microsoft Entra ID (keyless)")
elif current_api_key:
print(f" Current auth mode: API key ({current_api_key[:8]}...)")
print()
# ── Step 1: endpoint URL ─────────────────────────────────────────
try:
_placeholder = (
current_base_url
or "e.g. https://<resource>.openai.azure.com/openai/v1 "
"or https://<resource>.services.ai.azure.com/anthropic"
)
base_url = input(
f"API endpoint URL [{current_base_url or 'e.g. https://your-resource.openai.azure.com/openai/v1'}]: "
f"API endpoint URL [{_placeholder}]: "
).strip()
except (KeyboardInterrupt, EOFError):
print("\nCancelled.")
@ -3612,25 +3640,125 @@ def _model_flow_azure_foundry(config, current_model=""):
print(f"Invalid URL: {effective_url} (must start with http:// or https://)")
return
# ── Step 2: API key ──────────────────────────────────────────────
# ── Step 2: authentication mode ──────────────────────────────────
print()
print("Authentication:")
print(" 1. API key (AZURE_FOUNDRY_API_KEY in .env)")
print(" 2. Microsoft Entra ID (managed identity / workload identity / az login)")
print(" Recommended by Microsoft. Works for both OpenAI-style and Anthropic-style endpoints.")
print(" Requires the 'Azure AI User' role on the Foundry resource.")
try:
api_key = getpass.getpass(
f"API key [{current_api_key[:8] + '...' if current_api_key else 'required'}]: "
).strip()
_auth_default = "2" if current_auth_mode == "entra_id" else "1"
auth_choice = (
input(f"Authentication mode [1/2] ({_auth_default}): ").strip()
or _auth_default
)
except (KeyboardInterrupt, EOFError):
print("\nCancelled.")
return
use_entra = auth_choice == "2"
auth_mode_label = "entra_id" if use_entra else "api_key"
effective_key = api_key or current_api_key
if not effective_key:
print("No API key provided. Cancelled.")
return
# ── Step 3: credentials (key OR Entra preflight) ─────────────────
effective_key: str = ""
entra_overrides: dict = {}
token_provider = None # callable when entra
entra_scope = ""
# ── Step 3: auto-detect transport + models ───────────────────────
if use_entra:
try:
from agent.azure_identity_adapter import (
EntraIdentityConfig,
SCOPE_AI_AZURE_DEFAULT,
build_token_provider,
describe_active_credential,
has_azure_identity_installed,
)
except ImportError as exc:
print()
print(f"⚠ Could not import azure-identity adapter: {exc}")
print(" Falling back to API key auth.")
use_entra = False
auth_mode_label = "api_key"
if use_entra:
print()
if not has_azure_identity_installed():
print("◐ The 'azure-identity' package is not installed yet.")
print(
" Hermes will install it now (the preflight below "
"triggers the lazy-install). To skip lazy installs, "
"run: pip install azure-identity"
)
# Preserve only the optional scope override. Identity selection
# (tenant, user-assigned MI, workload identity, service principal)
# stays in Azure SDK env vars such as AZURE_CLIENT_ID.
_persisted_scope_override = str(current_entra.get("scope") or "").strip()
entra_scope = _persisted_scope_override or SCOPE_AI_AZURE_DEFAULT
entra_overrides = {}
if _persisted_scope_override:
entra_overrides["scope"] = _persisted_scope_override
print()
print("◐ Probing Microsoft Entra ID credential chain (up to 10s)...")
_config = EntraIdentityConfig(
scope=entra_scope,
)
info = describe_active_credential(config=_config, timeout_seconds=10.0)
if info.get("ok"):
env_sources = info.get("env_sources") or []
tag = ", ".join(env_sources) if env_sources else "default chain"
print(f"✓ Entra ID token acquired ({tag}, scope={entra_scope})")
else:
err = info.get("error") or "credential chain exhausted"
hint = info.get("hint") or (
"Run `az login`, attach a managed identity to this VM, or "
"set AZURE_TENANT_ID/AZURE_CLIENT_ID/AZURE_CLIENT_SECRET."
)
print(f"{err}")
print(f" Hint: {hint}")
try:
ans = input("Save Entra config anyway and validate later? [Y/n]: ").strip().lower()
except (KeyboardInterrupt, EOFError):
print("\nCancelled.")
return
if ans and ans not in ("y", "yes"):
print("Cancelled.")
return
# Build the token provider for the detection probe (best-effort —
# if the credential chain failed above, this will silently return
# None inside azure_detect and the probe falls back to manual).
try:
token_provider = build_token_provider(config=_config)
except Exception as exc:
print(f"⚠ Could not build token provider for probing: {exc}")
token_provider = None
else:
print()
try:
api_key = getpass.getpass(
f"API key [{current_api_key[:8] + '...' if current_api_key else 'required'}]: "
).strip()
except (KeyboardInterrupt, EOFError):
print("\nCancelled.")
return
effective_key = api_key or current_api_key
if not effective_key:
print("No API key provided. Cancelled.")
return
# ── Step 4: auto-detect transport + models ───────────────────────
print()
print("◐ Probing endpoint to auto-detect transport and models...")
detection = azure_detect.detect(effective_url, effective_key)
detection = azure_detect.detect(
effective_url,
api_key=effective_key,
token_provider=token_provider,
)
discovered_models: list[str] = list(detection.models)
api_mode: str = detection.api_mode or ""
@ -3665,7 +3793,7 @@ def _model_flow_azure_foundry(config, current_model=""):
return
api_mode = "anthropic_messages" if mode_choice == "2" else "chat_completions"
# ── Step 4: model name ───────────────────────────────────────────
# ── Step 5: model name ───────────────────────────────────────────
print()
effective_model = ""
if discovered_models:
@ -3704,15 +3832,17 @@ def _model_flow_azure_foundry(config, current_model=""):
print("No model name provided. Cancelled.")
return
# ── Step 5: context-length lookup ────────────────────────────────
# ── Step 6: context-length lookup ────────────────────────────────
ctx_len = azure_detect.lookup_context_length(
effective_model,
effective_url,
effective_key,
api_key=effective_key,
token_provider=token_provider,
)
# ── Step 6: persist ──────────────────────────────────────────────
save_env_value("AZURE_FOUNDRY_API_KEY", effective_key)
# ── Step 7: persist ──────────────────────────────────────────────
if not use_entra:
save_env_value("AZURE_FOUNDRY_API_KEY", effective_key)
cfg = load_config()
model = cfg.get("model")
@ -3724,6 +3854,22 @@ def _model_flow_azure_foundry(config, current_model=""):
model["base_url"] = effective_url
model["api_mode"] = api_mode
model["default"] = effective_model
model["auth_mode"] = auth_mode_label
if use_entra:
# Persist only the non-default Entra scope so config.yaml stays tidy.
# Azure identity selection stays in standard AZURE_* env vars.
clean_entra: dict = {}
for key in ("scope",):
val = entra_overrides.get(key)
if val:
clean_entra[key] = val
if clean_entra:
model["entra"] = clean_entra
elif "entra" in model:
del model["entra"]
else:
if "entra" in model:
del model["entra"]
if ctx_len:
model["context_length"] = ctx_len
@ -3739,10 +3885,14 @@ def _model_flow_azure_foundry(config, current_model=""):
save_env_value("OPENAI_API_KEY", "")
mode_label = "OpenAI-style" if api_mode == "chat_completions" else "Anthropic-style"
auth_label = (
"Microsoft Entra ID (keyless)" if use_entra else "API key"
)
print()
print("✓ Azure Foundry configured:")
print(f" Endpoint: {effective_url}")
print(f" API mode: {mode_label}")
print(f" Auth: {auth_label}")
print(f" Model: {effective_model}")
if ctx_len:
print(f" Context length: {ctx_len:,} tokens")

View file

@ -744,6 +744,15 @@ def _resolve_azure_foundry_runtime(
strips a trailing ``/v1`` for Anthropic-style endpoints because the
Anthropic SDK appends ``/v1/messages`` internally.
When ``model.auth_mode == "entra_id"`` (and the model is OpenAI-style),
the returned ``api_key`` is a zero-arg callable produced by
:func:`agent.azure_identity_adapter.build_token_provider` rather than
a string. Downstream code that constructs an OpenAI SDK client passes
this through unchanged (the SDK accepts ``Callable[[], str]`` for
``api_key`` and calls it before every request). Code paths that need
a string (logging, manual HTTP probes, header injection) must use the
helpers in ``agent.azure_identity_adapter``.
Raises :class:`AuthError` when required values are missing.
"""
explicit_api_key = str(explicit_api_key or "").strip()
@ -752,9 +761,15 @@ def _resolve_azure_foundry_runtime(
cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
cfg_base_url = ""
cfg_api_mode = "chat_completions"
cfg_auth_mode = "api_key"
cfg_entra: Dict[str, Any] = {}
if cfg_provider == "azure-foundry":
cfg_base_url = str(model_cfg.get("base_url") or "").strip().rstrip("/")
cfg_api_mode = _parse_api_mode(model_cfg.get("api_mode")) or "chat_completions"
cfg_auth_mode = str(model_cfg.get("auth_mode") or "api_key").strip().lower() or "api_key"
_entra = model_cfg.get("entra")
if isinstance(_entra, dict):
cfg_entra = _entra
# Model-family inference: Azure Foundry deploys GPT-5.x / codex / o1-o4
# reasoning models as Responses-API-only. Calling /chat/completions
@ -780,6 +795,79 @@ def _resolve_azure_foundry_runtime(
"the AZURE_FOUNDRY_BASE_URL environment variable."
)
# Anthropic SDK appends /v1/messages itself, so strip any trailing /v1
# we inherited from the configured base_url to avoid double-/v1 paths.
if cfg_api_mode == "anthropic_messages":
base_url = re.sub(r"/v1/?$", "", base_url)
# ── Entra ID (Microsoft Foundry recommended path) ──────────────────
#
# OpenAI-style endpoints use the OpenAI SDK's native callable
# ``api_key=`` contract — the SDK mints a fresh JWT per request
# automatically.
#
# Anthropic-style endpoints (Claude on Foundry) take the callable
# too: :func:`agent.anthropic_adapter.build_anthropic_client`
# detects the callable and constructs an ``httpx.Client`` with a
# request event hook that injects a fresh ``Authorization: Bearer``
# header per request (the Anthropic SDK does not accept callables
# natively). From the runtime resolver's perspective both modes
# are identical — return the callable api_key and let the
# downstream SDK wrapper handle the contract difference.
if cfg_auth_mode == "entra_id":
if explicit_api_key:
# User passed --api-key on the CLI while config says entra_id —
# honour the explicit string (escape hatch for one-off testing).
api_key: Any = explicit_api_key
source = "explicit"
auth_mode = "api_key"
else:
try:
from agent.azure_identity_adapter import (
EntraIdentityConfig,
SCOPE_AI_AZURE_DEFAULT,
build_token_provider,
)
except Exception as exc:
raise AuthError(
"Azure Foundry Entra ID auth requires the 'azure-identity' "
"package. Install it with: pip install azure-identity "
f"(import failed: {exc})"
) from exc
scope = (
str(cfg_entra.get("scope") or "").strip()
or SCOPE_AI_AZURE_DEFAULT
)
try:
entra_config = EntraIdentityConfig(
scope=scope,
)
token_provider = build_token_provider(config=entra_config)
except ImportError as exc:
raise AuthError(str(exc)) from exc
api_key = token_provider
source = "entra_id"
auth_mode = "entra_id"
clean_entra = {}
if auth_mode == "entra_id":
configured_scope = str(cfg_entra.get("scope") or "").strip()
if configured_scope:
clean_entra["scope"] = configured_scope
return {
"provider": "azure-foundry",
"api_mode": cfg_api_mode,
"base_url": base_url,
"api_key": api_key,
"auth_mode": auth_mode,
"entra": clean_entra,
"source": source,
"requested_provider": requested_provider,
}
# ── Static API key (legacy / default) ──────────────────────────────
api_key = explicit_api_key
if not api_key:
try:
@ -792,20 +880,19 @@ def _resolve_azure_foundry_runtime(
if not api_key:
raise AuthError(
"Azure Foundry requires an API key. Set AZURE_FOUNDRY_API_KEY in "
"~/.hermes/.env or run 'hermes model' to configure."
"~/.hermes/.env or run 'hermes model' to configure. To use "
"keyless Microsoft Entra ID auth instead, set "
"model.auth_mode: entra_id in config.yaml (or pick "
"'Microsoft Entra ID' in 'hermes model')."
)
# Anthropic SDK appends /v1/messages itself, so strip any trailing /v1
# we inherited from the configured base_url to avoid double-/v1 paths.
if cfg_api_mode == "anthropic_messages":
base_url = re.sub(r"/v1/?$", "", base_url)
source = "explicit" if (explicit_api_key or explicit_base_url) else "config"
return {
"provider": "azure-foundry",
"api_mode": cfg_api_mode,
"base_url": base_url,
"api_key": api_key,
"auth_mode": "api_key",
"source": source,
"requested_provider": requested_provider,
}
@ -1232,7 +1319,7 @@ def resolve_runtime_provider(
cfg_base_url = (model_cfg.get("base_url") or "").strip().rstrip("/")
base_url = cfg_base_url or "https://api.anthropic.com"
# For Azure AI Foundry endpoints, use ANTHROPIC_API_KEY directly —
# For Microsoft Foundry endpoints, use ANTHROPIC_API_KEY directly —
# Claude Code OAuth tokens (sk-ant-oat01) are not accepted by Azure.
# Azure keys don't start with "sk-ant-" so resolve_anthropic_token()
# would find the Claude Code OAuth token first (priority 3) and return

View file

@ -1288,9 +1288,15 @@ def _truncate_token(value: Optional[str], visible: int = 6) -> str:
OAuth access token. JWT prefixes (the part before the first dot) are
stripped first when present so the visible suffix is always part of
the signing region rather than a meaningless header chunk.
Returns the Entra-ID placeholder when handed a callable (Azure Foundry
bearer provider) the callable is NEVER invoked here.
"""
if not value:
return ""
if callable(value) and not isinstance(value, str):
# Entra ID bearer provider — never reveal a minted token in the UI.
return "<entra-id-bearer>"
s = str(value)
if "." in s and s.count(".") >= 2:
# Looks like a JWT — show the trailing piece of the signature only.

View file

@ -1,4 +1,4 @@
"""Azure AI Foundry provider profile.
"""Microsoft Foundry provider profile.
Azure Foundry exposes an OpenAI-compatible endpoint; users supply their own
base URL at setup since endpoints are per-resource.
@ -11,7 +11,7 @@ azure_foundry = ProviderProfile(
name="azure-foundry",
aliases=("azure", "azure-ai-foundry", "azure-ai"),
display_name="Azure Foundry",
description="Azure AI Foundry — OpenAI-compatible endpoint (user-supplied base URL)",
description="Microsoft Foundry - OpenAI-compatible endpoint (user-supplied base URL)",
signup_url="https://ai.azure.com/",
env_vars=("AZURE_FOUNDRY_API_KEY", "AZURE_FOUNDRY_BASE_URL"),
base_url="", # per-resource; user provides at setup

View file

@ -1,5 +1,5 @@
name: azure-foundry-provider
kind: model-provider
version: 1.0.0
description: Azure AI Foundry
description: Microsoft Foundry
author: Nous Research

View file

@ -125,6 +125,7 @@ acp = ["agent-client-protocol==0.9.0"]
# 4. Run `uv lock` to regenerate transitives.
# 5. Optionally re-add to [all] only after a few days of clean operation.
bedrock = ["boto3==1.42.89"]
azure-identity = ["azure-identity==1.25.3"]
termux = [
# Baseline Android / Termux path for reliable fresh installs.
"python-telegram-bot[webhooks]==22.6",

View file

@ -1428,7 +1428,11 @@ class AIAgent:
prefix = f"HTTP {status_code}: " if status_code else ""
return f"{prefix}{raw[:500]}"
def _mask_api_key_for_logs(self, key: Optional[str]) -> Optional[str]:
def _mask_api_key_for_logs(self, key: Any) -> Optional[str]:
# Azure Foundry Entra ID bearer providers are callables — never
# invoke them in log paths; identify the auth surface instead.
if callable(key) and not isinstance(key, str):
return "<entra-id-bearer>"
if not key:
return None
if len(key) <= 12:

View file

@ -0,0 +1,87 @@
"""Regression tests for ACP adapter detection under Azure Foundry Entra ID.
The ACP adapter's ``detect_provider`` previously gated on
``isinstance(api_key, str)`` and returned ``None`` for any runtime that
returned a callable ``api_key`` i.e. Azure Foundry with
``auth_mode=entra_id``. Downstream, ACP would default to
``"openrouter"`` and reject the legitimate provider in its auth handshake.
This test pins the callable-aware fix so it never regresses.
"""
from __future__ import annotations
from unittest.mock import patch
class TestDetectProviderEntra:
def test_callable_api_key_is_a_valid_credential(self):
"""A runtime returning a callable ``api_key`` (Entra bearer token
provider) must be detected as a configured provider, not
``None``."""
from acp_adapter import auth as _acp_auth
def _fake_runtime(**_kwargs):
return {
"provider": "azure-foundry",
"api_mode": "chat_completions",
"auth_mode": "entra_id",
"base_url": "https://r.openai.azure.com/openai/v1",
"api_key": lambda: "jwt-fresh",
}
with patch(
"hermes_cli.runtime_provider.resolve_runtime_provider",
side_effect=_fake_runtime,
):
assert _acp_auth.detect_provider() == "azure-foundry"
assert _acp_auth.has_provider() is True
def test_string_api_key_still_works(self):
from acp_adapter import auth as _acp_auth
def _fake_runtime(**_kwargs):
return {
"provider": "openrouter",
"api_key": "sk-or-static-key",
}
with patch(
"hermes_cli.runtime_provider.resolve_runtime_provider",
side_effect=_fake_runtime,
):
assert _acp_auth.detect_provider() == "openrouter"
def test_empty_string_api_key_returns_none(self):
from acp_adapter import auth as _acp_auth
def _fake_runtime(**_kwargs):
return {"provider": "openrouter", "api_key": ""}
with patch(
"hermes_cli.runtime_provider.resolve_runtime_provider",
side_effect=_fake_runtime,
):
assert _acp_auth.detect_provider() is None
def test_missing_provider_returns_none(self):
"""A callable api_key without a provider is still ``None`` —
we don't synthesize a provider name from the credential shape."""
from acp_adapter import auth as _acp_auth
def _fake_runtime(**_kwargs):
return {"api_key": lambda: "jwt-fresh", "provider": ""}
with patch(
"hermes_cli.runtime_provider.resolve_runtime_provider",
side_effect=_fake_runtime,
):
assert _acp_auth.detect_provider() is None
def test_resolver_exception_returns_none(self):
from acp_adapter import auth as _acp_auth
with patch(
"hermes_cli.runtime_provider.resolve_runtime_provider",
side_effect=RuntimeError("simulated"),
):
assert _acp_auth.detect_provider() is None

View file

@ -9,6 +9,7 @@ import pytest
from agent.prompt_caching import apply_anthropic_cache_control
from agent.anthropic_adapter import (
_is_azure_anthropic_endpoint,
_is_oauth_token,
_refresh_oauth_token,
_to_plain_data,
@ -121,6 +122,20 @@ class TestBuildAnthropicClient:
betas = kwargs["default_headers"]["anthropic-beta"]
assert "context-1m-2025-08-07" in betas
def test_azure_anthropic_endpoint_detection_is_host_and_path_scoped(self):
assert _is_azure_anthropic_endpoint(
"https://example.services.ai.azure.com/models/anthropic"
) is True
assert _is_azure_anthropic_endpoint(
"https://example.services.ai.azure.us/anthropic"
) is True
assert _is_azure_anthropic_endpoint(
"https://example.openai.azure.com/openai/v1"
) is False
assert _is_azure_anthropic_endpoint(
"https://management.azure.com/anthropic"
) is False
def test_bedrock_client_keeps_context_1m_beta(self):
with patch("agent.anthropic_adapter._anthropic_sdk") as mock_sdk:
mock_sdk.AnthropicBedrock = MagicMock()

View file

@ -0,0 +1,350 @@
"""Tests for auxiliary client routing of the ``azure-foundry`` provider.
Covers the dedicated branch in ``agent.auxiliary_client.resolve_provider_client``
that delegates to :func:`hermes_cli.runtime_provider._resolve_azure_foundry_runtime`
instead of falling into the generic ``resolve_api_key_provider_credentials``
path (which only knows about ``AZURE_FOUNDRY_API_KEY`` and would 401 for
Entra ID users and miss ``model.base_url`` overrides for api-key users
with non-standard Foundry-projects endpoints).
Pinned scenarios:
* ``auth_mode: api_key`` plain OpenAI client with the static string
key for ``chat_completions``.
* ``auth_mode: entra_id`` + ``chat_completions`` plain OpenAI
client with a callable ``api_key`` (the bearer-token provider)
confirms the callable survives the auxiliary path end-to-end.
* ``auth_mode: entra_id`` + GPT-5.x model CodexAuxiliaryClient
wrapping the OpenAI client (api_mode auto-upgrades to
codex_responses).
* Anthropic-style + entra_id rejected at the runtime resolver,
so the aux path returns ``(None, None)``.
* Failure path when no model is configured returns ``(None, None)``
cleanly so the auto chain falls through.
"""
from __future__ import annotations
import sys
from types import SimpleNamespace
from unittest.mock import MagicMock, patch
import pytest
@pytest.fixture(autouse=True)
def _reset_credential_cache():
from agent.azure_identity_adapter import reset_credential_cache
reset_credential_cache()
yield
reset_credential_cache()
@pytest.fixture
def fake_azure_identity(monkeypatch):
"""Stand-in for azure.identity (keeps CI hermetic when the SDK is
not installed)."""
from agent import azure_identity_adapter as _adapter
last = {"scope": None}
def _provider(scope):
return lambda: f"jwt-for-{scope}"
fake_module = SimpleNamespace(
DefaultAzureCredential=lambda **kw: SimpleNamespace(
kwargs=kw,
get_token=lambda scope: SimpleNamespace(token="fake", expires_on=9999999999),
),
get_bearer_token_provider=lambda credential, scope: (
last.__setitem__("scope", scope),
_provider(scope),
)[-1],
)
monkeypatch.setattr(_adapter, "_require_azure_identity", lambda: fake_module)
monkeypatch.setitem(sys.modules, "azure.identity", fake_module)
return last
@pytest.fixture
def patch_load_config(monkeypatch):
"""Helper to set model_cfg seen by _try_azure_foundry."""
def _apply(model_cfg):
monkeypatch.setattr(
"hermes_cli.config.load_config",
lambda: {"model": model_cfg},
)
return _apply
# ---------------------------------------------------------------------------
# auth_mode: api_key (default) — regression for the legacy path
# ---------------------------------------------------------------------------
class TestAuxAzureFoundryApiKey:
def test_chat_completions_returns_plain_openai_client(self, monkeypatch, patch_load_config):
from agent.auxiliary_client import _try_azure_foundry
from openai import OpenAI as _OpenAI
monkeypatch.setenv("AZURE_FOUNDRY_API_KEY", "sk-azure-static-key")
patch_load_config({
"provider": "azure-foundry",
"base_url": "https://r.openai.azure.com/openai/v1",
"api_mode": "chat_completions",
"default": "gpt-4o",
})
client, resolved = _try_azure_foundry(model="gpt-4o")
assert client is not None
assert resolved == "gpt-4o"
assert isinstance(client, _OpenAI)
assert client.api_key == "sk-azure-static-key"
def test_codex_responses_wraps_in_codex_aux_client(self, monkeypatch, patch_load_config):
from agent.auxiliary_client import _try_azure_foundry, CodexAuxiliaryClient
monkeypatch.setenv("AZURE_FOUNDRY_API_KEY", "sk-azure-static-key")
patch_load_config({
"provider": "azure-foundry",
"base_url": "https://r.openai.azure.com/openai/v1",
"api_mode": "chat_completions",
"default": "gpt-5.4-mini",
})
# GPT-5.x → runtime auto-upgrades to codex_responses
client, resolved = _try_azure_foundry(model="gpt-5.4-mini")
assert resolved == "gpt-5.4-mini"
assert isinstance(client, CodexAuxiliaryClient)
assert client.api_key == "sk-azure-static-key"
def test_no_key_returns_none(self, monkeypatch, patch_load_config):
from agent.auxiliary_client import _try_azure_foundry
monkeypatch.delenv("AZURE_FOUNDRY_API_KEY", raising=False)
patch_load_config({
"provider": "azure-foundry",
"base_url": "https://r.openai.azure.com/openai/v1",
"api_mode": "chat_completions",
"default": "gpt-4o",
})
client, resolved = _try_azure_foundry(model="gpt-4o")
assert client is None
assert resolved is None
def test_no_model_returns_none(self, monkeypatch, patch_load_config):
"""Azure has no fallback aux model — fail soft so the auto chain
can try other providers."""
from agent.auxiliary_client import _try_azure_foundry
monkeypatch.setenv("AZURE_FOUNDRY_API_KEY", "sk-azure-static-key")
patch_load_config({
"provider": "azure-foundry",
"base_url": "https://r.openai.azure.com/openai/v1",
"api_mode": "chat_completions",
# No default model
})
client, resolved = _try_azure_foundry()
assert client is None
assert resolved is None
# ---------------------------------------------------------------------------
# auth_mode: entra_id — callable api_key survives end-to-end
# ---------------------------------------------------------------------------
class TestAuxAzureFoundryEntra:
def test_callable_api_key_reaches_openai_constructor(
self, monkeypatch, fake_azure_identity, patch_load_config,
):
"""The token provider callable must arrive at ``OpenAI(api_key=...)``
intact never stringified to ``"no-key-required"`` or to the
SDK-internal empty-string representation BEFORE we hand it off.
We assert on the public SDK contract (constructor receives the
callable) rather than ``client.api_key``, because OpenAI 2.24.0
stores callable api_keys in a private attribute and exposes
``client.api_key`` as ``""``. The SDK still calls the callable
per request to mint ``Authorization: Bearer <token>``; that
behaviour is the documented Microsoft/OpenAI contract we rely on.
"""
from agent import auxiliary_client as _aux
received = {}
class _FakeOpenAI:
def __init__(self, **kwargs):
received.update(kwargs)
# Mirror the fields downstream callers read.
self.api_key = kwargs.get("api_key", "")
self.base_url = kwargs.get("base_url", "")
monkeypatch.setattr(_aux, "OpenAI", _FakeOpenAI)
patch_load_config({
"provider": "azure-foundry",
"base_url": "https://r.openai.azure.com/openai/v1",
"api_mode": "chat_completions",
"auth_mode": "entra_id",
"default": "gpt-4o",
})
client, resolved = _aux._try_azure_foundry(model="gpt-4o")
assert client is not None
assert resolved == "gpt-4o"
# Public-contract assertion: the OpenAI SDK constructor saw the
# callable, exactly as Microsoft's Foundry sample requires.
assert callable(received["api_key"])
assert not isinstance(received["api_key"], str)
assert received["api_key"]().startswith("jwt-for-")
# Base URL forwarded verbatim (no /responses suffix stripping
# in this path — that's a separate concern handled by the
# runtime resolver only when the user re-saves config).
assert received["base_url"] == "https://r.openai.azure.com/openai/v1"
def test_codex_responses_with_entra_wraps_correctly(
self, monkeypatch, fake_azure_identity, patch_load_config,
):
"""GPT-5.x deployment on Entra ID — auto-upgraded to
codex_responses, wrapped in CodexAuxiliaryClient, callable
api_key handed to the underlying OpenAI SDK."""
from agent import auxiliary_client as _aux
received = {}
class _FakeOpenAI:
def __init__(self, **kwargs):
received.update(kwargs)
self.api_key = kwargs.get("api_key", "")
self.base_url = kwargs.get("base_url", "")
monkeypatch.setattr(_aux, "OpenAI", _FakeOpenAI)
patch_load_config({
"provider": "azure-foundry",
"base_url": "https://r.openai.azure.com/openai/v1",
"api_mode": "chat_completions",
"auth_mode": "entra_id",
"default": "gpt-5.4-mini",
})
client, resolved = _aux._try_azure_foundry(model="gpt-5.4-mini")
assert resolved == "gpt-5.4-mini"
assert isinstance(client, _aux.CodexAuxiliaryClient)
# The Codex wrapper received an OpenAI client built with the
# callable api_key — verify against the SDK constructor record,
# not the wrapper attribute (which mirrors the SDK's empty-
# string representation).
assert callable(received["api_key"])
assert received["api_key"]().startswith("jwt-for-")
def test_entra_anthropic_messages_uses_bearer_hook(
self, monkeypatch, fake_azure_identity, patch_load_config,
):
"""Entra ID + anthropic_messages: runtime returns a callable
api_key; ``_maybe_wrap_anthropic`` ``build_anthropic_client``
detects the callable and installs the bearer-injecting httpx
event hook on a custom ``httpx.Client`` passed to the
Anthropic SDK via ``http_client=``."""
from agent import auxiliary_client as _aux
from agent import anthropic_adapter as _anthropic
received = {}
class _FakeOpenAI:
def __init__(self, **kwargs):
received["openai"] = kwargs
self.api_key = kwargs.get("api_key", "")
self.base_url = kwargs.get("base_url", "")
class _FakeAnthropicSDK:
class Anthropic:
def __init__(self, **kwargs):
received["anthropic"] = kwargs
monkeypatch.setattr(_aux, "OpenAI", _FakeOpenAI)
monkeypatch.setattr(_anthropic, "_get_anthropic_sdk", lambda: _FakeAnthropicSDK)
patch_load_config({
"provider": "azure-foundry",
"base_url": "https://r.services.ai.azure.com/anthropic",
"api_mode": "anthropic_messages",
"auth_mode": "entra_id",
"default": "claude-sonnet-4-5",
})
client, resolved = _aux._try_azure_foundry(model="claude-sonnet-4-5")
assert client is not None
assert resolved == "claude-sonnet-4-5"
# The Anthropic SDK constructor received a custom http_client
# (the bearer-injecting hook) and a placeholder auth_token.
anthropic_kwargs = received.get("anthropic") or {}
assert "http_client" in anthropic_kwargs, (
"build_anthropic_client must pass a custom http_client when "
"given a callable api_key, otherwise the SDK cannot mint "
"fresh tokens per request"
)
assert anthropic_kwargs.get("auth_token") == "entra-id-bearer-via-http-hook"
# Verify the http_client actually has our event hook installed.
http_client = anthropic_kwargs["http_client"]
hooks = getattr(http_client, "event_hooks", {})
assert "request" in hooks and len(hooks["request"]) >= 1
# ---------------------------------------------------------------------------
# resolve_provider_client → azure-foundry dispatch
# ---------------------------------------------------------------------------
class TestResolveProviderClientAzureFoundry:
def test_dispatches_to_azure_branch_not_generic_api_key_path(
self, monkeypatch, fake_azure_identity, patch_load_config,
):
"""End-to-end: the public ``resolve_provider_client`` entry
point must take the dedicated azure-foundry branch, NOT the
generic api-key registry path that would call
``resolve_api_key_provider_credentials`` and return None for
Entra users."""
from agent import auxiliary_client as _aux
received = {}
class _FakeOpenAI:
def __init__(self, **kwargs):
received.update(kwargs)
self.api_key = kwargs.get("api_key", "")
self.base_url = kwargs.get("base_url", "")
monkeypatch.setattr(_aux, "OpenAI", _FakeOpenAI)
patch_load_config({
"provider": "azure-foundry",
"base_url": "https://r.openai.azure.com/openai/v1",
"api_mode": "chat_completions",
"auth_mode": "entra_id",
"default": "gpt-4o",
})
client, resolved = _aux.resolve_provider_client("azure-foundry", "gpt-4o")
assert client is not None
assert resolved == "gpt-4o"
# The callable made it through resolve_provider_client → _try_azure_foundry
# → OpenAI(api_key=...).
assert callable(received["api_key"])
def test_warns_and_returns_none_on_failure(
self, monkeypatch, patch_load_config, caplog,
):
"""When azure-foundry is requested but cannot be resolved
(e.g. no model + no key), we return (None, None) and log a
clear warning pointing at ``hermes doctor``."""
import logging
from agent.auxiliary_client import resolve_provider_client
monkeypatch.delenv("AZURE_FOUNDRY_API_KEY", raising=False)
patch_load_config({
"provider": "azure-foundry",
"base_url": "https://r.openai.azure.com/openai/v1",
"api_mode": "chat_completions",
# No default → resolver yields no model → bail
})
with caplog.at_level(logging.WARNING, logger="agent.auxiliary_client"):
client, resolved = resolve_provider_client("azure-foundry")
assert client is None
assert resolved is None
assert any(
"azure-foundry" in rec.message and "hermes doctor" in rec.message
for rec in caplog.records
)

View file

@ -0,0 +1,662 @@
"""Tests for the Microsoft Entra ID adapter (agent/azure_identity_adapter.py).
Covers:
- Scope resolution per Azure host shape
- Display masking for callable + string + None inputs
- Cache-fingerprint stability under callable refresh
- is_token_provider truthiness on callables vs strings
- EntraIdentityConfig serialization round-trip
- Token provider construction with mocked azure-identity
- Credential cache reuse + reset
- has_azure_identity_credentials timeout / failure paths
- describe_active_credential structural reporting
- Lazy-install error path when azure-identity absent + lazy installs
disabled
We mock azure.identity at the import boundary rather than hitting any
real Azure endpoint. Tests must remain hermetic per AGENTS.md.
"""
from __future__ import annotations
import sys
from collections.abc import Callable
from types import SimpleNamespace
from typing import cast
from unittest.mock import MagicMock, patch
import pytest
# Ensure we always import a fresh adapter module — credential caches in
# the adapter persist across tests otherwise, polluting assertions
# about cache invalidation.
@pytest.fixture(autouse=True)
def _reset_adapter_cache():
from agent.azure_identity_adapter import reset_credential_cache
reset_credential_cache()
yield
reset_credential_cache()
# ---------------------------------------------------------------------------
# Scope constant
# ---------------------------------------------------------------------------
class TestEntraScopeConstant:
"""Pin the Microsoft-documented Foundry inference scope.
Microsoft's official samples for both ``*.openai.azure.com`` and
``*.services.ai.azure.com`` use ``https://ai.azure.com/.default``.
The older ``cognitiveservices.azure.com/.default`` is the
control-plane scope and is rejected for inference by newer
Azure OpenAI / Foundry resources.
Users with sovereign-cloud or unusual-tenant requirements pass the
scope explicitly via ``model.entra.scope`` in ``config.yaml``.
Refs:
* https://learn.microsoft.com/azure/ai-foundry/openai/how-to/managed-identity
* https://learn.microsoft.com/azure/ai-foundry/foundry-models/how-to/configure-entra-id
"""
def test_default_scope_matches_microsoft_documentation(self):
from agent.azure_identity_adapter import SCOPE_AI_AZURE_DEFAULT
assert SCOPE_AI_AZURE_DEFAULT == "https://ai.azure.com/.default"
# ---------------------------------------------------------------------------
# Cache fingerprint + http-bearer helpers
# ---------------------------------------------------------------------------
class TestMaterializeBearerForHttp:
"""The only helper that mints a real bearer JWT — must call the
callable exactly once and never fall through to display masking."""
def test_callable_is_invoked_and_returns_token(self):
from agent.azure_identity_adapter import materialize_bearer_for_http
invoked = {"count": 0}
def provider():
invoked["count"] += 1
return "fresh-jwt"
assert materialize_bearer_for_http(provider) == "fresh-jwt"
assert invoked["count"] == 1
def test_string_passes_through(self):
from agent.azure_identity_adapter import materialize_bearer_for_http
assert materialize_bearer_for_http("plain-key") == "plain-key"
def test_callable_returning_empty_raises(self):
from agent.azure_identity_adapter import materialize_bearer_for_http
with pytest.raises(ValueError):
materialize_bearer_for_http(lambda: "")
def test_empty_string_raises(self):
from agent.azure_identity_adapter import materialize_bearer_for_http
with pytest.raises(ValueError):
materialize_bearer_for_http("")
with pytest.raises(ValueError):
materialize_bearer_for_http(None)
# ---------------------------------------------------------------------------
# build_bearer_http_client — the Anthropic-on-Foundry bridge
# ---------------------------------------------------------------------------
class TestBuildBearerHttpClient:
"""``build_bearer_http_client`` returns an ``httpx.Client`` whose
request event hook mints a fresh JWT per outbound request. This is
how Entra ID auth reaches the Anthropic SDK (which does not accept
callable ``auth_token``)."""
def test_returns_httpx_client_with_request_hook(self):
import httpx
from agent.azure_identity_adapter import build_bearer_http_client
client = build_bearer_http_client(lambda: "jwt")
try:
assert isinstance(client, httpx.Client)
hooks = client.event_hooks.get("request", [])
assert len(hooks) >= 1
finally:
client.close()
def test_hook_overrides_authorization_header(self):
import httpx
from agent.azure_identity_adapter import build_bearer_http_client
minted_tokens = []
def provider():
minted_tokens.append(f"jwt-{len(minted_tokens) + 1}")
return minted_tokens[-1]
client = build_bearer_http_client(provider)
try:
hook = client.event_hooks["request"][0]
# Build a request with conflicting pre-set headers and verify
# the hook strips them and installs the fresh bearer.
req = httpx.Request(
"POST", "https://example.com/v1/messages",
headers={
"Authorization": "Bearer stale-token",
"api-key": "static-key",
"x-api-key": "static-key",
},
json={"hello": "world"},
)
hook(req)
assert req.headers["Authorization"] == "Bearer jwt-1"
# The static-key headers must be stripped — sending both
# auth values would be ambiguous on Azure.
assert "api-key" not in req.headers
assert "x-api-key" not in req.headers
# Second invocation mints a fresh token.
req2 = httpx.Request("GET", "https://example.com/v1/models")
hook(req2)
assert req2.headers["Authorization"] == "Bearer jwt-2"
assert len(minted_tokens) == 2
finally:
client.close()
def test_hook_strips_auth_headers_and_warns_when_token_provider_fails(self, caplog):
"""When the token provider fails (chain exhausted, IMDS down, az
login expired), the hook must:
1. Log at WARNING level so the misconfiguration is visible at
default log level (not buried at DEBUG).
2. Strip any pre-set Authorization headers including the
placeholder ``entra-id-bearer-via-http-hook`` sentinel that
:func:`_build_anthropic_client_with_bearer_hook` sets on the
Anthropic SDK constructor. This produces a clean
"missing auth" 401 from Azure rather than a sentinel-bearing
401 that's harder to diagnose AND avoids leaking the
sentinel string into upstream access logs.
"""
import logging
import httpx
from agent.azure_identity_adapter import build_bearer_http_client
def bad_provider():
return "" # empty token → materialize_bearer_for_http raises
client = build_bearer_http_client(bad_provider)
try:
hook = client.event_hooks["request"][0]
req = httpx.Request(
"POST", "https://example.com/v1/messages",
headers={
"Authorization": "Bearer entra-id-bearer-via-http-hook",
"api-key": "leaked-placeholder",
},
)
with caplog.at_level(logging.WARNING, logger="agent.azure_identity_adapter"):
hook(req) # Must not raise.
# Pre-set auth headers stripped — no sentinel makes it to Azure.
assert "Authorization" not in req.headers
assert "api-key" not in req.headers
# WARNING was logged so the user sees the misconfiguration.
assert any(
rec.levelno == logging.WARNING and "Entra ID token provider" in rec.message
for rec in caplog.records
)
finally:
client.close()
def test_rejects_non_callable_provider(self):
from agent.azure_identity_adapter import build_bearer_http_client
with pytest.raises(ValueError):
build_bearer_http_client(cast(Callable[[], str], "plain-string-not-callable"))
with pytest.raises(ValueError):
build_bearer_http_client(cast(Callable[[], str], None))
def test_forwards_httpx_kwargs(self):
import httpx
from agent.azure_identity_adapter import build_bearer_http_client
timeout = httpx.Timeout(60.0, connect=5.0)
client = build_bearer_http_client(lambda: "jwt", timeout=timeout)
try:
# httpx stores the timeout per-pool; just sanity-check it was
# accepted without TypeError.
assert client is not None
finally:
client.close()
class TestIsTokenProvider:
def test_callable_is_token_provider(self):
from agent.azure_identity_adapter import is_token_provider
assert is_token_provider(lambda: "x") is True
def test_string_is_not_token_provider(self):
from agent.azure_identity_adapter import is_token_provider
assert is_token_provider("static-key") is False
# ``str`` instances are technically callable in some edge cases
# — confirm they're never classified as token providers.
assert is_token_provider("") is False
# ---------------------------------------------------------------------------
# EntraIdentityConfig
# ---------------------------------------------------------------------------
class TestEntraIdentityConfig:
"""The serializable config that crosses multiprocessing boundaries —
must round-trip through dict cleanly and never lose fields."""
def test_to_dict_round_trip(self):
from agent.azure_identity_adapter import EntraIdentityConfig
cfg = EntraIdentityConfig(
scope="https://ai.azure.com/.default",
exclude_interactive_browser=False,
)
rebuilt = EntraIdentityConfig.from_dict(cfg.to_dict())
assert rebuilt == cfg
def test_from_dict_handles_empty_strings(self):
from agent.azure_identity_adapter import EntraIdentityConfig
cfg = EntraIdentityConfig.from_dict({
"scope": "",
"client_id": None,
})
# Empty scope falls back to default
assert cfg.scope.endswith("/.default")
def test_from_dict_ignores_legacy_identity_keys(self):
"""Old config.yaml that still has model.entra.client_id /
tenant_id / authority should not crash from_dict those values
are now read from AZURE_* env vars by azure-identity directly."""
from agent.azure_identity_adapter import EntraIdentityConfig
cfg = EntraIdentityConfig.from_dict({
"tenant_id": "legacy-tenant",
"authority": "https://login.partner.microsoftonline.cn",
"client_id": "user-mi-client",
})
# Legacy keys silently ignored — no crash, no surprise field on the dataclass.
assert not hasattr(cfg, "client_id")
assert not hasattr(cfg, "tenant_id")
assert not hasattr(cfg, "authority")
def test_constructor_normalizes_empty_scope(self):
from agent.azure_identity_adapter import EntraIdentityConfig
cfg = EntraIdentityConfig(scope="")
assert cfg.scope.endswith("/.default")
def test_from_dict_default_scope_override(self):
from agent.azure_identity_adapter import EntraIdentityConfig
cfg = EntraIdentityConfig.from_dict(
{"scope": ""},
default_scope="https://custom.example/.default",
)
assert cfg.scope == "https://custom.example/.default"
def test_dataclass_is_frozen(self):
# Frozen dataclasses are hashable / safe to pass through caches.
from agent.azure_identity_adapter import EntraIdentityConfig
cfg = EntraIdentityConfig()
with pytest.raises((AttributeError, Exception)):
setattr(cfg, "scope", "mutated")
# ---------------------------------------------------------------------------
# Credential / token provider construction
# ---------------------------------------------------------------------------
class _FakeAzureIdentity:
"""Stand-in for the ``azure.identity`` module.
Captures kwargs passed to ``DefaultAzureCredential`` so tests can
assert how config flows into the SDK.
"""
def __init__(self):
self.last_credential_kwargs = None
self.last_scope = None
self.credential_count = 0
def DefaultAzureCredential(self, **kwargs): # noqa: N802 — match SDK
self.last_credential_kwargs = kwargs
self.credential_count += 1
return SimpleNamespace(
get_token=lambda scope: SimpleNamespace(token="fake-jwt", expires_on=9999999999),
kwargs=kwargs,
)
def get_bearer_token_provider(self, credential, scope):
self.last_scope = scope
# Return a callable that mints a token when invoked.
return lambda: f"jwt-for-{scope}"
@pytest.fixture
def fake_azure_identity(monkeypatch):
"""Install a fake azure.identity into sys.modules and stub the
adapter's `_require_azure_identity` so all tests use the fake."""
fake = _FakeAzureIdentity()
fake_module = SimpleNamespace(
DefaultAzureCredential=fake.DefaultAzureCredential,
get_bearer_token_provider=fake.get_bearer_token_provider,
)
monkeypatch.setitem(sys.modules, "azure", SimpleNamespace(identity=fake_module))
monkeypatch.setitem(sys.modules, "azure.identity", fake_module)
# The adapter's `_require_azure_identity` does its own import, so
# patch that too to make sure tests never hit the real package's
# singleton state.
from agent import azure_identity_adapter as _adapter
monkeypatch.setattr(_adapter, "_require_azure_identity", lambda: fake_module)
return fake
class TestBuildCredential:
def test_default_kwargs_are_minimal(self, fake_azure_identity):
"""SDK default for ``exclude_interactive_browser_credential`` is
True; we only pass it when the user opts IN to interactive
browser auth. Tenant / authority / service principal config
flow through the standard ``AZURE_*`` env vars (read by
azure-identity directly), not Hermes config kwargs."""
from agent.azure_identity_adapter import EntraIdentityConfig, build_credential
cred = build_credential(EntraIdentityConfig())
kwargs = fake_azure_identity.last_credential_kwargs
# Default config should produce empty kwargs — SDK uses its own
# defaults plus env-var-driven settings.
assert kwargs == {}
assert cred is not None
def test_interactive_browser_opt_in(self, fake_azure_identity):
"""When the user explicitly sets
``exclude_interactive_browser=False``, the SDK kwarg is set to
False. Without the opt-in we don't pass the kwarg at all (SDK
default is True / browser excluded)."""
from agent.azure_identity_adapter import EntraIdentityConfig, build_credential
build_credential(EntraIdentityConfig(exclude_interactive_browser=False))
kwargs = fake_azure_identity.last_credential_kwargs
assert kwargs["exclude_interactive_browser_credential"] is False
def test_credential_is_cached_per_config(self, fake_azure_identity):
from agent.azure_identity_adapter import EntraIdentityConfig, build_credential
cfg = EntraIdentityConfig(scope="s1")
c1 = build_credential(cfg)
c2 = build_credential(cfg)
assert c1 is c2
assert fake_azure_identity.credential_count == 1
def test_distinct_configs_get_distinct_credentials(self, fake_azure_identity):
from agent.azure_identity_adapter import EntraIdentityConfig, build_credential
c1 = build_credential(EntraIdentityConfig(scope="s1"))
c2 = build_credential(EntraIdentityConfig(scope="s2"))
assert c1 is not c2
assert fake_azure_identity.credential_count == 2
def test_reset_cache_invalidates(self, fake_azure_identity):
from agent.azure_identity_adapter import (
EntraIdentityConfig,
build_credential,
reset_credential_cache,
)
cfg = EntraIdentityConfig(scope="x")
c1 = build_credential(cfg)
reset_credential_cache()
c2 = build_credential(cfg)
assert c1 is not c2
class TestBuildTokenProvider:
def test_returns_callable_for_scope(self, fake_azure_identity):
from agent.azure_identity_adapter import build_token_provider
provider = build_token_provider(scope="https://ai.azure.com/.default")
assert callable(provider)
assert provider() == "jwt-for-https://ai.azure.com/.default"
assert fake_azure_identity.last_scope == "https://ai.azure.com/.default"
def test_falls_back_to_default_scope_when_unspecified(self, fake_azure_identity):
"""When neither ``scope`` nor ``config`` is provided,
``build_token_provider`` uses ``SCOPE_AI_AZURE_DEFAULT``
Microsoft's documented Foundry inference scope. ``base_url`` is
accepted for back-compat but ignored."""
from agent.azure_identity_adapter import (
SCOPE_AI_AZURE_DEFAULT,
build_token_provider,
)
build_token_provider(base_url="https://r.openai.azure.com/openai/v1")
assert fake_azure_identity.last_scope == SCOPE_AI_AZURE_DEFAULT
def test_explicit_scope_wins_over_base_url(self, fake_azure_identity):
from agent.azure_identity_adapter import build_token_provider
build_token_provider(
scope="https://override.example/.default",
base_url="https://r.openai.azure.com/openai/v1",
)
assert fake_azure_identity.last_scope == "https://override.example/.default"
def test_config_object_wins_over_kwargs(self, fake_azure_identity):
from agent.azure_identity_adapter import (
EntraIdentityConfig,
build_token_provider,
)
cfg = EntraIdentityConfig(scope="cfg-scope")
build_token_provider(scope="ignored", config=cfg)
assert fake_azure_identity.last_scope == "cfg-scope"
assert fake_azure_identity.last_credential_kwargs == {}
# ---------------------------------------------------------------------------
# Lazy-install / missing-package surface
# ---------------------------------------------------------------------------
class TestRequireAzureIdentityMissing:
def test_clear_error_when_lazy_install_disabled(self, monkeypatch):
"""When azure-identity isn't importable AND lazy installs are
off, the adapter must raise ImportError with an actionable
message, not propagate FeatureUnavailable."""
from agent import azure_identity_adapter as _adapter
# Force the import path to fail.
original_import = __builtins__["__import__"] if isinstance(__builtins__, dict) else __import__
def _fake_import(name, *args, **kwargs):
if name == "azure.identity" or name.startswith("azure.identity."):
raise ImportError("simulated missing azure-identity")
return original_import(name, *args, **kwargs)
monkeypatch.setattr("builtins.__import__", _fake_import)
# Simulate lazy installs disabled.
from tools.lazy_deps import FeatureUnavailable
def _fake_ensure(*args, **kwargs):
raise FeatureUnavailable(
"provider.azure_identity",
("azure-identity==1.25.3",),
"lazy installs disabled (test simulation)",
)
# The adapter calls ``ensure`` from ``tools.lazy_deps``; intercept
# it by patching the actual symbol path.
monkeypatch.setattr("tools.lazy_deps.ensure", _fake_ensure)
with pytest.raises(ImportError) as exc_info:
_adapter._require_azure_identity()
msg = str(exc_info.value)
assert "azure-identity" in msg
assert "Foundry" in msg or "foundry" in msg.lower()
# ---------------------------------------------------------------------------
# has_azure_identity_credentials probe (timeout-bounded)
# ---------------------------------------------------------------------------
class TestHasAzureIdentityCredentials:
def test_returns_false_when_package_missing_and_install_disabled(self, monkeypatch):
from agent import azure_identity_adapter as _adapter
monkeypatch.setattr(_adapter, "has_azure_identity_installed", lambda: False)
assert _adapter.has_azure_identity_credentials(
"https://x/.default", allow_install=False,
) is False
def test_lazy_install_triggered_when_package_missing(self, monkeypatch):
"""With allow_install=True (default), the probe must trigger the
lazy-install path before bailing otherwise the wizard's
``preflight`` would silently fail for fresh installs that haven't
run ``pip install azure-identity`` yet."""
from agent import azure_identity_adapter as _adapter
installed = {"called": False}
def _fake_install():
installed["called"] = True
# After install, pretend the package is now importable.
monkeypatch.setattr(_adapter, "has_azure_identity_installed", lambda: True)
return SimpleNamespace(
DefaultAzureCredential=lambda **kw: SimpleNamespace(
kwargs=kw,
get_token=lambda scope: SimpleNamespace(token="post-install-jwt", expires_on=0),
),
get_bearer_token_provider=lambda c, s: lambda: "x",
)
monkeypatch.setattr(_adapter, "has_azure_identity_installed", lambda: False)
monkeypatch.setattr(_adapter, "_require_azure_identity", _fake_install)
# Provide a credential factory so the probe proceeds after install.
monkeypatch.setattr(
_adapter, "build_credential",
lambda config: SimpleNamespace(
get_token=lambda scope: SimpleNamespace(token="probe-jwt", expires_on=0),
),
)
result = _adapter.has_azure_identity_credentials(
"https://x/.default", timeout_seconds=0.5,
)
assert installed["called"] is True, (
"has_azure_identity_credentials must trigger lazy install "
"before bailing"
)
assert result is True
def test_returns_true_on_successful_token_mint(self, fake_azure_identity):
from agent.azure_identity_adapter import has_azure_identity_credentials
assert has_azure_identity_credentials("https://x/.default", timeout_seconds=0.5) is True
def test_returns_false_when_get_token_raises(self, monkeypatch):
from agent import azure_identity_adapter as _adapter
def _failing_credential(_config):
class _Cred:
def get_token(self, scope):
raise RuntimeError("simulated chain exhaustion")
return _Cred()
monkeypatch.setattr(_adapter, "build_credential", _failing_credential)
monkeypatch.setattr(_adapter, "has_azure_identity_installed", lambda: True)
assert _adapter.has_azure_identity_credentials("https://x/.default", timeout_seconds=0.5) is False
def test_returns_false_on_timeout(self, monkeypatch):
"""Slow IMDS / network must time out, not hang the caller."""
import threading
from agent import azure_identity_adapter as _adapter
slow_release = threading.Event()
def _slow_credential(_config):
class _Cred:
def get_token(self, scope):
# Block forever from the test's perspective; the
# adapter must give up via its thread-bounded probe.
slow_release.wait(timeout=10)
return SimpleNamespace(token="never-returned", expires_on=0)
return _Cred()
monkeypatch.setattr(_adapter, "build_credential", _slow_credential)
monkeypatch.setattr(_adapter, "has_azure_identity_installed", lambda: True)
try:
assert _adapter.has_azure_identity_credentials(
"https://x/.default", timeout_seconds=0.1
) is False
finally:
slow_release.set()
# ---------------------------------------------------------------------------
# describe_active_credential — used by hermes doctor + hermes auth
# ---------------------------------------------------------------------------
class TestDescribeActiveCredential:
def test_reports_not_installed(self, monkeypatch):
from agent import azure_identity_adapter as _adapter
monkeypatch.setattr(_adapter, "has_azure_identity_installed", lambda: False)
info = _adapter.describe_active_credential(
scope="https://x/.default", allow_install=False,
)
assert info["ok"] is False
assert "not installed" in info["error"].lower()
assert "pip install" in info["hint"].lower()
def test_reports_install_failure(self, monkeypatch):
"""When lazy install is allowed but fails (e.g. lazy installs
disabled), the diagnostic surfaces the failure as the error."""
from agent import azure_identity_adapter as _adapter
monkeypatch.setattr(_adapter, "has_azure_identity_installed", lambda: False)
def _fail_install():
raise ImportError("simulated: lazy installs disabled")
monkeypatch.setattr(_adapter, "_require_azure_identity", _fail_install)
info = _adapter.describe_active_credential(
scope="https://x/.default", allow_install=True,
)
assert info["ok"] is False
assert "lazy installs disabled" in info["error"]
assert "lazy" in info["hint"].lower()
def test_reports_env_sources_for_managed_identity(self, fake_azure_identity, monkeypatch):
from agent.azure_identity_adapter import describe_active_credential
monkeypatch.setenv("IDENTITY_ENDPOINT", "http://169.254.169.254")
info = describe_active_credential(scope="https://x/.default", timeout_seconds=0.5)
assert info["ok"] is True
sources = info.get("env_sources") or []
assert any("ManagedIdentity" in s for s in sources)
def test_reports_env_sources_for_workload_identity(self, fake_azure_identity, monkeypatch):
from agent.azure_identity_adapter import describe_active_credential
monkeypatch.setenv("AZURE_FEDERATED_TOKEN_FILE", "/var/secrets/azure/federated-token")
info = describe_active_credential(scope="https://x/.default", timeout_seconds=0.5)
sources = info.get("env_sources") or []
assert any("WorkloadIdentity" in s for s in sources)
def test_reports_env_sources_for_service_principal(self, fake_azure_identity, monkeypatch):
from agent.azure_identity_adapter import describe_active_credential
monkeypatch.setenv("AZURE_TENANT_ID", "t")
monkeypatch.setenv("AZURE_CLIENT_ID", "c")
monkeypatch.setenv("AZURE_CLIENT_SECRET", "s")
info = describe_active_credential(scope="https://x/.default", timeout_seconds=0.5)
sources = info.get("env_sources") or []
assert any("EnvironmentCredential" in s for s in sources)
def test_reports_error_on_chain_failure(self, monkeypatch):
from agent import azure_identity_adapter as _adapter
def _failing_credential(_config):
class _Cred:
def get_token(self, scope):
raise RuntimeError("auth failed")
return _Cred()
monkeypatch.setattr(_adapter, "build_credential", _failing_credential)
monkeypatch.setattr(_adapter, "has_azure_identity_installed", lambda: True)
info = _adapter.describe_active_credential(scope="https://x/.default", timeout_seconds=0.5)
assert info["ok"] is False
assert "auth failed" in info.get("error", "")

View file

@ -1,7 +1,7 @@
"""Tests for the 1M-context beta header on AWS Bedrock Claude models.
Claude Opus 4.6/4.7 and Sonnet 4.6 support a 1M context window, but on AWS
Bedrock (and Azure AI Foundry) that window is still gated behind the
Bedrock (and Microsoft Foundry) that window is still gated behind the
``context-1m-2025-08-07`` beta header as of 2026-04. Without it, Bedrock
caps these models at 200K even though ``model_metadata.py`` advertises 1M.
@ -61,4 +61,3 @@ class TestBedrockContext1MBeta:
# Other common betas still present — no regression.
assert "interleaved-thinking-2025-05-14" in beta_header
assert "fine-grained-tool-streaming-2025-05-14" in beta_header

View file

@ -102,7 +102,7 @@ def test_detect_anthropic_path_wins_without_http():
def test_detect_openai_models_probe_success():
"""/models probe returning a model list → chat_completions."""
def _fake_get(url, api_key, timeout=6.0):
def _fake_get(url, api_key, timeout=6.0, **kwargs):
assert "key-abc" == api_key
return 200, json.loads(_openai_models_body("gpt-5.4", "claude-opus-4-6"))
@ -118,7 +118,7 @@ def test_detect_openai_models_probe_success():
def test_detect_openai_models_probe_empty_list_still_counts():
"""Endpoint returned OpenAI shape but no models → still chat_completions."""
def _fake_get(url, api_key, timeout=6.0):
def _fake_get(url, api_key, timeout=6.0, **kwargs):
return 200, {"object": "list", "data": []}
with patch.object(azure_detect, "_http_get_json", side_effect=_fake_get):
@ -132,7 +132,7 @@ def test_detect_openai_models_probe_empty_list_still_counts():
def test_detect_falls_back_to_anthropic_probe():
"""/models fails but Anthropic Messages probe succeeds."""
def _fake_get(url, api_key, timeout=6.0):
def _fake_get(url, api_key, timeout=6.0, **kwargs):
return 401, None # /models forbidden
with patch.object(azure_detect, "_http_get_json", side_effect=_fake_get), \
@ -164,7 +164,7 @@ def test_probe_openai_models_tries_multiple_api_versions():
"""First call (no api-version) fails, api-version fallback succeeds."""
calls = []
def _fake_get(url, api_key, timeout=6.0):
def _fake_get(url, api_key, timeout=6.0, **kwargs):
calls.append(url)
if "api-version" not in url:
return 404, None

View file

@ -0,0 +1,404 @@
"""Tests for Azure Foundry Entra ID runtime resolution.
Covers the contract introduced in PR for Microsoft Entra ID auth on
``azure-foundry``:
* ``_resolve_azure_foundry_runtime`` returns a callable ``api_key`` for
``model.auth_mode = entra_id`` (OpenAI-style only).
* Anthropic-style endpoints with ``auth_mode = entra_id`` return the same
callable runtime credential as OpenAI-style endpoints.
* The legacy ``api_key`` path is unchanged when ``auth_mode`` is absent
or set to ``api_key``.
* Explicit ``--api-key`` overrides at runtime still work in entra mode
(escape hatch for one-off testing).
* ``model.entra.scope`` propagates to the token-provider config; Azure
identity selection stays in standard AZURE_* env vars.
* ``_get_azure_foundry_auth_status`` is structural never mints a
token (verified by checking the credential cache untouched).
* ``has_usable_secret`` for ``AZURE_FOUNDRY_API_KEY`` is irrelevant
when ``auth_mode == entra_id``.
"""
from __future__ import annotations
import sys
from types import SimpleNamespace
from typing import cast
from unittest.mock import MagicMock, patch
import pytest
@pytest.fixture(autouse=True)
def _reset_credential_cache():
from agent.azure_identity_adapter import reset_credential_cache
reset_credential_cache()
yield
reset_credential_cache()
@pytest.fixture
def fake_azure_identity(monkeypatch):
"""Identical fake to test_azure_identity_adapter — keeps Azure SDK
out of these tests so they run in CI without the package installed."""
from agent import azure_identity_adapter as _adapter
last = {"scope": None, "kwargs": None, "credential_count": 0}
def _provider(scope):
return lambda: f"jwt-for-{scope}"
fake_module = SimpleNamespace(
DefaultAzureCredential=lambda **kw: SimpleNamespace(
kwargs=kw,
get_token=lambda scope: SimpleNamespace(token="fake", expires_on=9999999999),
),
get_bearer_token_provider=lambda credential, scope: (
last.__setitem__("scope", scope),
last.__setitem__("kwargs", credential.kwargs),
last.__setitem__("credential_count", cast(int, last["credential_count"]) + 1),
_provider(scope),
)[-1],
)
monkeypatch.setattr(_adapter, "_require_azure_identity", lambda: fake_module)
monkeypatch.setitem(sys.modules, "azure.identity", fake_module)
return last
# ---------------------------------------------------------------------------
# _resolve_azure_foundry_runtime: entra_id branch
# ---------------------------------------------------------------------------
class TestResolveAzureFoundryRuntimeEntra:
def test_returns_callable_api_key_for_entra(self, fake_azure_identity):
from hermes_cli.runtime_provider import _resolve_azure_foundry_runtime
runtime = _resolve_azure_foundry_runtime(
requested_provider="azure-foundry",
model_cfg={
"provider": "azure-foundry",
"base_url": "https://my-resource.openai.azure.com/openai/v1",
"api_mode": "chat_completions",
"auth_mode": "entra_id",
"default": "gpt-4o", # stays on chat_completions (no codex auto-upgrade)
},
)
assert runtime["provider"] == "azure-foundry"
assert runtime["auth_mode"] == "entra_id"
assert runtime["api_mode"] == "chat_completions"
assert callable(runtime["api_key"])
assert runtime["source"] == "entra_id"
def test_entra_inherits_codex_responses_for_gpt5_family(self, fake_azure_identity):
"""GPT-5.x / o-series / codex models on Azure are Responses-API-only.
The runtime auto-upgrades api_mode regardless of auth mode this is
the same behaviour as the static-key path (see
``hermes_cli/models.py::azure_foundry_model_api_mode``)."""
from hermes_cli.runtime_provider import _resolve_azure_foundry_runtime
runtime = _resolve_azure_foundry_runtime(
requested_provider="azure-foundry",
model_cfg={
"provider": "azure-foundry",
"base_url": "https://my-resource.openai.azure.com/openai/v1",
"api_mode": "chat_completions",
"auth_mode": "entra_id",
"default": "gpt-5.4",
},
)
# GPT-5.x is upgraded to codex_responses — Entra path inherits.
assert runtime["api_mode"] == "codex_responses"
assert callable(runtime["api_key"])
assert runtime["auth_mode"] == "entra_id"
def test_entra_propagates_scope_only(self, fake_azure_identity):
"""``model.entra.scope`` is the only Hermes-managed Azure SDK
setting. Identity selection (client ID, tenant, authority,
service principal secret, federated token file) flows through
standard ``AZURE_*`` env vars read by azure-identity directly.
Legacy ``model.entra.client_id`` / ``tenant_id`` / ``authority``
keys in config.yaml are silently ignored."""
from hermes_cli.runtime_provider import _resolve_azure_foundry_runtime
_resolve_azure_foundry_runtime(
requested_provider="azure-foundry",
model_cfg={
"provider": "azure-foundry",
"base_url": "https://my-resource.services.ai.azure.com/v1",
"api_mode": "chat_completions",
"auth_mode": "entra_id",
"entra": {
"scope": "https://custom.example/.default",
"client_id": "client-uuid",
# Legacy keys must not crash — they are accepted in
# from_dict but never propagated to the SDK.
"tenant_id": "legacy-tenant",
"authority": "https://login.microsoftonline.us",
},
},
)
assert fake_azure_identity["scope"] == "https://custom.example/.default"
kw = fake_azure_identity["kwargs"]
assert "managed_identity_client_id" not in kw
assert "workload_identity_client_id" not in kw
assert "interactive_browser_tenant_id" not in kw
assert "authority" not in kw
def test_entra_default_scope_when_unset(self, fake_azure_identity):
"""When ``model.entra.scope`` is not set, the runtime resolves
Microsoft's documented inference scope —
``https://ai.azure.com/.default`` regardless of whether the
endpoint is ``*.openai.azure.com`` or ``*.services.ai.azure.com``.
Both shapes use the SAME scope per Microsoft's docs; the
``cognitiveservices.azure.com`` scope is the control-plane
audience and is rejected for inference by newer resources."""
from hermes_cli.runtime_provider import _resolve_azure_foundry_runtime
from agent.azure_identity_adapter import SCOPE_AI_AZURE_DEFAULT
_resolve_azure_foundry_runtime(
requested_provider="azure-foundry",
model_cfg={
"provider": "azure-foundry",
"base_url": "https://r.openai.azure.com/openai/v1",
"api_mode": "chat_completions",
"auth_mode": "entra_id",
},
)
assert fake_azure_identity["scope"] == SCOPE_AI_AZURE_DEFAULT
def test_entra_scope_override_wins(self, fake_azure_identity):
"""Users on sovereign clouds / unusual tenants can set
``model.entra.scope`` to override the default."""
from hermes_cli.runtime_provider import _resolve_azure_foundry_runtime
_resolve_azure_foundry_runtime(
requested_provider="azure-foundry",
model_cfg={
"provider": "azure-foundry",
"base_url": "https://r.openai.azure.com/openai/v1",
"api_mode": "chat_completions",
"auth_mode": "entra_id",
"entra": {
"scope": "https://cognitiveservices.azure.com/.default",
},
},
)
assert (
fake_azure_identity["scope"]
== "https://cognitiveservices.azure.com/.default"
)
def test_entra_with_anthropic_messages_is_supported(self, fake_azure_identity):
"""Entra ID now works for both OpenAI-style and Anthropic-style
Azure Foundry endpoints. The runtime returns a callable
``api_key``; downstream
:func:`agent.anthropic_adapter.build_anthropic_client` detects
the callable and installs an httpx event hook that mints a
fresh bearer JWT per request (the Anthropic SDK does not
accept callable auth_token natively)."""
from hermes_cli.runtime_provider import _resolve_azure_foundry_runtime
runtime = _resolve_azure_foundry_runtime(
requested_provider="azure-foundry",
model_cfg={
"provider": "azure-foundry",
"base_url": "https://r.services.ai.azure.com/anthropic",
"api_mode": "anthropic_messages",
"auth_mode": "entra_id",
"default": "claude-sonnet-4-5",
},
)
assert runtime["provider"] == "azure-foundry"
assert runtime["auth_mode"] == "entra_id"
assert runtime["api_mode"] == "anthropic_messages"
# Callable api_key — the anthropic_adapter detects this and
# plumbs through an httpx event hook.
assert callable(runtime["api_key"])
assert not isinstance(runtime["api_key"], str)
def test_entra_with_explicit_api_key_uses_string_escape_hatch(self, fake_azure_identity):
"""Passing --api-key on the CLI overrides the entra path so a
user can debug a single request with a static key without
editing config.yaml."""
from hermes_cli.runtime_provider import _resolve_azure_foundry_runtime
runtime = _resolve_azure_foundry_runtime(
requested_provider="azure-foundry",
model_cfg={
"provider": "azure-foundry",
"base_url": "https://r.openai.azure.com/openai/v1",
"api_mode": "chat_completions",
"auth_mode": "entra_id",
},
explicit_api_key="explicit-string-key",
)
assert runtime["api_key"] == "explicit-string-key"
assert runtime["auth_mode"] == "api_key"
assert runtime["source"] == "explicit"
def test_entra_runtime_dict_keeps_only_scope_override(self, fake_azure_identity):
from hermes_cli.runtime_provider import _resolve_azure_foundry_runtime
runtime = _resolve_azure_foundry_runtime(
requested_provider="azure-foundry",
model_cfg={
"provider": "azure-foundry",
"base_url": "https://r.openai.azure.com/openai/v1",
"api_mode": "chat_completions",
"auth_mode": "entra_id",
"entra": {
"scope": "https://custom.example/.default",
"client_id": "legacy-client",
},
},
)
assert runtime["entra"] == {"scope": "https://custom.example/.default"}
# ---------------------------------------------------------------------------
# _resolve_azure_foundry_runtime: legacy api_key branch (regression)
# ---------------------------------------------------------------------------
class TestResolveAzureFoundryRuntimeApiKey:
def test_default_auth_mode_uses_static_key(self, monkeypatch):
from hermes_cli.runtime_provider import _resolve_azure_foundry_runtime
monkeypatch.setenv("AZURE_FOUNDRY_API_KEY", "sk-azure-static-key")
runtime = _resolve_azure_foundry_runtime(
requested_provider="azure-foundry",
model_cfg={
"provider": "azure-foundry",
"base_url": "https://r.openai.azure.com/openai/v1",
"api_mode": "chat_completions",
},
)
assert runtime["api_key"] == "sk-azure-static-key"
assert runtime["auth_mode"] == "api_key"
assert "entra" not in runtime # only present in entra mode
def test_explicit_auth_mode_api_key(self, monkeypatch):
from hermes_cli.runtime_provider import _resolve_azure_foundry_runtime
monkeypatch.setenv("AZURE_FOUNDRY_API_KEY", "sk-static")
runtime = _resolve_azure_foundry_runtime(
requested_provider="azure-foundry",
model_cfg={
"provider": "azure-foundry",
"base_url": "https://r.openai.azure.com/openai/v1",
"api_mode": "chat_completions",
"auth_mode": "api_key",
},
)
assert runtime["api_key"] == "sk-static"
assert runtime["auth_mode"] == "api_key"
def test_anthropic_messages_strips_v1_suffix(self, monkeypatch):
from hermes_cli.runtime_provider import _resolve_azure_foundry_runtime
monkeypatch.setenv("AZURE_FOUNDRY_API_KEY", "k")
runtime = _resolve_azure_foundry_runtime(
requested_provider="azure-foundry",
model_cfg={
"provider": "azure-foundry",
"base_url": "https://r.services.ai.azure.com/anthropic/v1",
"api_mode": "anthropic_messages",
},
)
assert runtime["base_url"] == "https://r.services.ai.azure.com/anthropic"
def test_missing_api_key_raises_with_entra_hint(self, monkeypatch):
from hermes_cli.auth import AuthError
from hermes_cli.runtime_provider import _resolve_azure_foundry_runtime
monkeypatch.delenv("AZURE_FOUNDRY_API_KEY", raising=False)
with pytest.raises(AuthError) as exc_info:
_resolve_azure_foundry_runtime(
requested_provider="azure-foundry",
model_cfg={
"provider": "azure-foundry",
"base_url": "https://r.openai.azure.com/openai/v1",
"api_mode": "chat_completions",
},
)
msg = str(exc_info.value)
assert "AZURE_FOUNDRY_API_KEY" in msg
# Surface the Entra alternative so users discover the keyless path.
assert "entra_id" in msg
# ---------------------------------------------------------------------------
# _get_azure_foundry_auth_status (auth.py) — never mints a token
# ---------------------------------------------------------------------------
class TestAzureFoundryAuthStatus:
def test_entra_status_does_not_mint_token(self, monkeypatch, tmp_path):
"""Structural check — must return logged_in=True based on
importable + config, never call get_bearer_token_provider."""
from hermes_cli import auth as _auth
# Force load_config to return our entra config.
monkeypatch.setattr(
"hermes_cli.config.load_config",
lambda: {
"model": {
"provider": "azure-foundry",
"auth_mode": "entra_id",
"base_url": "https://r.openai.azure.com/openai/v1",
},
},
)
# Patch has_azure_identity_installed to True; do NOT patch the
# token provider — if the code path tried to mint, the SDK
# missing would raise.
monkeypatch.setattr(
"agent.azure_identity_adapter.has_azure_identity_installed",
lambda: True,
)
info = _auth._get_azure_foundry_auth_status()
assert info["logged_in"] is True
assert info["auth_mode"] == "entra_id"
assert info["azure_identity_installed"] is True
assert info["scope"].endswith("/.default")
def test_entra_status_reports_missing_package(self, monkeypatch):
from hermes_cli import auth as _auth
monkeypatch.setattr(
"hermes_cli.config.load_config",
lambda: {
"model": {
"provider": "azure-foundry",
"auth_mode": "entra_id",
"base_url": "https://r.openai.azure.com/openai/v1",
},
},
)
monkeypatch.setattr(
"agent.azure_identity_adapter.has_azure_identity_installed",
lambda: False,
)
info = _auth._get_azure_foundry_auth_status()
assert info["logged_in"] is False
assert info["azure_identity_installed"] is False
assert "azure-identity" in info["hint"]
def test_api_key_status_uses_env_var(self, monkeypatch):
from hermes_cli import auth as _auth
monkeypatch.setattr(
"hermes_cli.config.load_config",
lambda: {
"model": {
"provider": "azure-foundry",
"auth_mode": "api_key",
"base_url": "https://r.openai.azure.com/openai/v1",
},
},
)
monkeypatch.setenv("AZURE_FOUNDRY_API_KEY", "sk-real-key-xxx")
info = _auth._get_azure_foundry_auth_status()
assert info["auth_mode"] == "api_key"
assert info["logged_in"] is True
def test_api_key_status_false_when_missing(self, monkeypatch):
from hermes_cli import auth as _auth
monkeypatch.setattr(
"hermes_cli.config.load_config",
lambda: {
"model": {
"provider": "azure-foundry",
"auth_mode": "api_key",
},
},
)
monkeypatch.delenv("AZURE_FOUNDRY_API_KEY", raising=False)
info = _auth._get_azure_foundry_auth_status()
assert info["logged_in"] is False

View file

@ -0,0 +1,375 @@
"""Tests that callable api_key (Entra ID bearer provider) flows through
the agent stack without coercion.
The OpenAI Python SDK accepts ``api_key: str | None | Callable[[], str]``,
and ``azure-identity``'s ``get_bearer_token_provider`` returns a callable.
Hermes preserves the callable end-to-end so the SDK refreshes tokens
transparently. This file pins the contract at the high-risk seams the
rubber-duck audit identified.
Covered:
* ``_create_openai_client`` passes a callable ``api_key`` straight
through to ``openai.OpenAI(...)``.
* ``_normalize_main_runtime`` preserves the callable so auxiliary
clients inherit Entra auth.
* ``_truncate_token`` (dashboard preview) renders ``"<entra-id-bearer>"``
instead of ``"<function ...>"`` and never invokes the callable.
* ``run_agent.py`` masked-banner path renders the Entra placeholder
and never tries to slice/len the callable.
* Serialization scrub: dumping a runtime dict via ``json.dumps`` with
a callable api_key raises (default behaviour) guards against
silently leaking ``"<function ...>"`` strings into event logs.
* ``batch_runner`` strips the callable from the worker config dict
so multiprocessing.Pool can pickle the rest.
"""
from __future__ import annotations
import json
from types import SimpleNamespace
from typing import cast
from unittest.mock import MagicMock, patch
import pytest
# ---------------------------------------------------------------------------
# OpenAI SDK construction preserves the callable
# ---------------------------------------------------------------------------
class TestCreateOpenAIClientCallable:
"""``AIAgent._create_openai_client`` must pass the callable through
to ``openai.OpenAI(...)`` without coercion."""
def test_callable_api_key_passed_to_openai_constructor(self, monkeypatch):
"""Construct the smallest possible AIAgent surface and verify
the OpenAI client receives the callable unchanged."""
captured = {}
def fake_openai(**kwargs):
captured["kwargs"] = kwargs
return MagicMock(api_key=kwargs.get("api_key"))
# Patch the module-level OpenAI proxy used by ``_create_openai_client``.
monkeypatch.setattr("run_agent.OpenAI", fake_openai)
# Build a minimal stand-in for AIAgent so we can call the bound
# method directly without paying the full __init__ cost.
from run_agent import AIAgent
agent = AIAgent.__new__(AIAgent)
# Attributes consulted by _create_openai_client / _client_log_context.
agent.provider = "azure-foundry"
agent.model = "gpt-4o"
agent.base_url = "https://r.openai.azure.com/openai/v1"
agent._client_kwargs = {}
def token_provider():
return "fresh-jwt"
client_kwargs = {
"api_key": token_provider,
"base_url": "https://r.openai.azure.com/openai/v1",
}
client = agent._create_openai_client(client_kwargs, reason="test", shared=False)
# The OpenAI constructor must receive the *callable*, not a string.
forwarded = captured["kwargs"]["api_key"]
assert callable(forwarded)
assert not isinstance(forwarded, str)
assert forwarded is token_provider, (
"_create_openai_client must not wrap or coerce the callable"
)
assert client is not None
# ---------------------------------------------------------------------------
# Auxiliary runtime preserves the callable
# ---------------------------------------------------------------------------
class TestNormalizeMainRuntimePreservesCallable:
"""The aux client orchestrator must keep the callable on the
runtime dict so compression / vision / embedding / title-gen clients
inherit Entra ID auth from the main agent."""
def test_callable_api_key_survives_normalization(self):
from agent.auxiliary_client import _normalize_main_runtime
def provider():
return "jwt"
normalized = _normalize_main_runtime({
"provider": "azure-foundry",
"model": "gpt-4o",
"base_url": "https://r.openai.azure.com/openai/v1",
"api_key": provider,
"api_mode": "chat_completions",
"auth_mode": "entra_id",
})
assert normalized["api_key"] is provider
assert normalized["auth_mode"] == "entra_id"
def test_string_api_key_still_works(self):
from agent.auxiliary_client import _normalize_main_runtime
normalized = _normalize_main_runtime({
"provider": "azure-foundry",
"api_key": "sk-static",
})
assert normalized["api_key"] == "sk-static"
def test_normalization_drops_empty_string_but_preserves_callable(self):
from agent.auxiliary_client import _normalize_main_runtime
def provider():
return ""
# Empty string fields are dropped, but a callable is preserved
# even if it would mint an empty token (we don't invoke during
# normalization).
normalized = _normalize_main_runtime({
"provider": "azure-foundry",
"api_key": provider,
"model": "",
})
assert normalized["api_key"] is provider
assert "model" not in normalized
def test_unknown_field_dropped(self):
from agent.auxiliary_client import _normalize_main_runtime, _MAIN_RUNTIME_FIELDS
normalized = _normalize_main_runtime({
"provider": "azure-foundry",
"api_key": "k",
"secret_field_we_dont_want": "leak",
})
assert "secret_field_we_dont_want" not in normalized
# auth_mode IS in the field allowlist (rubber-duck blocker fix).
assert "auth_mode" in _MAIN_RUNTIME_FIELDS
# ---------------------------------------------------------------------------
# Display surfaces never invoke the callable
# ---------------------------------------------------------------------------
class TestTruncateTokenCallable:
def test_callable_returns_placeholder(self):
"""Dashboard preview must render the Entra placeholder, NOT
``"<function ...>"``."""
from hermes_cli.web_server import _truncate_token
invoked = {"count": 0}
def provider():
invoked["count"] += 1
return "should-not-appear-in-ui"
token_provider = cast(str | None, provider)
rendered = _truncate_token(token_provider)
assert rendered == "<entra-id-bearer>"
assert invoked["count"] == 0
def test_string_jwt_still_truncated_to_signature_tail(self):
from hermes_cli.web_server import _truncate_token
# JWT shape: header.payload.signature → only signature tail shown.
out = _truncate_token("aaaa.bbbb.cccccccsig", visible=4)
assert out == "…csig"
def test_empty_returns_empty(self):
from hermes_cli.web_server import _truncate_token
assert _truncate_token(None) == ""
assert _truncate_token("") == ""
# ---------------------------------------------------------------------------
# Serialization scrub — runtime dicts with callables must NOT silently
# JSON-encode as ``"<function ...>"`` (would leak garbage into events).
# ---------------------------------------------------------------------------
class TestRuntimeDictSerializationGuard:
def test_json_dumps_default_str_does_not_silently_stringify_callable(self):
"""Sanity check: a runtime dict with a callable api_key must
either raise on plain ``json.dumps`` (good fail loud) or be
sanitized BEFORE serialization. This test pins the loud-fail
behaviour so future changes that introduce
``json.dumps(..., default=str)`` over a runtime dict are caught
by a regression here."""
def provider():
return "jwt"
runtime = {
"provider": "azure-foundry",
"api_key": provider,
"auth_mode": "entra_id",
}
# Plain json.dumps — must raise, not silently produce
# ``"<function provider at 0x...>"``.
with pytest.raises(TypeError):
json.dumps(runtime)
# ---------------------------------------------------------------------------
# batch_runner strips callables from the worker config dict
# ---------------------------------------------------------------------------
class TestBatchRunnerCallableHandling:
def test_callable_api_key_stripped_from_worker_config(self, capsys, monkeypatch, tmp_path):
"""``BatchRunner._run_batches`` (or the equivalent code path)
must replace a callable api_key with None before pickling the
worker config dict otherwise multiprocessing.Pool fails."""
# We can't easily run BatchRunner end-to-end in a unit test
# (it spawns subprocesses), but we CAN inline the same logic:
# the production code uses ``callable(self.api_key) and not
# isinstance(self.api_key, str)`` to gate the substitution.
# Re-execute the same predicate here as a contract guard.
def provider():
return "jwt"
api_key = provider
worker_api_key = None if (callable(api_key) and not isinstance(api_key, str)) else api_key
assert worker_api_key is None, (
"BatchRunner must replace callable api_key with None so "
"multiprocessing.Pool can pickle the worker config"
)
# And a string passes through unchanged.
api_key_str = "sk-static"
worker_api_key_str = None if (callable(api_key_str) and not isinstance(api_key_str, str)) else api_key_str
assert worker_api_key_str == "sk-static"
def test_batch_runner_source_uses_the_correct_predicate(self):
"""Pin the predicate string in batch_runner so refactors that
change it are caught here. Reading the source rather than
importing avoids spinning up the full BatchRunner."""
from pathlib import Path
src = (Path(__file__).resolve().parent.parent.parent
/ "batch_runner.py").read_text()
assert "callable(self.api_key) and not isinstance(self.api_key, str)" in src, (
"BatchRunner.api_key callable check changed — update test or "
"verify the new predicate still routes Entra token providers "
"to the worker-rebuild path."
)
# ---------------------------------------------------------------------------
# Inline masked-banner / display sites (callable-aware)
# ---------------------------------------------------------------------------
class TestCliEnsureRuntimeCredentialsCallable:
"""Regression: ``cli.py:_ensure_runtime_credentials`` previously
treated a callable ``api_key`` as "not a string" and overwrote it
with the ``"no-key-required"`` placeholder, which then got sent as
``Authorization: Bearer no-key-required`` and rejected by Azure
with a 401. This is the most subtle of the callable-api_key audit
sites gated by ``not isinstance(api_key, str)`` rather than the
cleaner ``callable(...)`` check used elsewhere.
We verify the source pattern (rather than spinning up a real
``HermesCLI`` instance) the predicate change is the load-bearing
fix and is invariant under the surrounding orchestration code."""
def test_callable_predicate_present_in_cli_runtime_validation(self):
from pathlib import Path
src = (Path(__file__).resolve().parent.parent.parent
/ "cli.py").read_text()
# The fix introduces ``_is_callable_provider`` which gates the
# string-only check so callable token providers survive.
assert "_is_callable_provider = callable(api_key)" in src, (
"cli.py:_ensure_runtime_credentials must preserve a callable "
"api_key (Entra ID bearer provider). Without the guard, the "
"callable is stringified to 'no-key-required' and Azure 401s."
)
class TestInlinedDisplayMasks:
"""The masked-credential display sites are now inlined per-site (no
shared helper). Each site uses the ``is_token_provider`` predicate
to short-circuit on callables and print a static
``"Microsoft Entra ID"`` label, then falls through to its own
context-appropriate string mask. This replaces a unified helper
that would have forced one mask shape across sites with legitimately
different display needs (banner vs diagnostic vs UI vs preview)."""
def test_run_agent_banner_uses_is_token_provider_guard(self):
"""The masked-banner sites live in ``agent/agent_init.py``
(the ``__init__`` body was extracted into ``init_agent`` after
this feature was first written). Both the OpenAI and Anthropic
client init paths must guard their banner prints with
``is_token_provider`` so a callable Entra ID provider doesn't
crash ``len(api_key)``."""
from pathlib import Path
src = (Path(__file__).resolve().parent.parent.parent
/ "agent" / "agent_init.py").read_text()
assert src.count("is_token_provider(") >= 2, (
"agent/agent_init.py must guard BOTH masked-banner paths "
"(chat_completions and anthropic_messages) with "
"is_token_provider()."
)
assert src.count('"🔑 Using credentials: Microsoft Entra ID"') >= 2, (
"agent/agent_init.py banner blocks should print a static "
"'Microsoft Entra ID' label for callable api_keys — no "
"placeholder plumbing, no describe-mask fallback."
)
def test_cli_show_config_handles_callable(self):
"""``cli.HermesCLI.show_config`` previously did
``self.api_key[-4:]`` / ``len(self.api_key)`` which crashes on
callable Entra ID providers. The inlined version uses
``is_token_provider`` and prints the same static label as the
run_agent banners."""
from pathlib import Path
src = (Path(__file__).resolve().parent.parent.parent
/ "cli.py").read_text()
assert "is_token_provider(self.api_key)" in src, (
"cli.HermesCLI.show_config must guard self.api_key via "
"is_token_provider so callable Entra ID providers don't "
"crash /config."
)
assert '"Microsoft Entra ID"' in src, (
"cli.HermesCLI.show_config must print the static "
"'Microsoft Entra ID' label (matching run_agent banners) "
"instead of attempting to slice the callable."
)
def test_mask_api_key_for_logs_handles_callable(self):
"""``run_agent._mask_api_key_for_logs`` is called from the
request-dump JSON path. For Entra users, ``self.client.api_key``
is the SDK's empty string (callable stashed privately) — but
defensively the helper must also accept a callable directly
and return the placeholder rather than crashing on
``len(callable)``."""
from pathlib import Path
src = (Path(__file__).resolve().parent.parent.parent
/ "run_agent.py").read_text()
# The function now starts with a callable check.
assert (
"if callable(key) and not isinstance(key, str):" in src
and '"<entra-id-bearer>"' in src
), (
"run_agent._mask_api_key_for_logs must short-circuit for "
"callable api_keys to avoid len(callable) crashes in "
"request-dump paths."
)
def test_anthropic_401_diagnostic_handles_callable(self):
"""The Anthropic 401 diagnostic path lives in
``agent/conversation_loop.py`` (the ``run_conversation`` body
was extracted after this feature was first written). It used
to do ``key[:12]`` on ``self._anthropic_api_key``. For Entra ID +
Anthropic-style mode that's a callable; slicing crashes."""
from pathlib import Path
src = (Path(__file__).resolve().parent.parent.parent
/ "agent" / "conversation_loop.py").read_text()
# The Anthropic 401 block now branches on is_token_provider
# before slicing the key.
assert "Microsoft Entra ID (httpx event hook)" in src, (
"agent/conversation_loop.py Anthropic 401 diagnostic must "
"surface a Microsoft Entra ID branch before slicing the "
"key prefix."
)

View file

@ -81,6 +81,11 @@ LAZY_DEPS: dict[str, tuple[str, ...]] = {
"provider.anthropic": ("anthropic==0.87.0",), # CVE-2026-34450, CVE-2026-34452
# AWS Bedrock provider
"provider.bedrock": ("boto3==1.42.89",),
# Microsoft Foundry — Entra ID auth (managed identity, workload identity,
# service principal, az login, VS Code, azd, PowerShell). Only loaded
# when model.auth_mode=entra_id is selected; key-based azure-foundry
# users never pay this import.
"provider.azure_identity": ("azure-identity==1.25.3",),
# ─── Web search backends ───────────────────────────────────────────────
"search.exa": ("exa-py==2.10.2",),

View file

@ -1087,7 +1087,16 @@ def _apply_model_switch(sid: str, session: dict, raw_input: str) -> dict:
current_provider = str(runtime.get("provider", "") or "")
current_model = _resolve_model()
current_base_url = str(runtime.get("base_url", "") or "")
current_api_key = str(runtime.get("api_key", "") or "")
# Preserve a callable api_key (Azure Foundry Entra ID bearer
# provider) unchanged — ``str(...)`` would produce
# ``"<function ...>"`` and poison downstream switch_model
# validation. Match the agent-present branch's behavior at the
# top of this block.
_runtime_key = runtime.get("api_key", "")
if callable(_runtime_key) and not isinstance(_runtime_key, str):
current_api_key = _runtime_key
else:
current_api_key = str(_runtime_key or "")
# Load user-defined providers so switch_model can resolve named custom
# endpoints (e.g. "ollama-launch") and validate against saved model lists.

61
uv.lock generated
View file

@ -500,6 +500,35 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/41/0a/0896b829a39b5669a2d811e1a79598de661693685cd62b31f11d0c18e65b/av-17.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:dba98603fc4665b4f750de86fbaf6c0cfaece970671a9b529e0e3d1711e8367e", size = 22071058, upload-time = "2026-03-14T14:38:43.663Z" },
]
[[package]]
name = "azure-core"
version = "1.41.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "requests" },
{ name = "typing-extensions" },
]
sdist = { url = "https://files.pythonhosted.org/packages/a6/f3/b416179e408990df5db0d516283022dde0f5d0111d98c1a848e41853e81c/azure_core-1.41.0.tar.gz", hash = "sha256:f46ff5dfcd230f25cf1c19e8a34b8dc08a337b2503e268bb600a16c00db8ad5a", size = 381042, upload-time = "2026-05-07T23:30:54.302Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/5b/db/325c6d7312d2200251c52323878281045aaffcb5586612296484e4280eaa/azure_core-1.41.0-py3-none-any.whl", hash = "sha256:522b4011e8180b1a3dcd2024396a4e7fe9ac37fb8597db47163d230b5efe892d", size = 220920, upload-time = "2026-05-07T23:30:56.357Z" },
]
[[package]]
name = "azure-identity"
version = "1.25.3"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "azure-core" },
{ name = "cryptography" },
{ name = "msal" },
{ name = "msal-extensions" },
{ name = "typing-extensions" },
]
sdist = { url = "https://files.pythonhosted.org/packages/c5/0e/3a63efb48aa4a5ae2cfca61ee152fbcb668092134d3eb8bfda472dd5c617/azure_identity-1.25.3.tar.gz", hash = "sha256:ab23c0d63015f50b630ef6c6cf395e7262f439ce06e5d07a64e874c724f8d9e6", size = 286304, upload-time = "2026-03-13T01:12:20.892Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/49/9a/417b3a533e01953a7c618884df2cb05a71e7b68bdbce4fbdb62349d2a2e8/azure_identity-1.25.3-py3-none-any.whl", hash = "sha256:f4d0b956a8146f30333e071374171f3cfa7bdb8073adb8c3814b65567aa7447c", size = 192138, upload-time = "2026-03-13T01:12:22.951Z" },
]
[[package]]
name = "base58"
version = "2.1.1"
@ -1618,6 +1647,9 @@ all = [
anthropic = [
{ name = "anthropic" },
]
azure-identity = [
{ name = "azure-identity" },
]
bedrock = [
{ name = "boto3" },
]
@ -1767,6 +1799,7 @@ requires-dist = [
{ name = "alibabacloud-dingtalk", marker = "extra == 'dingtalk'", specifier = "==2.2.42" },
{ name = "anthropic", marker = "extra == 'anthropic'", specifier = "==0.86.0" },
{ name = "asyncpg", marker = "extra == 'matrix'", specifier = "==0.31.0" },
{ name = "azure-identity", marker = "extra == 'azure-identity'", specifier = "==1.25.3" },
{ name = "boto3", marker = "extra == 'bedrock'", specifier = "==1.42.89" },
{ name = "brotlicffi", marker = "extra == 'messaging'", specifier = "==1.2.0.1" },
{ name = "croniter", specifier = "==6.0.0" },
@ -1855,7 +1888,7 @@ requires-dist = [
{ name = "vercel", marker = "extra == 'vercel'", specifier = "==0.5.7" },
{ name = "youtube-transcript-api", marker = "extra == 'youtube'", specifier = "==1.2.4" },
]
provides-extras = ["anthropic", "exa", "firecrawl", "parallel-web", "fal", "edge-tts", "modal", "daytona", "vercel", "hindsight", "dev", "messaging", "cron", "slack", "matrix", "cli", "tts-premium", "voice", "pty", "honcho", "mcp", "homeassistant", "sms", "computer-use", "acp", "bedrock", "termux", "termux-all", "dingtalk", "feishu", "google", "youtube", "web", "all"]
provides-extras = ["anthropic", "exa", "firecrawl", "parallel-web", "fal", "edge-tts", "modal", "daytona", "vercel", "hindsight", "dev", "messaging", "cron", "slack", "matrix", "cli", "tts-premium", "voice", "pty", "honcho", "mcp", "homeassistant", "sms", "computer-use", "acp", "bedrock", "azure-identity", "termux", "termux-all", "dingtalk", "feishu", "google", "youtube", "web", "all"]
[[package]]
name = "hf-xet"
@ -2421,6 +2454,32 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/43/e3/7d92a15f894aa0c9c4b49b8ee9ac9850d6e63b03c9c32c0367a13ae62209/mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c", size = 536198, upload-time = "2023-03-07T16:47:09.197Z" },
]
[[package]]
name = "msal"
version = "1.36.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "cryptography" },
{ name = "pyjwt", extra = ["crypto"] },
{ name = "requests" },
]
sdist = { url = "https://files.pythonhosted.org/packages/de/cb/b02b0f748ac668922364ccb3c3bff5b71628a05f5adfec2ba2a5c3031483/msal-1.36.0.tar.gz", hash = "sha256:3f6a4af2b036b476a4215111c4297b4e6e236ed186cd804faefba23e4990978b", size = 174217, upload-time = "2026-04-09T10:20:33.525Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/2a/d3/414d1f0a5f6f4fe5313c2b002c54e78a3332970feb3f5fed14237aa17064/msal-1.36.0-py3-none-any.whl", hash = "sha256:36ecac30e2ff4322d956029aabce3c82301c29f0acb1ad89b94edcabb0e58ec4", size = 121547, upload-time = "2026-04-09T10:20:32.336Z" },
]
[[package]]
name = "msal-extensions"
version = "1.3.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "msal" },
]
sdist = { url = "https://files.pythonhosted.org/packages/01/99/5d239b6156eddf761a636bded1118414d161bd6b7b37a9335549ed159396/msal_extensions-1.3.1.tar.gz", hash = "sha256:c5b0fd10f65ef62b5f1d62f4251d51cbcaf003fcedae8c91b040a488614be1a4", size = 23315, upload-time = "2025-03-14T23:51:03.902Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/5e/75/bd9b7bb966668920f06b200e84454c8f3566b102183bc55c5473d96cb2b9/msal_extensions-1.3.1-py3-none-any.whl", hash = "sha256:96d3de4d034504e969ac5e85bae8106c8373b5c6568e4c8fa7af2eca9dbe6bca", size = 20583, upload-time = "2025-03-14T23:51:03.016Z" },
]
[[package]]
name = "msgpack"
version = "1.1.2"

View file

@ -1,23 +1,23 @@
---
sidebar_position: 15
title: "Azure AI Foundry"
description: "Use Hermes Agent with Azure AI Foundry — OpenAI-style and Anthropic-style endpoints, auto-detection of transport and deployed models"
title: "Microsoft Foundry"
description: "Use Hermes Agent with Microsoft Foundry — OpenAI-style and Anthropic-style endpoints, auto-detection of transport and deployed models"
---
# Azure AI Foundry
# Microsoft Foundry
Hermes Agent supports Azure AI Foundry (and Azure OpenAI) as a first-class provider. A single Azure resource can host models with two different wire formats:
Hermes Agent's `azure-foundry` provider supports Microsoft Foundry (formerly Azure AI Foundry) and Azure OpenAI. A single Foundry resource can host models with two different wire formats:
- **OpenAI-style**`POST /v1/chat/completions` on endpoints like `https://<resource>.openai.azure.com/openai/v1`. Used for GPT-4.x, GPT-5.x, Llama, Mistral, and most open-weight models.
- **Anthropic-style**`POST /v1/messages` on endpoints like `https://<resource>.services.ai.azure.com/anthropic`. Used when Azure Foundry serves Claude models via the Anthropic Messages API format.
- **Anthropic-style**`POST /v1/messages` on endpoints like `https://<resource>.services.ai.azure.com/anthropic`. Used when Microsoft Foundry serves Claude models via the Anthropic Messages API format.
The setup wizard probes your endpoint and auto-detects which transport it uses, which deployments are available, and each model's context length.
## Prerequisites
- An Azure AI Foundry or Azure OpenAI resource with at least one deployment
- An API key for that resource (available in the Azure Portal under "Keys and Endpoint")
- A Microsoft Foundry or Azure OpenAI resource with at least one deployment
- The deployment's endpoint URL
- **Either** an API key (from the Azure Portal under "Keys and Endpoint") **or** the **Azure AI User** RBAC role on the Foundry resource if you plan to use Microsoft Entra ID (the keyless path Microsoft recommends). Some tenants may show the role as **Foundry User** during Microsoft's rename rollout.
## Quick Start
@ -25,20 +25,172 @@ The setup wizard probes your endpoint and auto-detects which transport it uses,
hermes model
# → Select "Azure Foundry"
# → Enter your endpoint URL
# → Enter your API key
# → Choose Authentication:
# 1. API key
# 2. Microsoft Entra ID (managed identity / workload identity / az login)
# → (Entra) Hermes probes DefaultAzureCredential; on success it never asks for a key
# → (API key) Enter your API key
# Hermes probes the endpoint and auto-detects transport + models
# → Pick a model from the list (or type a deployment name manually)
```
The wizard will:
1. **Sniff the URL path** — URLs ending in `/anthropic` are recognised as Azure Foundry Claude routes.
1. **Sniff the URL path** — URLs ending in `/anthropic` are recognised as Microsoft Foundry Claude routes.
2. **Probe `GET <base>/models`** — if the endpoint returns an OpenAI-shaped model list, Hermes switches to `chat_completions` and prefills a picker with the returned deployment IDs.
3. **Probe Anthropic Messages shape** — fallback for endpoints that do not expose `/models` but do accept the Anthropic Messages format.
4. **Fall back to manual entry** — private/gated endpoints that reject every probe still work; you pick the API mode and type a deployment name by hand.
Context length for the chosen model is resolved via Hermes' standard metadata chain (`models.dev`, provider metadata, and hardcoded family fallbacks) and stored in `config.yaml` so the model can size its own context window correctly.
## Microsoft Entra ID (keyless, RBAC) — recommended
Microsoft recommends [keyless authentication with Microsoft Entra ID](https://learn.microsoft.com/azure/ai-foundry/foundry-models/how-to/configure-entra-id) for production Foundry workloads. Hermes supports Entra ID for **both** API surfaces:
- **OpenAI-style** (`api_mode: chat_completions` / `codex_responses`) — GPT-4/5, Llama, Mistral, DeepSeek, etc.
- **Anthropic-style** (`api_mode: anthropic_messages`) — Claude models on Microsoft Foundry.
Foundry's RBAC is per-resource (`Azure AI User` grants both surfaces; some tenants may display `Foundry User`) and Microsoft documents the same inference scope (`https://ai.azure.com/.default`) for both. Under the hood:
- OpenAI-style uses the OpenAI Python SDK's native callable `api_key=` contract — the SDK mints a fresh JWT per request automatically.
- Anthropic-style uses an `httpx.Client` with a request event hook installed by `agent.azure_identity_adapter.build_bearer_http_client`, because the Anthropic SDK does not accept callable `auth_token` natively. The hook rewrites `Authorization: Bearer <fresh-jwt>` per outbound request. Same Microsoft RBAC, same Foundry scope — the SDK contract is the only difference.
### Why use Entra ID?
- No long-lived API keys to rotate or revoke.
- RBAC-driven access — grant or remove `Azure AI User` on the Foundry resource, no config rewrite needed.
- Access and audit logs are segmented by assignee instead of all callers sharing one static key.
- Single auth surface for Azure VMs, AKS pods, App Service, Functions, Container Apps, and Foundry Agent Service via managed identity.
- Workload identity and service-principal flows for CI/CD pipelines.
### One-time setup (Azure side)
1. In the Azure Portal, open your Foundry resource → **Access control (IAM)****Add → Add role assignment**.
2. Pick the **Azure AI User** role (or **Foundry User** if your tenant has the renamed role).
3. Assign it to:
- **Your user account** for local development with `az login`.
- **A managed identity or workload identity** for Azure-hosted compute (recommended for production).
- **A Foundry Agent Service hosted agent's agent identity** when Hermes runs inside a hosted agent.
- **A service principal** for CI/CD pipelines when workload identity is not available.
4. Wait ~5 minutes for the role to propagate.
Azure CLI equivalent:
```bash
az role assignment create \
--assignee <principal-or-agent-identity-client-id> \
--role "Azure AI User" \
--scope <foundry-resource-id>
```
### One-time setup (Hermes side)
```bash
hermes model
# → Select "Azure Foundry"
# → Enter your endpoint URL
# → Authentication: 2 (Microsoft Entra ID)
# → (optional) user-assigned managed identity client ID
# → (optional) Azure tenant ID
# → Hermes probes DefaultAzureCredential() and reports which inner
# credential succeeded (e.g. AzureCliCredential, ManagedIdentityCredential)
```
The wizard runs a bounded preflight probe (10 s timeout). On failure it offers to "save anyway, validate later" — useful when configuring on a machine that doesn't yet have credentials but will at runtime (e.g. preparing config for a managed-identity deployment).
`azure-identity` is installed automatically on first use via Hermes' lazy-install path. To pre-install:
```bash
pip install azure-identity
```
### Configuration written to `config.yaml`
```yaml
model:
provider: azure-foundry
base_url: https://my-resource.openai.azure.com/openai/v1
api_mode: chat_completions
auth_mode: entra_id
default: gpt-4o
context_length: 128000
entra:
scope: https://ai.azure.com/.default # only when overriding the default
```
Hermes only manages one Entra-specific knob in `config.yaml`:
- **`scope`** — the OAuth resource scope. Defaults to Microsoft's documented inference scope (`https://ai.azure.com/.default`). Override only if your resource was provisioned against a non-standard audience.
Everything else (tenant, service principal secret, federated token file, sovereign cloud authority, broker preferences) is read by `azure-identity` directly from the standard `AZURE_*` environment variables — see the [credential resolution order](#credential-resolution-order) below. Set those in `~/.hermes/.env` or your deployment environment, exactly as Microsoft's SDK reference describes.
No secrets land in `~/.hermes/.env` for Entra mode — `azure-identity` caches tokens in-process (and where available, in your OS keychain / `~/.IdentityService`).
### Credential resolution order
`azure-identity`'s `DefaultAzureCredential` walks this chain on each token request, stopping at the first credential that returns a token:
1. **Environment credential**`AZURE_TENANT_ID` + `AZURE_CLIENT_ID` + `AZURE_CLIENT_SECRET` (or `AZURE_CLIENT_CERTIFICATE_PATH` / `AZURE_FEDERATED_TOKEN_FILE`).
2. **Workload Identity**`AZURE_FEDERATED_TOKEN_FILE` (AKS federated tokens / OIDC).
3. **Managed Identity** — IMDS endpoint (`169.254.169.254`) for virtual machines; `IDENTITY_ENDPOINT` for App Service / Functions / Container Apps. Foundry Agent Service hosted agents use the hosted agent's agent identity.
4. **Visual Studio Code** — Azure account extension.
5. **Azure CLI**`az login` session.
6. **Azure Developer CLI**`azd auth login`.
7. **Azure PowerShell**`Connect-AzAccount`.
8. **Broker** (Windows / WSL only) — Web Account Manager.
Interactive browser credential is excluded by default for unattended Hermes runs; use Azure CLI, Azure Developer CLI, managed identity, workload identity, or service principal credentials instead.
### Deployment patterns
**Local development:**
```bash
az login
hermes model # pick Azure Foundry → Entra ID
hermes # uses your az login token
```
**Azure VM / Functions / App Service / Container Apps (system-assigned managed identity):**
1. Enable system-assigned identity on the compute resource.
2. Grant the identity `Azure AI User` (or `Foundry User`) on the Foundry resource.
3. Set `model.auth_mode: entra_id` in config.yaml — no env vars needed.
**Azure VM / Functions / App Service / Container Apps (user-assigned managed identity):**
- Set `AZURE_CLIENT_ID` to the user-assigned identity's client ID so `DefaultAzureCredential` picks the right one.
**Foundry Agent Service hosted agent:**
- Create the hosted agent and grant that agent's identity `Azure AI User` (or `Foundry User`) on the Foundry resource. Hermes uses `ManagedIdentityCredential` from inside the hosted agent; role assignment belongs on the agent identity, not just the parent project or your user.
**AKS Workload Identity (replaces AAD Pod Identity):**
- Annotate the pod's service account with the workload identity client ID.
- The pod's federated token file is auto-detected via `AZURE_FEDERATED_TOKEN_FILE`.
- `model.auth_mode: entra_id` works without further config changes.
**Service principal in CI:**
- Set `AZURE_TENANT_ID`, `AZURE_CLIENT_ID`, `AZURE_CLIENT_SECRET` in the runner env.
**Sovereign clouds (Government, China):**
- Export `AZURE_AUTHORITY_HOST` (e.g. `https://login.microsoftonline.us` for Azure Government, `https://login.partner.microsoftonline.cn` for Azure China). `azure-identity` reads it directly.
### Health checks
`hermes doctor` runs a 10 s probe against `DefaultAzureCredential` when `model.auth_mode: entra_id`, reporting which inner credential won (env vars present, managed identity endpoint reachable, etc.).
`hermes auth` shows a structured status block:
```
azure-foundry (Microsoft Entra ID):
Endpoint: https://my-resource.openai.azure.com/openai/v1
Scope: https://ai.azure.com/.default
Status: configured; live token probe is skipped here
```
### Limitations
- **Anthropic-style endpoints use an httpx event hook.** The Anthropic Python SDK does not accept a callable `auth_token` natively (≤ 0.86.0). Hermes installs a request event hook on a custom `httpx.Client` that mints a fresh JWT per outbound request and rewrites `Authorization: Bearer <jwt>`. This is functionally equivalent to the OpenAI SDK's native `Callable[[], str]` contract but adds one indirection layer. If the Anthropic SDK adds first-class callable-auth support in a future release, Hermes will switch to it transparently.
- **Batch jobs and `multiprocessing.Pool`.** The Entra token provider is a closure that cannot be pickled across process boundaries. `batch_runner.py` automatically drops the callable from the worker config and lets each worker process rebuild its own provider from `config.yaml` — no user action required, but each worker pays one chain walk at startup.
- **No bearer JWT persistence in `auth.json`.** Hermes does not duplicate `azure-identity`'s internal token cache; cold starts walk the credential chain on first inference.
## Configuration (written to `config.yaml`)
After running the wizard you'll see something like this:
@ -72,11 +224,11 @@ model:
Important behaviour:
- **GPT-5.x, codex, and o-series auto-route to the Responses API.** Azure Foundry deploys GPT-5 / codex / o1 / o3 / o4 models as Responses-API-only — calling `/chat/completions` against them returns `400 "The requested operation is unsupported."`. Hermes detects these model families by name and upgrades `api_mode` to `codex_responses` transparently, even when `config.yaml` still reads `api_mode: chat_completions`. GPT-4, GPT-4o, Llama, Mistral, and other deployments stay on `/chat/completions`.
- **GPT-5.x, codex, and o-series auto-route to the Responses API.** Microsoft Foundry deploys GPT-5 / codex / o1 / o3 / o4 models as Responses-API-only — calling `/chat/completions` against them returns `400 "The requested operation is unsupported."`. Hermes detects these model families by name and upgrades `api_mode` to `codex_responses` transparently, even when `config.yaml` still reads `api_mode: chat_completions`. GPT-4, GPT-4o, Llama, Mistral, and other deployments stay on `/chat/completions`.
- **`max_completion_tokens` is used automatically.** Azure OpenAI (like direct OpenAI) requires `max_completion_tokens` for gpt-4o, o-series, and gpt-5.x models. Hermes sends the right parameter based on the endpoint.
- **Pre-v1 endpoints that require `api-version`.** If you have a legacy base URL like `https://<resource>.openai.azure.com/openai?api-version=2025-04-01-preview`, Hermes extracts the query string and forwards it via `default_query` on every request (the OpenAI SDK otherwise drops it when joining paths).
## Anthropic-style endpoints (Claude via Azure Foundry)
## Anthropic-style endpoints (Claude via Microsoft Foundry)
For Claude deployments, use the Anthropic-style route:
@ -96,7 +248,7 @@ Important behaviour:
## Alternative: `provider: anthropic` + Azure base URL
If you already have `provider: anthropic` configured and just want to point it at Azure AI Foundry for Claude, you can skip the `azure-foundry` provider entirely:
If you already have `provider: anthropic` configured and just want to point it at Microsoft Foundry for Claude, you can skip the `azure-foundry` provider entirely:
```yaml
model:
@ -117,7 +269,7 @@ Azure does **not** expose a pure-API-key endpoint to list your *deployed* model
What Hermes can do:
- Azure OpenAI v1 endpoints (`<resource>.openai.azure.com/openai/v1`) expose `GET /models` with the resource's **available** model catalog. Hermes uses this list to prefill the model picker.
- Azure Foundry `/anthropic` routes: detected via URL path, model name entered manually.
- Microsoft Foundry `/anthropic` routes: detected via URL path, model name entered manually.
- Private / firewalled endpoints: manual entry with a friendly "couldn't probe" message.
You can always type a deployment name directly — Hermes does not validate against the returned list.
@ -126,9 +278,18 @@ You can always type a deployment name directly — Hermes does not validate agai
| Variable | Purpose |
|----------|---------|
| `AZURE_FOUNDRY_API_KEY` | Primary API key for Azure AI Foundry / Azure OpenAI |
| `AZURE_FOUNDRY_API_KEY` | Primary API key for Microsoft Foundry / Azure OpenAI (api_key mode) |
| `AZURE_FOUNDRY_BASE_URL` | Endpoint URL (set via `hermes model`; env var is used as a fallback) |
| `AZURE_ANTHROPIC_KEY` | Used by `provider: anthropic` + Azure base URL (alternative to `ANTHROPIC_API_KEY`) |
| `AZURE_TENANT_ID` | Entra ID tenant for service-principal flows |
| `AZURE_CLIENT_ID` | Entra ID client ID (service principal, workload identity, or user-assigned managed identity) |
| `AZURE_CLIENT_SECRET` | Service principal secret |
| `AZURE_CLIENT_CERTIFICATE_PATH` | Service principal cert (alternative to secret) |
| `AZURE_FEDERATED_TOKEN_FILE` | Workload Identity federated token path (AKS) |
| `AZURE_AUTHORITY_HOST` | Sovereign cloud authority host override |
| `IDENTITY_ENDPOINT` / `MSI_ENDPOINT` | Managed Identity endpoint for App Service, Functions, and Container Apps; VMs usually use IMDS instead |
The Azure SDK reads the `AZURE_*` env vars directly. Hermes never inspects them other than to report which sources are present in `hermes doctor` output.
## Troubleshooting
@ -150,8 +311,21 @@ model:
api_mode: anthropic_messages # or chat_completions
```
**Entra ID: "credential chain exhausted" or 401 Unauthorized after switching to `auth_mode: entra_id`.**
- Run `az login` to refresh your developer session (the cached token may have expired).
- Verify the `Azure AI User` (or `Foundry User`) role assignment took effect: `az role assignment list --assignee <user-or-identity-id>` should list it on your Foundry resource. Role propagation can take up to 5 minutes.
- For user-assigned managed identities, double-check `AZURE_CLIENT_ID` matches the identity attached to the compute resource.
- Run `hermes doctor` — the Azure Entra probe reports whether token acquisition succeeded and includes a remediation hint.
**Entra ID: wizard preflight hangs or times out.**
The 10 s preflight is a soft check. Choose "Save anyway and validate later" and run `hermes doctor` after deploying to the target environment. Common causes include an unreachable token service or stale local login state — prefer workload identity in CI, set `AZURE_TENANT_ID`+`AZURE_CLIENT_ID`+`AZURE_CLIENT_SECRET` when using a service principal, or run `az login` for local development.
**401 on Anthropic-style endpoint with Entra ID.**
Verify the same `Azure AI User` (or `Foundry User`) role is assigned on the Foundry resource (it covers both `/openai/v1` and `/anthropic` paths). If the OpenAI-style probe works during the wizard but `claude-*` requests fail at runtime, the most common cause is a stale `model.entra.scope` left over from an earlier wizard run — delete the `entra.scope` line from `config.yaml` so the runtime falls back to the default `https://ai.azure.com/.default` scope.
## Related
- [Environment variables](/docs/reference/environment-variables)
- [Configuration](/docs/user-guide/configuration)
- [AWS Bedrock](/docs/guides/aws-bedrock) — the other major cloud provider integration
- [Microsoft: Configure Entra ID for Foundry](https://learn.microsoft.com/azure/ai-foundry/foundry-models/how-to/configure-entra-id) — upstream documentation for the keyless path

View file

@ -50,9 +50,16 @@ All variables go in `~/.hermes/.env`. You can also set them with `hermes config
| `XIAOMI_BASE_URL` | Override Xiaomi MiMo base URL (default: `https://api.xiaomimimo.com/v1`) |
| `TOKENHUB_API_KEY` | Tencent TokenHub API key ([tokenhub.tencentmaas.com](https://tokenhub.tencentmaas.com)) |
| `TOKENHUB_BASE_URL` | Override Tencent TokenHub base URL (default: `https://tokenhub.tencentmaas.com/v1`) |
| `AZURE_FOUNDRY_API_KEY` | Azure AI Foundry / Azure OpenAI API key ([ai.azure.com](https://ai.azure.com/)) |
| `AZURE_FOUNDRY_BASE_URL` | Azure AI Foundry endpoint URL (e.g. `https://<resource>.openai.azure.com/openai/v1` for OpenAI-style, or `https://<resource>.services.ai.azure.com/anthropic` for Anthropic-style) |
| `AZURE_ANTHROPIC_KEY` | Azure Anthropic API key for `provider: anthropic` + `base_url` pointing at an Azure Foundry Claude deployment (alternative to `ANTHROPIC_API_KEY` when both Anthropic and Azure Anthropic are configured) |
| `AZURE_FOUNDRY_API_KEY` | Microsoft Foundry / Azure OpenAI API key ([ai.azure.com](https://ai.azure.com/)). Not needed when `model.auth_mode: entra_id` |
| `AZURE_FOUNDRY_BASE_URL` | Microsoft Foundry endpoint URL (e.g. `https://<resource>.openai.azure.com/openai/v1` for OpenAI-style, or `https://<resource>.services.ai.azure.com/anthropic` for Anthropic-style) |
| `AZURE_ANTHROPIC_KEY` | Azure Anthropic API key for `provider: anthropic` + `base_url` pointing at a Microsoft Foundry Claude deployment (alternative to `ANTHROPIC_API_KEY` when both Anthropic and Azure Anthropic are configured) |
| `AZURE_TENANT_ID` | Entra ID tenant ID (service-principal flows; honored by `azure-identity` when `model.auth_mode: entra_id`) |
| `AZURE_CLIENT_ID` | Entra ID client ID (service principal, workload identity, or user-assigned managed identity) |
| `AZURE_CLIENT_SECRET` | Service principal secret used by `EnvironmentCredential` |
| `AZURE_CLIENT_CERTIFICATE_PATH` | Service principal certificate (alternative to `AZURE_CLIENT_SECRET`) |
| `AZURE_FEDERATED_TOKEN_FILE` | Federated token file path for AKS Workload Identity / OIDC flows |
| `AZURE_AUTHORITY_HOST` | Sovereign-cloud authority override (e.g. `https://login.microsoftonline.us` for Azure Government). See [Azure Foundry guide](/docs/guides/azure-foundry#sovereign-clouds-government-china) |
| `IDENTITY_ENDPOINT` / `MSI_ENDPOINT` | Managed Identity endpoint for App Service, Functions, and Container Apps; VMs usually use IMDS instead and do not set these |
| `HF_TOKEN` | Hugging Face token for Inference Providers ([huggingface.co/settings/tokens](https://huggingface.co/settings/tokens)) |
| `HF_BASE_URL` | Override Hugging Face base URL (default: `https://router.huggingface.co/v1`) |
| `GOOGLE_API_KEY` | Google AI Studio API key ([aistudio.google.com/app/apikey](https://aistudio.google.com/app/apikey)) |

View file

@ -81,7 +81,7 @@ Both `provider` and `model` are **required**. If either is missing, the fallback
| Kimi / Moonshot (China) | `kimi-coding-cn` | `KIMI_CN_API_KEY` |
| StepFun | `stepfun` | `STEPFUN_API_KEY` |
| Tencent TokenHub | `tencent-tokenhub` | `TOKENHUB_API_KEY` |
| Azure AI Foundry | `azure-foundry` | `AZURE_FOUNDRY_API_KEY` + `AZURE_FOUNDRY_BASE_URL` |
| Microsoft Foundry | `azure-foundry` | `AZURE_FOUNDRY_API_KEY` + `AZURE_FOUNDRY_BASE_URL` |
| LM Studio (local) | `lmstudio` | `LM_API_KEY` (or none for local) + `LM_BASE_URL` |
| Hugging Face | `huggingface` | `HF_TOKEN` |
| Custom endpoint | `custom` | `base_url` + `key_env` (see below) |