diff --git a/acp_adapter/auth.py b/acp_adapter/auth.py index 7b2556fd062..b04a7b7b408 100644 --- a/acp_adapter/auth.py +++ b/acp_adapter/auth.py @@ -9,13 +9,24 @@ TERMINAL_SETUP_AUTH_METHOD_ID = "hermes-setup" def detect_provider() -> Optional[str]: - """Resolve the active Hermes runtime provider, or None if unavailable.""" + """Resolve the active Hermes runtime provider, or None if unavailable. + + Treats a ``Callable`` ``api_key`` (Azure Foundry Entra ID bearer + token provider — see :mod:`agent.azure_identity_adapter`) as a valid + credential. Without this, ACP sessions for Entra-configured Foundry + deployments silently default to ``"openrouter"`` and the ACP auth + handshake rejects the legitimate provider. + """ try: from hermes_cli.runtime_provider import resolve_runtime_provider runtime = resolve_runtime_provider() api_key = runtime.get("api_key") provider = runtime.get("provider") - if isinstance(api_key, str) and api_key.strip() and isinstance(provider, str) and provider.strip(): + if not isinstance(provider, str) or not provider.strip(): + return None + is_string_key = isinstance(api_key, str) and api_key.strip() + is_callable_provider = callable(api_key) and not isinstance(api_key, str) + if is_string_key or is_callable_provider: return provider.strip().lower() except Exception: return None diff --git a/agent/agent_init.py b/agent/agent_init.py index df8fe229e7b..71b04e3e540 100644 --- a/agent/agent_init.py +++ b/agent/agent_init.py @@ -560,7 +560,16 @@ def init_agent( agent._client_kwargs = {} if not agent.quiet_mode: print(f"🤖 AI Agent initialized with model: {agent.model} (Anthropic native)") - if effective_key and len(effective_key) > 12: + # ``effective_key`` may be a callable Entra ID bearer + # provider for Azure Foundry anthropic_messages mode. + # The Anthropic adapter installs an httpx event hook + # that mints a fresh JWT per request — we never + # invoke or inspect the callable in the banner. + from agent.azure_identity_adapter import is_token_provider + + if is_token_provider(effective_key): + print("🔑 Using credentials: Microsoft Entra ID") + elif isinstance(effective_key, str) and len(effective_key) > 12: print(f"🔑 Using token: {effective_key[:8]}...{effective_key[-4:]}") elif agent.api_mode == "bedrock_converse": # AWS Bedrock — uses boto3 directly, no OpenAI client needed. @@ -764,12 +773,19 @@ def init_agent( print(f"🤖 AI Agent initialized with model: {agent.model}") if base_url: print(f"🔗 Using custom base URL: {base_url}") - # Always show API key info (masked) for debugging auth issues + # ``api_key`` may be a callable Entra ID bearer + # provider (Azure Foundry). The OpenAI SDK mints a + # fresh JWT per request internally — the banner + # never invokes or inspects the callable. + from agent.azure_identity_adapter import is_token_provider + key_used = client_kwargs.get("api_key", "none") - if key_used and key_used != "dummy-key" and len(key_used) > 12: + if is_token_provider(key_used): + print("🔑 Using credentials: Microsoft Entra ID") + elif isinstance(key_used, str) and key_used and key_used != "dummy-key" and len(key_used) > 12: print(f"🔑 Using API key: {key_used[:8]}...{key_used[-4:]}") else: - print(f"⚠️ Warning: API key appears invalid or missing (got: '{key_used[:20] if key_used else 'none'}...')") + print("⚠️ Warning: API key appears invalid or missing") except Exception as e: raise RuntimeError(f"Failed to initialize OpenAI client: {e}") @@ -1395,7 +1411,12 @@ def init_agent( _ra().logger.debug("Invalid ollama_num_ctx config value: %r", _ollama_num_ctx_override) if agent._ollama_num_ctx is None and agent.base_url and is_local_endpoint(agent.base_url): try: - _detected = query_ollama_num_ctx(agent.model, agent.base_url, api_key=agent.api_key or "") + # ``agent.api_key`` may be a callable (Entra token provider). + # Ollama detection makes a manual HTTP request and expects a + # string — Azure Foundry isn't a local endpoint so this branch + # never fires for Entra, but guard defensively. + _key_for_ollama = agent.api_key if isinstance(agent.api_key, str) else "" + _detected = query_ollama_num_ctx(agent.model, agent.base_url, api_key=_key_for_ollama or "") if _detected and _detected > 0: agent._ollama_num_ctx = _detected except Exception as exc: diff --git a/agent/agent_runtime_helpers.py b/agent/agent_runtime_helpers.py index 61551a65dc9..8e5b81ce27a 100644 --- a/agent/agent_runtime_helpers.py +++ b/agent/agent_runtime_helpers.py @@ -1390,10 +1390,16 @@ def switch_model(agent, new_model, new_provider, api_key='', base_url='', api_mo _sm_custom_providers = get_compatible_custom_providers(_sm_cfg) except Exception: _sm_custom_providers = None + # ``agent.api_key`` may be a callable (Azure Foundry Entra ID + # token provider). ``get_model_context_length`` expects a + # string for its live-probe paths; for Foundry the context + # length normally resolves via config or static catalogs and + # never hits a probe, but coerce to empty string defensively. + _ctx_api_key = agent.api_key if isinstance(agent.api_key, str) else "" new_context_length = get_model_context_length( agent.model, base_url=agent.base_url, - api_key=agent.api_key, + api_key=_ctx_api_key, provider=agent.provider, config_context_length=getattr(agent, "_config_context_length", None), custom_providers=_sm_custom_providers, @@ -1402,7 +1408,7 @@ def switch_model(agent, new_model, new_provider, api_key='', base_url='', api_mo model=agent.model, context_length=new_context_length, base_url=agent.base_url, - api_key=getattr(agent, "api_key", ""), + api_key=agent.api_key, # context_compressor forwards to call_llm; callable preserved provider=agent.provider, api_mode=agent.api_mode, ) diff --git a/agent/anthropic_adapter.py b/agent/anthropic_adapter.py index de9b7dd586c..c94d664a434 100644 --- a/agent/anthropic_adapter.py +++ b/agent/anthropic_adapter.py @@ -17,6 +17,7 @@ import os import platform import subprocess from pathlib import Path +from urllib.parse import urlparse from hermes_constants import get_hermes_home from typing import Any, Dict, List, Optional, Tuple @@ -364,7 +365,7 @@ def _normalize_base_url_text(base_url) -> str: def _is_third_party_anthropic_endpoint(base_url: str | None) -> bool: """Return True for non-Anthropic endpoints using the Anthropic Messages API. - Third-party proxies (Azure AI Foundry, AWS Bedrock, self-hosted) authenticate + Third-party proxies (Microsoft Foundry, AWS Bedrock, self-hosted) authenticate with their own API keys via x-api-key, not Anthropic OAuth tokens. OAuth detection should be skipped for these endpoints. """ @@ -508,6 +509,29 @@ def _is_minimax_anthropic_endpoint(base_url: str | None) -> bool: ) +def _is_azure_anthropic_endpoint(base_url: str | None) -> bool: + """Return True for Azure-hosted Anthropic Messages endpoints. + + Covers both the modern Foundry host family (``*.services.ai.azure.*``) + and the legacy Azure OpenAI host family (``*.openai.azure.*``) when + serving Anthropic's ``/anthropic`` route. Used to opt-in those hosts + to the ``api-version`` query-param plumbing required by Azure. + + Intentionally avoids a finite allow-list of TLD suffixes so it works + across sovereign / private Azure clouds. + """ + normalized = _normalize_base_url_text(base_url) + if not normalized: + return False + parsed = urlparse(normalized) + host = (parsed.hostname or "").lower().rstrip(".") + path = (parsed.path or "").lower() + host_padded = f".{host}." + is_foundry_host = ".services.ai.azure." in host_padded + is_legacy_azoai_host = ".openai.azure." in host_padded + return (is_foundry_host or is_legacy_azoai_host) and "/anthropic" in path + + def _common_betas_for_base_url( base_url: str | None, *, @@ -523,7 +547,7 @@ def _common_betas_for_base_url( The ``context-1m-2025-08-07`` beta is not sent to native Anthropic by default because some subscriptions reject it. Add it only for endpoint - families that still require it for 1M context, currently Azure AI Foundry. + families that still require it for 1M context, currently Microsoft Foundry. Bedrock uses its own client helper below and opts in explicitly. ``drop_context_1m_beta=True`` strips the 1M-context beta from any path that @@ -540,8 +564,81 @@ def _common_betas_for_base_url( return betas +def _build_anthropic_client_with_bearer_hook( + token_provider, + base_url: str = None, + timeout: float = None, + *, + drop_context_1m_beta: bool = False, +): + """Anthropic-on-Foundry Entra ID variant of :func:`build_anthropic_client`. + + Anthropic SDK 0.86.0 stores ``api_key`` / ``auth_token`` as static + strings; there is no callable-token contract. To get per-request + bearer refresh (Microsoft's documented Foundry pattern), we hand + the SDK a custom ``httpx.Client`` whose request event hook mints a + fresh JWT from the Entra credential chain and rewrites + ``Authorization: Bearer `` on every outbound request. The SDK + ignores its own auth logic when ``http_client`` is provided (the + hook strips any pre-set Authorization). + + The placeholder ``auth_token`` is required because the SDK raises + ``AnthropicError`` at construction if neither ``api_key`` nor + ``auth_token`` is set — but the hook overrides it per-request so + the placeholder value never reaches Azure. + """ + _anthropic_sdk = _get_anthropic_sdk() + if _anthropic_sdk is None: + raise ImportError( + "The 'anthropic' package is required for Azure Foundry Anthropic-style " + "endpoints with Entra ID auth. Install with: pip install 'anthropic>=0.39.0'" + ) + + normalize_proxy_env_vars() + + from httpx import Timeout + from agent.azure_identity_adapter import build_bearer_http_client + + _read_timeout = timeout if (isinstance(timeout, (int, float)) and timeout > 0) else 900.0 + timeout_obj = Timeout(timeout=float(_read_timeout), connect=10.0) + + # Strip any trailing /v1 — the Anthropic SDK appends /v1/messages. + normalized_base_url = _normalize_base_url_text(base_url) + if normalized_base_url: + import re as _re + normalized_base_url = _re.sub(r"/v1/?$", "", normalized_base_url.rstrip("/")) + + http_client = build_bearer_http_client(token_provider, timeout=timeout_obj) + + kwargs = { + "timeout": timeout_obj, + "http_client": http_client, + # The SDK requires *something* for api_key/auth_token. Our + # event hook overrides Authorization per request so this value + # is never sent. The sentinel string makes accidental leaks + # diagnosable in logs. + "auth_token": "entra-id-bearer-via-http-hook", + } + + if normalized_base_url: + if _is_azure_anthropic_endpoint(normalized_base_url) and "api-version" not in normalized_base_url: + kwargs["base_url"] = normalized_base_url + kwargs["default_query"] = {"api-version": "2025-04-15"} + else: + kwargs["base_url"] = normalized_base_url + + common_betas = _common_betas_for_base_url( + normalized_base_url, + drop_context_1m_beta=drop_context_1m_beta, + ) + if common_betas: + kwargs["default_headers"] = {"anthropic-beta": ",".join(common_betas)} + + return _anthropic_sdk.Anthropic(**kwargs) + + def build_anthropic_client( - api_key: str, + api_key, base_url: str = None, timeout: float = None, *, @@ -549,6 +646,17 @@ def build_anthropic_client( ): """Create an Anthropic client, auto-detecting setup-tokens vs API keys. + ``api_key`` accepts either: + + * a static ``str`` — the historical contract for all key-based and + OAuth flows. + * a ``Callable[[], str]`` — an Entra ID bearer token provider from + :mod:`agent.azure_identity_adapter`. The Anthropic SDK itself + requires a static string, so when given a callable we construct + a custom ``httpx.Client`` with a request event hook that mints a + fresh JWT per outbound request and rewrites the ``Authorization`` + header. The SDK never sees the callable directly. + If *timeout* is provided it overrides the default 900s read timeout. The connect timeout stays at 10s. Callers pass this from the per-provider / per-model ``request_timeout_seconds`` config so Anthropic-native and @@ -570,6 +678,14 @@ def build_anthropic_client( "Install it with: pip install 'anthropic>=0.39.0'" ) + # Callable api_key → Entra ID bearer provider path. Delegated to a + # helper so the existing static-key code below stays unchanged. + if callable(api_key) and not isinstance(api_key, str): + return _build_anthropic_client_with_bearer_hook( + api_key, base_url, timeout, + drop_context_1m_beta=drop_context_1m_beta, + ) + normalize_proxy_env_vars() from httpx import Timeout @@ -584,8 +700,7 @@ def build_anthropic_client( # Pass it via default_query so the SDK appends it to every request URL # without corrupting the base_url (appending it directly produces # malformed paths like /anthropic?api-version=.../v1/messages). - _is_azure_endpoint = "azure.com" in normalized_base_url.lower() - if _is_azure_endpoint and "api-version" not in normalized_base_url: + if _is_azure_anthropic_endpoint(normalized_base_url) and "api-version" not in normalized_base_url: kwargs["base_url"] = normalized_base_url.rstrip("/") kwargs["default_query"] = {"api-version": "2025-04-15"} else: @@ -615,7 +730,7 @@ def build_anthropic_client( if common_betas: kwargs["default_headers"] = {"anthropic-beta": ",".join(common_betas)} elif _is_third_party_anthropic_endpoint(base_url): - # Third-party proxies (Azure AI Foundry, AWS Bedrock, etc.) use their + # Third-party proxies (Microsoft Foundry, AWS Bedrock, etc.) use their # own API keys with x-api-key auth. Skip OAuth detection — their keys # don't follow Anthropic's sk-ant-* prefix convention and would be # misclassified as OAuth tokens. @@ -1757,7 +1872,7 @@ def convert_messages_to_anthropic( # causing HTTP 400 "Invalid signature in thinking block". # # Signatures are Anthropic-proprietary. Third-party endpoints - # (MiniMax, Azure AI Foundry, self-hosted proxies) cannot validate + # (MiniMax, Microsoft Foundry, self-hosted proxies) cannot validate # them and will reject them outright. When targeting a third-party # endpoint, strip ALL thinking/redacted_thinking blocks from every # assistant message — the third-party will generate its own @@ -2103,5 +2218,3 @@ def build_anthropic_kwargs( kwargs["extra_headers"] = {"anthropic-beta": ",".join(betas)} return kwargs - - diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 5d44fe10869..807ed076875 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -1902,6 +1902,120 @@ def _build_codex_client(model: str) -> Tuple[Optional[Any], Optional[str]]: return CodexAuxiliaryClient(real_client, model), model +def _try_azure_foundry( + *, + model: Optional[str] = None, + explicit_api_key: Optional[str] = None, + explicit_base_url: Optional[str] = None, + api_mode: Optional[str] = None, +) -> Tuple[Optional[Any], Optional[str]]: + """Resolve an Azure Foundry auxiliary client via the runtime resolver. + + Mirrors the ``_try_anthropic`` / ``_try_nous`` shape but delegates to + :func:`hermes_cli.runtime_provider._resolve_azure_foundry_runtime` — + the same resolver the main agent uses — so: + + * ``auth_mode: api_key`` (default) gets the static + ``AZURE_FOUNDRY_API_KEY`` string. + * ``auth_mode: entra_id`` gets a callable bearer-token provider + (``Callable[[], str]`` from + :mod:`agent.azure_identity_adapter`). + * Per-model ``api_mode`` auto-routing for GPT-5.x / o-series / + codex models works. + * ``model.entra.{tenant_id,client_id,authority,scope}`` config + fields propagate. + * Non-default ``model.base_url`` overrides are honored. + + The OpenAI SDK accepts both shapes for ``api_key`` so the caller + can forward the result without coercion. + + Returns ``(client, model)`` or ``(None, None)`` on failure. + """ + try: + from hermes_cli.runtime_provider import _resolve_azure_foundry_runtime + from hermes_cli.auth import AuthError + from hermes_cli.config import load_config + except ImportError: + return None, None + + try: + cfg = load_config() + model_cfg = cfg.get("model") if isinstance(cfg, dict) else {} + if not isinstance(model_cfg, dict): + model_cfg = {} + except Exception: + model_cfg = {} + + try: + runtime = _resolve_azure_foundry_runtime( + requested_provider="azure-foundry", + model_cfg=model_cfg, + explicit_api_key=explicit_api_key, + explicit_base_url=explicit_base_url, + target_model=model, + ) + except AuthError as exc: + logger.debug("Auxiliary azure-foundry: %s", exc) + return None, None + except Exception as exc: + logger.debug("Auxiliary azure-foundry runtime error: %s", exc) + return None, None + + api_key = runtime.get("api_key") + base_url = str(runtime.get("base_url", "") or "") + runtime_api_mode = api_mode or runtime.get("api_mode") or "chat_completions" + + # Empty-string check on api_key here would be wrong for callable + # token providers (callables are truthy and non-empty by definition). + # Bail only when api_key is None / empty string. + _has_key = bool(api_key) if not callable(api_key) else True + if not _has_key or not base_url: + return None, None + + final_model = _normalize_resolved_model( + model or str(model_cfg.get("default") or ""), + "azure-foundry", + ) + if not final_model: + # No fallback aux model for Azure — the user must have a + # deployment name. Surface that as "no client" so the auto + # chain falls through to the next provider rather than 404ing. + logger.debug( + "Auxiliary azure-foundry: no model resolved (model=%r, default=%r)", + model, model_cfg.get("default"), + ) + return None, None + + # Azure pre-v1 endpoints sometimes carry api-version query params + # in the base URL; the OpenAI SDK drops them when joining paths, + # so lift them out and pass via default_query. + extra: Dict[str, Any] = {} + _clean_base, _dq = _extract_url_query_params(base_url) + if _dq: + extra["default_query"] = _dq + + client = OpenAI(api_key=api_key, base_url=_clean_base, **extra) + + if runtime_api_mode == "codex_responses": + # GPT-5.x / o-series / codex models on Azure Foundry are + # Responses-API-only — wrap so chat.completions.create() is + # translated to /responses behind the scenes. + return CodexAuxiliaryClient(client, final_model), final_model + + if runtime_api_mode == "anthropic_messages": + # Forward ``api_key`` verbatim — for static keys it's a string, + # for Entra ID it's a callable. ``_maybe_wrap_anthropic`` → + # ``build_anthropic_client`` detects the callable and installs + # the bearer-injecting httpx hook. + return _maybe_wrap_anthropic( + client, final_model, api_key, + base_url, runtime_api_mode, + ), final_model + + # chat_completions — return the plain OpenAI client. + return client, final_model + + def _try_anthropic(explicit_api_key: str = None) -> Tuple[Optional[Any], Optional[str]]: try: from agent.anthropic_adapter import build_anthropic_client, resolve_anthropic_token @@ -1957,20 +2071,31 @@ _AUTO_PROVIDER_LABELS = { "_resolve_api_key_provider": "api-key", } -_MAIN_RUNTIME_FIELDS = ("provider", "model", "base_url", "api_key", "api_mode") +_MAIN_RUNTIME_FIELDS = ("provider", "model", "base_url", "api_key", "api_mode", "auth_mode") -def _normalize_main_runtime(main_runtime: Optional[Dict[str, Any]]) -> Dict[str, str]: - """Return a sanitized copy of a live main-runtime override.""" +def _normalize_main_runtime(main_runtime: Optional[Dict[str, Any]]) -> Dict[str, Any]: + """Return a sanitized copy of a live main-runtime override. + + Most fields are stripped strings. ``api_key`` may legitimately be a + zero-arg callable (Azure Foundry Entra ID token provider) — preserve + those as-is so auxiliary clients inherit the same authentication + surface as the main agent. The OpenAI SDK accepts ``Callable[[], str]`` + for ``api_key`` and calls it before every request. + """ if not isinstance(main_runtime, dict): return {} - normalized: Dict[str, str] = {} + normalized: Dict[str, Any] = {} for field in _MAIN_RUNTIME_FIELDS: value = main_runtime.get(field) + # Preserve a callable api_key (Entra ID bearer provider) unchanged. + if field == "api_key" and callable(value) and not isinstance(value, str): + normalized[field] = value + continue if isinstance(value, str) and value.strip(): normalized[field] = value.strip() provider = normalized.get("provider") - if provider: + if isinstance(provider, str): normalized["provider"] = provider.lower() return normalized @@ -2762,10 +2887,10 @@ def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Option auxiliary_is_nous = False # Reset — _try_nous() will set True if it wins runtime = _normalize_main_runtime(main_runtime) runtime_provider = runtime.get("provider", "") - runtime_model = runtime.get("model", "") - runtime_base_url = runtime.get("base_url", "") + runtime_model = str(runtime.get("model") or "") + runtime_base_url = str(runtime.get("base_url") or "") runtime_api_key = runtime.get("api_key", "") - runtime_api_mode = runtime.get("api_mode", "") + runtime_api_mode = str(runtime.get("api_mode") or "") # ── Warn once if OPENAI_BASE_URL is set but config.yaml uses a named # provider (not 'custom'). This catches the common "env poisoning" @@ -2793,8 +2918,8 @@ def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Option # on aggregators (OpenRouter, Nous) who previously got routed to a # cheap provider-side default. Explicit per-task overrides set via # config.yaml (auxiliary..provider) still win over this. - main_provider = runtime_provider or _read_main_provider() - main_model = runtime_model or _read_main_model() + main_provider = str(runtime_provider or _read_main_provider() or "") + main_model = str(runtime_model or _read_main_model() or "") if (main_provider and main_model and main_provider not in {"auto", ""}): resolved_provider = main_provider @@ -3188,7 +3313,11 @@ def resolve_provider_client( if client is not None: final_model = _normalize_resolved_model(model or default, provider) _cbase = str(getattr(client, "base_url", "") or "") - _ckey = str(getattr(client, "api_key", "") or "") + # ``client.api_key`` may be a callable (Azure Foundry Entra + # bearer provider). Pass empty string for the wrapper-detection + # path — wrapping decisions are based on base_url + api_mode. + _raw_ckey = getattr(client, "api_key", "") + _ckey = "" if (callable(_raw_ckey) and not isinstance(_raw_ckey, str)) else str(_raw_ckey or "") client = _wrap_if_needed(client, final_model, _cbase, _ckey) return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode else (client, final_model)) @@ -3300,6 +3429,40 @@ def resolve_provider_client( except ImportError: pass + # ── Azure Foundry (delegates to runtime resolver for auth_mode-aware routing) ─ + # + # The generic PROVIDER_REGISTRY path below uses + # ``resolve_api_key_provider_credentials`` which only knows about the + # static ``AZURE_FOUNDRY_API_KEY`` env var. That misses two important + # cases for the ``azure-foundry`` provider: + # + # 1. ``model.auth_mode: entra_id`` — no static key exists; we need + # a callable bearer-token provider from ``azure_identity_adapter``. + # 2. Non-default ``model.base_url`` (Foundry projects path) — the + # env-var-only resolver doesn't apply config-yaml-driven URL + # overrides. + # + # Delegate to the same runtime resolver the main agent uses so + # auxiliary tasks (title generation, compression, vision, embedding, + # session search) inherit the user's full Azure config. + if provider == "azure-foundry": + client, default_model = _try_azure_foundry( + model=model, + explicit_api_key=explicit_api_key, + explicit_base_url=explicit_base_url, + api_mode=api_mode, + ) + if client is None: + logger.warning( + "resolve_provider_client: azure-foundry requested but " + "runtime resolution failed (run: hermes doctor for " + "diagnostics)" + ) + return None, None + final_model = _normalize_resolved_model(model or default_model, provider) + return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode + else (client, final_model)) + # ── API-key providers from PROVIDER_REGISTRY ───────────────────── try: from hermes_cli.auth import ( diff --git a/agent/azure_identity_adapter.py b/agent/azure_identity_adapter.py new file mode 100644 index 00000000000..9506715019d --- /dev/null +++ b/agent/azure_identity_adapter.py @@ -0,0 +1,555 @@ +"""Microsoft Entra ID adapter for Microsoft Foundry. + +Provides keyless authentication for Microsoft Foundry deployments using the +`azure-identity` SDK's `DefaultAzureCredential` chain (env service principal +→ workload identity → managed identity → VS Code → Azure CLI → azd → +PowerShell → broker). + +Architecture mirrors `agent/bedrock_adapter.py`: + +* Lazy import. `azure-identity` is only loaded when ``model.auth_mode = + entra_id`` is selected. Users who stick with `AZURE_FOUNDRY_API_KEY` + never pay the import cost. +* SDK-callable contract. The public entry point ``build_token_provider`` + returns a zero-arg callable produced by ``get_bearer_token_provider`` — + this is exactly the value Microsoft's documented sample plugs into + ``OpenAI(api_key=token_provider, base_url=...)``. The OpenAI SDK calls + it before every request, so token refresh is transparent. +* Three explicit consumer-side helpers (display / cache / http-bearer) + rather than one generic "materialize" function — splitting them by + purpose prevents accidental token-minting in logging paths or token + leakage into cache keys / dashboard JSON. +* No persisted JWT. ``azure-identity`` caches in-process and (where + available) in the OS keychain or ``~/.IdentityService``. Hermes does + not duplicate that storage in ``auth.json``. + +Reference: https://learn.microsoft.com/azure/ai-foundry/foundry-models/how-to/configure-entra-id + +Requires: ``azure-identity`` (optional dependency — only needed when +``model.auth_mode = entra_id``). +""" + +from __future__ import annotations + +import functools +import logging +import os +import threading +from dataclasses import dataclass +from typing import Any, Callable, Dict, Optional + +logger = logging.getLogger(__name__) + +# Microsoft-documented scope for Foundry inference auth. Both the new +# Foundry portal and the legacy Azure OpenAI managed-identity docs use +# this scope for ALL Foundry endpoint shapes (*.openai.azure.com, +# *.services.ai.azure.com, *.ai.azure.com). The older control-plane +# scope ``https://cognitiveservices.azure.com/.default`` is for ARM +# resource management and is rejected for inference by newer +# resources — users with that requirement override via +# ``model.entra.scope`` in config.yaml. +SCOPE_AI_AZURE_DEFAULT = "https://ai.azure.com/.default" + +# --------------------------------------------------------------------------- +# Lazy SDK import — only loaded when the Entra path is actually used. +# --------------------------------------------------------------------------- + +_AZURE_IDENTITY_FEATURE = "provider.azure_identity" + + +def has_azure_identity_installed() -> bool: + """Return True if `azure-identity` can be imported right now. + + Cheap check — does not walk the credential chain. + """ + try: + import azure.identity # noqa: F401 + return True + except Exception: + return False + + +def _require_azure_identity(): + """Import ``azure.identity``, lazy-installing it if allowed. + + Raises ``ImportError`` with a clear actionable message when the + package is missing and lazy installs are disabled. + """ + try: + import azure.identity as _ai + return _ai + except ImportError: + try: + from tools.lazy_deps import ensure, FeatureUnavailable + except ImportError as exc: + raise ImportError( + "The 'azure-identity' package is required for Azure AI " + "Foundry Entra ID authentication. Install it with: " + "pip install azure-identity" + ) from exc + + try: + ensure(_AZURE_IDENTITY_FEATURE, prompt=False) + except FeatureUnavailable as exc: + raise ImportError( + "The 'azure-identity' package is required for Azure AI " + "Foundry Entra ID authentication. " + str(exc) + ) from exc + + # Retry import after lazy install. + import azure.identity as _ai # noqa: WPS440 + return _ai + + +def reset_credential_cache() -> None: + """Clear the cached ``DefaultAzureCredential``. Used by tests and + profile switches. + + Defensive against tests that ``monkeypatch.setattr`` over + ``build_credential`` with a plain (non-lru-cached) function — those + won't expose ``cache_clear()`` until pytest reverts the patch. + """ + cache_clear = getattr(build_credential, "cache_clear", None) + if callable(cache_clear): + cache_clear() + + +# --------------------------------------------------------------------------- +# Token-provider construction +# --------------------------------------------------------------------------- + + +@dataclass(frozen=True) +class EntraIdentityConfig: + """Serializable Entra ID config. + + Captures the Hermes-managed Entra knobs we need outside Azure SDK + environment configuration. Everything else + (tenant ID, service principal secret, federated token file, sovereign + cloud authority, etc.) flows through azure-identity's standard + ``AZURE_*`` env vars — see the Bedrock pattern in + ``hermes_cli/runtime_provider.py:1310-1377`` for the analogous + "let the SDK read env" approach. + + ``scope`` is Microsoft's documented Foundry inference audience. Almost + everyone uses the default; sovereign-cloud / non-standard tenants can + override via ``model.entra.scope``. Identity selection (user-assigned + managed identity, workload identity, service principal, tenant, authority) + stays in the standard Azure SDK env vars such as ``AZURE_CLIENT_ID``. + + ``exclude_interactive_browser`` is kept as an internal constructor knob + so probes stay non-interactive by default. It is not written by the setup + wizard. + + The dataclass is frozen so it's hashable for ``functools.lru_cache`` + keying, and serializable across multiprocessing boundaries (workers + rebuild the credential inside their own process). + """ + + scope: str = SCOPE_AI_AZURE_DEFAULT + exclude_interactive_browser: bool = True + + def __post_init__(self) -> None: + scope = str(self.scope or "").strip() or SCOPE_AI_AZURE_DEFAULT + object.__setattr__(self, "scope", scope) + + def to_dict(self) -> Dict[str, Any]: + return { + "scope": self.scope, + "exclude_interactive_browser": self.exclude_interactive_browser, + } + + @classmethod + def from_dict(cls, data: Optional[Dict[str, Any]], + *, default_scope: Optional[str] = None) -> "EntraIdentityConfig": + data = data or {} + scope = str(data.get("scope") or "").strip() or default_scope or SCOPE_AI_AZURE_DEFAULT + exclude_browser = bool(data.get("exclude_interactive_browser", True)) + return cls( + scope=scope, + exclude_interactive_browser=exclude_browser, + ) + + +def _build_default_credential(config: EntraIdentityConfig) -> Any: + """Construct a ``DefaultAzureCredential`` for ``config``. + + Only Hermes-selected knobs are passed as kwargs. Everything else + (tenant, service principal secret, federated token file, sovereign + cloud authority, etc.) is read by ``azure-identity`` from the + standard ``AZURE_*`` environment variables — see Microsoft's + documented credential resolution chain. Users configure those in + ``~/.hermes/.env`` or the deployment environment. + """ + ai = _require_azure_identity() + kwargs: Dict[str, Any] = {} + # SDK default is True (browser excluded); only pass when the user + # explicitly opts in to interactive browser auth. + if not config.exclude_interactive_browser: + kwargs["exclude_interactive_browser_credential"] = False + return ai.DefaultAzureCredential(**kwargs) + + +@functools.lru_cache(maxsize=1) +def build_credential(config: EntraIdentityConfig) -> Any: + """Return the cached ``DefaultAzureCredential`` for ``config``. + + Hermes processes use exactly one Entra config at a time (the + ``model.entra.*`` block in config.yaml drives every aux task, + subagent, and credential probe in the session). ``maxsize=1`` is + intentional: it reflects the actual usage pattern and keeps the + cache trivially small. + + ``EntraIdentityConfig`` is a frozen dataclass, so it's hashable and + safe as an LRU-cache key. ``functools.lru_cache`` is thread-safe in + CPython. + + If two distinct configs are ever passed (tests do this; production + rarely), the LRU eviction handles it correctly — each call still + returns a credential matching its config; only one is cached at a + time. Use :func:`reset_credential_cache` to clear (e.g. in tests). + """ + return _build_default_credential(config) + + +def build_token_provider(scope: Optional[str] = None, + *, + config: Optional[EntraIdentityConfig] = None, + base_url: Optional[str] = None, + exclude_interactive_browser: bool = True, + ) -> Callable[[], str]: + """Return a zero-arg callable that mints a fresh Entra bearer JWT. + + The returned callable is exactly what Microsoft's documented Foundry + sample expects:: + + from openai import OpenAI + client = OpenAI( + base_url="https://my-resource.openai.azure.com/openai/v1/", + api_key=build_token_provider(), + ) + + Scope resolution order: + 1. ``config.scope`` when a config object is supplied + 2. explicit ``scope`` kwarg + 3. ``SCOPE_AI_AZURE_DEFAULT`` (Microsoft's documented Foundry scope) + + ``base_url`` is unused today and kept for back-compat. Tenant / + service-principal / sovereign-cloud configuration flows through + ``azure-identity``'s standard ``AZURE_*`` environment variables — + see :func:`_build_default_credential` for the rationale. + + NOT serializable across process boundaries. For multiprocessing + workers, serialize the ``EntraIdentityConfig`` and rebuild the + provider inside the worker. + """ + ai = _require_azure_identity() + if config is None: + config = EntraIdentityConfig( + scope=scope or SCOPE_AI_AZURE_DEFAULT, + exclude_interactive_browser=exclude_interactive_browser, + ) + credential = build_credential(config) + return ai.get_bearer_token_provider(credential, config.scope) + + +# --------------------------------------------------------------------------- +# Credential probing +# --------------------------------------------------------------------------- + + +def has_azure_identity_credentials(scope: Optional[str] = None, + *, + config: Optional[EntraIdentityConfig] = None, + timeout_seconds: float = 10.0, + allow_install: bool = True, + **overrides: Any) -> bool: + """Best-effort probe: can `DefaultAzureCredential` mint a token now? + + Runs ``credential.get_token(scope)`` under a thread-based timeout so + a slow token service can't hang the caller. Returns False on any + error — never raises. Use for ``hermes doctor`` / + ``hermes auth status`` / wizard preflight. + + ``allow_install``: when True (default) and ``azure-identity`` is not + importable, the adapter triggers the standard lazy-install path + (subject to ``security.allow_lazy_installs``) before probing. Set + False to make this strictly an "is installed?" check — used on hot + paths like CLI startup where we never want pip to run. + + NOT used by ``is_provider_configured()`` — that path is structural + only (no token mint), so CLI startup doesn't pay this latency. + """ + if not has_azure_identity_installed(): + if not allow_install: + return False + try: + _require_azure_identity() + except ImportError as exc: + logger.debug("azure-identity lazy install unavailable: %s", exc) + return False + if config is None: + effective_scope = (scope or "").strip() or SCOPE_AI_AZURE_DEFAULT + config = EntraIdentityConfig(scope=effective_scope, **overrides) + + result = {"ok": False} + + def _probe() -> None: + try: + credential = build_credential(config) + tok = credential.get_token(config.scope) + result["ok"] = bool(getattr(tok, "token", None)) + except Exception as exc: + logger.debug("Entra credential probe failed: %s", exc) + result["ok"] = False + + thread = threading.Thread(target=_probe, daemon=True) + thread.start() + thread.join(timeout=max(0.01, timeout_seconds)) + if thread.is_alive(): + logger.debug("Entra token service probe timed out after %ss", timeout_seconds) + return False + return bool(result.get("ok")) + + +def describe_active_credential(config: Optional[EntraIdentityConfig] = None, + *, + scope: Optional[str] = None, + timeout_seconds: float = 10.0, + allow_install: bool = True, + **overrides: Any) -> Dict[str, Any]: + """Return diagnostic info about the active credential chain. + + Best-effort: runs ``get_token()`` and inspects what came back. + Designed for ``hermes doctor`` and the wizard preflight — never + raises, returns ``{"ok": False, "error": ...}`` on failure. + + ``allow_install``: when True (default) and ``azure-identity`` is not + importable, the adapter triggers the standard lazy-install path + (subject to ``security.allow_lazy_installs``) before probing. The + install failure is surfaced as the diagnostic error when it fails. + Set False for hot CLI paths that should never trigger pip. + + ``azure-identity`` doesn't expose the winning inner credential as + a public field, so we report a coarse picture (env vars present, + token expiry, claims-derived tenant) rather than the credential + class name. Users wanting the precise class can run with + ``AZURE_LOG_LEVEL=DEBUG``. + """ + info: Dict[str, Any] = {"ok": False} + if not has_azure_identity_installed(): + if not allow_install: + info["error"] = "azure-identity not installed" + info["hint"] = ( + "pip install azure-identity (or rely on lazy install at " + "first use)" + ) + return info + try: + _require_azure_identity() + except ImportError as exc: + info["error"] = str(exc) or "azure-identity not installed" + info["hint"] = ( + "pip install azure-identity manually, or enable lazy " + "installs (security.allow_lazy_installs: true in " + "config.yaml)." + ) + return info + + if config is None: + effective_scope = (scope or "").strip() or SCOPE_AI_AZURE_DEFAULT + config = EntraIdentityConfig(scope=effective_scope, **overrides) + + info["scope"] = config.scope + # Tenant / authority / service-principal config flow through the + # standard ``AZURE_*`` env vars; surface them below. + if os.environ.get("AZURE_TENANT_ID", "").strip(): + info["tenant_id_env"] = os.environ["AZURE_TENANT_ID"].strip() + + # Surface which env-var sources are present without minting yet. + env_sources = [] + if os.environ.get("AZURE_FEDERATED_TOKEN_FILE", "").strip(): + env_sources.append("WorkloadIdentityCredential (AZURE_FEDERATED_TOKEN_FILE)") + if (os.environ.get("AZURE_CLIENT_ID", "").strip() + and os.environ.get("AZURE_CLIENT_SECRET", "").strip() + and os.environ.get("AZURE_TENANT_ID", "").strip()): + env_sources.append("EnvironmentCredential (client secret)") + if os.environ.get("IDENTITY_ENDPOINT", "").strip() or os.environ.get("MSI_ENDPOINT", "").strip(): + env_sources.append("ManagedIdentityCredential (IDENTITY_ENDPOINT)") + info["env_sources"] = env_sources + + # Now try minting. + result: Dict[str, Any] = {} + + def _probe() -> None: + try: + credential = build_credential(config) + tok = credential.get_token(config.scope) + result["token"] = tok + except Exception as exc: + result["error"] = str(exc) + + thread = threading.Thread(target=_probe, daemon=True) + thread.start() + thread.join(timeout=max(0.01, timeout_seconds)) + if thread.is_alive(): + info["error"] = f"Token probe timed out after {timeout_seconds:.0f}s" + info["hint"] = ( + "DefaultAzureCredential can be slow when the token service is unreachable " + "or when az login state is stale. Try `az login` or set " + "AZURE_CLIENT_ID / AZURE_TENANT_ID / AZURE_CLIENT_SECRET." + ) + return info + + if "error" in result: + info["error"] = result["error"] + return info + + token = result.get("token") + if token is None: + info["error"] = "credential chain exhausted" + return info + + info["ok"] = True + info["expires_on"] = getattr(token, "expires_on", None) + return info + + +# --------------------------------------------------------------------------- +# Consumer-side helpers — split by purpose to prevent accidental token +# minting in logging / cache-key / dashboard paths. +# --------------------------------------------------------------------------- + + +def is_token_provider(value: Any) -> bool: + """Return True when ``value`` is a callable Entra token provider. + + Used at the seams where a consumer must decide between + string-API-key semantics and bearer-callable semantics. + """ + return callable(value) and not isinstance(value, str) + + +def materialize_bearer_for_http(value: Any) -> str: + """Return a fresh Bearer JWT for a manual HTTP request. + + Only call this at sites that must construct an ``Authorization`` + header outside the OpenAI SDK (e.g. ``hermes_cli/azure_detect.py``). + Calls the callable exactly once and returns the resulting token. + + **Anthropic SDK integration:** the Anthropic Python SDK does not + accept a ``Callable[[], str]`` for ``auth_token``. Instead, + :func:`build_bearer_http_client` returns an ``httpx.Client`` whose + request event hook calls this function and rewrites the + ``Authorization`` header per request — and that client is passed to + the Anthropic SDK via ``http_client=...``. See + :func:`agent.anthropic_adapter.build_anthropic_client` for the + consumer. + + Raises ``ValueError`` if ``value`` is not a callable token provider + or non-empty string. + """ + if is_token_provider(value): + token = value() + if not isinstance(token, str) or not token: + raise ValueError("token provider returned empty value") + return token + if isinstance(value, str) and value: + return value + raise ValueError("no usable api_key / token provider") + + +def build_bearer_http_client(token_provider: Callable[[], str], **httpx_kwargs: Any) -> Any: + """Return an ``httpx.Client`` that mints a fresh Entra bearer JWT + per outbound request. + + The Anthropic SDK (≤ 0.86.0 at the time of writing) stores + ``api_key`` / ``auth_token`` as static strings and computes the + ``Authorization`` header at construction time. To get per-request + token refresh (the Microsoft-recommended Foundry pattern for + callable bearer providers), we install an httpx ``request`` event + hook on a custom client and pass that client to the SDK via + ``http_client=...``. The hook: + + 1. Calls :func:`materialize_bearer_for_http` to mint a fresh JWT + (azure-identity caches internally — this is cheap when the + cached token is still valid). + 2. Strips any pre-set ``Authorization`` / ``api-key`` / + ``x-api-key`` headers the SDK may have added (avoids + conflicting auth values). + 3. Sets ``Authorization: Bearer ``. + + ``token_provider`` must be a zero-arg callable returning a string — + typically the result of :func:`build_token_provider`. + + ``httpx_kwargs`` are forwarded verbatim to ``httpx.Client(...)`` so + callers can attach a ``timeout``, ``transport``, ``proxy``, etc. + + Raises ``ImportError`` if ``httpx`` is not installed (it is a + transitive dependency of both ``openai`` and ``anthropic`` SDKs, so + in practice always available when this helper is reached). + """ + if not is_token_provider(token_provider): + raise ValueError( + "build_bearer_http_client requires a zero-arg callable " + "token provider" + ) + + try: + import httpx + except ImportError as exc: # pragma: no cover — httpx ships with openai/anthropic + raise ImportError( + "httpx is required for Entra ID bearer auth on Microsoft Foundry " + "Anthropic-style endpoints. It is normally a transitive " + "dependency of the openai/anthropic SDKs." + ) from exc + + def _inject_bearer(request: "httpx.Request") -> None: + try: + token = materialize_bearer_for_http(token_provider) + except ValueError as exc: + # Token provider failed (chain exhausted, token service unreachable, + # az login expired, etc.). Strip any auth headers the SDK + # may have set — including our own placeholder sentinel + # ``entra-id-bearer-via-http-hook`` from + # ``_build_anthropic_client_with_bearer_hook`` — so the + # outbound request hits Azure with NO Authorization rather + # than with the placeholder. Azure returns a clean 401 + # "missing auth" that is easier to diagnose than a 401 + # against the sentinel string, and the sentinel never + # appears in upstream access logs. + # + # Log at WARNING (not DEBUG) so the misconfiguration is + # visible at default log levels. + logger.warning( + "Bearer hook: Entra ID token provider returned empty (%s) " + "— stripping Authorization headers. Azure will respond 401. " + "Run `hermes doctor` or `az login` to recover.", + exc, + ) + for header_name in ("Authorization", "authorization", "Api-Key", "api-key", "X-Api-Key", "x-api-key"): + request.headers.pop(header_name, None) + return + for header_name in ("Authorization", "authorization", "Api-Key", "api-key", "X-Api-Key", "x-api-key"): + request.headers.pop(header_name, None) + request.headers["Authorization"] = f"Bearer {token}" + + return httpx.Client( + event_hooks={"request": [_inject_bearer]}, + **httpx_kwargs, + ) + + +__all__ = [ + "EntraIdentityConfig", + "SCOPE_AI_AZURE_DEFAULT", + "build_bearer_http_client", + "build_credential", + "build_token_provider", + "describe_active_credential", + "has_azure_identity_credentials", + "has_azure_identity_installed", + "is_token_provider", + "materialize_bearer_for_http", + "reset_credential_cache", +] diff --git a/agent/chat_completion_helpers.py b/agent/chat_completion_helpers.py index ee5b957bf2f..350a54e4066 100644 --- a/agent/chat_completion_helpers.py +++ b/agent/chat_completion_helpers.py @@ -866,9 +866,14 @@ def try_activate_fallback(agent, reason: "FailoverReason | None" = None) -> bool # the fallback activation drops to 128K even when config says 204800. if hasattr(agent, 'context_compressor') and agent.context_compressor: from agent.model_metadata import get_model_context_length + # ``agent.api_key`` may be callable (Entra ID); the + # context-length resolver expects a string for live + # probes. Foundry typically resolves via config/static + # catalogs anyway, so coerce defensively. + _fb_ctx_api_key = agent.api_key if isinstance(agent.api_key, str) else "" fb_context_length = get_model_context_length( agent.model, base_url=agent.base_url, - api_key=agent.api_key, provider=agent.provider, + api_key=_fb_ctx_api_key, provider=agent.provider, config_context_length=getattr(agent, "_config_context_length", None), custom_providers=getattr(agent, "_custom_providers", None), ) @@ -876,7 +881,7 @@ def try_activate_fallback(agent, reason: "FailoverReason | None" = None) -> bool model=agent.model, context_length=fb_context_length, base_url=agent.base_url, - api_key=getattr(agent, "api_key", ""), + api_key=getattr(agent, "api_key", ""), # callable preserved → call_llm provider=agent.provider, ) diff --git a/agent/context_compressor.py b/agent/context_compressor.py index 8eadcf26ef8..41983fabba9 100644 --- a/agent/context_compressor.py +++ b/agent/context_compressor.py @@ -486,7 +486,7 @@ class ContextCompressor(ContextEngine): model: str, context_length: int, base_url: str = "", - api_key: str = "", + api_key: Any = "", provider: str = "", api_mode: str = "", ) -> None: diff --git a/agent/conversation_compression.py b/agent/conversation_compression.py index bc70623997d..e9aa6c8f688 100644 --- a/agent/conversation_compression.py +++ b/agent/conversation_compression.py @@ -103,7 +103,15 @@ def check_compression_model_feasibility(agent: Any) -> None: return aux_base_url = str(getattr(client, "base_url", "")) - aux_api_key = str(getattr(client, "api_key", "")) + # ``client.api_key`` may be a callable (Azure Foundry Entra ID + # bearer provider). The context-length resolver chain expects a + # string, but it only needs a key for live catalogue probes + # (provider model lists). For Entra clients the model-metadata + # chain still resolves via models.dev + hardcoded family + # fallbacks, which don't require auth — pass empty string rather + # than minting a bearer JWT just to look up a context length. + _raw_aux_key = getattr(client, "api_key", "") + aux_api_key = "" if (callable(_raw_aux_key) and not isinstance(_raw_aux_key, str)) else str(_raw_aux_key or "") aux_context = get_model_context_length( aux_model, diff --git a/agent/conversation_loop.py b/agent/conversation_loop.py index d3d47a5a101..98f65e1f7f7 100644 --- a/agent/conversation_loop.py +++ b/agent/conversation_loop.py @@ -1807,7 +1807,11 @@ def run_conversation( # that survives message/tool sanitization (#6843). _credential_sanitized = False _raw_key = getattr(agent, "api_key", None) or "" - if _raw_key: + # Entra ID bearer providers are callables — their + # minted JWTs are always ASCII, so no sanitization + # is needed (and ``_strip_non_ascii`` would crash + # on a callable input). + if _raw_key and isinstance(_raw_key, str): _clean_key = _strip_non_ascii(_raw_key) if _clean_key != _raw_key: agent.api_key = _clean_key @@ -2080,15 +2084,26 @@ def run_conversation( ): anthropic_auth_retry_attempted = True from agent.anthropic_adapter import _is_oauth_token + from agent.azure_identity_adapter import is_token_provider if agent._try_refresh_anthropic_client_credentials(): print(f"{agent.log_prefix}🔐 Anthropic credentials refreshed after 401. Retrying request...") continue # Credential refresh didn't help — show diagnostic info key = agent._anthropic_api_key - auth_method = "Bearer (OAuth/setup-token)" if _is_oauth_token(key) else "x-api-key (API key)" print(f"{agent.log_prefix}🔐 Anthropic 401 — authentication failed.") - print(f"{agent.log_prefix} Auth method: {auth_method}") - print(f"{agent.log_prefix} Token prefix: {key[:12]}..." if key and len(key) > 12 else f"{agent.log_prefix} Token: (empty or short)") + if is_token_provider(key): + # Azure Foundry Entra ID — the bearer token is + # minted per-request by an httpx event hook on a + # custom http_client passed to the SDK. The 401 + # means Azure rejected the JWT (RBAC role missing, + # az login expired, IMDS unreachable, etc.). + print(f"{agent.log_prefix} Auth method: Microsoft Entra ID (httpx event hook)") + print(f"{agent.log_prefix} Run `hermes doctor` for credential-chain diagnostics, or") + print(f"{agent.log_prefix} `az login` if your developer session expired.") + else: + auth_method = "Bearer (OAuth/setup-token)" if _is_oauth_token(key) else "x-api-key (API key)" + print(f"{agent.log_prefix} Auth method: {auth_method}") + print(f"{agent.log_prefix} Token prefix: {key[:12]}..." if isinstance(key, str) and len(key) > 12 else f"{agent.log_prefix} Token: (empty or short)") print(f"{agent.log_prefix} Troubleshooting:") from hermes_constants import display_hermes_home as _dhh_fn _dhh = _dhh_fn() diff --git a/batch_runner.py b/batch_runner.py index a67037171bf..28936198955 100644 --- a/batch_runner.py +++ b/batch_runner.py @@ -862,13 +862,32 @@ class BatchRunner: "last_updated": None } - # Prepare configuration for workers + # Prepare configuration for workers. + # + # ``self.api_key`` may be a zero-arg callable (Azure Foundry Entra ID + # bearer provider returned by ``agent.azure_identity_adapter``). Such + # closures are not safely picklable across the multiprocessing.Pool + # boundary. Drop the callable here and let each worker rebuild its + # own provider via ``resolve_runtime_provider()``, which reads + # ``model.auth_mode`` from ``config.yaml`` and constructs a fresh + # token provider in the worker process (azure-identity caches + # in-process so each worker gets its own short-lived cache). + if callable(self.api_key) and not isinstance(self.api_key, str): + worker_api_key = None + print( + "ℹ️ Detected Entra ID bearer provider — workers will rebuild " + "credentials from config.yaml in each process.", + flush=True, + ) + else: + worker_api_key = self.api_key + config = { "distribution": self.distribution, "model": self.model, "max_iterations": self.max_iterations, "base_url": self.base_url, - "api_key": self.api_key, + "api_key": worker_api_key, "verbose": self.verbose, "ephemeral_system_prompt": self.ephemeral_system_prompt, "log_prefix_chars": self.log_prefix_chars, diff --git a/cli-config.yaml.example b/cli-config.yaml.example index f5fb7156380..68c716daab0 100644 --- a/cli-config.yaml.example +++ b/cli-config.yaml.example @@ -30,6 +30,7 @@ model: # "ollama-cloud" - Ollama Cloud (requires: OLLAMA_API_KEY — https://ollama.com/settings) # "kilocode" - KiloCode gateway (requires: KILOCODE_API_KEY) # "ai-gateway" - Vercel AI Gateway (requires: AI_GATEWAY_API_KEY) + # "azure-foundry" - Microsoft Foundry / Azure OpenAI (API key or Entra ID) # "lmstudio" - LM Studio local server (optional: LM_API_KEY, defaults to http://127.0.0.1:1234/v1) # # Local servers (LM Studio, Ollama, vLLM, llama.cpp): @@ -45,6 +46,14 @@ model: # api_key: "your-key-here" # Uncomment to set here instead of .env base_url: "https://openrouter.ai/api/v1" + # Azure Foundry keyless auth example: + # provider: "azure-foundry" + # base_url: "https://.openai.azure.com/openai/v1" + # auth_mode: "entra_id" # DefaultAzureCredential: az login, managed identity, workload identity, etc. + # default: "gpt-4o" # Deployment/model name + # entra: + # scope: "https://ai.azure.com/.default" # Optional; this is the default. + # ── Token limits — two settings, easy to confuse ────────────────────────── # # context_length: TOTAL context window (input + output tokens combined). diff --git a/cli.py b/cli.py index 6b62493d60c..e9169de674a 100644 --- a/cli.py +++ b/cli.py @@ -4251,7 +4251,13 @@ class HermesCLI: resolved_acp_command = runtime.get("command") resolved_acp_args = list(runtime.get("args") or []) resolved_credential_pool = runtime.get("credential_pool") - if not isinstance(api_key, str) or not api_key: + # A callable api_key is a bearer-token provider (Azure Foundry + # Entra ID — ``azure_identity_adapter.build_token_provider``). + # The OpenAI SDK accepts ``Callable[[], str]`` for ``api_key`` and + # invokes it before every request. Skip the string-only validation + # and placeholder substitution for callables. + _is_callable_provider = callable(api_key) and not isinstance(api_key, str) + if not _is_callable_provider and (not isinstance(api_key, str) or not api_key): # Custom / local endpoints (llama.cpp, ollama, vLLM, etc.) often # don't require authentication. When a base_url IS configured but # no API key was found, use a placeholder so the OpenAI SDK @@ -5723,7 +5729,15 @@ class HermesCLI: config_path = project_config_path config_status = "(loaded)" if config_path.exists() else "(not found)" - api_key_display = '********' + self.api_key[-4:] if self.api_key and len(self.api_key) > 4 else 'Not set!' + # ``self.api_key`` may be a callable (Azure Foundry Entra ID bearer + # provider). Never invoke it; just identify the auth surface. + from agent.azure_identity_adapter import is_token_provider + if is_token_provider(self.api_key): + api_key_display = "Microsoft Entra ID" + elif isinstance(self.api_key, str) and len(self.api_key) > 12: + api_key_display = f"{self.api_key[:8]}...{self.api_key[-4:]}" + else: + api_key_display = "Not set!" print() title = "(^_^) Configuration" diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py index cb97a4c2300..df4de463a55 100644 --- a/hermes_cli/auth.py +++ b/hermes_cli/auth.py @@ -5334,7 +5334,9 @@ def get_external_process_provider_status(provider_id: str) -> Dict[str, Any]: def get_auth_status(provider_id: Optional[str] = None) -> Dict[str, Any]: """Generic auth status dispatcher.""" - target = provider_id or get_active_provider() + target = (provider_id or get_active_provider() or "").strip().lower() + if not target: + return {"logged_in": False} if target == "spotify": return get_spotify_auth_status() if target == "nous": @@ -5351,6 +5353,8 @@ def get_auth_status(provider_id: Optional[str] = None) -> Dict[str, Any]: return get_minimax_oauth_auth_status() if target == "copilot-acp": return get_external_process_provider_status(target) + if target == "azure-foundry": + return _get_azure_foundry_auth_status() # API-key providers pconfig = PROVIDER_REGISTRY.get(target) if pconfig and pconfig.auth_type == "api_key": @@ -5365,6 +5369,83 @@ def get_auth_status(provider_id: Optional[str] = None) -> Dict[str, Any]: return {"logged_in": False} +def _get_azure_foundry_auth_status() -> Dict[str, Any]: + """Return structural auth status for Azure Foundry. + + ``logged_in`` is structural, matching other non-OAuth provider status + checks: + + * ``auth_mode == "entra_id"`` AND ``azure-identity`` is importable + (we do NOT mint a token here; ``hermes doctor`` runs the live + probe and reports whether the credential chain can acquire one). + * ``auth_mode == "api_key"`` (default) AND ``AZURE_FOUNDRY_API_KEY`` + is set with a usable value. + + Never invokes the Entra credential chain — keeps CLI startup latency + flat regardless of token-service / az login state. + """ + info: Dict[str, Any] = {"provider": "azure-foundry"} + try: + from hermes_cli.config import load_config, get_env_value + cfg = load_config() + except Exception: + cfg = {} + + model_cfg = cfg.get("model") if isinstance(cfg, dict) else None + auth_mode = "api_key" + base_url = "" + if isinstance(model_cfg, dict): + auth_mode = str(model_cfg.get("auth_mode") or "api_key").strip().lower() or "api_key" + base_url = str(model_cfg.get("base_url") or "").strip() + info["auth_mode"] = auth_mode + info["base_url"] = base_url + + if auth_mode == "entra_id": + try: + from agent.azure_identity_adapter import ( + EntraIdentityConfig, + SCOPE_AI_AZURE_DEFAULT, + has_azure_identity_installed, + ) + installed = has_azure_identity_installed() + entra_cfg = {} + if isinstance(model_cfg, dict) and isinstance(model_cfg.get("entra"), dict): + entra_cfg = model_cfg["entra"] + identity_config = EntraIdentityConfig.from_dict( + entra_cfg, + default_scope=SCOPE_AI_AZURE_DEFAULT, + ) + info["azure_identity_installed"] = installed + info["scope"] = identity_config.scope + info["credential_probe"] = "not_run" + info["credential_verified"] = False + info["logged_in"] = bool(installed) + if not installed: + info["hint"] = ( + "azure-identity not installed. Install with: " + "pip install azure-identity (or rely on Hermes' " + "lazy-install at first use)." + ) + else: + info["hint"] = ( + "azure-identity is installed; live credential validation " + "is skipped here. Run `hermes doctor` to verify token acquisition." + ) + return info + except Exception as exc: + info["logged_in"] = False + info["error"] = f"azure-identity check failed: {exc}" + return info + + # api_key mode (default) + try: + api_key = get_env_value("AZURE_FOUNDRY_API_KEY") or os.getenv("AZURE_FOUNDRY_API_KEY", "") + except Exception: + api_key = os.getenv("AZURE_FOUNDRY_API_KEY", "") + info["logged_in"] = has_usable_secret(api_key) + return info + + def resolve_api_key_provider_credentials(provider_id: str) -> Dict[str, Any]: """Resolve API key and base URL for an API-key provider. diff --git a/hermes_cli/auth_commands.py b/hermes_cli/auth_commands.py index 10b040d8a1d..998f72b3e61 100644 --- a/hermes_cli/auth_commands.py +++ b/hermes_cli/auth_commands.py @@ -566,6 +566,54 @@ def _interactive_auth() -> None: print() except ImportError: pass # boto3 or bedrock_adapter not available + + # Show Azure Foundry Entra ID status + try: + from hermes_cli.config import load_config + _cfg = load_config() + _model_cfg = _cfg.get("model") if isinstance(_cfg, dict) else None + if isinstance(_model_cfg, dict): + _cfg_provider = str(_model_cfg.get("provider") or "").strip().lower() + _cfg_auth_mode = str(_model_cfg.get("auth_mode") or "").strip().lower() + if _cfg_provider == "azure-foundry" and _cfg_auth_mode == "entra_id": + from agent.azure_identity_adapter import ( + EntraIdentityConfig, + SCOPE_AI_AZURE_DEFAULT, + describe_active_credential, + has_azure_identity_installed, + ) + _base_url = str(_model_cfg.get("base_url") or "").strip() + _entra = _model_cfg.get("entra") or {} + if not isinstance(_entra, dict): + _entra = {} + _scope = ( + str(_entra.get("scope") or "").strip() + or SCOPE_AI_AZURE_DEFAULT + ) + print(f"azure-foundry (Microsoft Entra ID):") + print(f" Endpoint: {_base_url or '(not configured)'}") + print(f" Scope: {_scope}") + if not has_azure_identity_installed(): + print(" Status: ⚠ azure-identity not installed " + "(pip install azure-identity)") + else: + _entra_cfg = EntraIdentityConfig( + scope=_scope, + ) + _info = describe_active_credential(config=_entra_cfg, timeout_seconds=10.0) + _env_sources = _info.get("env_sources") or [] + if _info.get("ok"): + _tag = ", ".join(_env_sources) if _env_sources else "default chain" + print(f" Status: ✓ token acquired ({_tag})") + else: + _err = _info.get("error") or "credential chain exhausted" + print(f" Status: ⚠ {_err}") + _hint = _info.get("hint") + if _hint: + print(f" Hint: {_hint}") + print() + except Exception: + pass print() # Main menu diff --git a/hermes_cli/azure_detect.py b/hermes_cli/azure_detect.py index 8dd0d632a9f..1420d9334d6 100644 --- a/hermes_cli/azure_detect.py +++ b/hermes_cli/azure_detect.py @@ -1,6 +1,6 @@ """Azure Foundry endpoint auto-detection. -Inspect an Azure AI Foundry / Azure OpenAI endpoint to determine: +Inspect a Microsoft Foundry / Azure OpenAI endpoint to determine: - API transport (OpenAI-style ``chat_completions`` vs Anthropic-style ``anthropic_messages``) - Available models (best effort — Azure does not expose a deployment @@ -19,6 +19,16 @@ rather than the user's *deployed* deployment names. In practice it is still a useful hint — the user picks a familiar model name and we look up its context length from the catalog. +Authentication modes: + - ``api_key`` (default): the wizard passes an ``api_key`` string; the + probe sends both ``api-key:`` and ``Authorization: Bearer`` headers + so we hit any Azure deployment regardless of which header it expects. + - ``entra_id``: the wizard passes a ``token_provider`` callable from + :mod:`agent.azure_identity_adapter`. The probe mints exactly one + bearer JWT, sends **only** ``Authorization: Bearer `` (never + ``api-key:``), and never persists the token. This matches Microsoft's + documented contract for keyless inference. + The detector never crashes on errors (every HTTP call is wrapped in a broad try/except). Callers get a :class:`DetectionResult` with whatever information could be gathered, and fall back to manual entry for the @@ -31,7 +41,7 @@ import json import logging import re from dataclasses import dataclass, field -from typing import Optional +from typing import Any, Callable, Optional from urllib import request as urllib_request from urllib.error import HTTPError, URLError from urllib.parse import urlparse @@ -79,15 +89,73 @@ class DetectionResult: is_anthropic: bool = False -def _http_get_json(url: str, api_key: str, timeout: float = 6.0) -> tuple[int, Optional[dict]]: - """GET a URL with ``api-key`` + ``Authorization`` headers. Return +def _resolve_credential(api_key: Any, + token_provider: Optional[Callable[[], str]] = None, + ) -> tuple[Optional[str], str]: + """Coerce wizard inputs into a (token, mode) pair. + + Returns ``(token_or_None, mode)`` where ``mode`` is: + - ``"entra_id"`` when a callable token provider was supplied — the + returned token is a freshly minted bearer JWT, sent ONLY in + ``Authorization: Bearer``. + - ``"api_key"`` when a string key was supplied — the returned token + is the raw API key, sent in BOTH ``api-key:`` and + ``Authorization: Bearer`` headers (preserves the original + broad-compat probe behaviour). + - ``("", "api_key")`` when neither yields a value. + + Bearer minting failures degrade to ``("", "entra_id")`` so the caller + can still report "detection incomplete" rather than crashing. + """ + # Token-provider path (callable wins when both supplied). + if token_provider is not None and callable(token_provider): + try: + token = token_provider() + return (str(token) if token else None), "entra_id" + except Exception as exc: + logger.debug("azure_detect: token_provider failed: %s", exc) + return None, "entra_id" + if callable(api_key) and not isinstance(api_key, str): + try: + token = api_key() + return (str(token) if token else None), "entra_id" + except Exception as exc: + logger.debug("azure_detect: api_key callable failed: %s", exc) + return None, "entra_id" + # API-key path. + if isinstance(api_key, str) and api_key: + return api_key, "api_key" + return None, "api_key" + + +def _apply_auth_headers(req: urllib_request.Request, + token: Optional[str], + mode: str) -> None: + """Attach the right auth headers to ``req`` based on credential mode.""" + if not token: + return + if mode == "entra_id": + # Bearer-only: do NOT also set api-key, which would log a JWT in + # a header slot intended for static keys. + req.add_header("Authorization", f"Bearer {token}") + else: + # Legacy broad-compat behaviour: send both headers so we land on + # any Azure resource regardless of which it accepts. + req.add_header("api-key", token) + req.add_header("Authorization", f"Bearer {token}") + + +def _http_get_json(url: str, + api_key: Any, + timeout: float = 6.0, + *, + token_provider: Optional[Callable[[], str]] = None, + ) -> tuple[int, Optional[dict]]: + """GET a URL with the appropriate auth headers. Return ``(status_code, parsed_json_or_None)``. Never raises.""" + token, mode = _resolve_credential(api_key, token_provider) req = urllib_request.Request(url, method="GET") - # Azure OpenAI uses ``api-key``. Some Azure deployments (and - # Anthropic-style routes) use ``Authorization: Bearer``. Send both - # so we probe once per URL rather than twice. - req.add_header("api-key", api_key) - req.add_header("Authorization", f"Bearer {api_key}") + _apply_auth_headers(req, token, mode) req.add_header("User-Agent", "hermes-agent/azure-detect") try: with urllib_request.urlopen(req, timeout=timeout) as resp: @@ -140,7 +208,11 @@ def _extract_model_ids(payload: dict) -> list[str]: return ids -def _probe_openai_models(base_url: str, api_key: str) -> tuple[bool, list[str]]: +def _probe_openai_models(base_url: str, + api_key: Any, + *, + token_provider: Optional[Callable[[], str]] = None, + ) -> tuple[bool, list[str]]: """Probe ``/models`` for an OpenAI-shaped response. Returns ``(ok, models)``. ``ok`` is True iff the endpoint accepted @@ -156,7 +228,7 @@ def _probe_openai_models(base_url: str, api_key: str) -> tuple[bool, list[str]]: candidates.append(f"{base_url}/models?api-version={v}") for url in candidates: - status, body = _http_get_json(url, api_key) + status, body = _http_get_json(url, api_key, token_provider=token_provider) if status == 200 and body is not None: ids = _extract_model_ids(body) if ids: @@ -172,7 +244,11 @@ def _probe_openai_models(base_url: str, api_key: str) -> tuple[bool, list[str]]: return False, [] -def _probe_anthropic_messages(base_url: str, api_key: str) -> bool: +def _probe_anthropic_messages(base_url: str, + api_key: Any, + *, + token_provider: Optional[Callable[[], str]] = None, + ) -> bool: """Send a zero-token request to ``/v1/messages`` and check whether the endpoint at least *recognises* the Anthropic Messages shape (any 4xx that mentions ``messages`` or ``model``, or a 400 @@ -187,8 +263,8 @@ def _probe_anthropic_messages(base_url: str, api_key: str) -> bool: "messages": [{"role": "user", "content": "ping"}], }).encode("utf-8") req = urllib_request.Request(url, method="POST", data=payload) - req.add_header("api-key", api_key) - req.add_header("Authorization", f"Bearer {api_key}") + token, mode = _resolve_credential(api_key, token_provider) + _apply_auth_headers(req, token, mode) req.add_header("anthropic-version", "2023-06-01") req.add_header("content-type", "application/json") req.add_header("User-Agent", "hermes-agent/azure-detect") @@ -218,13 +294,23 @@ def _probe_anthropic_messages(base_url: str, api_key: str) -> bool: return False -def detect(base_url: str, api_key: str) -> DetectionResult: +def detect(base_url: str, + api_key: Any = "", + *, + token_provider: Optional[Callable[[], str]] = None, + ) -> DetectionResult: """Inspect an Azure endpoint and describe its transport + models. Call this from the wizard before asking the user to pick an API mode manually. The caller should treat the returned :class:`DetectionResult` as *advisory* — if ``api_mode`` is None, fall back to asking the user. + + ``api_key`` may be a string (legacy API-key auth — sends both + ``api-key:`` and ``Authorization: Bearer``) or a callable returning + a bearer JWT (Entra ID auth — sends ONLY ``Authorization: Bearer``). + ``token_provider`` is an alternative explicit name for the callable + form; if both are supplied the callable wins. """ result = DetectionResult() @@ -244,7 +330,7 @@ def detect(base_url: str, api_key: str) -> DetectionResult: # 2. Try the OpenAI-style /models probe. If this works, the # endpoint definitely speaks OpenAI wire. - ok, models = _probe_openai_models(base_url, api_key) + ok, models = _probe_openai_models(base_url, api_key, token_provider=token_provider) if ok: result.models_probe_ok = True result.models = models @@ -259,7 +345,7 @@ def detect(base_url: str, api_key: str) -> DetectionResult: # 3. Fallback: probe the Anthropic Messages shape. Slower and more # intrusive than /models, so only run it when the OpenAI probe # failed. - if _probe_anthropic_messages(base_url, api_key): + if _probe_anthropic_messages(base_url, api_key, token_provider=token_provider): result.is_anthropic = True result.api_mode = "anthropic_messages" result.reason = "Endpoint accepts Anthropic Messages shape" @@ -273,11 +359,26 @@ def detect(base_url: str, api_key: str) -> DetectionResult: return result -def lookup_context_length(model: str, base_url: str, api_key: str) -> Optional[int]: +def lookup_context_length(model: str, + base_url: str, + api_key: Any = "", + *, + token_provider: Optional[Callable[[], str]] = None, + ) -> Optional[int]: """Thin wrapper around :func:`agent.model_metadata.get_model_context_length` that returns ``None`` when only the fallback default (128k) would fire, so the wizard can distinguish "we actually know this" from - "we guessed.""" + "we guessed. + + For Entra-ID mode pass a callable as ``api_key`` (or via + ``token_provider=``); the wrapped resolver expects a string, so we + mint one bearer JWT here for the single lookup. The resolver itself + only reads catalog metadata over HTTP — no SDK client is built — so + the minted token is consumed for at most one /models probe. + """ + model_id = str(model or "").strip() + if not model_id: + return None try: from agent.model_metadata import ( DEFAULT_FALLBACK_CONTEXT, @@ -286,8 +387,13 @@ def lookup_context_length(model: str, base_url: str, api_key: str) -> Optional[i except Exception: return None + # Resolve the credential once. For Entra mode this calls the token + # provider; for legacy api_key this is a no-op string pass-through. + token, mode = _resolve_credential(api_key, token_provider) + effective_key = token or "" + try: - n = get_model_context_length(model, base_url=base_url, api_key=api_key) + n = get_model_context_length(model_id, base_url=base_url, api_key=effective_key) except Exception as exc: logger.debug("azure_detect: context length lookup failed: %s", exc) return None diff --git a/hermes_cli/doctor.py b/hermes_cli/doctor.py index 4440b386823..dab22e2640a 100644 --- a/hermes_cli/doctor.py +++ b/hermes_cli/doctor.py @@ -1613,6 +1613,87 @@ def run_doctor(args): f"bedrock:ListFoundationModels"], ) + def _probe_azure_entra() -> _ConnectivityResult: + """Probe Azure Foundry Entra ID auth, parallel to ``_probe_bedrock``. + + Skipped unless the active config has ``model.provider: + azure-foundry`` AND ``model.auth_mode: entra_id`` — we don't probe + the token-service / CLI chain for users on plain API-key Azure. + + Bounded by a 10s timeout (via + :func:`agent.azure_identity_adapter.describe_active_credential`) + so a slow token service can't pad the doctor run. + """ + label = "Azure Foundry (Entra ID)".ljust(28) + try: + from hermes_cli.config import load_config + cfg = load_config() + model_cfg = cfg.get("model") if isinstance(cfg, dict) else {} + if not isinstance(model_cfg, dict): + return _ConnectivityResult("Azure Foundry (Entra ID)", [], []) + cfg_provider = str(model_cfg.get("provider") or "").strip().lower() + auth_mode = str(model_cfg.get("auth_mode") or "").strip().lower() + if cfg_provider != "azure-foundry" or auth_mode != "entra_id": + return _ConnectivityResult("Azure Foundry (Entra ID)", [], []) + except Exception: + return _ConnectivityResult("Azure Foundry (Entra ID)", [], []) + + try: + from agent.azure_identity_adapter import ( + EntraIdentityConfig, + SCOPE_AI_AZURE_DEFAULT, + describe_active_credential, + has_azure_identity_installed, + ) + except Exception as exc: + return _ConnectivityResult( + "Azure Foundry (Entra ID)", + [(color("⚠", Colors.YELLOW), label, + color(f"(adapter import failed: {exc})", Colors.DIM))], + [f"Azure Foundry adapter import failed: {exc}"], + ) + + if not has_azure_identity_installed(): + return _ConnectivityResult( + "Azure Foundry (Entra ID)", + [(color("⚠", Colors.YELLOW), label, + color("(azure-identity not installed)", Colors.DIM))], + [f"Install azure-identity: {sys.executable} -m pip install azure-identity"], + ) + + base_url = str(model_cfg.get("base_url") or "").strip() + entra_cfg = model_cfg.get("entra") or {} + if not isinstance(entra_cfg, dict): + entra_cfg = {} + scope = ( + str(entra_cfg.get("scope") or "").strip() + or SCOPE_AI_AZURE_DEFAULT + ) + config = EntraIdentityConfig( + scope=scope, + ) + info = describe_active_credential(config=config, timeout_seconds=10.0) + if info.get("ok"): + env_sources = info.get("env_sources") or [] + tag = ", ".join(env_sources) if env_sources else "default credential chain" + return _ConnectivityResult( + "Azure Foundry (Entra ID)", + [(color("✓", Colors.GREEN), label, + color(f"({tag}, scope={scope})", Colors.DIM))], + [], + ) + err = info.get("error") or "credential chain exhausted" + hint = info.get("hint") or ( + "Run `az login`, set AZURE_TENANT_ID/AZURE_CLIENT_ID/" + "AZURE_CLIENT_SECRET, or attach a managed identity to this VM." + ) + return _ConnectivityResult( + "Azure Foundry (Entra ID)", + [(color("⚠", Colors.YELLOW), label, + color(f"({err})", Colors.DIM))], + [f"Azure Foundry Entra: {err}. {hint}"], + ) + # Build the probe submission list in display order _probes.append(("OpenRouter API", _probe_openrouter)) _probes.append(("Anthropic API", _probe_anthropic)) @@ -1630,6 +1711,7 @@ def run_doctor(args): _probe_apikey_provider(p, e, u, b, s))) _probes.append(("AWS Bedrock", _probe_bedrock)) + _probes.append(("Azure Foundry (Entra ID)", _probe_azure_entra)) # Print a single status line so users see something happening, then # fan out. ``\r`` clears it once the first real result line lands. diff --git a/hermes_cli/main.py b/hermes_cli/main.py index fe287543673..48bf6675b32 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -3535,11 +3535,27 @@ def _save_custom_provider( def _model_flow_azure_foundry(config, current_model=""): - """Azure Foundry provider: configure endpoint, API mode, API key, and model. + """Azure Foundry provider: configure endpoint, auth mode, API mode, and model. Azure Foundry supports both OpenAI-style (``/v1/chat/completions``) and - Anthropic-style (``/v1/messages``) endpoints. The wizard auto-detects - the transport and available models when possible: + Anthropic-style (``/v1/messages``) endpoints, and two authentication + modes: + + * **API key** (default) — uses ``AZURE_FOUNDRY_API_KEY`` from .env. + * **Microsoft Entra ID** — keyless, RBAC-based auth via the + ``azure-identity`` SDK (Managed Identity / Workload Identity / az + login / VS Code / azd / service principal env vars). Works on both + OpenAI-style and Anthropic-style endpoints — Microsoft RBAC is + per-resource and the same ``Azure AI User`` role grants + both. For OpenAI-style the OpenAI SDK's native callable + ``api_key=`` contract is used; for Anthropic-style an + ``httpx.Client`` with a request event hook (built by + :func:`agent.azure_identity_adapter.build_bearer_http_client`) + mints a fresh JWT per request because the Anthropic SDK does not + accept a callable ``auth_token`` natively. + + The wizard auto-detects the transport and available models when + possible: * URLs ending in ``/anthropic`` → Anthropic Messages API. * Successful ``GET /models`` probe → OpenAI-style + populates @@ -3566,9 +3582,14 @@ def _model_flow_azure_foundry(config, current_model=""): if isinstance(model_cfg, dict) and model_cfg.get("provider") == "azure-foundry": current_base_url = str(model_cfg.get("base_url", "") or "") current_api_mode = str(model_cfg.get("api_mode", "") or "") + current_auth_mode = str(model_cfg.get("auth_mode") or "api_key").strip().lower() or "api_key" + _cur_entra = model_cfg.get("entra") or {} + current_entra = _cur_entra if isinstance(_cur_entra, dict) else {} else: current_base_url = "" current_api_mode = "" + current_auth_mode = "api_key" + current_entra = {} current_api_key = get_env_value("AZURE_FOUNDRY_API_KEY") or "" @@ -3583,22 +3604,29 @@ def _model_flow_azure_foundry(config, current_model=""): print() if current_base_url: - print(f" Current endpoint: {current_base_url}") + print(f" Current endpoint: {current_base_url}") if current_api_mode: _lbl = ( "OpenAI-style" if current_api_mode == "chat_completions" else "Anthropic-style" ) - print(f" Current API mode: {_lbl}") - if current_api_key: - print(f" Current API key: {current_api_key[:8]}...") + print(f" Current API mode: {_lbl}") + if current_auth_mode == "entra_id": + print(f" Current auth mode: Microsoft Entra ID (keyless)") + elif current_api_key: + print(f" Current auth mode: API key ({current_api_key[:8]}...)") print() # ── Step 1: endpoint URL ───────────────────────────────────────── try: + _placeholder = ( + current_base_url + or "e.g. https://.openai.azure.com/openai/v1 " + "or https://.services.ai.azure.com/anthropic" + ) base_url = input( - f"API endpoint URL [{current_base_url or 'e.g. https://your-resource.openai.azure.com/openai/v1'}]: " + f"API endpoint URL [{_placeholder}]: " ).strip() except (KeyboardInterrupt, EOFError): print("\nCancelled.") @@ -3612,25 +3640,125 @@ def _model_flow_azure_foundry(config, current_model=""): print(f"Invalid URL: {effective_url} (must start with http:// or https://)") return - # ── Step 2: API key ────────────────────────────────────────────── + # ── Step 2: authentication mode ────────────────────────────────── print() + print("Authentication:") + print(" 1. API key (AZURE_FOUNDRY_API_KEY in .env)") + print(" 2. Microsoft Entra ID (managed identity / workload identity / az login)") + print(" Recommended by Microsoft. Works for both OpenAI-style and Anthropic-style endpoints.") + print(" Requires the 'Azure AI User' role on the Foundry resource.") try: - api_key = getpass.getpass( - f"API key [{current_api_key[:8] + '...' if current_api_key else 'required'}]: " - ).strip() + _auth_default = "2" if current_auth_mode == "entra_id" else "1" + auth_choice = ( + input(f"Authentication mode [1/2] ({_auth_default}): ").strip() + or _auth_default + ) except (KeyboardInterrupt, EOFError): print("\nCancelled.") return + use_entra = auth_choice == "2" + auth_mode_label = "entra_id" if use_entra else "api_key" - effective_key = api_key or current_api_key - if not effective_key: - print("No API key provided. Cancelled.") - return + # ── Step 3: credentials (key OR Entra preflight) ───────────────── + effective_key: str = "" + entra_overrides: dict = {} + token_provider = None # callable when entra + entra_scope = "" - # ── Step 3: auto-detect transport + models ─────────────────────── + if use_entra: + try: + from agent.azure_identity_adapter import ( + EntraIdentityConfig, + SCOPE_AI_AZURE_DEFAULT, + build_token_provider, + describe_active_credential, + has_azure_identity_installed, + ) + except ImportError as exc: + print() + print(f"⚠ Could not import azure-identity adapter: {exc}") + print(" Falling back to API key auth.") + use_entra = False + auth_mode_label = "api_key" + + if use_entra: + print() + if not has_azure_identity_installed(): + print("◐ The 'azure-identity' package is not installed yet.") + print( + " Hermes will install it now (the preflight below " + "triggers the lazy-install). To skip lazy installs, " + "run: pip install azure-identity" + ) + + # Preserve only the optional scope override. Identity selection + # (tenant, user-assigned MI, workload identity, service principal) + # stays in Azure SDK env vars such as AZURE_CLIENT_ID. + _persisted_scope_override = str(current_entra.get("scope") or "").strip() + entra_scope = _persisted_scope_override or SCOPE_AI_AZURE_DEFAULT + + entra_overrides = {} + if _persisted_scope_override: + entra_overrides["scope"] = _persisted_scope_override + + print() + print("◐ Probing Microsoft Entra ID credential chain (up to 10s)...") + _config = EntraIdentityConfig( + scope=entra_scope, + ) + info = describe_active_credential(config=_config, timeout_seconds=10.0) + if info.get("ok"): + env_sources = info.get("env_sources") or [] + tag = ", ".join(env_sources) if env_sources else "default chain" + print(f"✓ Entra ID token acquired ({tag}, scope={entra_scope})") + else: + err = info.get("error") or "credential chain exhausted" + hint = info.get("hint") or ( + "Run `az login`, attach a managed identity to this VM, or " + "set AZURE_TENANT_ID/AZURE_CLIENT_ID/AZURE_CLIENT_SECRET." + ) + print(f"⚠ {err}") + print(f" Hint: {hint}") + try: + ans = input("Save Entra config anyway and validate later? [Y/n]: ").strip().lower() + except (KeyboardInterrupt, EOFError): + print("\nCancelled.") + return + if ans and ans not in ("y", "yes"): + print("Cancelled.") + return + + # Build the token provider for the detection probe (best-effort — + # if the credential chain failed above, this will silently return + # None inside azure_detect and the probe falls back to manual). + try: + token_provider = build_token_provider(config=_config) + except Exception as exc: + print(f"⚠ Could not build token provider for probing: {exc}") + token_provider = None + else: + print() + try: + api_key = getpass.getpass( + f"API key [{current_api_key[:8] + '...' if current_api_key else 'required'}]: " + ).strip() + except (KeyboardInterrupt, EOFError): + print("\nCancelled.") + return + + effective_key = api_key or current_api_key + if not effective_key: + print("No API key provided. Cancelled.") + return + + # ── Step 4: auto-detect transport + models ─────────────────────── print() print("◐ Probing endpoint to auto-detect transport and models...") - detection = azure_detect.detect(effective_url, effective_key) + detection = azure_detect.detect( + effective_url, + api_key=effective_key, + token_provider=token_provider, + ) discovered_models: list[str] = list(detection.models) api_mode: str = detection.api_mode or "" @@ -3665,7 +3793,7 @@ def _model_flow_azure_foundry(config, current_model=""): return api_mode = "anthropic_messages" if mode_choice == "2" else "chat_completions" - # ── Step 4: model name ─────────────────────────────────────────── + # ── Step 5: model name ─────────────────────────────────────────── print() effective_model = "" if discovered_models: @@ -3704,15 +3832,17 @@ def _model_flow_azure_foundry(config, current_model=""): print("No model name provided. Cancelled.") return - # ── Step 5: context-length lookup ──────────────────────────────── + # ── Step 6: context-length lookup ──────────────────────────────── ctx_len = azure_detect.lookup_context_length( effective_model, effective_url, - effective_key, + api_key=effective_key, + token_provider=token_provider, ) - # ── Step 6: persist ────────────────────────────────────────────── - save_env_value("AZURE_FOUNDRY_API_KEY", effective_key) + # ── Step 7: persist ────────────────────────────────────────────── + if not use_entra: + save_env_value("AZURE_FOUNDRY_API_KEY", effective_key) cfg = load_config() model = cfg.get("model") @@ -3724,6 +3854,22 @@ def _model_flow_azure_foundry(config, current_model=""): model["base_url"] = effective_url model["api_mode"] = api_mode model["default"] = effective_model + model["auth_mode"] = auth_mode_label + if use_entra: + # Persist only the non-default Entra scope so config.yaml stays tidy. + # Azure identity selection stays in standard AZURE_* env vars. + clean_entra: dict = {} + for key in ("scope",): + val = entra_overrides.get(key) + if val: + clean_entra[key] = val + if clean_entra: + model["entra"] = clean_entra + elif "entra" in model: + del model["entra"] + else: + if "entra" in model: + del model["entra"] if ctx_len: model["context_length"] = ctx_len @@ -3739,10 +3885,14 @@ def _model_flow_azure_foundry(config, current_model=""): save_env_value("OPENAI_API_KEY", "") mode_label = "OpenAI-style" if api_mode == "chat_completions" else "Anthropic-style" + auth_label = ( + "Microsoft Entra ID (keyless)" if use_entra else "API key" + ) print() print("✓ Azure Foundry configured:") print(f" Endpoint: {effective_url}") print(f" API mode: {mode_label}") + print(f" Auth: {auth_label}") print(f" Model: {effective_model}") if ctx_len: print(f" Context length: {ctx_len:,} tokens") diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py index de32131d861..11fd9f564ca 100644 --- a/hermes_cli/runtime_provider.py +++ b/hermes_cli/runtime_provider.py @@ -744,6 +744,15 @@ def _resolve_azure_foundry_runtime( strips a trailing ``/v1`` for Anthropic-style endpoints because the Anthropic SDK appends ``/v1/messages`` internally. + When ``model.auth_mode == "entra_id"`` (and the model is OpenAI-style), + the returned ``api_key`` is a zero-arg callable produced by + :func:`agent.azure_identity_adapter.build_token_provider` rather than + a string. Downstream code that constructs an OpenAI SDK client passes + this through unchanged (the SDK accepts ``Callable[[], str]`` for + ``api_key`` and calls it before every request). Code paths that need + a string (logging, manual HTTP probes, header injection) must use the + helpers in ``agent.azure_identity_adapter``. + Raises :class:`AuthError` when required values are missing. """ explicit_api_key = str(explicit_api_key or "").strip() @@ -752,9 +761,15 @@ def _resolve_azure_foundry_runtime( cfg_provider = str(model_cfg.get("provider") or "").strip().lower() cfg_base_url = "" cfg_api_mode = "chat_completions" + cfg_auth_mode = "api_key" + cfg_entra: Dict[str, Any] = {} if cfg_provider == "azure-foundry": cfg_base_url = str(model_cfg.get("base_url") or "").strip().rstrip("/") cfg_api_mode = _parse_api_mode(model_cfg.get("api_mode")) or "chat_completions" + cfg_auth_mode = str(model_cfg.get("auth_mode") or "api_key").strip().lower() or "api_key" + _entra = model_cfg.get("entra") + if isinstance(_entra, dict): + cfg_entra = _entra # Model-family inference: Azure Foundry deploys GPT-5.x / codex / o1-o4 # reasoning models as Responses-API-only. Calling /chat/completions @@ -780,6 +795,79 @@ def _resolve_azure_foundry_runtime( "the AZURE_FOUNDRY_BASE_URL environment variable." ) + # Anthropic SDK appends /v1/messages itself, so strip any trailing /v1 + # we inherited from the configured base_url to avoid double-/v1 paths. + if cfg_api_mode == "anthropic_messages": + base_url = re.sub(r"/v1/?$", "", base_url) + + # ── Entra ID (Microsoft Foundry recommended path) ────────────────── + # + # OpenAI-style endpoints use the OpenAI SDK's native callable + # ``api_key=`` contract — the SDK mints a fresh JWT per request + # automatically. + # + # Anthropic-style endpoints (Claude on Foundry) take the callable + # too: :func:`agent.anthropic_adapter.build_anthropic_client` + # detects the callable and constructs an ``httpx.Client`` with a + # request event hook that injects a fresh ``Authorization: Bearer`` + # header per request (the Anthropic SDK does not accept callables + # natively). From the runtime resolver's perspective both modes + # are identical — return the callable api_key and let the + # downstream SDK wrapper handle the contract difference. + if cfg_auth_mode == "entra_id": + if explicit_api_key: + # User passed --api-key on the CLI while config says entra_id — + # honour the explicit string (escape hatch for one-off testing). + api_key: Any = explicit_api_key + source = "explicit" + auth_mode = "api_key" + else: + try: + from agent.azure_identity_adapter import ( + EntraIdentityConfig, + SCOPE_AI_AZURE_DEFAULT, + build_token_provider, + ) + except Exception as exc: + raise AuthError( + "Azure Foundry Entra ID auth requires the 'azure-identity' " + "package. Install it with: pip install azure-identity " + f"(import failed: {exc})" + ) from exc + + scope = ( + str(cfg_entra.get("scope") or "").strip() + or SCOPE_AI_AZURE_DEFAULT + ) + try: + entra_config = EntraIdentityConfig( + scope=scope, + ) + token_provider = build_token_provider(config=entra_config) + except ImportError as exc: + raise AuthError(str(exc)) from exc + api_key = token_provider + source = "entra_id" + auth_mode = "entra_id" + + clean_entra = {} + if auth_mode == "entra_id": + configured_scope = str(cfg_entra.get("scope") or "").strip() + if configured_scope: + clean_entra["scope"] = configured_scope + + return { + "provider": "azure-foundry", + "api_mode": cfg_api_mode, + "base_url": base_url, + "api_key": api_key, + "auth_mode": auth_mode, + "entra": clean_entra, + "source": source, + "requested_provider": requested_provider, + } + + # ── Static API key (legacy / default) ────────────────────────────── api_key = explicit_api_key if not api_key: try: @@ -792,20 +880,19 @@ def _resolve_azure_foundry_runtime( if not api_key: raise AuthError( "Azure Foundry requires an API key. Set AZURE_FOUNDRY_API_KEY in " - "~/.hermes/.env or run 'hermes model' to configure." + "~/.hermes/.env or run 'hermes model' to configure. To use " + "keyless Microsoft Entra ID auth instead, set " + "model.auth_mode: entra_id in config.yaml (or pick " + "'Microsoft Entra ID' in 'hermes model')." ) - # Anthropic SDK appends /v1/messages itself, so strip any trailing /v1 - # we inherited from the configured base_url to avoid double-/v1 paths. - if cfg_api_mode == "anthropic_messages": - base_url = re.sub(r"/v1/?$", "", base_url) - source = "explicit" if (explicit_api_key or explicit_base_url) else "config" return { "provider": "azure-foundry", "api_mode": cfg_api_mode, "base_url": base_url, "api_key": api_key, + "auth_mode": "api_key", "source": source, "requested_provider": requested_provider, } @@ -1232,7 +1319,7 @@ def resolve_runtime_provider( cfg_base_url = (model_cfg.get("base_url") or "").strip().rstrip("/") base_url = cfg_base_url or "https://api.anthropic.com" - # For Azure AI Foundry endpoints, use ANTHROPIC_API_KEY directly — + # For Microsoft Foundry endpoints, use ANTHROPIC_API_KEY directly — # Claude Code OAuth tokens (sk-ant-oat01) are not accepted by Azure. # Azure keys don't start with "sk-ant-" so resolve_anthropic_token() # would find the Claude Code OAuth token first (priority 3) and return diff --git a/hermes_cli/web_server.py b/hermes_cli/web_server.py index ebf053a6257..a2db00ac2c3 100644 --- a/hermes_cli/web_server.py +++ b/hermes_cli/web_server.py @@ -1288,9 +1288,15 @@ def _truncate_token(value: Optional[str], visible: int = 6) -> str: OAuth access token. JWT prefixes (the part before the first dot) are stripped first when present so the visible suffix is always part of the signing region rather than a meaningless header chunk. + + Returns the Entra-ID placeholder when handed a callable (Azure Foundry + bearer provider) — the callable is NEVER invoked here. """ if not value: return "" + if callable(value) and not isinstance(value, str): + # Entra ID bearer provider — never reveal a minted token in the UI. + return "" s = str(value) if "." in s and s.count(".") >= 2: # Looks like a JWT — show the trailing piece of the signature only. diff --git a/plugins/model-providers/azure-foundry/__init__.py b/plugins/model-providers/azure-foundry/__init__.py index a8e29f241c7..50968805f55 100644 --- a/plugins/model-providers/azure-foundry/__init__.py +++ b/plugins/model-providers/azure-foundry/__init__.py @@ -1,4 +1,4 @@ -"""Azure AI Foundry provider profile. +"""Microsoft Foundry provider profile. Azure Foundry exposes an OpenAI-compatible endpoint; users supply their own base URL at setup since endpoints are per-resource. @@ -11,7 +11,7 @@ azure_foundry = ProviderProfile( name="azure-foundry", aliases=("azure", "azure-ai-foundry", "azure-ai"), display_name="Azure Foundry", - description="Azure AI Foundry — OpenAI-compatible endpoint (user-supplied base URL)", + description="Microsoft Foundry - OpenAI-compatible endpoint (user-supplied base URL)", signup_url="https://ai.azure.com/", env_vars=("AZURE_FOUNDRY_API_KEY", "AZURE_FOUNDRY_BASE_URL"), base_url="", # per-resource; user provides at setup diff --git a/plugins/model-providers/azure-foundry/plugin.yaml b/plugins/model-providers/azure-foundry/plugin.yaml index 791f82b75a2..806e44d0b28 100644 --- a/plugins/model-providers/azure-foundry/plugin.yaml +++ b/plugins/model-providers/azure-foundry/plugin.yaml @@ -1,5 +1,5 @@ name: azure-foundry-provider kind: model-provider version: 1.0.0 -description: Azure AI Foundry +description: Microsoft Foundry author: Nous Research diff --git a/pyproject.toml b/pyproject.toml index cb3c515e021..344a9721a39 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -125,6 +125,7 @@ acp = ["agent-client-protocol==0.9.0"] # 4. Run `uv lock` to regenerate transitives. # 5. Optionally re-add to [all] only after a few days of clean operation. bedrock = ["boto3==1.42.89"] +azure-identity = ["azure-identity==1.25.3"] termux = [ # Baseline Android / Termux path for reliable fresh installs. "python-telegram-bot[webhooks]==22.6", diff --git a/run_agent.py b/run_agent.py index 484f9f84fd9..185e6afb12e 100644 --- a/run_agent.py +++ b/run_agent.py @@ -1428,7 +1428,11 @@ class AIAgent: prefix = f"HTTP {status_code}: " if status_code else "" return f"{prefix}{raw[:500]}" - def _mask_api_key_for_logs(self, key: Optional[str]) -> Optional[str]: + def _mask_api_key_for_logs(self, key: Any) -> Optional[str]: + # Azure Foundry Entra ID bearer providers are callables — never + # invoke them in log paths; identify the auth surface instead. + if callable(key) and not isinstance(key, str): + return "" if not key: return None if len(key) <= 12: diff --git a/tests/acp_adapter/test_detect_provider_entra.py b/tests/acp_adapter/test_detect_provider_entra.py new file mode 100644 index 00000000000..1a46ac79537 --- /dev/null +++ b/tests/acp_adapter/test_detect_provider_entra.py @@ -0,0 +1,87 @@ +"""Regression tests for ACP adapter detection under Azure Foundry Entra ID. + +The ACP adapter's ``detect_provider`` previously gated on +``isinstance(api_key, str)`` and returned ``None`` for any runtime that +returned a callable ``api_key`` — i.e. Azure Foundry with +``auth_mode=entra_id``. Downstream, ACP would default to +``"openrouter"`` and reject the legitimate provider in its auth handshake. +This test pins the callable-aware fix so it never regresses. +""" + +from __future__ import annotations + +from unittest.mock import patch + + +class TestDetectProviderEntra: + def test_callable_api_key_is_a_valid_credential(self): + """A runtime returning a callable ``api_key`` (Entra bearer token + provider) must be detected as a configured provider, not + ``None``.""" + from acp_adapter import auth as _acp_auth + + def _fake_runtime(**_kwargs): + return { + "provider": "azure-foundry", + "api_mode": "chat_completions", + "auth_mode": "entra_id", + "base_url": "https://r.openai.azure.com/openai/v1", + "api_key": lambda: "jwt-fresh", + } + + with patch( + "hermes_cli.runtime_provider.resolve_runtime_provider", + side_effect=_fake_runtime, + ): + assert _acp_auth.detect_provider() == "azure-foundry" + assert _acp_auth.has_provider() is True + + def test_string_api_key_still_works(self): + from acp_adapter import auth as _acp_auth + + def _fake_runtime(**_kwargs): + return { + "provider": "openrouter", + "api_key": "sk-or-static-key", + } + + with patch( + "hermes_cli.runtime_provider.resolve_runtime_provider", + side_effect=_fake_runtime, + ): + assert _acp_auth.detect_provider() == "openrouter" + + def test_empty_string_api_key_returns_none(self): + from acp_adapter import auth as _acp_auth + + def _fake_runtime(**_kwargs): + return {"provider": "openrouter", "api_key": ""} + + with patch( + "hermes_cli.runtime_provider.resolve_runtime_provider", + side_effect=_fake_runtime, + ): + assert _acp_auth.detect_provider() is None + + def test_missing_provider_returns_none(self): + """A callable api_key without a provider is still ``None`` — + we don't synthesize a provider name from the credential shape.""" + from acp_adapter import auth as _acp_auth + + def _fake_runtime(**_kwargs): + return {"api_key": lambda: "jwt-fresh", "provider": ""} + + with patch( + "hermes_cli.runtime_provider.resolve_runtime_provider", + side_effect=_fake_runtime, + ): + assert _acp_auth.detect_provider() is None + + def test_resolver_exception_returns_none(self): + from acp_adapter import auth as _acp_auth + + with patch( + "hermes_cli.runtime_provider.resolve_runtime_provider", + side_effect=RuntimeError("simulated"), + ): + assert _acp_auth.detect_provider() is None diff --git a/tests/agent/test_anthropic_adapter.py b/tests/agent/test_anthropic_adapter.py index 3d19c32dcaa..10f82ca95e0 100644 --- a/tests/agent/test_anthropic_adapter.py +++ b/tests/agent/test_anthropic_adapter.py @@ -9,6 +9,7 @@ import pytest from agent.prompt_caching import apply_anthropic_cache_control from agent.anthropic_adapter import ( + _is_azure_anthropic_endpoint, _is_oauth_token, _refresh_oauth_token, _to_plain_data, @@ -121,6 +122,20 @@ class TestBuildAnthropicClient: betas = kwargs["default_headers"]["anthropic-beta"] assert "context-1m-2025-08-07" in betas + def test_azure_anthropic_endpoint_detection_is_host_and_path_scoped(self): + assert _is_azure_anthropic_endpoint( + "https://example.services.ai.azure.com/models/anthropic" + ) is True + assert _is_azure_anthropic_endpoint( + "https://example.services.ai.azure.us/anthropic" + ) is True + assert _is_azure_anthropic_endpoint( + "https://example.openai.azure.com/openai/v1" + ) is False + assert _is_azure_anthropic_endpoint( + "https://management.azure.com/anthropic" + ) is False + def test_bedrock_client_keeps_context_1m_beta(self): with patch("agent.anthropic_adapter._anthropic_sdk") as mock_sdk: mock_sdk.AnthropicBedrock = MagicMock() diff --git a/tests/agent/test_auxiliary_client_azure_foundry.py b/tests/agent/test_auxiliary_client_azure_foundry.py new file mode 100644 index 00000000000..dea08a5caa2 --- /dev/null +++ b/tests/agent/test_auxiliary_client_azure_foundry.py @@ -0,0 +1,350 @@ +"""Tests for auxiliary client routing of the ``azure-foundry`` provider. + +Covers the dedicated branch in ``agent.auxiliary_client.resolve_provider_client`` +that delegates to :func:`hermes_cli.runtime_provider._resolve_azure_foundry_runtime` +instead of falling into the generic ``resolve_api_key_provider_credentials`` +path (which only knows about ``AZURE_FOUNDRY_API_KEY`` and would 401 for +Entra ID users and miss ``model.base_url`` overrides for api-key users +with non-standard Foundry-projects endpoints). + +Pinned scenarios: + + * ``auth_mode: api_key`` → plain OpenAI client with the static string + key for ``chat_completions``. + * ``auth_mode: entra_id`` + ``chat_completions`` → plain OpenAI + client with a callable ``api_key`` (the bearer-token provider) — + confirms the callable survives the auxiliary path end-to-end. + * ``auth_mode: entra_id`` + GPT-5.x model → CodexAuxiliaryClient + wrapping the OpenAI client (api_mode auto-upgrades to + codex_responses). + * Anthropic-style + entra_id → rejected at the runtime resolver, + so the aux path returns ``(None, None)``. + * Failure path when no model is configured returns ``(None, None)`` + cleanly so the auto chain falls through. +""" + +from __future__ import annotations + +import sys +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + +import pytest + + +@pytest.fixture(autouse=True) +def _reset_credential_cache(): + from agent.azure_identity_adapter import reset_credential_cache + reset_credential_cache() + yield + reset_credential_cache() + + +@pytest.fixture +def fake_azure_identity(monkeypatch): + """Stand-in for azure.identity (keeps CI hermetic when the SDK is + not installed).""" + from agent import azure_identity_adapter as _adapter + + last = {"scope": None} + + def _provider(scope): + return lambda: f"jwt-for-{scope}" + + fake_module = SimpleNamespace( + DefaultAzureCredential=lambda **kw: SimpleNamespace( + kwargs=kw, + get_token=lambda scope: SimpleNamespace(token="fake", expires_on=9999999999), + ), + get_bearer_token_provider=lambda credential, scope: ( + last.__setitem__("scope", scope), + _provider(scope), + )[-1], + ) + monkeypatch.setattr(_adapter, "_require_azure_identity", lambda: fake_module) + monkeypatch.setitem(sys.modules, "azure.identity", fake_module) + return last + + +@pytest.fixture +def patch_load_config(monkeypatch): + """Helper to set model_cfg seen by _try_azure_foundry.""" + def _apply(model_cfg): + monkeypatch.setattr( + "hermes_cli.config.load_config", + lambda: {"model": model_cfg}, + ) + return _apply + + +# --------------------------------------------------------------------------- +# auth_mode: api_key (default) — regression for the legacy path +# --------------------------------------------------------------------------- + + +class TestAuxAzureFoundryApiKey: + def test_chat_completions_returns_plain_openai_client(self, monkeypatch, patch_load_config): + from agent.auxiliary_client import _try_azure_foundry + from openai import OpenAI as _OpenAI + + monkeypatch.setenv("AZURE_FOUNDRY_API_KEY", "sk-azure-static-key") + patch_load_config({ + "provider": "azure-foundry", + "base_url": "https://r.openai.azure.com/openai/v1", + "api_mode": "chat_completions", + "default": "gpt-4o", + }) + client, resolved = _try_azure_foundry(model="gpt-4o") + assert client is not None + assert resolved == "gpt-4o" + assert isinstance(client, _OpenAI) + assert client.api_key == "sk-azure-static-key" + + def test_codex_responses_wraps_in_codex_aux_client(self, monkeypatch, patch_load_config): + from agent.auxiliary_client import _try_azure_foundry, CodexAuxiliaryClient + + monkeypatch.setenv("AZURE_FOUNDRY_API_KEY", "sk-azure-static-key") + patch_load_config({ + "provider": "azure-foundry", + "base_url": "https://r.openai.azure.com/openai/v1", + "api_mode": "chat_completions", + "default": "gpt-5.4-mini", + }) + # GPT-5.x → runtime auto-upgrades to codex_responses + client, resolved = _try_azure_foundry(model="gpt-5.4-mini") + assert resolved == "gpt-5.4-mini" + assert isinstance(client, CodexAuxiliaryClient) + assert client.api_key == "sk-azure-static-key" + + def test_no_key_returns_none(self, monkeypatch, patch_load_config): + from agent.auxiliary_client import _try_azure_foundry + + monkeypatch.delenv("AZURE_FOUNDRY_API_KEY", raising=False) + patch_load_config({ + "provider": "azure-foundry", + "base_url": "https://r.openai.azure.com/openai/v1", + "api_mode": "chat_completions", + "default": "gpt-4o", + }) + client, resolved = _try_azure_foundry(model="gpt-4o") + assert client is None + assert resolved is None + + def test_no_model_returns_none(self, monkeypatch, patch_load_config): + """Azure has no fallback aux model — fail soft so the auto chain + can try other providers.""" + from agent.auxiliary_client import _try_azure_foundry + + monkeypatch.setenv("AZURE_FOUNDRY_API_KEY", "sk-azure-static-key") + patch_load_config({ + "provider": "azure-foundry", + "base_url": "https://r.openai.azure.com/openai/v1", + "api_mode": "chat_completions", + # No default model + }) + client, resolved = _try_azure_foundry() + assert client is None + assert resolved is None + + +# --------------------------------------------------------------------------- +# auth_mode: entra_id — callable api_key survives end-to-end +# --------------------------------------------------------------------------- + + +class TestAuxAzureFoundryEntra: + def test_callable_api_key_reaches_openai_constructor( + self, monkeypatch, fake_azure_identity, patch_load_config, + ): + """The token provider callable must arrive at ``OpenAI(api_key=...)`` + intact — never stringified to ``"no-key-required"`` or to the + SDK-internal empty-string representation BEFORE we hand it off. + + We assert on the public SDK contract (constructor receives the + callable) rather than ``client.api_key``, because OpenAI 2.24.0 + stores callable api_keys in a private attribute and exposes + ``client.api_key`` as ``""``. The SDK still calls the callable + per request to mint ``Authorization: Bearer ``; that + behaviour is the documented Microsoft/OpenAI contract we rely on. + """ + from agent import auxiliary_client as _aux + + received = {} + + class _FakeOpenAI: + def __init__(self, **kwargs): + received.update(kwargs) + # Mirror the fields downstream callers read. + self.api_key = kwargs.get("api_key", "") + self.base_url = kwargs.get("base_url", "") + + monkeypatch.setattr(_aux, "OpenAI", _FakeOpenAI) + patch_load_config({ + "provider": "azure-foundry", + "base_url": "https://r.openai.azure.com/openai/v1", + "api_mode": "chat_completions", + "auth_mode": "entra_id", + "default": "gpt-4o", + }) + client, resolved = _aux._try_azure_foundry(model="gpt-4o") + assert client is not None + assert resolved == "gpt-4o" + # Public-contract assertion: the OpenAI SDK constructor saw the + # callable, exactly as Microsoft's Foundry sample requires. + assert callable(received["api_key"]) + assert not isinstance(received["api_key"], str) + assert received["api_key"]().startswith("jwt-for-") + # Base URL forwarded verbatim (no /responses suffix stripping + # in this path — that's a separate concern handled by the + # runtime resolver only when the user re-saves config). + assert received["base_url"] == "https://r.openai.azure.com/openai/v1" + + def test_codex_responses_with_entra_wraps_correctly( + self, monkeypatch, fake_azure_identity, patch_load_config, + ): + """GPT-5.x deployment on Entra ID — auto-upgraded to + codex_responses, wrapped in CodexAuxiliaryClient, callable + api_key handed to the underlying OpenAI SDK.""" + from agent import auxiliary_client as _aux + + received = {} + + class _FakeOpenAI: + def __init__(self, **kwargs): + received.update(kwargs) + self.api_key = kwargs.get("api_key", "") + self.base_url = kwargs.get("base_url", "") + + monkeypatch.setattr(_aux, "OpenAI", _FakeOpenAI) + patch_load_config({ + "provider": "azure-foundry", + "base_url": "https://r.openai.azure.com/openai/v1", + "api_mode": "chat_completions", + "auth_mode": "entra_id", + "default": "gpt-5.4-mini", + }) + client, resolved = _aux._try_azure_foundry(model="gpt-5.4-mini") + assert resolved == "gpt-5.4-mini" + assert isinstance(client, _aux.CodexAuxiliaryClient) + # The Codex wrapper received an OpenAI client built with the + # callable api_key — verify against the SDK constructor record, + # not the wrapper attribute (which mirrors the SDK's empty- + # string representation). + assert callable(received["api_key"]) + assert received["api_key"]().startswith("jwt-for-") + + def test_entra_anthropic_messages_uses_bearer_hook( + self, monkeypatch, fake_azure_identity, patch_load_config, + ): + """Entra ID + anthropic_messages: runtime returns a callable + api_key; ``_maybe_wrap_anthropic`` → ``build_anthropic_client`` + detects the callable and installs the bearer-injecting httpx + event hook on a custom ``httpx.Client`` passed to the + Anthropic SDK via ``http_client=``.""" + from agent import auxiliary_client as _aux + from agent import anthropic_adapter as _anthropic + + received = {} + + class _FakeOpenAI: + def __init__(self, **kwargs): + received["openai"] = kwargs + self.api_key = kwargs.get("api_key", "") + self.base_url = kwargs.get("base_url", "") + + class _FakeAnthropicSDK: + class Anthropic: + def __init__(self, **kwargs): + received["anthropic"] = kwargs + + monkeypatch.setattr(_aux, "OpenAI", _FakeOpenAI) + monkeypatch.setattr(_anthropic, "_get_anthropic_sdk", lambda: _FakeAnthropicSDK) + + patch_load_config({ + "provider": "azure-foundry", + "base_url": "https://r.services.ai.azure.com/anthropic", + "api_mode": "anthropic_messages", + "auth_mode": "entra_id", + "default": "claude-sonnet-4-5", + }) + client, resolved = _aux._try_azure_foundry(model="claude-sonnet-4-5") + assert client is not None + assert resolved == "claude-sonnet-4-5" + # The Anthropic SDK constructor received a custom http_client + # (the bearer-injecting hook) and a placeholder auth_token. + anthropic_kwargs = received.get("anthropic") or {} + assert "http_client" in anthropic_kwargs, ( + "build_anthropic_client must pass a custom http_client when " + "given a callable api_key, otherwise the SDK cannot mint " + "fresh tokens per request" + ) + assert anthropic_kwargs.get("auth_token") == "entra-id-bearer-via-http-hook" + # Verify the http_client actually has our event hook installed. + http_client = anthropic_kwargs["http_client"] + hooks = getattr(http_client, "event_hooks", {}) + assert "request" in hooks and len(hooks["request"]) >= 1 + + +# --------------------------------------------------------------------------- +# resolve_provider_client → azure-foundry dispatch +# --------------------------------------------------------------------------- + + +class TestResolveProviderClientAzureFoundry: + def test_dispatches_to_azure_branch_not_generic_api_key_path( + self, monkeypatch, fake_azure_identity, patch_load_config, + ): + """End-to-end: the public ``resolve_provider_client`` entry + point must take the dedicated azure-foundry branch, NOT the + generic api-key registry path that would call + ``resolve_api_key_provider_credentials`` and return None for + Entra users.""" + from agent import auxiliary_client as _aux + + received = {} + + class _FakeOpenAI: + def __init__(self, **kwargs): + received.update(kwargs) + self.api_key = kwargs.get("api_key", "") + self.base_url = kwargs.get("base_url", "") + + monkeypatch.setattr(_aux, "OpenAI", _FakeOpenAI) + patch_load_config({ + "provider": "azure-foundry", + "base_url": "https://r.openai.azure.com/openai/v1", + "api_mode": "chat_completions", + "auth_mode": "entra_id", + "default": "gpt-4o", + }) + client, resolved = _aux.resolve_provider_client("azure-foundry", "gpt-4o") + assert client is not None + assert resolved == "gpt-4o" + # The callable made it through resolve_provider_client → _try_azure_foundry + # → OpenAI(api_key=...). + assert callable(received["api_key"]) + + def test_warns_and_returns_none_on_failure( + self, monkeypatch, patch_load_config, caplog, + ): + """When azure-foundry is requested but cannot be resolved + (e.g. no model + no key), we return (None, None) and log a + clear warning pointing at ``hermes doctor``.""" + import logging + from agent.auxiliary_client import resolve_provider_client + + monkeypatch.delenv("AZURE_FOUNDRY_API_KEY", raising=False) + patch_load_config({ + "provider": "azure-foundry", + "base_url": "https://r.openai.azure.com/openai/v1", + "api_mode": "chat_completions", + # No default → resolver yields no model → bail + }) + with caplog.at_level(logging.WARNING, logger="agent.auxiliary_client"): + client, resolved = resolve_provider_client("azure-foundry") + assert client is None + assert resolved is None + assert any( + "azure-foundry" in rec.message and "hermes doctor" in rec.message + for rec in caplog.records + ) diff --git a/tests/agent/test_azure_identity_adapter.py b/tests/agent/test_azure_identity_adapter.py new file mode 100644 index 00000000000..a569709e00d --- /dev/null +++ b/tests/agent/test_azure_identity_adapter.py @@ -0,0 +1,662 @@ +"""Tests for the Microsoft Entra ID adapter (agent/azure_identity_adapter.py). + +Covers: + - Scope resolution per Azure host shape + - Display masking for callable + string + None inputs + - Cache-fingerprint stability under callable refresh + - is_token_provider truthiness on callables vs strings + - EntraIdentityConfig serialization round-trip + - Token provider construction with mocked azure-identity + - Credential cache reuse + reset + - has_azure_identity_credentials timeout / failure paths + - describe_active_credential structural reporting + - Lazy-install error path when azure-identity absent + lazy installs + disabled + +We mock azure.identity at the import boundary rather than hitting any +real Azure endpoint. Tests must remain hermetic per AGENTS.md. +""" + +from __future__ import annotations + +import sys +from collections.abc import Callable +from types import SimpleNamespace +from typing import cast +from unittest.mock import MagicMock, patch + +import pytest + +# Ensure we always import a fresh adapter module — credential caches in +# the adapter persist across tests otherwise, polluting assertions +# about cache invalidation. +@pytest.fixture(autouse=True) +def _reset_adapter_cache(): + from agent.azure_identity_adapter import reset_credential_cache + reset_credential_cache() + yield + reset_credential_cache() + + +# --------------------------------------------------------------------------- +# Scope constant +# --------------------------------------------------------------------------- + + +class TestEntraScopeConstant: + """Pin the Microsoft-documented Foundry inference scope. + + Microsoft's official samples for both ``*.openai.azure.com`` and + ``*.services.ai.azure.com`` use ``https://ai.azure.com/.default``. + The older ``cognitiveservices.azure.com/.default`` is the + control-plane scope and is rejected for inference by newer + Azure OpenAI / Foundry resources. + + Users with sovereign-cloud or unusual-tenant requirements pass the + scope explicitly via ``model.entra.scope`` in ``config.yaml``. + + Refs: + * https://learn.microsoft.com/azure/ai-foundry/openai/how-to/managed-identity + * https://learn.microsoft.com/azure/ai-foundry/foundry-models/how-to/configure-entra-id + """ + + def test_default_scope_matches_microsoft_documentation(self): + from agent.azure_identity_adapter import SCOPE_AI_AZURE_DEFAULT + assert SCOPE_AI_AZURE_DEFAULT == "https://ai.azure.com/.default" + + +# --------------------------------------------------------------------------- +# Cache fingerprint + http-bearer helpers +# --------------------------------------------------------------------------- + + +class TestMaterializeBearerForHttp: + """The only helper that mints a real bearer JWT — must call the + callable exactly once and never fall through to display masking.""" + + def test_callable_is_invoked_and_returns_token(self): + from agent.azure_identity_adapter import materialize_bearer_for_http + + invoked = {"count": 0} + + def provider(): + invoked["count"] += 1 + return "fresh-jwt" + + assert materialize_bearer_for_http(provider) == "fresh-jwt" + assert invoked["count"] == 1 + + def test_string_passes_through(self): + from agent.azure_identity_adapter import materialize_bearer_for_http + assert materialize_bearer_for_http("plain-key") == "plain-key" + + def test_callable_returning_empty_raises(self): + from agent.azure_identity_adapter import materialize_bearer_for_http + with pytest.raises(ValueError): + materialize_bearer_for_http(lambda: "") + + def test_empty_string_raises(self): + from agent.azure_identity_adapter import materialize_bearer_for_http + with pytest.raises(ValueError): + materialize_bearer_for_http("") + with pytest.raises(ValueError): + materialize_bearer_for_http(None) + + +# --------------------------------------------------------------------------- +# build_bearer_http_client — the Anthropic-on-Foundry bridge +# --------------------------------------------------------------------------- + + +class TestBuildBearerHttpClient: + """``build_bearer_http_client`` returns an ``httpx.Client`` whose + request event hook mints a fresh JWT per outbound request. This is + how Entra ID auth reaches the Anthropic SDK (which does not accept + callable ``auth_token``).""" + + def test_returns_httpx_client_with_request_hook(self): + import httpx + from agent.azure_identity_adapter import build_bearer_http_client + + client = build_bearer_http_client(lambda: "jwt") + try: + assert isinstance(client, httpx.Client) + hooks = client.event_hooks.get("request", []) + assert len(hooks) >= 1 + finally: + client.close() + + def test_hook_overrides_authorization_header(self): + import httpx + from agent.azure_identity_adapter import build_bearer_http_client + + minted_tokens = [] + + def provider(): + minted_tokens.append(f"jwt-{len(minted_tokens) + 1}") + return minted_tokens[-1] + + client = build_bearer_http_client(provider) + try: + hook = client.event_hooks["request"][0] + # Build a request with conflicting pre-set headers and verify + # the hook strips them and installs the fresh bearer. + req = httpx.Request( + "POST", "https://example.com/v1/messages", + headers={ + "Authorization": "Bearer stale-token", + "api-key": "static-key", + "x-api-key": "static-key", + }, + json={"hello": "world"}, + ) + hook(req) + assert req.headers["Authorization"] == "Bearer jwt-1" + # The static-key headers must be stripped — sending both + # auth values would be ambiguous on Azure. + assert "api-key" not in req.headers + assert "x-api-key" not in req.headers + + # Second invocation mints a fresh token. + req2 = httpx.Request("GET", "https://example.com/v1/models") + hook(req2) + assert req2.headers["Authorization"] == "Bearer jwt-2" + assert len(minted_tokens) == 2 + finally: + client.close() + + def test_hook_strips_auth_headers_and_warns_when_token_provider_fails(self, caplog): + """When the token provider fails (chain exhausted, IMDS down, az + login expired), the hook must: + 1. Log at WARNING level so the misconfiguration is visible at + default log level (not buried at DEBUG). + 2. Strip any pre-set Authorization headers — including the + placeholder ``entra-id-bearer-via-http-hook`` sentinel that + :func:`_build_anthropic_client_with_bearer_hook` sets on the + Anthropic SDK constructor. This produces a clean + "missing auth" 401 from Azure rather than a sentinel-bearing + 401 that's harder to diagnose AND avoids leaking the + sentinel string into upstream access logs. + """ + import logging + import httpx + from agent.azure_identity_adapter import build_bearer_http_client + + def bad_provider(): + return "" # empty token → materialize_bearer_for_http raises + + client = build_bearer_http_client(bad_provider) + try: + hook = client.event_hooks["request"][0] + req = httpx.Request( + "POST", "https://example.com/v1/messages", + headers={ + "Authorization": "Bearer entra-id-bearer-via-http-hook", + "api-key": "leaked-placeholder", + }, + ) + with caplog.at_level(logging.WARNING, logger="agent.azure_identity_adapter"): + hook(req) # Must not raise. + # Pre-set auth headers stripped — no sentinel makes it to Azure. + assert "Authorization" not in req.headers + assert "api-key" not in req.headers + # WARNING was logged so the user sees the misconfiguration. + assert any( + rec.levelno == logging.WARNING and "Entra ID token provider" in rec.message + for rec in caplog.records + ) + finally: + client.close() + + def test_rejects_non_callable_provider(self): + from agent.azure_identity_adapter import build_bearer_http_client + with pytest.raises(ValueError): + build_bearer_http_client(cast(Callable[[], str], "plain-string-not-callable")) + with pytest.raises(ValueError): + build_bearer_http_client(cast(Callable[[], str], None)) + + def test_forwards_httpx_kwargs(self): + import httpx + from agent.azure_identity_adapter import build_bearer_http_client + + timeout = httpx.Timeout(60.0, connect=5.0) + client = build_bearer_http_client(lambda: "jwt", timeout=timeout) + try: + # httpx stores the timeout per-pool; just sanity-check it was + # accepted without TypeError. + assert client is not None + finally: + client.close() + + +class TestIsTokenProvider: + def test_callable_is_token_provider(self): + from agent.azure_identity_adapter import is_token_provider + assert is_token_provider(lambda: "x") is True + + def test_string_is_not_token_provider(self): + from agent.azure_identity_adapter import is_token_provider + assert is_token_provider("static-key") is False + # ``str`` instances are technically callable in some edge cases + # — confirm they're never classified as token providers. + assert is_token_provider("") is False + + +# --------------------------------------------------------------------------- +# EntraIdentityConfig +# --------------------------------------------------------------------------- + + +class TestEntraIdentityConfig: + """The serializable config that crosses multiprocessing boundaries — + must round-trip through dict cleanly and never lose fields.""" + + def test_to_dict_round_trip(self): + from agent.azure_identity_adapter import EntraIdentityConfig + cfg = EntraIdentityConfig( + scope="https://ai.azure.com/.default", + exclude_interactive_browser=False, + ) + rebuilt = EntraIdentityConfig.from_dict(cfg.to_dict()) + assert rebuilt == cfg + + def test_from_dict_handles_empty_strings(self): + from agent.azure_identity_adapter import EntraIdentityConfig + cfg = EntraIdentityConfig.from_dict({ + "scope": "", + "client_id": None, + }) + # Empty scope falls back to default + assert cfg.scope.endswith("/.default") + + def test_from_dict_ignores_legacy_identity_keys(self): + """Old config.yaml that still has model.entra.client_id / + tenant_id / authority should not crash from_dict — those values + are now read from AZURE_* env vars by azure-identity directly.""" + from agent.azure_identity_adapter import EntraIdentityConfig + cfg = EntraIdentityConfig.from_dict({ + "tenant_id": "legacy-tenant", + "authority": "https://login.partner.microsoftonline.cn", + "client_id": "user-mi-client", + }) + # Legacy keys silently ignored — no crash, no surprise field on the dataclass. + assert not hasattr(cfg, "client_id") + assert not hasattr(cfg, "tenant_id") + assert not hasattr(cfg, "authority") + + def test_constructor_normalizes_empty_scope(self): + from agent.azure_identity_adapter import EntraIdentityConfig + cfg = EntraIdentityConfig(scope="") + assert cfg.scope.endswith("/.default") + + def test_from_dict_default_scope_override(self): + from agent.azure_identity_adapter import EntraIdentityConfig + cfg = EntraIdentityConfig.from_dict( + {"scope": ""}, + default_scope="https://custom.example/.default", + ) + assert cfg.scope == "https://custom.example/.default" + + def test_dataclass_is_frozen(self): + # Frozen dataclasses are hashable / safe to pass through caches. + from agent.azure_identity_adapter import EntraIdentityConfig + cfg = EntraIdentityConfig() + with pytest.raises((AttributeError, Exception)): + setattr(cfg, "scope", "mutated") + + +# --------------------------------------------------------------------------- +# Credential / token provider construction +# --------------------------------------------------------------------------- + + +class _FakeAzureIdentity: + """Stand-in for the ``azure.identity`` module. + + Captures kwargs passed to ``DefaultAzureCredential`` so tests can + assert how config flows into the SDK. + """ + + def __init__(self): + self.last_credential_kwargs = None + self.last_scope = None + self.credential_count = 0 + + def DefaultAzureCredential(self, **kwargs): # noqa: N802 — match SDK + self.last_credential_kwargs = kwargs + self.credential_count += 1 + return SimpleNamespace( + get_token=lambda scope: SimpleNamespace(token="fake-jwt", expires_on=9999999999), + kwargs=kwargs, + ) + + def get_bearer_token_provider(self, credential, scope): + self.last_scope = scope + # Return a callable that mints a token when invoked. + return lambda: f"jwt-for-{scope}" + + +@pytest.fixture +def fake_azure_identity(monkeypatch): + """Install a fake azure.identity into sys.modules and stub the + adapter's `_require_azure_identity` so all tests use the fake.""" + fake = _FakeAzureIdentity() + + fake_module = SimpleNamespace( + DefaultAzureCredential=fake.DefaultAzureCredential, + get_bearer_token_provider=fake.get_bearer_token_provider, + ) + monkeypatch.setitem(sys.modules, "azure", SimpleNamespace(identity=fake_module)) + monkeypatch.setitem(sys.modules, "azure.identity", fake_module) + + # The adapter's `_require_azure_identity` does its own import, so + # patch that too to make sure tests never hit the real package's + # singleton state. + from agent import azure_identity_adapter as _adapter + monkeypatch.setattr(_adapter, "_require_azure_identity", lambda: fake_module) + + return fake + + +class TestBuildCredential: + def test_default_kwargs_are_minimal(self, fake_azure_identity): + """SDK default for ``exclude_interactive_browser_credential`` is + True; we only pass it when the user opts IN to interactive + browser auth. Tenant / authority / service principal config + flow through the standard ``AZURE_*`` env vars (read by + azure-identity directly), not Hermes config kwargs.""" + from agent.azure_identity_adapter import EntraIdentityConfig, build_credential + cred = build_credential(EntraIdentityConfig()) + kwargs = fake_azure_identity.last_credential_kwargs + # Default config should produce empty kwargs — SDK uses its own + # defaults plus env-var-driven settings. + assert kwargs == {} + assert cred is not None + + def test_interactive_browser_opt_in(self, fake_azure_identity): + """When the user explicitly sets + ``exclude_interactive_browser=False``, the SDK kwarg is set to + False. Without the opt-in we don't pass the kwarg at all (SDK + default is True / browser excluded).""" + from agent.azure_identity_adapter import EntraIdentityConfig, build_credential + build_credential(EntraIdentityConfig(exclude_interactive_browser=False)) + kwargs = fake_azure_identity.last_credential_kwargs + assert kwargs["exclude_interactive_browser_credential"] is False + + def test_credential_is_cached_per_config(self, fake_azure_identity): + from agent.azure_identity_adapter import EntraIdentityConfig, build_credential + cfg = EntraIdentityConfig(scope="s1") + c1 = build_credential(cfg) + c2 = build_credential(cfg) + assert c1 is c2 + assert fake_azure_identity.credential_count == 1 + + def test_distinct_configs_get_distinct_credentials(self, fake_azure_identity): + from agent.azure_identity_adapter import EntraIdentityConfig, build_credential + c1 = build_credential(EntraIdentityConfig(scope="s1")) + c2 = build_credential(EntraIdentityConfig(scope="s2")) + assert c1 is not c2 + assert fake_azure_identity.credential_count == 2 + + def test_reset_cache_invalidates(self, fake_azure_identity): + from agent.azure_identity_adapter import ( + EntraIdentityConfig, + build_credential, + reset_credential_cache, + ) + cfg = EntraIdentityConfig(scope="x") + c1 = build_credential(cfg) + reset_credential_cache() + c2 = build_credential(cfg) + assert c1 is not c2 + + +class TestBuildTokenProvider: + def test_returns_callable_for_scope(self, fake_azure_identity): + from agent.azure_identity_adapter import build_token_provider + provider = build_token_provider(scope="https://ai.azure.com/.default") + assert callable(provider) + assert provider() == "jwt-for-https://ai.azure.com/.default" + assert fake_azure_identity.last_scope == "https://ai.azure.com/.default" + + def test_falls_back_to_default_scope_when_unspecified(self, fake_azure_identity): + """When neither ``scope`` nor ``config`` is provided, + ``build_token_provider`` uses ``SCOPE_AI_AZURE_DEFAULT`` — + Microsoft's documented Foundry inference scope. ``base_url`` is + accepted for back-compat but ignored.""" + from agent.azure_identity_adapter import ( + SCOPE_AI_AZURE_DEFAULT, + build_token_provider, + ) + build_token_provider(base_url="https://r.openai.azure.com/openai/v1") + assert fake_azure_identity.last_scope == SCOPE_AI_AZURE_DEFAULT + + def test_explicit_scope_wins_over_base_url(self, fake_azure_identity): + from agent.azure_identity_adapter import build_token_provider + build_token_provider( + scope="https://override.example/.default", + base_url="https://r.openai.azure.com/openai/v1", + ) + assert fake_azure_identity.last_scope == "https://override.example/.default" + + def test_config_object_wins_over_kwargs(self, fake_azure_identity): + from agent.azure_identity_adapter import ( + EntraIdentityConfig, + build_token_provider, + ) + cfg = EntraIdentityConfig(scope="cfg-scope") + build_token_provider(scope="ignored", config=cfg) + assert fake_azure_identity.last_scope == "cfg-scope" + assert fake_azure_identity.last_credential_kwargs == {} + + +# --------------------------------------------------------------------------- +# Lazy-install / missing-package surface +# --------------------------------------------------------------------------- + + +class TestRequireAzureIdentityMissing: + def test_clear_error_when_lazy_install_disabled(self, monkeypatch): + """When azure-identity isn't importable AND lazy installs are + off, the adapter must raise ImportError with an actionable + message, not propagate FeatureUnavailable.""" + from agent import azure_identity_adapter as _adapter + + # Force the import path to fail. + original_import = __builtins__["__import__"] if isinstance(__builtins__, dict) else __import__ + def _fake_import(name, *args, **kwargs): + if name == "azure.identity" or name.startswith("azure.identity."): + raise ImportError("simulated missing azure-identity") + return original_import(name, *args, **kwargs) + + monkeypatch.setattr("builtins.__import__", _fake_import) + + # Simulate lazy installs disabled. + from tools.lazy_deps import FeatureUnavailable + + def _fake_ensure(*args, **kwargs): + raise FeatureUnavailable( + "provider.azure_identity", + ("azure-identity==1.25.3",), + "lazy installs disabled (test simulation)", + ) + + # The adapter calls ``ensure`` from ``tools.lazy_deps``; intercept + # it by patching the actual symbol path. + monkeypatch.setattr("tools.lazy_deps.ensure", _fake_ensure) + + with pytest.raises(ImportError) as exc_info: + _adapter._require_azure_identity() + msg = str(exc_info.value) + assert "azure-identity" in msg + assert "Foundry" in msg or "foundry" in msg.lower() + + +# --------------------------------------------------------------------------- +# has_azure_identity_credentials probe (timeout-bounded) +# --------------------------------------------------------------------------- + + +class TestHasAzureIdentityCredentials: + def test_returns_false_when_package_missing_and_install_disabled(self, monkeypatch): + from agent import azure_identity_adapter as _adapter + monkeypatch.setattr(_adapter, "has_azure_identity_installed", lambda: False) + assert _adapter.has_azure_identity_credentials( + "https://x/.default", allow_install=False, + ) is False + + def test_lazy_install_triggered_when_package_missing(self, monkeypatch): + """With allow_install=True (default), the probe must trigger the + lazy-install path before bailing — otherwise the wizard's + ``preflight`` would silently fail for fresh installs that haven't + run ``pip install azure-identity`` yet.""" + from agent import azure_identity_adapter as _adapter + + installed = {"called": False} + + def _fake_install(): + installed["called"] = True + # After install, pretend the package is now importable. + monkeypatch.setattr(_adapter, "has_azure_identity_installed", lambda: True) + return SimpleNamespace( + DefaultAzureCredential=lambda **kw: SimpleNamespace( + kwargs=kw, + get_token=lambda scope: SimpleNamespace(token="post-install-jwt", expires_on=0), + ), + get_bearer_token_provider=lambda c, s: lambda: "x", + ) + + monkeypatch.setattr(_adapter, "has_azure_identity_installed", lambda: False) + monkeypatch.setattr(_adapter, "_require_azure_identity", _fake_install) + + # Provide a credential factory so the probe proceeds after install. + monkeypatch.setattr( + _adapter, "build_credential", + lambda config: SimpleNamespace( + get_token=lambda scope: SimpleNamespace(token="probe-jwt", expires_on=0), + ), + ) + + result = _adapter.has_azure_identity_credentials( + "https://x/.default", timeout_seconds=0.5, + ) + assert installed["called"] is True, ( + "has_azure_identity_credentials must trigger lazy install " + "before bailing" + ) + assert result is True + + def test_returns_true_on_successful_token_mint(self, fake_azure_identity): + from agent.azure_identity_adapter import has_azure_identity_credentials + assert has_azure_identity_credentials("https://x/.default", timeout_seconds=0.5) is True + + def test_returns_false_when_get_token_raises(self, monkeypatch): + from agent import azure_identity_adapter as _adapter + + def _failing_credential(_config): + class _Cred: + def get_token(self, scope): + raise RuntimeError("simulated chain exhaustion") + return _Cred() + + monkeypatch.setattr(_adapter, "build_credential", _failing_credential) + monkeypatch.setattr(_adapter, "has_azure_identity_installed", lambda: True) + assert _adapter.has_azure_identity_credentials("https://x/.default", timeout_seconds=0.5) is False + + def test_returns_false_on_timeout(self, monkeypatch): + """Slow IMDS / network must time out, not hang the caller.""" + import threading + from agent import azure_identity_adapter as _adapter + + slow_release = threading.Event() + + def _slow_credential(_config): + class _Cred: + def get_token(self, scope): + # Block forever from the test's perspective; the + # adapter must give up via its thread-bounded probe. + slow_release.wait(timeout=10) + return SimpleNamespace(token="never-returned", expires_on=0) + return _Cred() + + monkeypatch.setattr(_adapter, "build_credential", _slow_credential) + monkeypatch.setattr(_adapter, "has_azure_identity_installed", lambda: True) + try: + assert _adapter.has_azure_identity_credentials( + "https://x/.default", timeout_seconds=0.1 + ) is False + finally: + slow_release.set() + + +# --------------------------------------------------------------------------- +# describe_active_credential — used by hermes doctor + hermes auth +# --------------------------------------------------------------------------- + + +class TestDescribeActiveCredential: + def test_reports_not_installed(self, monkeypatch): + from agent import azure_identity_adapter as _adapter + monkeypatch.setattr(_adapter, "has_azure_identity_installed", lambda: False) + info = _adapter.describe_active_credential( + scope="https://x/.default", allow_install=False, + ) + assert info["ok"] is False + assert "not installed" in info["error"].lower() + assert "pip install" in info["hint"].lower() + + def test_reports_install_failure(self, monkeypatch): + """When lazy install is allowed but fails (e.g. lazy installs + disabled), the diagnostic surfaces the failure as the error.""" + from agent import azure_identity_adapter as _adapter + monkeypatch.setattr(_adapter, "has_azure_identity_installed", lambda: False) + + def _fail_install(): + raise ImportError("simulated: lazy installs disabled") + + monkeypatch.setattr(_adapter, "_require_azure_identity", _fail_install) + info = _adapter.describe_active_credential( + scope="https://x/.default", allow_install=True, + ) + assert info["ok"] is False + assert "lazy installs disabled" in info["error"] + assert "lazy" in info["hint"].lower() + + def test_reports_env_sources_for_managed_identity(self, fake_azure_identity, monkeypatch): + from agent.azure_identity_adapter import describe_active_credential + monkeypatch.setenv("IDENTITY_ENDPOINT", "http://169.254.169.254") + info = describe_active_credential(scope="https://x/.default", timeout_seconds=0.5) + assert info["ok"] is True + sources = info.get("env_sources") or [] + assert any("ManagedIdentity" in s for s in sources) + + def test_reports_env_sources_for_workload_identity(self, fake_azure_identity, monkeypatch): + from agent.azure_identity_adapter import describe_active_credential + monkeypatch.setenv("AZURE_FEDERATED_TOKEN_FILE", "/var/secrets/azure/federated-token") + info = describe_active_credential(scope="https://x/.default", timeout_seconds=0.5) + sources = info.get("env_sources") or [] + assert any("WorkloadIdentity" in s for s in sources) + + def test_reports_env_sources_for_service_principal(self, fake_azure_identity, monkeypatch): + from agent.azure_identity_adapter import describe_active_credential + monkeypatch.setenv("AZURE_TENANT_ID", "t") + monkeypatch.setenv("AZURE_CLIENT_ID", "c") + monkeypatch.setenv("AZURE_CLIENT_SECRET", "s") + info = describe_active_credential(scope="https://x/.default", timeout_seconds=0.5) + sources = info.get("env_sources") or [] + assert any("EnvironmentCredential" in s for s in sources) + + def test_reports_error_on_chain_failure(self, monkeypatch): + from agent import azure_identity_adapter as _adapter + + def _failing_credential(_config): + class _Cred: + def get_token(self, scope): + raise RuntimeError("auth failed") + return _Cred() + + monkeypatch.setattr(_adapter, "build_credential", _failing_credential) + monkeypatch.setattr(_adapter, "has_azure_identity_installed", lambda: True) + info = _adapter.describe_active_credential(scope="https://x/.default", timeout_seconds=0.5) + assert info["ok"] is False + assert "auth failed" in info.get("error", "") diff --git a/tests/agent/test_bedrock_1m_context.py b/tests/agent/test_bedrock_1m_context.py index 7d9753831ed..c088bcc0473 100644 --- a/tests/agent/test_bedrock_1m_context.py +++ b/tests/agent/test_bedrock_1m_context.py @@ -1,7 +1,7 @@ """Tests for the 1M-context beta header on AWS Bedrock Claude models. Claude Opus 4.6/4.7 and Sonnet 4.6 support a 1M context window, but on AWS -Bedrock (and Azure AI Foundry) that window is still gated behind the +Bedrock (and Microsoft Foundry) that window is still gated behind the ``context-1m-2025-08-07`` beta header as of 2026-04. Without it, Bedrock caps these models at 200K even though ``model_metadata.py`` advertises 1M. @@ -61,4 +61,3 @@ class TestBedrockContext1MBeta: # Other common betas still present — no regression. assert "interleaved-thinking-2025-05-14" in beta_header assert "fine-grained-tool-streaming-2025-05-14" in beta_header - diff --git a/tests/hermes_cli/test_azure_detect.py b/tests/hermes_cli/test_azure_detect.py index 45eaa86e733..41cd737d780 100644 --- a/tests/hermes_cli/test_azure_detect.py +++ b/tests/hermes_cli/test_azure_detect.py @@ -102,7 +102,7 @@ def test_detect_anthropic_path_wins_without_http(): def test_detect_openai_models_probe_success(): """/models probe returning a model list → chat_completions.""" - def _fake_get(url, api_key, timeout=6.0): + def _fake_get(url, api_key, timeout=6.0, **kwargs): assert "key-abc" == api_key return 200, json.loads(_openai_models_body("gpt-5.4", "claude-opus-4-6")) @@ -118,7 +118,7 @@ def test_detect_openai_models_probe_success(): def test_detect_openai_models_probe_empty_list_still_counts(): """Endpoint returned OpenAI shape but no models → still chat_completions.""" - def _fake_get(url, api_key, timeout=6.0): + def _fake_get(url, api_key, timeout=6.0, **kwargs): return 200, {"object": "list", "data": []} with patch.object(azure_detect, "_http_get_json", side_effect=_fake_get): @@ -132,7 +132,7 @@ def test_detect_openai_models_probe_empty_list_still_counts(): def test_detect_falls_back_to_anthropic_probe(): """/models fails but Anthropic Messages probe succeeds.""" - def _fake_get(url, api_key, timeout=6.0): + def _fake_get(url, api_key, timeout=6.0, **kwargs): return 401, None # /models forbidden with patch.object(azure_detect, "_http_get_json", side_effect=_fake_get), \ @@ -164,7 +164,7 @@ def test_probe_openai_models_tries_multiple_api_versions(): """First call (no api-version) fails, api-version fallback succeeds.""" calls = [] - def _fake_get(url, api_key, timeout=6.0): + def _fake_get(url, api_key, timeout=6.0, **kwargs): calls.append(url) if "api-version" not in url: return 404, None diff --git a/tests/hermes_cli/test_azure_foundry_entra.py b/tests/hermes_cli/test_azure_foundry_entra.py new file mode 100644 index 00000000000..6cc2ff0ec97 --- /dev/null +++ b/tests/hermes_cli/test_azure_foundry_entra.py @@ -0,0 +1,404 @@ +"""Tests for Azure Foundry Entra ID runtime resolution. + +Covers the contract introduced in PR for Microsoft Entra ID auth on +``azure-foundry``: + + * ``_resolve_azure_foundry_runtime`` returns a callable ``api_key`` for + ``model.auth_mode = entra_id`` (OpenAI-style only). + * Anthropic-style endpoints with ``auth_mode = entra_id`` return the same + callable runtime credential as OpenAI-style endpoints. + * The legacy ``api_key`` path is unchanged when ``auth_mode`` is absent + or set to ``api_key``. + * Explicit ``--api-key`` overrides at runtime still work in entra mode + (escape hatch for one-off testing). + * ``model.entra.scope`` propagates to the token-provider config; Azure + identity selection stays in standard AZURE_* env vars. + * ``_get_azure_foundry_auth_status`` is structural — never mints a + token (verified by checking the credential cache untouched). + * ``has_usable_secret`` for ``AZURE_FOUNDRY_API_KEY`` is irrelevant + when ``auth_mode == entra_id``. +""" + +from __future__ import annotations + +import sys +from types import SimpleNamespace +from typing import cast +from unittest.mock import MagicMock, patch + +import pytest + + +@pytest.fixture(autouse=True) +def _reset_credential_cache(): + from agent.azure_identity_adapter import reset_credential_cache + reset_credential_cache() + yield + reset_credential_cache() + + +@pytest.fixture +def fake_azure_identity(monkeypatch): + """Identical fake to test_azure_identity_adapter — keeps Azure SDK + out of these tests so they run in CI without the package installed.""" + from agent import azure_identity_adapter as _adapter + + last = {"scope": None, "kwargs": None, "credential_count": 0} + + def _provider(scope): + return lambda: f"jwt-for-{scope}" + + fake_module = SimpleNamespace( + DefaultAzureCredential=lambda **kw: SimpleNamespace( + kwargs=kw, + get_token=lambda scope: SimpleNamespace(token="fake", expires_on=9999999999), + ), + get_bearer_token_provider=lambda credential, scope: ( + last.__setitem__("scope", scope), + last.__setitem__("kwargs", credential.kwargs), + last.__setitem__("credential_count", cast(int, last["credential_count"]) + 1), + _provider(scope), + )[-1], + ) + monkeypatch.setattr(_adapter, "_require_azure_identity", lambda: fake_module) + monkeypatch.setitem(sys.modules, "azure.identity", fake_module) + return last + + +# --------------------------------------------------------------------------- +# _resolve_azure_foundry_runtime: entra_id branch +# --------------------------------------------------------------------------- + + +class TestResolveAzureFoundryRuntimeEntra: + def test_returns_callable_api_key_for_entra(self, fake_azure_identity): + from hermes_cli.runtime_provider import _resolve_azure_foundry_runtime + runtime = _resolve_azure_foundry_runtime( + requested_provider="azure-foundry", + model_cfg={ + "provider": "azure-foundry", + "base_url": "https://my-resource.openai.azure.com/openai/v1", + "api_mode": "chat_completions", + "auth_mode": "entra_id", + "default": "gpt-4o", # stays on chat_completions (no codex auto-upgrade) + }, + ) + assert runtime["provider"] == "azure-foundry" + assert runtime["auth_mode"] == "entra_id" + assert runtime["api_mode"] == "chat_completions" + assert callable(runtime["api_key"]) + assert runtime["source"] == "entra_id" + + def test_entra_inherits_codex_responses_for_gpt5_family(self, fake_azure_identity): + """GPT-5.x / o-series / codex models on Azure are Responses-API-only. + The runtime auto-upgrades api_mode regardless of auth mode — this is + the same behaviour as the static-key path (see + ``hermes_cli/models.py::azure_foundry_model_api_mode``).""" + from hermes_cli.runtime_provider import _resolve_azure_foundry_runtime + runtime = _resolve_azure_foundry_runtime( + requested_provider="azure-foundry", + model_cfg={ + "provider": "azure-foundry", + "base_url": "https://my-resource.openai.azure.com/openai/v1", + "api_mode": "chat_completions", + "auth_mode": "entra_id", + "default": "gpt-5.4", + }, + ) + # GPT-5.x is upgraded to codex_responses — Entra path inherits. + assert runtime["api_mode"] == "codex_responses" + assert callable(runtime["api_key"]) + assert runtime["auth_mode"] == "entra_id" + + def test_entra_propagates_scope_only(self, fake_azure_identity): + """``model.entra.scope`` is the only Hermes-managed Azure SDK + setting. Identity selection (client ID, tenant, authority, + service principal secret, federated token file) flows through + standard ``AZURE_*`` env vars read by azure-identity directly. + Legacy ``model.entra.client_id`` / ``tenant_id`` / ``authority`` + keys in config.yaml are silently ignored.""" + from hermes_cli.runtime_provider import _resolve_azure_foundry_runtime + _resolve_azure_foundry_runtime( + requested_provider="azure-foundry", + model_cfg={ + "provider": "azure-foundry", + "base_url": "https://my-resource.services.ai.azure.com/v1", + "api_mode": "chat_completions", + "auth_mode": "entra_id", + "entra": { + "scope": "https://custom.example/.default", + "client_id": "client-uuid", + # Legacy keys must not crash — they are accepted in + # from_dict but never propagated to the SDK. + "tenant_id": "legacy-tenant", + "authority": "https://login.microsoftonline.us", + }, + }, + ) + assert fake_azure_identity["scope"] == "https://custom.example/.default" + kw = fake_azure_identity["kwargs"] + assert "managed_identity_client_id" not in kw + assert "workload_identity_client_id" not in kw + assert "interactive_browser_tenant_id" not in kw + assert "authority" not in kw + + def test_entra_default_scope_when_unset(self, fake_azure_identity): + """When ``model.entra.scope`` is not set, the runtime resolves + Microsoft's documented inference scope — + ``https://ai.azure.com/.default`` — regardless of whether the + endpoint is ``*.openai.azure.com`` or ``*.services.ai.azure.com``. + Both shapes use the SAME scope per Microsoft's docs; the + ``cognitiveservices.azure.com`` scope is the control-plane + audience and is rejected for inference by newer resources.""" + from hermes_cli.runtime_provider import _resolve_azure_foundry_runtime + from agent.azure_identity_adapter import SCOPE_AI_AZURE_DEFAULT + _resolve_azure_foundry_runtime( + requested_provider="azure-foundry", + model_cfg={ + "provider": "azure-foundry", + "base_url": "https://r.openai.azure.com/openai/v1", + "api_mode": "chat_completions", + "auth_mode": "entra_id", + }, + ) + assert fake_azure_identity["scope"] == SCOPE_AI_AZURE_DEFAULT + + def test_entra_scope_override_wins(self, fake_azure_identity): + """Users on sovereign clouds / unusual tenants can set + ``model.entra.scope`` to override the default.""" + from hermes_cli.runtime_provider import _resolve_azure_foundry_runtime + _resolve_azure_foundry_runtime( + requested_provider="azure-foundry", + model_cfg={ + "provider": "azure-foundry", + "base_url": "https://r.openai.azure.com/openai/v1", + "api_mode": "chat_completions", + "auth_mode": "entra_id", + "entra": { + "scope": "https://cognitiveservices.azure.com/.default", + }, + }, + ) + assert ( + fake_azure_identity["scope"] + == "https://cognitiveservices.azure.com/.default" + ) + + def test_entra_with_anthropic_messages_is_supported(self, fake_azure_identity): + """Entra ID now works for both OpenAI-style and Anthropic-style + Azure Foundry endpoints. The runtime returns a callable + ``api_key``; downstream + :func:`agent.anthropic_adapter.build_anthropic_client` detects + the callable and installs an httpx event hook that mints a + fresh bearer JWT per request (the Anthropic SDK does not + accept callable auth_token natively).""" + from hermes_cli.runtime_provider import _resolve_azure_foundry_runtime + runtime = _resolve_azure_foundry_runtime( + requested_provider="azure-foundry", + model_cfg={ + "provider": "azure-foundry", + "base_url": "https://r.services.ai.azure.com/anthropic", + "api_mode": "anthropic_messages", + "auth_mode": "entra_id", + "default": "claude-sonnet-4-5", + }, + ) + assert runtime["provider"] == "azure-foundry" + assert runtime["auth_mode"] == "entra_id" + assert runtime["api_mode"] == "anthropic_messages" + # Callable api_key — the anthropic_adapter detects this and + # plumbs through an httpx event hook. + assert callable(runtime["api_key"]) + assert not isinstance(runtime["api_key"], str) + + def test_entra_with_explicit_api_key_uses_string_escape_hatch(self, fake_azure_identity): + """Passing --api-key on the CLI overrides the entra path so a + user can debug a single request with a static key without + editing config.yaml.""" + from hermes_cli.runtime_provider import _resolve_azure_foundry_runtime + runtime = _resolve_azure_foundry_runtime( + requested_provider="azure-foundry", + model_cfg={ + "provider": "azure-foundry", + "base_url": "https://r.openai.azure.com/openai/v1", + "api_mode": "chat_completions", + "auth_mode": "entra_id", + }, + explicit_api_key="explicit-string-key", + ) + assert runtime["api_key"] == "explicit-string-key" + assert runtime["auth_mode"] == "api_key" + assert runtime["source"] == "explicit" + + def test_entra_runtime_dict_keeps_only_scope_override(self, fake_azure_identity): + from hermes_cli.runtime_provider import _resolve_azure_foundry_runtime + runtime = _resolve_azure_foundry_runtime( + requested_provider="azure-foundry", + model_cfg={ + "provider": "azure-foundry", + "base_url": "https://r.openai.azure.com/openai/v1", + "api_mode": "chat_completions", + "auth_mode": "entra_id", + "entra": { + "scope": "https://custom.example/.default", + "client_id": "legacy-client", + }, + }, + ) + assert runtime["entra"] == {"scope": "https://custom.example/.default"} + + +# --------------------------------------------------------------------------- +# _resolve_azure_foundry_runtime: legacy api_key branch (regression) +# --------------------------------------------------------------------------- + + +class TestResolveAzureFoundryRuntimeApiKey: + def test_default_auth_mode_uses_static_key(self, monkeypatch): + from hermes_cli.runtime_provider import _resolve_azure_foundry_runtime + monkeypatch.setenv("AZURE_FOUNDRY_API_KEY", "sk-azure-static-key") + runtime = _resolve_azure_foundry_runtime( + requested_provider="azure-foundry", + model_cfg={ + "provider": "azure-foundry", + "base_url": "https://r.openai.azure.com/openai/v1", + "api_mode": "chat_completions", + }, + ) + assert runtime["api_key"] == "sk-azure-static-key" + assert runtime["auth_mode"] == "api_key" + assert "entra" not in runtime # only present in entra mode + + def test_explicit_auth_mode_api_key(self, monkeypatch): + from hermes_cli.runtime_provider import _resolve_azure_foundry_runtime + monkeypatch.setenv("AZURE_FOUNDRY_API_KEY", "sk-static") + runtime = _resolve_azure_foundry_runtime( + requested_provider="azure-foundry", + model_cfg={ + "provider": "azure-foundry", + "base_url": "https://r.openai.azure.com/openai/v1", + "api_mode": "chat_completions", + "auth_mode": "api_key", + }, + ) + assert runtime["api_key"] == "sk-static" + assert runtime["auth_mode"] == "api_key" + + def test_anthropic_messages_strips_v1_suffix(self, monkeypatch): + from hermes_cli.runtime_provider import _resolve_azure_foundry_runtime + monkeypatch.setenv("AZURE_FOUNDRY_API_KEY", "k") + runtime = _resolve_azure_foundry_runtime( + requested_provider="azure-foundry", + model_cfg={ + "provider": "azure-foundry", + "base_url": "https://r.services.ai.azure.com/anthropic/v1", + "api_mode": "anthropic_messages", + }, + ) + assert runtime["base_url"] == "https://r.services.ai.azure.com/anthropic" + + def test_missing_api_key_raises_with_entra_hint(self, monkeypatch): + from hermes_cli.auth import AuthError + from hermes_cli.runtime_provider import _resolve_azure_foundry_runtime + monkeypatch.delenv("AZURE_FOUNDRY_API_KEY", raising=False) + with pytest.raises(AuthError) as exc_info: + _resolve_azure_foundry_runtime( + requested_provider="azure-foundry", + model_cfg={ + "provider": "azure-foundry", + "base_url": "https://r.openai.azure.com/openai/v1", + "api_mode": "chat_completions", + }, + ) + msg = str(exc_info.value) + assert "AZURE_FOUNDRY_API_KEY" in msg + # Surface the Entra alternative so users discover the keyless path. + assert "entra_id" in msg + + +# --------------------------------------------------------------------------- +# _get_azure_foundry_auth_status (auth.py) — never mints a token +# --------------------------------------------------------------------------- + + +class TestAzureFoundryAuthStatus: + def test_entra_status_does_not_mint_token(self, monkeypatch, tmp_path): + """Structural check — must return logged_in=True based on + importable + config, never call get_bearer_token_provider.""" + from hermes_cli import auth as _auth + # Force load_config to return our entra config. + monkeypatch.setattr( + "hermes_cli.config.load_config", + lambda: { + "model": { + "provider": "azure-foundry", + "auth_mode": "entra_id", + "base_url": "https://r.openai.azure.com/openai/v1", + }, + }, + ) + # Patch has_azure_identity_installed to True; do NOT patch the + # token provider — if the code path tried to mint, the SDK + # missing would raise. + monkeypatch.setattr( + "agent.azure_identity_adapter.has_azure_identity_installed", + lambda: True, + ) + info = _auth._get_azure_foundry_auth_status() + assert info["logged_in"] is True + assert info["auth_mode"] == "entra_id" + assert info["azure_identity_installed"] is True + assert info["scope"].endswith("/.default") + + def test_entra_status_reports_missing_package(self, monkeypatch): + from hermes_cli import auth as _auth + monkeypatch.setattr( + "hermes_cli.config.load_config", + lambda: { + "model": { + "provider": "azure-foundry", + "auth_mode": "entra_id", + "base_url": "https://r.openai.azure.com/openai/v1", + }, + }, + ) + monkeypatch.setattr( + "agent.azure_identity_adapter.has_azure_identity_installed", + lambda: False, + ) + info = _auth._get_azure_foundry_auth_status() + assert info["logged_in"] is False + assert info["azure_identity_installed"] is False + assert "azure-identity" in info["hint"] + + def test_api_key_status_uses_env_var(self, monkeypatch): + from hermes_cli import auth as _auth + monkeypatch.setattr( + "hermes_cli.config.load_config", + lambda: { + "model": { + "provider": "azure-foundry", + "auth_mode": "api_key", + "base_url": "https://r.openai.azure.com/openai/v1", + }, + }, + ) + monkeypatch.setenv("AZURE_FOUNDRY_API_KEY", "sk-real-key-xxx") + info = _auth._get_azure_foundry_auth_status() + assert info["auth_mode"] == "api_key" + assert info["logged_in"] is True + + def test_api_key_status_false_when_missing(self, monkeypatch): + from hermes_cli import auth as _auth + monkeypatch.setattr( + "hermes_cli.config.load_config", + lambda: { + "model": { + "provider": "azure-foundry", + "auth_mode": "api_key", + }, + }, + ) + monkeypatch.delenv("AZURE_FOUNDRY_API_KEY", raising=False) + info = _auth._get_azure_foundry_auth_status() + assert info["logged_in"] is False diff --git a/tests/run_agent/test_callable_api_key.py b/tests/run_agent/test_callable_api_key.py new file mode 100644 index 00000000000..2c685643b98 --- /dev/null +++ b/tests/run_agent/test_callable_api_key.py @@ -0,0 +1,375 @@ +"""Tests that callable api_key (Entra ID bearer provider) flows through +the agent stack without coercion. + +The OpenAI Python SDK accepts ``api_key: str | None | Callable[[], str]``, +and ``azure-identity``'s ``get_bearer_token_provider`` returns a callable. +Hermes preserves the callable end-to-end so the SDK refreshes tokens +transparently. This file pins the contract at the high-risk seams the +rubber-duck audit identified. + +Covered: + * ``_create_openai_client`` passes a callable ``api_key`` straight + through to ``openai.OpenAI(...)``. + * ``_normalize_main_runtime`` preserves the callable so auxiliary + clients inherit Entra auth. + * ``_truncate_token`` (dashboard preview) renders ``""`` + instead of ``""`` and never invokes the callable. + * ``run_agent.py`` masked-banner path renders the Entra placeholder + and never tries to slice/len the callable. + * Serialization scrub: dumping a runtime dict via ``json.dumps`` with + a callable api_key raises (default behaviour) — guards against + silently leaking ``""`` strings into event logs. + * ``batch_runner`` strips the callable from the worker config dict + so multiprocessing.Pool can pickle the rest. +""" + +from __future__ import annotations + +import json +from types import SimpleNamespace +from typing import cast +from unittest.mock import MagicMock, patch + +import pytest + + +# --------------------------------------------------------------------------- +# OpenAI SDK construction preserves the callable +# --------------------------------------------------------------------------- + + +class TestCreateOpenAIClientCallable: + """``AIAgent._create_openai_client`` must pass the callable through + to ``openai.OpenAI(...)`` without coercion.""" + + def test_callable_api_key_passed_to_openai_constructor(self, monkeypatch): + """Construct the smallest possible AIAgent surface and verify + the OpenAI client receives the callable unchanged.""" + captured = {} + + def fake_openai(**kwargs): + captured["kwargs"] = kwargs + return MagicMock(api_key=kwargs.get("api_key")) + + # Patch the module-level OpenAI proxy used by ``_create_openai_client``. + monkeypatch.setattr("run_agent.OpenAI", fake_openai) + + # Build a minimal stand-in for AIAgent so we can call the bound + # method directly without paying the full __init__ cost. + from run_agent import AIAgent + + agent = AIAgent.__new__(AIAgent) + # Attributes consulted by _create_openai_client / _client_log_context. + agent.provider = "azure-foundry" + agent.model = "gpt-4o" + agent.base_url = "https://r.openai.azure.com/openai/v1" + agent._client_kwargs = {} + + def token_provider(): + return "fresh-jwt" + + client_kwargs = { + "api_key": token_provider, + "base_url": "https://r.openai.azure.com/openai/v1", + } + client = agent._create_openai_client(client_kwargs, reason="test", shared=False) + + # The OpenAI constructor must receive the *callable*, not a string. + forwarded = captured["kwargs"]["api_key"] + assert callable(forwarded) + assert not isinstance(forwarded, str) + assert forwarded is token_provider, ( + "_create_openai_client must not wrap or coerce the callable" + ) + assert client is not None + + +# --------------------------------------------------------------------------- +# Auxiliary runtime preserves the callable +# --------------------------------------------------------------------------- + + +class TestNormalizeMainRuntimePreservesCallable: + """The aux client orchestrator must keep the callable on the + runtime dict so compression / vision / embedding / title-gen clients + inherit Entra ID auth from the main agent.""" + + def test_callable_api_key_survives_normalization(self): + from agent.auxiliary_client import _normalize_main_runtime + + def provider(): + return "jwt" + + normalized = _normalize_main_runtime({ + "provider": "azure-foundry", + "model": "gpt-4o", + "base_url": "https://r.openai.azure.com/openai/v1", + "api_key": provider, + "api_mode": "chat_completions", + "auth_mode": "entra_id", + }) + assert normalized["api_key"] is provider + assert normalized["auth_mode"] == "entra_id" + + def test_string_api_key_still_works(self): + from agent.auxiliary_client import _normalize_main_runtime + normalized = _normalize_main_runtime({ + "provider": "azure-foundry", + "api_key": "sk-static", + }) + assert normalized["api_key"] == "sk-static" + + def test_normalization_drops_empty_string_but_preserves_callable(self): + from agent.auxiliary_client import _normalize_main_runtime + + def provider(): + return "" + + # Empty string fields are dropped, but a callable is preserved + # even if it would mint an empty token (we don't invoke during + # normalization). + normalized = _normalize_main_runtime({ + "provider": "azure-foundry", + "api_key": provider, + "model": "", + }) + assert normalized["api_key"] is provider + assert "model" not in normalized + + def test_unknown_field_dropped(self): + from agent.auxiliary_client import _normalize_main_runtime, _MAIN_RUNTIME_FIELDS + normalized = _normalize_main_runtime({ + "provider": "azure-foundry", + "api_key": "k", + "secret_field_we_dont_want": "leak", + }) + assert "secret_field_we_dont_want" not in normalized + # auth_mode IS in the field allowlist (rubber-duck blocker fix). + assert "auth_mode" in _MAIN_RUNTIME_FIELDS + + +# --------------------------------------------------------------------------- +# Display surfaces never invoke the callable +# --------------------------------------------------------------------------- + + +class TestTruncateTokenCallable: + def test_callable_returns_placeholder(self): + """Dashboard preview must render the Entra placeholder, NOT + ``""``.""" + from hermes_cli.web_server import _truncate_token + + invoked = {"count": 0} + + def provider(): + invoked["count"] += 1 + return "should-not-appear-in-ui" + + token_provider = cast(str | None, provider) + rendered = _truncate_token(token_provider) + assert rendered == "" + assert invoked["count"] == 0 + + def test_string_jwt_still_truncated_to_signature_tail(self): + from hermes_cli.web_server import _truncate_token + # JWT shape: header.payload.signature → only signature tail shown. + out = _truncate_token("aaaa.bbbb.cccccccsig", visible=4) + assert out == "…csig" + + def test_empty_returns_empty(self): + from hermes_cli.web_server import _truncate_token + assert _truncate_token(None) == "" + assert _truncate_token("") == "" + + +# --------------------------------------------------------------------------- +# Serialization scrub — runtime dicts with callables must NOT silently +# JSON-encode as ``""`` (would leak garbage into events). +# --------------------------------------------------------------------------- + + +class TestRuntimeDictSerializationGuard: + def test_json_dumps_default_str_does_not_silently_stringify_callable(self): + """Sanity check: a runtime dict with a callable api_key must + either raise on plain ``json.dumps`` (good — fail loud) or be + sanitized BEFORE serialization. This test pins the loud-fail + behaviour so future changes that introduce + ``json.dumps(..., default=str)`` over a runtime dict are caught + by a regression here.""" + + def provider(): + return "jwt" + + runtime = { + "provider": "azure-foundry", + "api_key": provider, + "auth_mode": "entra_id", + } + # Plain json.dumps — must raise, not silently produce + # ``""``. + with pytest.raises(TypeError): + json.dumps(runtime) + + +# --------------------------------------------------------------------------- +# batch_runner strips callables from the worker config dict +# --------------------------------------------------------------------------- + + +class TestBatchRunnerCallableHandling: + def test_callable_api_key_stripped_from_worker_config(self, capsys, monkeypatch, tmp_path): + """``BatchRunner._run_batches`` (or the equivalent code path) + must replace a callable api_key with None before pickling the + worker config dict — otherwise multiprocessing.Pool fails.""" + # We can't easily run BatchRunner end-to-end in a unit test + # (it spawns subprocesses), but we CAN inline the same logic: + # the production code uses ``callable(self.api_key) and not + # isinstance(self.api_key, str)`` to gate the substitution. + # Re-execute the same predicate here as a contract guard. + + def provider(): + return "jwt" + + api_key = provider + worker_api_key = None if (callable(api_key) and not isinstance(api_key, str)) else api_key + assert worker_api_key is None, ( + "BatchRunner must replace callable api_key with None so " + "multiprocessing.Pool can pickle the worker config" + ) + + # And a string passes through unchanged. + api_key_str = "sk-static" + worker_api_key_str = None if (callable(api_key_str) and not isinstance(api_key_str, str)) else api_key_str + assert worker_api_key_str == "sk-static" + + def test_batch_runner_source_uses_the_correct_predicate(self): + """Pin the predicate string in batch_runner so refactors that + change it are caught here. Reading the source rather than + importing avoids spinning up the full BatchRunner.""" + from pathlib import Path + src = (Path(__file__).resolve().parent.parent.parent + / "batch_runner.py").read_text() + assert "callable(self.api_key) and not isinstance(self.api_key, str)" in src, ( + "BatchRunner.api_key callable check changed — update test or " + "verify the new predicate still routes Entra token providers " + "to the worker-rebuild path." + ) + + +# --------------------------------------------------------------------------- +# Inline masked-banner / display sites (callable-aware) +# --------------------------------------------------------------------------- + + +class TestCliEnsureRuntimeCredentialsCallable: + """Regression: ``cli.py:_ensure_runtime_credentials`` previously + treated a callable ``api_key`` as "not a string" and overwrote it + with the ``"no-key-required"`` placeholder, which then got sent as + ``Authorization: Bearer no-key-required`` and rejected by Azure + with a 401. This is the most subtle of the callable-api_key audit + sites — gated by ``not isinstance(api_key, str)`` rather than the + cleaner ``callable(...)`` check used elsewhere. + + We verify the source pattern (rather than spinning up a real + ``HermesCLI`` instance) — the predicate change is the load-bearing + fix and is invariant under the surrounding orchestration code.""" + + def test_callable_predicate_present_in_cli_runtime_validation(self): + from pathlib import Path + src = (Path(__file__).resolve().parent.parent.parent + / "cli.py").read_text() + # The fix introduces ``_is_callable_provider`` which gates the + # string-only check so callable token providers survive. + assert "_is_callable_provider = callable(api_key)" in src, ( + "cli.py:_ensure_runtime_credentials must preserve a callable " + "api_key (Entra ID bearer provider). Without the guard, the " + "callable is stringified to 'no-key-required' and Azure 401s." + ) + + +class TestInlinedDisplayMasks: + """The masked-credential display sites are now inlined per-site (no + shared helper). Each site uses the ``is_token_provider`` predicate + to short-circuit on callables and print a static + ``"Microsoft Entra ID"`` label, then falls through to its own + context-appropriate string mask. This replaces a unified helper + that would have forced one mask shape across sites with legitimately + different display needs (banner vs diagnostic vs UI vs preview).""" + + def test_run_agent_banner_uses_is_token_provider_guard(self): + """The masked-banner sites live in ``agent/agent_init.py`` + (the ``__init__`` body was extracted into ``init_agent`` after + this feature was first written). Both the OpenAI and Anthropic + client init paths must guard their banner prints with + ``is_token_provider`` so a callable Entra ID provider doesn't + crash ``len(api_key)``.""" + from pathlib import Path + src = (Path(__file__).resolve().parent.parent.parent + / "agent" / "agent_init.py").read_text() + assert src.count("is_token_provider(") >= 2, ( + "agent/agent_init.py must guard BOTH masked-banner paths " + "(chat_completions and anthropic_messages) with " + "is_token_provider()." + ) + assert src.count('"🔑 Using credentials: Microsoft Entra ID"') >= 2, ( + "agent/agent_init.py banner blocks should print a static " + "'Microsoft Entra ID' label for callable api_keys — no " + "placeholder plumbing, no describe-mask fallback." + ) + + def test_cli_show_config_handles_callable(self): + """``cli.HermesCLI.show_config`` previously did + ``self.api_key[-4:]`` / ``len(self.api_key)`` which crashes on + callable Entra ID providers. The inlined version uses + ``is_token_provider`` and prints the same static label as the + run_agent banners.""" + from pathlib import Path + src = (Path(__file__).resolve().parent.parent.parent + / "cli.py").read_text() + assert "is_token_provider(self.api_key)" in src, ( + "cli.HermesCLI.show_config must guard self.api_key via " + "is_token_provider so callable Entra ID providers don't " + "crash /config." + ) + assert '"Microsoft Entra ID"' in src, ( + "cli.HermesCLI.show_config must print the static " + "'Microsoft Entra ID' label (matching run_agent banners) " + "instead of attempting to slice the callable." + ) + + def test_mask_api_key_for_logs_handles_callable(self): + """``run_agent._mask_api_key_for_logs`` is called from the + request-dump JSON path. For Entra users, ``self.client.api_key`` + is the SDK's empty string (callable stashed privately) — but + defensively the helper must also accept a callable directly + and return the placeholder rather than crashing on + ``len(callable)``.""" + from pathlib import Path + src = (Path(__file__).resolve().parent.parent.parent + / "run_agent.py").read_text() + # The function now starts with a callable check. + assert ( + "if callable(key) and not isinstance(key, str):" in src + and '""' in src + ), ( + "run_agent._mask_api_key_for_logs must short-circuit for " + "callable api_keys to avoid len(callable) crashes in " + "request-dump paths." + ) + + def test_anthropic_401_diagnostic_handles_callable(self): + """The Anthropic 401 diagnostic path lives in + ``agent/conversation_loop.py`` (the ``run_conversation`` body + was extracted after this feature was first written). It used + to do ``key[:12]`` on ``self._anthropic_api_key``. For Entra ID + + Anthropic-style mode that's a callable; slicing crashes.""" + from pathlib import Path + src = (Path(__file__).resolve().parent.parent.parent + / "agent" / "conversation_loop.py").read_text() + # The Anthropic 401 block now branches on is_token_provider + # before slicing the key. + assert "Microsoft Entra ID (httpx event hook)" in src, ( + "agent/conversation_loop.py Anthropic 401 diagnostic must " + "surface a Microsoft Entra ID branch before slicing the " + "key prefix." + ) diff --git a/tools/lazy_deps.py b/tools/lazy_deps.py index c7d7730c756..1a8708ef25c 100644 --- a/tools/lazy_deps.py +++ b/tools/lazy_deps.py @@ -81,6 +81,11 @@ LAZY_DEPS: dict[str, tuple[str, ...]] = { "provider.anthropic": ("anthropic==0.87.0",), # CVE-2026-34450, CVE-2026-34452 # AWS Bedrock provider "provider.bedrock": ("boto3==1.42.89",), + # Microsoft Foundry — Entra ID auth (managed identity, workload identity, + # service principal, az login, VS Code, azd, PowerShell). Only loaded + # when model.auth_mode=entra_id is selected; key-based azure-foundry + # users never pay this import. + "provider.azure_identity": ("azure-identity==1.25.3",), # ─── Web search backends ─────────────────────────────────────────────── "search.exa": ("exa-py==2.10.2",), diff --git a/tui_gateway/server.py b/tui_gateway/server.py index 4a9bc2b6590..de2888a6de7 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -1087,7 +1087,16 @@ def _apply_model_switch(sid: str, session: dict, raw_input: str) -> dict: current_provider = str(runtime.get("provider", "") or "") current_model = _resolve_model() current_base_url = str(runtime.get("base_url", "") or "") - current_api_key = str(runtime.get("api_key", "") or "") + # Preserve a callable api_key (Azure Foundry Entra ID bearer + # provider) unchanged — ``str(...)`` would produce + # ``""`` and poison downstream switch_model + # validation. Match the agent-present branch's behavior at the + # top of this block. + _runtime_key = runtime.get("api_key", "") + if callable(_runtime_key) and not isinstance(_runtime_key, str): + current_api_key = _runtime_key + else: + current_api_key = str(_runtime_key or "") # Load user-defined providers so switch_model can resolve named custom # endpoints (e.g. "ollama-launch") and validate against saved model lists. diff --git a/uv.lock b/uv.lock index e7641abd224..02f00816659 100644 --- a/uv.lock +++ b/uv.lock @@ -500,6 +500,35 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/41/0a/0896b829a39b5669a2d811e1a79598de661693685cd62b31f11d0c18e65b/av-17.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:dba98603fc4665b4f750de86fbaf6c0cfaece970671a9b529e0e3d1711e8367e", size = 22071058, upload-time = "2026-03-14T14:38:43.663Z" }, ] +[[package]] +name = "azure-core" +version = "1.41.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "requests" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a6/f3/b416179e408990df5db0d516283022dde0f5d0111d98c1a848e41853e81c/azure_core-1.41.0.tar.gz", hash = "sha256:f46ff5dfcd230f25cf1c19e8a34b8dc08a337b2503e268bb600a16c00db8ad5a", size = 381042, upload-time = "2026-05-07T23:30:54.302Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5b/db/325c6d7312d2200251c52323878281045aaffcb5586612296484e4280eaa/azure_core-1.41.0-py3-none-any.whl", hash = "sha256:522b4011e8180b1a3dcd2024396a4e7fe9ac37fb8597db47163d230b5efe892d", size = 220920, upload-time = "2026-05-07T23:30:56.357Z" }, +] + +[[package]] +name = "azure-identity" +version = "1.25.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "azure-core" }, + { name = "cryptography" }, + { name = "msal" }, + { name = "msal-extensions" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c5/0e/3a63efb48aa4a5ae2cfca61ee152fbcb668092134d3eb8bfda472dd5c617/azure_identity-1.25.3.tar.gz", hash = "sha256:ab23c0d63015f50b630ef6c6cf395e7262f439ce06e5d07a64e874c724f8d9e6", size = 286304, upload-time = "2026-03-13T01:12:20.892Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/49/9a/417b3a533e01953a7c618884df2cb05a71e7b68bdbce4fbdb62349d2a2e8/azure_identity-1.25.3-py3-none-any.whl", hash = "sha256:f4d0b956a8146f30333e071374171f3cfa7bdb8073adb8c3814b65567aa7447c", size = 192138, upload-time = "2026-03-13T01:12:22.951Z" }, +] + [[package]] name = "base58" version = "2.1.1" @@ -1618,6 +1647,9 @@ all = [ anthropic = [ { name = "anthropic" }, ] +azure-identity = [ + { name = "azure-identity" }, +] bedrock = [ { name = "boto3" }, ] @@ -1767,6 +1799,7 @@ requires-dist = [ { name = "alibabacloud-dingtalk", marker = "extra == 'dingtalk'", specifier = "==2.2.42" }, { name = "anthropic", marker = "extra == 'anthropic'", specifier = "==0.86.0" }, { name = "asyncpg", marker = "extra == 'matrix'", specifier = "==0.31.0" }, + { name = "azure-identity", marker = "extra == 'azure-identity'", specifier = "==1.25.3" }, { name = "boto3", marker = "extra == 'bedrock'", specifier = "==1.42.89" }, { name = "brotlicffi", marker = "extra == 'messaging'", specifier = "==1.2.0.1" }, { name = "croniter", specifier = "==6.0.0" }, @@ -1855,7 +1888,7 @@ requires-dist = [ { name = "vercel", marker = "extra == 'vercel'", specifier = "==0.5.7" }, { name = "youtube-transcript-api", marker = "extra == 'youtube'", specifier = "==1.2.4" }, ] -provides-extras = ["anthropic", "exa", "firecrawl", "parallel-web", "fal", "edge-tts", "modal", "daytona", "vercel", "hindsight", "dev", "messaging", "cron", "slack", "matrix", "cli", "tts-premium", "voice", "pty", "honcho", "mcp", "homeassistant", "sms", "computer-use", "acp", "bedrock", "termux", "termux-all", "dingtalk", "feishu", "google", "youtube", "web", "all"] +provides-extras = ["anthropic", "exa", "firecrawl", "parallel-web", "fal", "edge-tts", "modal", "daytona", "vercel", "hindsight", "dev", "messaging", "cron", "slack", "matrix", "cli", "tts-premium", "voice", "pty", "honcho", "mcp", "homeassistant", "sms", "computer-use", "acp", "bedrock", "azure-identity", "termux", "termux-all", "dingtalk", "feishu", "google", "youtube", "web", "all"] [[package]] name = "hf-xet" @@ -2421,6 +2454,32 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/43/e3/7d92a15f894aa0c9c4b49b8ee9ac9850d6e63b03c9c32c0367a13ae62209/mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c", size = 536198, upload-time = "2023-03-07T16:47:09.197Z" }, ] +[[package]] +name = "msal" +version = "1.36.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cryptography" }, + { name = "pyjwt", extra = ["crypto"] }, + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/de/cb/b02b0f748ac668922364ccb3c3bff5b71628a05f5adfec2ba2a5c3031483/msal-1.36.0.tar.gz", hash = "sha256:3f6a4af2b036b476a4215111c4297b4e6e236ed186cd804faefba23e4990978b", size = 174217, upload-time = "2026-04-09T10:20:33.525Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2a/d3/414d1f0a5f6f4fe5313c2b002c54e78a3332970feb3f5fed14237aa17064/msal-1.36.0-py3-none-any.whl", hash = "sha256:36ecac30e2ff4322d956029aabce3c82301c29f0acb1ad89b94edcabb0e58ec4", size = 121547, upload-time = "2026-04-09T10:20:32.336Z" }, +] + +[[package]] +name = "msal-extensions" +version = "1.3.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "msal" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/01/99/5d239b6156eddf761a636bded1118414d161bd6b7b37a9335549ed159396/msal_extensions-1.3.1.tar.gz", hash = "sha256:c5b0fd10f65ef62b5f1d62f4251d51cbcaf003fcedae8c91b040a488614be1a4", size = 23315, upload-time = "2025-03-14T23:51:03.902Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5e/75/bd9b7bb966668920f06b200e84454c8f3566b102183bc55c5473d96cb2b9/msal_extensions-1.3.1-py3-none-any.whl", hash = "sha256:96d3de4d034504e969ac5e85bae8106c8373b5c6568e4c8fa7af2eca9dbe6bca", size = 20583, upload-time = "2025-03-14T23:51:03.016Z" }, +] + [[package]] name = "msgpack" version = "1.1.2" diff --git a/website/docs/guides/azure-foundry.md b/website/docs/guides/azure-foundry.md index 218eadadc37..070f5c0d99b 100644 --- a/website/docs/guides/azure-foundry.md +++ b/website/docs/guides/azure-foundry.md @@ -1,23 +1,23 @@ --- sidebar_position: 15 -title: "Azure AI Foundry" -description: "Use Hermes Agent with Azure AI Foundry — OpenAI-style and Anthropic-style endpoints, auto-detection of transport and deployed models" +title: "Microsoft Foundry" +description: "Use Hermes Agent with Microsoft Foundry — OpenAI-style and Anthropic-style endpoints, auto-detection of transport and deployed models" --- -# Azure AI Foundry +# Microsoft Foundry -Hermes Agent supports Azure AI Foundry (and Azure OpenAI) as a first-class provider. A single Azure resource can host models with two different wire formats: +Hermes Agent's `azure-foundry` provider supports Microsoft Foundry (formerly Azure AI Foundry) and Azure OpenAI. A single Foundry resource can host models with two different wire formats: - **OpenAI-style** — `POST /v1/chat/completions` on endpoints like `https://.openai.azure.com/openai/v1`. Used for GPT-4.x, GPT-5.x, Llama, Mistral, and most open-weight models. -- **Anthropic-style** — `POST /v1/messages` on endpoints like `https://.services.ai.azure.com/anthropic`. Used when Azure Foundry serves Claude models via the Anthropic Messages API format. +- **Anthropic-style** — `POST /v1/messages` on endpoints like `https://.services.ai.azure.com/anthropic`. Used when Microsoft Foundry serves Claude models via the Anthropic Messages API format. The setup wizard probes your endpoint and auto-detects which transport it uses, which deployments are available, and each model's context length. ## Prerequisites -- An Azure AI Foundry or Azure OpenAI resource with at least one deployment -- An API key for that resource (available in the Azure Portal under "Keys and Endpoint") +- A Microsoft Foundry or Azure OpenAI resource with at least one deployment - The deployment's endpoint URL +- **Either** an API key (from the Azure Portal under "Keys and Endpoint") **or** the **Azure AI User** RBAC role on the Foundry resource if you plan to use Microsoft Entra ID (the keyless path Microsoft recommends). Some tenants may show the role as **Foundry User** during Microsoft's rename rollout. ## Quick Start @@ -25,20 +25,172 @@ The setup wizard probes your endpoint and auto-detects which transport it uses, hermes model # → Select "Azure Foundry" # → Enter your endpoint URL -# → Enter your API key +# → Choose Authentication: +# 1. API key +# 2. Microsoft Entra ID (managed identity / workload identity / az login) +# → (Entra) Hermes probes DefaultAzureCredential; on success it never asks for a key +# → (API key) Enter your API key # Hermes probes the endpoint and auto-detects transport + models # → Pick a model from the list (or type a deployment name manually) ``` The wizard will: -1. **Sniff the URL path** — URLs ending in `/anthropic` are recognised as Azure Foundry Claude routes. +1. **Sniff the URL path** — URLs ending in `/anthropic` are recognised as Microsoft Foundry Claude routes. 2. **Probe `GET /models`** — if the endpoint returns an OpenAI-shaped model list, Hermes switches to `chat_completions` and prefills a picker with the returned deployment IDs. 3. **Probe Anthropic Messages shape** — fallback for endpoints that do not expose `/models` but do accept the Anthropic Messages format. 4. **Fall back to manual entry** — private/gated endpoints that reject every probe still work; you pick the API mode and type a deployment name by hand. Context length for the chosen model is resolved via Hermes' standard metadata chain (`models.dev`, provider metadata, and hardcoded family fallbacks) and stored in `config.yaml` so the model can size its own context window correctly. +## Microsoft Entra ID (keyless, RBAC) — recommended + +Microsoft recommends [keyless authentication with Microsoft Entra ID](https://learn.microsoft.com/azure/ai-foundry/foundry-models/how-to/configure-entra-id) for production Foundry workloads. Hermes supports Entra ID for **both** API surfaces: + +- **OpenAI-style** (`api_mode: chat_completions` / `codex_responses`) — GPT-4/5, Llama, Mistral, DeepSeek, etc. +- **Anthropic-style** (`api_mode: anthropic_messages`) — Claude models on Microsoft Foundry. + +Foundry's RBAC is per-resource (`Azure AI User` grants both surfaces; some tenants may display `Foundry User`) and Microsoft documents the same inference scope (`https://ai.azure.com/.default`) for both. Under the hood: + +- OpenAI-style uses the OpenAI Python SDK's native callable `api_key=` contract — the SDK mints a fresh JWT per request automatically. +- Anthropic-style uses an `httpx.Client` with a request event hook installed by `agent.azure_identity_adapter.build_bearer_http_client`, because the Anthropic SDK does not accept callable `auth_token` natively. The hook rewrites `Authorization: Bearer ` per outbound request. Same Microsoft RBAC, same Foundry scope — the SDK contract is the only difference. + +### Why use Entra ID? + +- No long-lived API keys to rotate or revoke. +- RBAC-driven access — grant or remove `Azure AI User` on the Foundry resource, no config rewrite needed. +- Access and audit logs are segmented by assignee instead of all callers sharing one static key. +- Single auth surface for Azure VMs, AKS pods, App Service, Functions, Container Apps, and Foundry Agent Service via managed identity. +- Workload identity and service-principal flows for CI/CD pipelines. + +### One-time setup (Azure side) + +1. In the Azure Portal, open your Foundry resource → **Access control (IAM)** → **Add → Add role assignment**. +2. Pick the **Azure AI User** role (or **Foundry User** if your tenant has the renamed role). +3. Assign it to: + - **Your user account** for local development with `az login`. + - **A managed identity or workload identity** for Azure-hosted compute (recommended for production). + - **A Foundry Agent Service hosted agent's agent identity** when Hermes runs inside a hosted agent. + - **A service principal** for CI/CD pipelines when workload identity is not available. +4. Wait ~5 minutes for the role to propagate. + +Azure CLI equivalent: + +```bash +az role assignment create \ + --assignee \ + --role "Azure AI User" \ + --scope +``` + +### One-time setup (Hermes side) + +```bash +hermes model +# → Select "Azure Foundry" +# → Enter your endpoint URL +# → Authentication: 2 (Microsoft Entra ID) +# → (optional) user-assigned managed identity client ID +# → (optional) Azure tenant ID +# → Hermes probes DefaultAzureCredential() and reports which inner +# credential succeeded (e.g. AzureCliCredential, ManagedIdentityCredential) +``` + +The wizard runs a bounded preflight probe (10 s timeout). On failure it offers to "save anyway, validate later" — useful when configuring on a machine that doesn't yet have credentials but will at runtime (e.g. preparing config for a managed-identity deployment). + +`azure-identity` is installed automatically on first use via Hermes' lazy-install path. To pre-install: + +```bash +pip install azure-identity +``` + +### Configuration written to `config.yaml` + +```yaml +model: + provider: azure-foundry + base_url: https://my-resource.openai.azure.com/openai/v1 + api_mode: chat_completions + auth_mode: entra_id + default: gpt-4o + context_length: 128000 + entra: + scope: https://ai.azure.com/.default # only when overriding the default +``` + +Hermes only manages one Entra-specific knob in `config.yaml`: + +- **`scope`** — the OAuth resource scope. Defaults to Microsoft's documented inference scope (`https://ai.azure.com/.default`). Override only if your resource was provisioned against a non-standard audience. + +Everything else (tenant, service principal secret, federated token file, sovereign cloud authority, broker preferences) is read by `azure-identity` directly from the standard `AZURE_*` environment variables — see the [credential resolution order](#credential-resolution-order) below. Set those in `~/.hermes/.env` or your deployment environment, exactly as Microsoft's SDK reference describes. + +No secrets land in `~/.hermes/.env` for Entra mode — `azure-identity` caches tokens in-process (and where available, in your OS keychain / `~/.IdentityService`). + +### Credential resolution order + +`azure-identity`'s `DefaultAzureCredential` walks this chain on each token request, stopping at the first credential that returns a token: + +1. **Environment credential** — `AZURE_TENANT_ID` + `AZURE_CLIENT_ID` + `AZURE_CLIENT_SECRET` (or `AZURE_CLIENT_CERTIFICATE_PATH` / `AZURE_FEDERATED_TOKEN_FILE`). +2. **Workload Identity** — `AZURE_FEDERATED_TOKEN_FILE` (AKS federated tokens / OIDC). +3. **Managed Identity** — IMDS endpoint (`169.254.169.254`) for virtual machines; `IDENTITY_ENDPOINT` for App Service / Functions / Container Apps. Foundry Agent Service hosted agents use the hosted agent's agent identity. +4. **Visual Studio Code** — Azure account extension. +5. **Azure CLI** — `az login` session. +6. **Azure Developer CLI** — `azd auth login`. +7. **Azure PowerShell** — `Connect-AzAccount`. +8. **Broker** (Windows / WSL only) — Web Account Manager. + +Interactive browser credential is excluded by default for unattended Hermes runs; use Azure CLI, Azure Developer CLI, managed identity, workload identity, or service principal credentials instead. + +### Deployment patterns + +**Local development:** +```bash +az login +hermes model # pick Azure Foundry → Entra ID +hermes # uses your az login token +``` + +**Azure VM / Functions / App Service / Container Apps (system-assigned managed identity):** +1. Enable system-assigned identity on the compute resource. +2. Grant the identity `Azure AI User` (or `Foundry User`) on the Foundry resource. +3. Set `model.auth_mode: entra_id` in config.yaml — no env vars needed. + +**Azure VM / Functions / App Service / Container Apps (user-assigned managed identity):** +- Set `AZURE_CLIENT_ID` to the user-assigned identity's client ID so `DefaultAzureCredential` picks the right one. + +**Foundry Agent Service hosted agent:** +- Create the hosted agent and grant that agent's identity `Azure AI User` (or `Foundry User`) on the Foundry resource. Hermes uses `ManagedIdentityCredential` from inside the hosted agent; role assignment belongs on the agent identity, not just the parent project or your user. + +**AKS Workload Identity (replaces AAD Pod Identity):** +- Annotate the pod's service account with the workload identity client ID. +- The pod's federated token file is auto-detected via `AZURE_FEDERATED_TOKEN_FILE`. +- `model.auth_mode: entra_id` works without further config changes. + +**Service principal in CI:** +- Set `AZURE_TENANT_ID`, `AZURE_CLIENT_ID`, `AZURE_CLIENT_SECRET` in the runner env. + +**Sovereign clouds (Government, China):** +- Export `AZURE_AUTHORITY_HOST` (e.g. `https://login.microsoftonline.us` for Azure Government, `https://login.partner.microsoftonline.cn` for Azure China). `azure-identity` reads it directly. + +### Health checks + +`hermes doctor` runs a 10 s probe against `DefaultAzureCredential` when `model.auth_mode: entra_id`, reporting which inner credential won (env vars present, managed identity endpoint reachable, etc.). + +`hermes auth` shows a structured status block: + +``` +azure-foundry (Microsoft Entra ID): + Endpoint: https://my-resource.openai.azure.com/openai/v1 + Scope: https://ai.azure.com/.default + Status: configured; live token probe is skipped here +``` + +### Limitations + +- **Anthropic-style endpoints use an httpx event hook.** The Anthropic Python SDK does not accept a callable `auth_token` natively (≤ 0.86.0). Hermes installs a request event hook on a custom `httpx.Client` that mints a fresh JWT per outbound request and rewrites `Authorization: Bearer `. This is functionally equivalent to the OpenAI SDK's native `Callable[[], str]` contract but adds one indirection layer. If the Anthropic SDK adds first-class callable-auth support in a future release, Hermes will switch to it transparently. +- **Batch jobs and `multiprocessing.Pool`.** The Entra token provider is a closure that cannot be pickled across process boundaries. `batch_runner.py` automatically drops the callable from the worker config and lets each worker process rebuild its own provider from `config.yaml` — no user action required, but each worker pays one chain walk at startup. +- **No bearer JWT persistence in `auth.json`.** Hermes does not duplicate `azure-identity`'s internal token cache; cold starts walk the credential chain on first inference. + ## Configuration (written to `config.yaml`) After running the wizard you'll see something like this: @@ -72,11 +224,11 @@ model: Important behaviour: -- **GPT-5.x, codex, and o-series auto-route to the Responses API.** Azure Foundry deploys GPT-5 / codex / o1 / o3 / o4 models as Responses-API-only — calling `/chat/completions` against them returns `400 "The requested operation is unsupported."`. Hermes detects these model families by name and upgrades `api_mode` to `codex_responses` transparently, even when `config.yaml` still reads `api_mode: chat_completions`. GPT-4, GPT-4o, Llama, Mistral, and other deployments stay on `/chat/completions`. +- **GPT-5.x, codex, and o-series auto-route to the Responses API.** Microsoft Foundry deploys GPT-5 / codex / o1 / o3 / o4 models as Responses-API-only — calling `/chat/completions` against them returns `400 "The requested operation is unsupported."`. Hermes detects these model families by name and upgrades `api_mode` to `codex_responses` transparently, even when `config.yaml` still reads `api_mode: chat_completions`. GPT-4, GPT-4o, Llama, Mistral, and other deployments stay on `/chat/completions`. - **`max_completion_tokens` is used automatically.** Azure OpenAI (like direct OpenAI) requires `max_completion_tokens` for gpt-4o, o-series, and gpt-5.x models. Hermes sends the right parameter based on the endpoint. - **Pre-v1 endpoints that require `api-version`.** If you have a legacy base URL like `https://.openai.azure.com/openai?api-version=2025-04-01-preview`, Hermes extracts the query string and forwards it via `default_query` on every request (the OpenAI SDK otherwise drops it when joining paths). -## Anthropic-style endpoints (Claude via Azure Foundry) +## Anthropic-style endpoints (Claude via Microsoft Foundry) For Claude deployments, use the Anthropic-style route: @@ -96,7 +248,7 @@ Important behaviour: ## Alternative: `provider: anthropic` + Azure base URL -If you already have `provider: anthropic` configured and just want to point it at Azure AI Foundry for Claude, you can skip the `azure-foundry` provider entirely: +If you already have `provider: anthropic` configured and just want to point it at Microsoft Foundry for Claude, you can skip the `azure-foundry` provider entirely: ```yaml model: @@ -117,7 +269,7 @@ Azure does **not** expose a pure-API-key endpoint to list your *deployed* model What Hermes can do: - Azure OpenAI v1 endpoints (`.openai.azure.com/openai/v1`) expose `GET /models` with the resource's **available** model catalog. Hermes uses this list to prefill the model picker. -- Azure Foundry `/anthropic` routes: detected via URL path, model name entered manually. +- Microsoft Foundry `/anthropic` routes: detected via URL path, model name entered manually. - Private / firewalled endpoints: manual entry with a friendly "couldn't probe" message. You can always type a deployment name directly — Hermes does not validate against the returned list. @@ -126,9 +278,18 @@ You can always type a deployment name directly — Hermes does not validate agai | Variable | Purpose | |----------|---------| -| `AZURE_FOUNDRY_API_KEY` | Primary API key for Azure AI Foundry / Azure OpenAI | +| `AZURE_FOUNDRY_API_KEY` | Primary API key for Microsoft Foundry / Azure OpenAI (api_key mode) | | `AZURE_FOUNDRY_BASE_URL` | Endpoint URL (set via `hermes model`; env var is used as a fallback) | | `AZURE_ANTHROPIC_KEY` | Used by `provider: anthropic` + Azure base URL (alternative to `ANTHROPIC_API_KEY`) | +| `AZURE_TENANT_ID` | Entra ID tenant for service-principal flows | +| `AZURE_CLIENT_ID` | Entra ID client ID (service principal, workload identity, or user-assigned managed identity) | +| `AZURE_CLIENT_SECRET` | Service principal secret | +| `AZURE_CLIENT_CERTIFICATE_PATH` | Service principal cert (alternative to secret) | +| `AZURE_FEDERATED_TOKEN_FILE` | Workload Identity federated token path (AKS) | +| `AZURE_AUTHORITY_HOST` | Sovereign cloud authority host override | +| `IDENTITY_ENDPOINT` / `MSI_ENDPOINT` | Managed Identity endpoint for App Service, Functions, and Container Apps; VMs usually use IMDS instead | + +The Azure SDK reads the `AZURE_*` env vars directly. Hermes never inspects them other than to report which sources are present in `hermes doctor` output. ## Troubleshooting @@ -150,8 +311,21 @@ model: api_mode: anthropic_messages # or chat_completions ``` +**Entra ID: "credential chain exhausted" or 401 Unauthorized after switching to `auth_mode: entra_id`.** +- Run `az login` to refresh your developer session (the cached token may have expired). +- Verify the `Azure AI User` (or `Foundry User`) role assignment took effect: `az role assignment list --assignee ` should list it on your Foundry resource. Role propagation can take up to 5 minutes. +- For user-assigned managed identities, double-check `AZURE_CLIENT_ID` matches the identity attached to the compute resource. +- Run `hermes doctor` — the Azure Entra probe reports whether token acquisition succeeded and includes a remediation hint. + +**Entra ID: wizard preflight hangs or times out.** +The 10 s preflight is a soft check. Choose "Save anyway and validate later" and run `hermes doctor` after deploying to the target environment. Common causes include an unreachable token service or stale local login state — prefer workload identity in CI, set `AZURE_TENANT_ID`+`AZURE_CLIENT_ID`+`AZURE_CLIENT_SECRET` when using a service principal, or run `az login` for local development. + +**401 on Anthropic-style endpoint with Entra ID.** +Verify the same `Azure AI User` (or `Foundry User`) role is assigned on the Foundry resource (it covers both `/openai/v1` and `/anthropic` paths). If the OpenAI-style probe works during the wizard but `claude-*` requests fail at runtime, the most common cause is a stale `model.entra.scope` left over from an earlier wizard run — delete the `entra.scope` line from `config.yaml` so the runtime falls back to the default `https://ai.azure.com/.default` scope. + ## Related - [Environment variables](/docs/reference/environment-variables) - [Configuration](/docs/user-guide/configuration) - [AWS Bedrock](/docs/guides/aws-bedrock) — the other major cloud provider integration +- [Microsoft: Configure Entra ID for Foundry](https://learn.microsoft.com/azure/ai-foundry/foundry-models/how-to/configure-entra-id) — upstream documentation for the keyless path diff --git a/website/docs/reference/environment-variables.md b/website/docs/reference/environment-variables.md index 90aecba4412..969b0bf1f05 100644 --- a/website/docs/reference/environment-variables.md +++ b/website/docs/reference/environment-variables.md @@ -50,9 +50,16 @@ All variables go in `~/.hermes/.env`. You can also set them with `hermes config | `XIAOMI_BASE_URL` | Override Xiaomi MiMo base URL (default: `https://api.xiaomimimo.com/v1`) | | `TOKENHUB_API_KEY` | Tencent TokenHub API key ([tokenhub.tencentmaas.com](https://tokenhub.tencentmaas.com)) | | `TOKENHUB_BASE_URL` | Override Tencent TokenHub base URL (default: `https://tokenhub.tencentmaas.com/v1`) | -| `AZURE_FOUNDRY_API_KEY` | Azure AI Foundry / Azure OpenAI API key ([ai.azure.com](https://ai.azure.com/)) | -| `AZURE_FOUNDRY_BASE_URL` | Azure AI Foundry endpoint URL (e.g. `https://.openai.azure.com/openai/v1` for OpenAI-style, or `https://.services.ai.azure.com/anthropic` for Anthropic-style) | -| `AZURE_ANTHROPIC_KEY` | Azure Anthropic API key for `provider: anthropic` + `base_url` pointing at an Azure Foundry Claude deployment (alternative to `ANTHROPIC_API_KEY` when both Anthropic and Azure Anthropic are configured) | +| `AZURE_FOUNDRY_API_KEY` | Microsoft Foundry / Azure OpenAI API key ([ai.azure.com](https://ai.azure.com/)). Not needed when `model.auth_mode: entra_id` | +| `AZURE_FOUNDRY_BASE_URL` | Microsoft Foundry endpoint URL (e.g. `https://.openai.azure.com/openai/v1` for OpenAI-style, or `https://.services.ai.azure.com/anthropic` for Anthropic-style) | +| `AZURE_ANTHROPIC_KEY` | Azure Anthropic API key for `provider: anthropic` + `base_url` pointing at a Microsoft Foundry Claude deployment (alternative to `ANTHROPIC_API_KEY` when both Anthropic and Azure Anthropic are configured) | +| `AZURE_TENANT_ID` | Entra ID tenant ID (service-principal flows; honored by `azure-identity` when `model.auth_mode: entra_id`) | +| `AZURE_CLIENT_ID` | Entra ID client ID (service principal, workload identity, or user-assigned managed identity) | +| `AZURE_CLIENT_SECRET` | Service principal secret used by `EnvironmentCredential` | +| `AZURE_CLIENT_CERTIFICATE_PATH` | Service principal certificate (alternative to `AZURE_CLIENT_SECRET`) | +| `AZURE_FEDERATED_TOKEN_FILE` | Federated token file path for AKS Workload Identity / OIDC flows | +| `AZURE_AUTHORITY_HOST` | Sovereign-cloud authority override (e.g. `https://login.microsoftonline.us` for Azure Government). See [Azure Foundry guide](/docs/guides/azure-foundry#sovereign-clouds-government-china) | +| `IDENTITY_ENDPOINT` / `MSI_ENDPOINT` | Managed Identity endpoint for App Service, Functions, and Container Apps; VMs usually use IMDS instead and do not set these | | `HF_TOKEN` | Hugging Face token for Inference Providers ([huggingface.co/settings/tokens](https://huggingface.co/settings/tokens)) | | `HF_BASE_URL` | Override Hugging Face base URL (default: `https://router.huggingface.co/v1`) | | `GOOGLE_API_KEY` | Google AI Studio API key ([aistudio.google.com/app/apikey](https://aistudio.google.com/app/apikey)) | diff --git a/website/docs/user-guide/features/fallback-providers.md b/website/docs/user-guide/features/fallback-providers.md index 6ae92e3bb20..6d17abbf14d 100644 --- a/website/docs/user-guide/features/fallback-providers.md +++ b/website/docs/user-guide/features/fallback-providers.md @@ -81,7 +81,7 @@ Both `provider` and `model` are **required**. If either is missing, the fallback | Kimi / Moonshot (China) | `kimi-coding-cn` | `KIMI_CN_API_KEY` | | StepFun | `stepfun` | `STEPFUN_API_KEY` | | Tencent TokenHub | `tencent-tokenhub` | `TOKENHUB_API_KEY` | -| Azure AI Foundry | `azure-foundry` | `AZURE_FOUNDRY_API_KEY` + `AZURE_FOUNDRY_BASE_URL` | +| Microsoft Foundry | `azure-foundry` | `AZURE_FOUNDRY_API_KEY` + `AZURE_FOUNDRY_BASE_URL` | | LM Studio (local) | `lmstudio` | `LM_API_KEY` (or none for local) + `LM_BASE_URL` | | Hugging Face | `huggingface` | `HF_TOKEN` | | Custom endpoint | `custom` | `base_url` + `key_env` (see below) |