feat(azure-foundry): add Microsoft Entra ID auth

Use azure-identity DefaultAzureCredential for keyless Foundry auth. Preserve refreshable callable credentials through OpenAI and Anthropic client paths. Add setup, doctor, auth status, docs, and tests for Entra auth. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-07-28 18:19:28 +00:00 · 2026-05-15 14:36:18 -07:00 · 2026-05-15 14:36:18 -07:00 · 9df9816dab
commit 9df9816dab
parent 457fa913b8
38 changed files with 3772 additions and 122 deletions
--- a/acp_adapter/auth.py
+++ b/acp_adapter/auth.py
@ -9,13 +9,24 @@ TERMINAL_SETUP_AUTH_METHOD_ID = "hermes-setup"


 def detect_provider() -> Optional[str]:
-    """Resolve the active Hermes runtime provider, or None if unavailable."""
+    """Resolve the active Hermes runtime provider, or None if unavailable.
+
+    Treats a ``Callable`` ``api_key`` (Azure Foundry Entra ID bearer
+    token provider — see :mod:`agent.azure_identity_adapter`) as a valid
+    credential. Without this, ACP sessions for Entra-configured Foundry
+    deployments silently default to ``"openrouter"`` and the ACP auth
+    handshake rejects the legitimate provider.
+    """
    try:
        from hermes_cli.runtime_provider import resolve_runtime_provider
        runtime = resolve_runtime_provider()
        api_key = runtime.get("api_key")
        provider = runtime.get("provider")
-        if isinstance(api_key, str) and api_key.strip() and isinstance(provider, str) and provider.strip():
+        if not isinstance(provider, str) or not provider.strip():
+            return None
+        is_string_key = isinstance(api_key, str) and api_key.strip()
+        is_callable_provider = callable(api_key) and not isinstance(api_key, str)
+        if is_string_key or is_callable_provider:
            return provider.strip().lower()
    except Exception:
        return None
--- a/agent/agent_init.py
+++ b/agent/agent_init.py
@ -560,7 +560,16 @@ def init_agent(
            agent._client_kwargs = {}
            if not agent.quiet_mode:
                print(f"🤖 AI Agent initialized with model: {agent.model} (Anthropic native)")
-                if effective_key and len(effective_key) > 12:
+                # ``effective_key`` may be a callable Entra ID bearer
+                # provider for Azure Foundry anthropic_messages mode.
+                # The Anthropic adapter installs an httpx event hook
+                # that mints a fresh JWT per request — we never
+                # invoke or inspect the callable in the banner.
+                from agent.azure_identity_adapter import is_token_provider
+
+                if is_token_provider(effective_key):
+                    print("🔑 Using credentials: Microsoft Entra ID")
+                elif isinstance(effective_key, str) and len(effective_key) > 12:
                    print(f"🔑 Using token: {effective_key[:8]}...{effective_key[-4:]}")
    elif agent.api_mode == "bedrock_converse":
        # AWS Bedrock — uses boto3 directly, no OpenAI client needed.
@ -764,12 +773,19 @@ def init_agent(
                print(f"🤖 AI Agent initialized with model: {agent.model}")
                if base_url:
                    print(f"🔗 Using custom base URL: {base_url}")
-                # Always show API key info (masked) for debugging auth issues
+                # ``api_key`` may be a callable Entra ID bearer
+                # provider (Azure Foundry). The OpenAI SDK mints a
+                # fresh JWT per request internally — the banner
+                # never invokes or inspects the callable.
+                from agent.azure_identity_adapter import is_token_provider
+
                key_used = client_kwargs.get("api_key", "none")
-                if key_used and key_used != "dummy-key" and len(key_used) > 12:
+                if is_token_provider(key_used):
+                    print("🔑 Using credentials: Microsoft Entra ID")
+                elif isinstance(key_used, str) and key_used and key_used != "dummy-key" and len(key_used) > 12:
                    print(f"🔑 Using API key: {key_used[:8]}...{key_used[-4:]}")
                else:
-                    print(f"⚠️  Warning: API key appears invalid or missing (got: '{key_used[:20] if key_used else 'none'}...')")
+                    print("⚠️  Warning: API key appears invalid or missing")
        except Exception as e:
            raise RuntimeError(f"Failed to initialize OpenAI client: {e}")
    
@ -1395,7 +1411,12 @@ def init_agent(
            _ra().logger.debug("Invalid ollama_num_ctx config value: %r", _ollama_num_ctx_override)
    if agent._ollama_num_ctx is None and agent.base_url and is_local_endpoint(agent.base_url):
        try:
-            _detected = query_ollama_num_ctx(agent.model, agent.base_url, api_key=agent.api_key or "")
+            # ``agent.api_key`` may be a callable (Entra token provider).
+            # Ollama detection makes a manual HTTP request and expects a
+            # string — Azure Foundry isn't a local endpoint so this branch
+            # never fires for Entra, but guard defensively.
+            _key_for_ollama = agent.api_key if isinstance(agent.api_key, str) else ""
+            _detected = query_ollama_num_ctx(agent.model, agent.base_url, api_key=_key_for_ollama or "")
            if _detected and _detected > 0:
                agent._ollama_num_ctx = _detected
        except Exception as exc:
--- a/agent/agent_runtime_helpers.py
+++ b/agent/agent_runtime_helpers.py
@ -1390,10 +1390,16 @@ def switch_model(agent, new_model, new_provider, api_key='', base_url='', api_mo
            _sm_custom_providers = get_compatible_custom_providers(_sm_cfg)
        except Exception:
            _sm_custom_providers = None
+        # ``agent.api_key`` may be a callable (Azure Foundry Entra ID
+        # token provider). ``get_model_context_length`` expects a
+        # string for its live-probe paths; for Foundry the context
+        # length normally resolves via config or static catalogs and
+        # never hits a probe, but coerce to empty string defensively.
+        _ctx_api_key = agent.api_key if isinstance(agent.api_key, str) else ""
        new_context_length = get_model_context_length(
            agent.model,
            base_url=agent.base_url,
-            api_key=agent.api_key,
+            api_key=_ctx_api_key,
            provider=agent.provider,
            config_context_length=getattr(agent, "_config_context_length", None),
            custom_providers=_sm_custom_providers,
@ -1402,7 +1408,7 @@ def switch_model(agent, new_model, new_provider, api_key='', base_url='', api_mo
            model=agent.model,
            context_length=new_context_length,
            base_url=agent.base_url,
-            api_key=getattr(agent, "api_key", ""),
+            api_key=agent.api_key,  # context_compressor forwards to call_llm; callable preserved
            provider=agent.provider,
            api_mode=agent.api_mode,
        )
--- a/agent/anthropic_adapter.py
+++ b/agent/anthropic_adapter.py
@ -17,6 +17,7 @@ import os
 import platform
 import subprocess
 from pathlib import Path
+from urllib.parse import urlparse

 from hermes_constants import get_hermes_home
 from typing import Any, Dict, List, Optional, Tuple
@ -364,7 +365,7 @@ def _normalize_base_url_text(base_url) -> str:
 def _is_third_party_anthropic_endpoint(base_url: str | None) -> bool:
    """Return True for non-Anthropic endpoints using the Anthropic Messages API.

-    Third-party proxies (Azure AI Foundry, AWS Bedrock, self-hosted) authenticate
+    Third-party proxies (Microsoft Foundry, AWS Bedrock, self-hosted) authenticate
    with their own API keys via x-api-key, not Anthropic OAuth tokens. OAuth
    detection should be skipped for these endpoints.
    """
@ -508,6 +509,29 @@ def _is_minimax_anthropic_endpoint(base_url: str | None) -> bool:
    )


+def _is_azure_anthropic_endpoint(base_url: str | None) -> bool:
+    """Return True for Azure-hosted Anthropic Messages endpoints.
+
+    Covers both the modern Foundry host family (``*.services.ai.azure.*``)
+    and the legacy Azure OpenAI host family (``*.openai.azure.*``) when
+    serving Anthropic's ``/anthropic`` route. Used to opt-in those hosts
+    to the ``api-version`` query-param plumbing required by Azure.
+
+    Intentionally avoids a finite allow-list of TLD suffixes so it works
+    across sovereign / private Azure clouds.
+    """
+    normalized = _normalize_base_url_text(base_url)
+    if not normalized:
+        return False
+    parsed = urlparse(normalized)
+    host = (parsed.hostname or "").lower().rstrip(".")
+    path = (parsed.path or "").lower()
+    host_padded = f".{host}."
+    is_foundry_host = ".services.ai.azure." in host_padded
+    is_legacy_azoai_host = ".openai.azure." in host_padded
+    return (is_foundry_host or is_legacy_azoai_host) and "/anthropic" in path
+
+
 def _common_betas_for_base_url(
    base_url: str | None,
    *,
@ -523,7 +547,7 @@ def _common_betas_for_base_url(

    The ``context-1m-2025-08-07`` beta is not sent to native Anthropic by
    default because some subscriptions reject it. Add it only for endpoint
-    families that still require it for 1M context, currently Azure AI Foundry.
+    families that still require it for 1M context, currently Microsoft Foundry.
    Bedrock uses its own client helper below and opts in explicitly.

    ``drop_context_1m_beta=True`` strips the 1M-context beta from any path that
@ -540,8 +564,81 @@ def _common_betas_for_base_url(
    return betas


+def _build_anthropic_client_with_bearer_hook(
+    token_provider,
+    base_url: str = None,
+    timeout: float = None,
+    *,
+    drop_context_1m_beta: bool = False,
+):
+    """Anthropic-on-Foundry Entra ID variant of :func:`build_anthropic_client`.
+
+    Anthropic SDK 0.86.0 stores ``api_key`` / ``auth_token`` as static
+    strings; there is no callable-token contract. To get per-request
+    bearer refresh (Microsoft's documented Foundry pattern), we hand
+    the SDK a custom ``httpx.Client`` whose request event hook mints a
+    fresh JWT from the Entra credential chain and rewrites
+    ``Authorization: Bearer <jwt>`` on every outbound request. The SDK
+    ignores its own auth logic when ``http_client`` is provided (the
+    hook strips any pre-set Authorization).
+
+    The placeholder ``auth_token`` is required because the SDK raises
+    ``AnthropicError`` at construction if neither ``api_key`` nor
+    ``auth_token`` is set — but the hook overrides it per-request so
+    the placeholder value never reaches Azure.
+    """
+    _anthropic_sdk = _get_anthropic_sdk()
+    if _anthropic_sdk is None:
+        raise ImportError(
+            "The 'anthropic' package is required for Azure Foundry Anthropic-style "
+            "endpoints with Entra ID auth. Install with: pip install 'anthropic>=0.39.0'"
+        )
+
+    normalize_proxy_env_vars()
+
+    from httpx import Timeout
+    from agent.azure_identity_adapter import build_bearer_http_client
+
+    _read_timeout = timeout if (isinstance(timeout, (int, float)) and timeout > 0) else 900.0
+    timeout_obj = Timeout(timeout=float(_read_timeout), connect=10.0)
+
+    # Strip any trailing /v1 — the Anthropic SDK appends /v1/messages.
+    normalized_base_url = _normalize_base_url_text(base_url)
+    if normalized_base_url:
+        import re as _re
+        normalized_base_url = _re.sub(r"/v1/?$", "", normalized_base_url.rstrip("/"))
+
+    http_client = build_bearer_http_client(token_provider, timeout=timeout_obj)
+
+    kwargs = {
+        "timeout": timeout_obj,
+        "http_client": http_client,
+        # The SDK requires *something* for api_key/auth_token. Our
+        # event hook overrides Authorization per request so this value
+        # is never sent. The sentinel string makes accidental leaks
+        # diagnosable in logs.
+        "auth_token": "entra-id-bearer-via-http-hook",
+    }
+
+    if normalized_base_url:
+        if _is_azure_anthropic_endpoint(normalized_base_url) and "api-version" not in normalized_base_url:
+            kwargs["base_url"] = normalized_base_url
+            kwargs["default_query"] = {"api-version": "2025-04-15"}
+        else:
+            kwargs["base_url"] = normalized_base_url
+
+    common_betas = _common_betas_for_base_url(
+        normalized_base_url,
+        drop_context_1m_beta=drop_context_1m_beta,
+    )
+    if common_betas:
+        kwargs["default_headers"] = {"anthropic-beta": ",".join(common_betas)}
+
+    return _anthropic_sdk.Anthropic(**kwargs)
+
+
 def build_anthropic_client(
-    api_key: str,
+    api_key,
    base_url: str = None,
    timeout: float = None,
    *,
@ -549,6 +646,17 @@ def build_anthropic_client(
 ):
    """Create an Anthropic client, auto-detecting setup-tokens vs API keys.

+    ``api_key`` accepts either:
+
+    * a static ``str`` — the historical contract for all key-based and
+      OAuth flows.
+    * a ``Callable[[], str]`` — an Entra ID bearer token provider from
+      :mod:`agent.azure_identity_adapter`. The Anthropic SDK itself
+      requires a static string, so when given a callable we construct
+      a custom ``httpx.Client`` with a request event hook that mints a
+      fresh JWT per outbound request and rewrites the ``Authorization``
+      header. The SDK never sees the callable directly.
+
    If *timeout* is provided it overrides the default 900s read timeout.  The
    connect timeout stays at 10s.  Callers pass this from the per-provider /
    per-model ``request_timeout_seconds`` config so Anthropic-native and
@ -570,6 +678,14 @@ def build_anthropic_client(
            "Install it with: pip install 'anthropic>=0.39.0'"
        )

+    # Callable api_key → Entra ID bearer provider path. Delegated to a
+    # helper so the existing static-key code below stays unchanged.
+    if callable(api_key) and not isinstance(api_key, str):
+        return _build_anthropic_client_with_bearer_hook(
+            api_key, base_url, timeout,
+            drop_context_1m_beta=drop_context_1m_beta,
+        )
+
    normalize_proxy_env_vars()

    from httpx import Timeout
@ -584,8 +700,7 @@ def build_anthropic_client(
        # Pass it via default_query so the SDK appends it to every request URL
        # without corrupting the base_url (appending it directly produces
        # malformed paths like /anthropic?api-version=.../v1/messages).
-        _is_azure_endpoint = "azure.com" in normalized_base_url.lower()
-        if _is_azure_endpoint and "api-version" not in normalized_base_url:
+        if _is_azure_anthropic_endpoint(normalized_base_url) and "api-version" not in normalized_base_url:
            kwargs["base_url"] = normalized_base_url.rstrip("/")
            kwargs["default_query"] = {"api-version": "2025-04-15"}
        else:
@ -615,7 +730,7 @@ def build_anthropic_client(
        if common_betas:
            kwargs["default_headers"] = {"anthropic-beta": ",".join(common_betas)}
    elif _is_third_party_anthropic_endpoint(base_url):
-        # Third-party proxies (Azure AI Foundry, AWS Bedrock, etc.) use their
+        # Third-party proxies (Microsoft Foundry, AWS Bedrock, etc.) use their
        # own API keys with x-api-key auth. Skip OAuth detection — their keys
        # don't follow Anthropic's sk-ant-* prefix convention and would be
        # misclassified as OAuth tokens.
@ -1757,7 +1872,7 @@ def convert_messages_to_anthropic(
    # causing HTTP 400 "Invalid signature in thinking block".
    #
    # Signatures are Anthropic-proprietary.  Third-party endpoints
-    # (MiniMax, Azure AI Foundry, self-hosted proxies) cannot validate
+    # (MiniMax, Microsoft Foundry, self-hosted proxies) cannot validate
    # them and will reject them outright.  When targeting a third-party
    # endpoint, strip ALL thinking/redacted_thinking blocks from every
    # assistant message — the third-party will generate its own
@ -2103,5 +2218,3 @@ def build_anthropic_kwargs(
        kwargs["extra_headers"] = {"anthropic-beta": ",".join(betas)}

    return kwargs
-
-
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@ -1902,6 +1902,120 @@ def _build_codex_client(model: str) -> Tuple[Optional[Any], Optional[str]]:
    return CodexAuxiliaryClient(real_client, model), model


+def _try_azure_foundry(
+    *,
+    model: Optional[str] = None,
+    explicit_api_key: Optional[str] = None,
+    explicit_base_url: Optional[str] = None,
+    api_mode: Optional[str] = None,
+) -> Tuple[Optional[Any], Optional[str]]:
+    """Resolve an Azure Foundry auxiliary client via the runtime resolver.
+
+    Mirrors the ``_try_anthropic`` / ``_try_nous`` shape but delegates to
+    :func:`hermes_cli.runtime_provider._resolve_azure_foundry_runtime` —
+    the same resolver the main agent uses — so:
+
+    * ``auth_mode: api_key`` (default) gets the static
+      ``AZURE_FOUNDRY_API_KEY`` string.
+    * ``auth_mode: entra_id`` gets a callable bearer-token provider
+      (``Callable[[], str]`` from
+      :mod:`agent.azure_identity_adapter`).
+    * Per-model ``api_mode`` auto-routing for GPT-5.x / o-series /
+      codex models works.
+    * ``model.entra.{tenant_id,client_id,authority,scope}`` config
+      fields propagate.
+    * Non-default ``model.base_url`` overrides are honored.
+
+    The OpenAI SDK accepts both shapes for ``api_key`` so the caller
+    can forward the result without coercion.
+
+    Returns ``(client, model)`` or ``(None, None)`` on failure.
+    """
+    try:
+        from hermes_cli.runtime_provider import _resolve_azure_foundry_runtime
+        from hermes_cli.auth import AuthError
+        from hermes_cli.config import load_config
+    except ImportError:
+        return None, None
+
+    try:
+        cfg = load_config()
+        model_cfg = cfg.get("model") if isinstance(cfg, dict) else {}
+        if not isinstance(model_cfg, dict):
+            model_cfg = {}
+    except Exception:
+        model_cfg = {}
+
+    try:
+        runtime = _resolve_azure_foundry_runtime(
+            requested_provider="azure-foundry",
+            model_cfg=model_cfg,
+            explicit_api_key=explicit_api_key,
+            explicit_base_url=explicit_base_url,
+            target_model=model,
+        )
+    except AuthError as exc:
+        logger.debug("Auxiliary azure-foundry: %s", exc)
+        return None, None
+    except Exception as exc:
+        logger.debug("Auxiliary azure-foundry runtime error: %s", exc)
+        return None, None
+
+    api_key = runtime.get("api_key")
+    base_url = str(runtime.get("base_url", "") or "")
+    runtime_api_mode = api_mode or runtime.get("api_mode") or "chat_completions"
+
+    # Empty-string check on api_key here would be wrong for callable
+    # token providers (callables are truthy and non-empty by definition).
+    # Bail only when api_key is None / empty string.
+    _has_key = bool(api_key) if not callable(api_key) else True
+    if not _has_key or not base_url:
+        return None, None
+
+    final_model = _normalize_resolved_model(
+        model or str(model_cfg.get("default") or ""),
+        "azure-foundry",
+    )
+    if not final_model:
+        # No fallback aux model for Azure — the user must have a
+        # deployment name. Surface that as "no client" so the auto
+        # chain falls through to the next provider rather than 404ing.
+        logger.debug(
+            "Auxiliary azure-foundry: no model resolved (model=%r, default=%r)",
+            model, model_cfg.get("default"),
+        )
+        return None, None
+
+    # Azure pre-v1 endpoints sometimes carry api-version query params
+    # in the base URL; the OpenAI SDK drops them when joining paths,
+    # so lift them out and pass via default_query.
+    extra: Dict[str, Any] = {}
+    _clean_base, _dq = _extract_url_query_params(base_url)
+    if _dq:
+        extra["default_query"] = _dq
+
+    client = OpenAI(api_key=api_key, base_url=_clean_base, **extra)
+
+    if runtime_api_mode == "codex_responses":
+        # GPT-5.x / o-series / codex models on Azure Foundry are
+        # Responses-API-only — wrap so chat.completions.create() is
+        # translated to /responses behind the scenes.
+        return CodexAuxiliaryClient(client, final_model), final_model
+
+    if runtime_api_mode == "anthropic_messages":
+        # Forward ``api_key`` verbatim — for static keys it's a string,
+        # for Entra ID it's a callable. ``_maybe_wrap_anthropic`` →
+        # ``build_anthropic_client`` detects the callable and installs
+        # the bearer-injecting httpx hook.
+        return _maybe_wrap_anthropic(
+            client, final_model, api_key,
+            base_url, runtime_api_mode,
+        ), final_model
+
+    # chat_completions — return the plain OpenAI client.
+    return client, final_model
+
+
 def _try_anthropic(explicit_api_key: str = None) -> Tuple[Optional[Any], Optional[str]]:
    try:
        from agent.anthropic_adapter import build_anthropic_client, resolve_anthropic_token
@ -1957,20 +2071,31 @@ _AUTO_PROVIDER_LABELS = {
    "_resolve_api_key_provider": "api-key",
 }

-_MAIN_RUNTIME_FIELDS = ("provider", "model", "base_url", "api_key", "api_mode")
+_MAIN_RUNTIME_FIELDS = ("provider", "model", "base_url", "api_key", "api_mode", "auth_mode")


-def _normalize_main_runtime(main_runtime: Optional[Dict[str, Any]]) -> Dict[str, str]:
-    """Return a sanitized copy of a live main-runtime override."""
+def _normalize_main_runtime(main_runtime: Optional[Dict[str, Any]]) -> Dict[str, Any]:
+    """Return a sanitized copy of a live main-runtime override.
+
+    Most fields are stripped strings. ``api_key`` may legitimately be a
+    zero-arg callable (Azure Foundry Entra ID token provider) — preserve
+    those as-is so auxiliary clients inherit the same authentication
+    surface as the main agent. The OpenAI SDK accepts ``Callable[[], str]``
+    for ``api_key`` and calls it before every request.
+    """
    if not isinstance(main_runtime, dict):
        return {}
-    normalized: Dict[str, str] = {}
+    normalized: Dict[str, Any] = {}
    for field in _MAIN_RUNTIME_FIELDS:
        value = main_runtime.get(field)
+        # Preserve a callable api_key (Entra ID bearer provider) unchanged.
+        if field == "api_key" and callable(value) and not isinstance(value, str):
+            normalized[field] = value
+            continue
        if isinstance(value, str) and value.strip():
            normalized[field] = value.strip()
    provider = normalized.get("provider")
-    if provider:
+    if isinstance(provider, str):
        normalized["provider"] = provider.lower()
    return normalized

@ -2762,10 +2887,10 @@ def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Option
    auxiliary_is_nous = False  # Reset — _try_nous() will set True if it wins
    runtime = _normalize_main_runtime(main_runtime)
    runtime_provider = runtime.get("provider", "")
-    runtime_model = runtime.get("model", "")
-    runtime_base_url = runtime.get("base_url", "")
+    runtime_model = str(runtime.get("model") or "")
+    runtime_base_url = str(runtime.get("base_url") or "")
    runtime_api_key = runtime.get("api_key", "")
-    runtime_api_mode = runtime.get("api_mode", "")
+    runtime_api_mode = str(runtime.get("api_mode") or "")

    # ── Warn once if OPENAI_BASE_URL is set but config.yaml uses a named
    #    provider (not 'custom').  This catches the common "env poisoning"
@ -2793,8 +2918,8 @@ def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Option
    # on aggregators (OpenRouter, Nous) who previously got routed to a
    # cheap provider-side default.  Explicit per-task overrides set via
    # config.yaml (auxiliary.<task>.provider) still win over this.
-    main_provider = runtime_provider or _read_main_provider()
-    main_model = runtime_model or _read_main_model()
+    main_provider = str(runtime_provider or _read_main_provider() or "")
+    main_model = str(runtime_model or _read_main_model() or "")
    if (main_provider and main_model
            and main_provider not in {"auto", ""}):
        resolved_provider = main_provider
@ -3188,7 +3313,11 @@ def resolve_provider_client(
            if client is not None:
                final_model = _normalize_resolved_model(model or default, provider)
                _cbase = str(getattr(client, "base_url", "") or "")
-                _ckey = str(getattr(client, "api_key", "") or "")
+                # ``client.api_key`` may be a callable (Azure Foundry Entra
+                # bearer provider). Pass empty string for the wrapper-detection
+                # path — wrapping decisions are based on base_url + api_mode.
+                _raw_ckey = getattr(client, "api_key", "")
+                _ckey = "" if (callable(_raw_ckey) and not isinstance(_raw_ckey, str)) else str(_raw_ckey or "")
                client = _wrap_if_needed(client, final_model, _cbase, _ckey)
                return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
                        else (client, final_model))
@ -3300,6 +3429,40 @@ def resolve_provider_client(
    except ImportError:
        pass

+    # ── Azure Foundry (delegates to runtime resolver for auth_mode-aware routing) ─
+    #
+    # The generic PROVIDER_REGISTRY path below uses
+    # ``resolve_api_key_provider_credentials`` which only knows about the
+    # static ``AZURE_FOUNDRY_API_KEY`` env var. That misses two important
+    # cases for the ``azure-foundry`` provider:
+    #
+    #   1. ``model.auth_mode: entra_id`` — no static key exists; we need
+    #      a callable bearer-token provider from ``azure_identity_adapter``.
+    #   2. Non-default ``model.base_url`` (Foundry projects path) — the
+    #      env-var-only resolver doesn't apply config-yaml-driven URL
+    #      overrides.
+    #
+    # Delegate to the same runtime resolver the main agent uses so
+    # auxiliary tasks (title generation, compression, vision, embedding,
+    # session search) inherit the user's full Azure config.
+    if provider == "azure-foundry":
+        client, default_model = _try_azure_foundry(
+            model=model,
+            explicit_api_key=explicit_api_key,
+            explicit_base_url=explicit_base_url,
+            api_mode=api_mode,
+        )
+        if client is None:
+            logger.warning(
+                "resolve_provider_client: azure-foundry requested but "
+                "runtime resolution failed (run: hermes doctor for "
+                "diagnostics)"
+            )
+            return None, None
+        final_model = _normalize_resolved_model(model or default_model, provider)
+        return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
+                else (client, final_model))
+
    # ── API-key providers from PROVIDER_REGISTRY ─────────────────────
    try:
        from hermes_cli.auth import (
--- a/agent/azure_identity_adapter.py
+++ b/agent/azure_identity_adapter.py
@ -0,0 +1,555 @@
+"""Microsoft Entra ID adapter for Microsoft Foundry.
+
+Provides keyless authentication for Microsoft Foundry deployments using the
+`azure-identity` SDK's `DefaultAzureCredential` chain (env service principal
+→ workload identity → managed identity → VS Code → Azure CLI → azd →
+PowerShell → broker).
+
+Architecture mirrors `agent/bedrock_adapter.py`:
+
+* Lazy import. `azure-identity` is only loaded when ``model.auth_mode =
+  entra_id`` is selected. Users who stick with `AZURE_FOUNDRY_API_KEY`
+  never pay the import cost.
+* SDK-callable contract. The public entry point ``build_token_provider``
+  returns a zero-arg callable produced by ``get_bearer_token_provider`` —
+  this is exactly the value Microsoft's documented sample plugs into
+  ``OpenAI(api_key=token_provider, base_url=...)``. The OpenAI SDK calls
+  it before every request, so token refresh is transparent.
+* Three explicit consumer-side helpers (display / cache / http-bearer)
+  rather than one generic "materialize" function — splitting them by
+  purpose prevents accidental token-minting in logging paths or token
+  leakage into cache keys / dashboard JSON.
+* No persisted JWT. ``azure-identity`` caches in-process and (where
+  available) in the OS keychain or ``~/.IdentityService``. Hermes does
+  not duplicate that storage in ``auth.json``.
+
+Reference: https://learn.microsoft.com/azure/ai-foundry/foundry-models/how-to/configure-entra-id
+
+Requires: ``azure-identity`` (optional dependency — only needed when
+``model.auth_mode = entra_id``).
+"""
+
+from __future__ import annotations
+
+import functools
+import logging
+import os
+import threading
+from dataclasses import dataclass
+from typing import Any, Callable, Dict, Optional
+
+logger = logging.getLogger(__name__)
+
+# Microsoft-documented scope for Foundry inference auth. Both the new
+# Foundry portal and the legacy Azure OpenAI managed-identity docs use
+# this scope for ALL Foundry endpoint shapes (*.openai.azure.com,
+# *.services.ai.azure.com, *.ai.azure.com). The older control-plane
+# scope ``https://cognitiveservices.azure.com/.default`` is for ARM
+# resource management and is rejected for inference by newer
+# resources — users with that requirement override via
+# ``model.entra.scope`` in config.yaml.
+SCOPE_AI_AZURE_DEFAULT = "https://ai.azure.com/.default"
+
+# ---------------------------------------------------------------------------
+# Lazy SDK import — only loaded when the Entra path is actually used.
+# ---------------------------------------------------------------------------
+
+_AZURE_IDENTITY_FEATURE = "provider.azure_identity"
+
+
+def has_azure_identity_installed() -> bool:
+    """Return True if `azure-identity` can be imported right now.
+
+    Cheap check — does not walk the credential chain.
+    """
+    try:
+        import azure.identity  # noqa: F401
+        return True
+    except Exception:
+        return False
+
+
+def _require_azure_identity():
+    """Import ``azure.identity``, lazy-installing it if allowed.
+
+    Raises ``ImportError`` with a clear actionable message when the
+    package is missing and lazy installs are disabled.
+    """
+    try:
+        import azure.identity as _ai
+        return _ai
+    except ImportError:
+        try:
+            from tools.lazy_deps import ensure, FeatureUnavailable
+        except ImportError as exc:
+            raise ImportError(
+                "The 'azure-identity' package is required for Azure AI "
+                "Foundry Entra ID authentication. Install it with: "
+                "pip install azure-identity"
+            ) from exc
+
+        try:
+            ensure(_AZURE_IDENTITY_FEATURE, prompt=False)
+        except FeatureUnavailable as exc:
+            raise ImportError(
+                "The 'azure-identity' package is required for Azure AI "
+                "Foundry Entra ID authentication. " + str(exc)
+            ) from exc
+
+        # Retry import after lazy install.
+        import azure.identity as _ai  # noqa: WPS440
+        return _ai
+
+
+def reset_credential_cache() -> None:
+    """Clear the cached ``DefaultAzureCredential``. Used by tests and
+    profile switches.
+
+    Defensive against tests that ``monkeypatch.setattr`` over
+    ``build_credential`` with a plain (non-lru-cached) function — those
+    won't expose ``cache_clear()`` until pytest reverts the patch.
+    """
+    cache_clear = getattr(build_credential, "cache_clear", None)
+    if callable(cache_clear):
+        cache_clear()
+
+
+# ---------------------------------------------------------------------------
+# Token-provider construction
+# ---------------------------------------------------------------------------
+
+
+@dataclass(frozen=True)
+class EntraIdentityConfig:
+    """Serializable Entra ID config.
+
+    Captures the Hermes-managed Entra knobs we need outside Azure SDK
+    environment configuration. Everything else
+    (tenant ID, service principal secret, federated token file, sovereign
+    cloud authority, etc.) flows through azure-identity's standard
+    ``AZURE_*`` env vars — see the Bedrock pattern in
+    ``hermes_cli/runtime_provider.py:1310-1377`` for the analogous
+    "let the SDK read env" approach.
+
+    ``scope`` is Microsoft's documented Foundry inference audience. Almost
+    everyone uses the default; sovereign-cloud / non-standard tenants can
+    override via ``model.entra.scope``. Identity selection (user-assigned
+    managed identity, workload identity, service principal, tenant, authority)
+    stays in the standard Azure SDK env vars such as ``AZURE_CLIENT_ID``.
+
+    ``exclude_interactive_browser`` is kept as an internal constructor knob
+    so probes stay non-interactive by default. It is not written by the setup
+    wizard.
+
+    The dataclass is frozen so it's hashable for ``functools.lru_cache``
+    keying, and serializable across multiprocessing boundaries (workers
+    rebuild the credential inside their own process).
+    """
+
+    scope: str = SCOPE_AI_AZURE_DEFAULT
+    exclude_interactive_browser: bool = True
+
+    def __post_init__(self) -> None:
+        scope = str(self.scope or "").strip() or SCOPE_AI_AZURE_DEFAULT
+        object.__setattr__(self, "scope", scope)
+
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "scope": self.scope,
+            "exclude_interactive_browser": self.exclude_interactive_browser,
+        }
+
+    @classmethod
+    def from_dict(cls, data: Optional[Dict[str, Any]],
+                  *, default_scope: Optional[str] = None) -> "EntraIdentityConfig":
+        data = data or {}
+        scope = str(data.get("scope") or "").strip() or default_scope or SCOPE_AI_AZURE_DEFAULT
+        exclude_browser = bool(data.get("exclude_interactive_browser", True))
+        return cls(
+            scope=scope,
+            exclude_interactive_browser=exclude_browser,
+        )
+
+
+def _build_default_credential(config: EntraIdentityConfig) -> Any:
+    """Construct a ``DefaultAzureCredential`` for ``config``.
+
+    Only Hermes-selected knobs are passed as kwargs. Everything else
+    (tenant, service principal secret, federated token file, sovereign
+    cloud authority, etc.) is read by ``azure-identity`` from the
+    standard ``AZURE_*`` environment variables — see Microsoft's
+    documented credential resolution chain. Users configure those in
+    ``~/.hermes/.env`` or the deployment environment.
+    """
+    ai = _require_azure_identity()
+    kwargs: Dict[str, Any] = {}
+    # SDK default is True (browser excluded); only pass when the user
+    # explicitly opts in to interactive browser auth.
+    if not config.exclude_interactive_browser:
+        kwargs["exclude_interactive_browser_credential"] = False
+    return ai.DefaultAzureCredential(**kwargs)
+
+
+@functools.lru_cache(maxsize=1)
+def build_credential(config: EntraIdentityConfig) -> Any:
+    """Return the cached ``DefaultAzureCredential`` for ``config``.
+
+    Hermes processes use exactly one Entra config at a time (the
+    ``model.entra.*`` block in config.yaml drives every aux task,
+    subagent, and credential probe in the session). ``maxsize=1`` is
+    intentional: it reflects the actual usage pattern and keeps the
+    cache trivially small.
+
+    ``EntraIdentityConfig`` is a frozen dataclass, so it's hashable and
+    safe as an LRU-cache key. ``functools.lru_cache`` is thread-safe in
+    CPython.
+
+    If two distinct configs are ever passed (tests do this; production
+    rarely), the LRU eviction handles it correctly — each call still
+    returns a credential matching its config; only one is cached at a
+    time. Use :func:`reset_credential_cache` to clear (e.g. in tests).
+    """
+    return _build_default_credential(config)
+
+
+def build_token_provider(scope: Optional[str] = None,
+                         *,
+                         config: Optional[EntraIdentityConfig] = None,
+                         base_url: Optional[str] = None,
+                         exclude_interactive_browser: bool = True,
+                         ) -> Callable[[], str]:
+    """Return a zero-arg callable that mints a fresh Entra bearer JWT.
+
+    The returned callable is exactly what Microsoft's documented Foundry
+    sample expects::
+
+        from openai import OpenAI
+        client = OpenAI(
+            base_url="https://my-resource.openai.azure.com/openai/v1/",
+            api_key=build_token_provider(),
+        )
+
+    Scope resolution order:
+      1. ``config.scope`` when a config object is supplied
+      2. explicit ``scope`` kwarg
+      3. ``SCOPE_AI_AZURE_DEFAULT`` (Microsoft's documented Foundry scope)
+
+    ``base_url`` is unused today and kept for back-compat. Tenant /
+    service-principal / sovereign-cloud configuration flows through
+    ``azure-identity``'s standard ``AZURE_*`` environment variables —
+    see :func:`_build_default_credential` for the rationale.
+
+    NOT serializable across process boundaries. For multiprocessing
+    workers, serialize the ``EntraIdentityConfig`` and rebuild the
+    provider inside the worker.
+    """
+    ai = _require_azure_identity()
+    if config is None:
+        config = EntraIdentityConfig(
+            scope=scope or SCOPE_AI_AZURE_DEFAULT,
+            exclude_interactive_browser=exclude_interactive_browser,
+        )
+    credential = build_credential(config)
+    return ai.get_bearer_token_provider(credential, config.scope)
+
+
+# ---------------------------------------------------------------------------
+# Credential probing
+# ---------------------------------------------------------------------------
+
+
+def has_azure_identity_credentials(scope: Optional[str] = None,
+                                   *,
+                                   config: Optional[EntraIdentityConfig] = None,
+                                   timeout_seconds: float = 10.0,
+                                   allow_install: bool = True,
+                                   **overrides: Any) -> bool:
+    """Best-effort probe: can `DefaultAzureCredential` mint a token now?
+
+    Runs ``credential.get_token(scope)`` under a thread-based timeout so
+    a slow token service can't hang the caller. Returns False on any
+    error — never raises. Use for ``hermes doctor`` /
+    ``hermes auth status`` / wizard preflight.
+
+    ``allow_install``: when True (default) and ``azure-identity`` is not
+    importable, the adapter triggers the standard lazy-install path
+    (subject to ``security.allow_lazy_installs``) before probing. Set
+    False to make this strictly an "is installed?" check — used on hot
+    paths like CLI startup where we never want pip to run.
+
+    NOT used by ``is_provider_configured()`` — that path is structural
+    only (no token mint), so CLI startup doesn't pay this latency.
+    """
+    if not has_azure_identity_installed():
+        if not allow_install:
+            return False
+        try:
+            _require_azure_identity()
+        except ImportError as exc:
+            logger.debug("azure-identity lazy install unavailable: %s", exc)
+            return False
+    if config is None:
+        effective_scope = (scope or "").strip() or SCOPE_AI_AZURE_DEFAULT
+        config = EntraIdentityConfig(scope=effective_scope, **overrides)
+
+    result = {"ok": False}
+
+    def _probe() -> None:
+        try:
+            credential = build_credential(config)
+            tok = credential.get_token(config.scope)
+            result["ok"] = bool(getattr(tok, "token", None))
+        except Exception as exc:
+            logger.debug("Entra credential probe failed: %s", exc)
+            result["ok"] = False
+
+    thread = threading.Thread(target=_probe, daemon=True)
+    thread.start()
+    thread.join(timeout=max(0.01, timeout_seconds))
+    if thread.is_alive():
+        logger.debug("Entra token service probe timed out after %ss", timeout_seconds)
+        return False
+    return bool(result.get("ok"))
+
+
+def describe_active_credential(config: Optional[EntraIdentityConfig] = None,
+                               *,
+                               scope: Optional[str] = None,
+                               timeout_seconds: float = 10.0,
+                               allow_install: bool = True,
+                               **overrides: Any) -> Dict[str, Any]:
+    """Return diagnostic info about the active credential chain.
+
+    Best-effort: runs ``get_token()`` and inspects what came back.
+    Designed for ``hermes doctor`` and the wizard preflight — never
+    raises, returns ``{"ok": False, "error": ...}`` on failure.
+
+    ``allow_install``: when True (default) and ``azure-identity`` is not
+    importable, the adapter triggers the standard lazy-install path
+    (subject to ``security.allow_lazy_installs``) before probing. The
+    install failure is surfaced as the diagnostic error when it fails.
+    Set False for hot CLI paths that should never trigger pip.
+
+    ``azure-identity`` doesn't expose the winning inner credential as
+    a public field, so we report a coarse picture (env vars present,
+    token expiry, claims-derived tenant) rather than the credential
+    class name. Users wanting the precise class can run with
+    ``AZURE_LOG_LEVEL=DEBUG``.
+    """
+    info: Dict[str, Any] = {"ok": False}
+    if not has_azure_identity_installed():
+        if not allow_install:
+            info["error"] = "azure-identity not installed"
+            info["hint"] = (
+                "pip install azure-identity (or rely on lazy install at "
+                "first use)"
+            )
+            return info
+        try:
+            _require_azure_identity()
+        except ImportError as exc:
+            info["error"] = str(exc) or "azure-identity not installed"
+            info["hint"] = (
+                "pip install azure-identity manually, or enable lazy "
+                "installs (security.allow_lazy_installs: true in "
+                "config.yaml)."
+            )
+            return info
+
+    if config is None:
+        effective_scope = (scope or "").strip() or SCOPE_AI_AZURE_DEFAULT
+        config = EntraIdentityConfig(scope=effective_scope, **overrides)
+
+    info["scope"] = config.scope
+    # Tenant / authority / service-principal config flow through the
+    # standard ``AZURE_*`` env vars; surface them below.
+    if os.environ.get("AZURE_TENANT_ID", "").strip():
+        info["tenant_id_env"] = os.environ["AZURE_TENANT_ID"].strip()
+
+    # Surface which env-var sources are present without minting yet.
+    env_sources = []
+    if os.environ.get("AZURE_FEDERATED_TOKEN_FILE", "").strip():
+        env_sources.append("WorkloadIdentityCredential (AZURE_FEDERATED_TOKEN_FILE)")
+    if (os.environ.get("AZURE_CLIENT_ID", "").strip()
+            and os.environ.get("AZURE_CLIENT_SECRET", "").strip()
+            and os.environ.get("AZURE_TENANT_ID", "").strip()):
+        env_sources.append("EnvironmentCredential (client secret)")
+    if os.environ.get("IDENTITY_ENDPOINT", "").strip() or os.environ.get("MSI_ENDPOINT", "").strip():
+        env_sources.append("ManagedIdentityCredential (IDENTITY_ENDPOINT)")
+    info["env_sources"] = env_sources
+
+    # Now try minting.
+    result: Dict[str, Any] = {}
+
+    def _probe() -> None:
+        try:
+            credential = build_credential(config)
+            tok = credential.get_token(config.scope)
+            result["token"] = tok
+        except Exception as exc:
+            result["error"] = str(exc)
+
+    thread = threading.Thread(target=_probe, daemon=True)
+    thread.start()
+    thread.join(timeout=max(0.01, timeout_seconds))
+    if thread.is_alive():
+        info["error"] = f"Token probe timed out after {timeout_seconds:.0f}s"
+        info["hint"] = (
+            "DefaultAzureCredential can be slow when the token service is unreachable "
+            "or when az login state is stale. Try `az login` or set "
+            "AZURE_CLIENT_ID / AZURE_TENANT_ID / AZURE_CLIENT_SECRET."
+        )
+        return info
+
+    if "error" in result:
+        info["error"] = result["error"]
+        return info
+
+    token = result.get("token")
+    if token is None:
+        info["error"] = "credential chain exhausted"
+        return info
+
+    info["ok"] = True
+    info["expires_on"] = getattr(token, "expires_on", None)
+    return info
+
+
+# ---------------------------------------------------------------------------
+# Consumer-side helpers — split by purpose to prevent accidental token
+# minting in logging / cache-key / dashboard paths.
+# ---------------------------------------------------------------------------
+
+
+def is_token_provider(value: Any) -> bool:
+    """Return True when ``value`` is a callable Entra token provider.
+
+    Used at the seams where a consumer must decide between
+    string-API-key semantics and bearer-callable semantics.
+    """
+    return callable(value) and not isinstance(value, str)
+
+
+def materialize_bearer_for_http(value: Any) -> str:
+    """Return a fresh Bearer JWT for a manual HTTP request.
+
+    Only call this at sites that must construct an ``Authorization``
+    header outside the OpenAI SDK (e.g. ``hermes_cli/azure_detect.py``).
+    Calls the callable exactly once and returns the resulting token.
+
+    **Anthropic SDK integration:** the Anthropic Python SDK does not
+    accept a ``Callable[[], str]`` for ``auth_token``. Instead,
+    :func:`build_bearer_http_client` returns an ``httpx.Client`` whose
+    request event hook calls this function and rewrites the
+    ``Authorization`` header per request — and that client is passed to
+    the Anthropic SDK via ``http_client=...``. See
+    :func:`agent.anthropic_adapter.build_anthropic_client` for the
+    consumer.
+
+    Raises ``ValueError`` if ``value`` is not a callable token provider
+    or non-empty string.
+    """
+    if is_token_provider(value):
+        token = value()
+        if not isinstance(token, str) or not token:
+            raise ValueError("token provider returned empty value")
+        return token
+    if isinstance(value, str) and value:
+        return value
+    raise ValueError("no usable api_key / token provider")
+
+
+def build_bearer_http_client(token_provider: Callable[[], str], **httpx_kwargs: Any) -> Any:
+    """Return an ``httpx.Client`` that mints a fresh Entra bearer JWT
+    per outbound request.
+
+    The Anthropic SDK (≤ 0.86.0 at the time of writing) stores
+    ``api_key`` / ``auth_token`` as static strings and computes the
+    ``Authorization`` header at construction time. To get per-request
+    token refresh (the Microsoft-recommended Foundry pattern for
+    callable bearer providers), we install an httpx ``request`` event
+    hook on a custom client and pass that client to the SDK via
+    ``http_client=...``. The hook:
+
+      1. Calls :func:`materialize_bearer_for_http` to mint a fresh JWT
+         (azure-identity caches internally — this is cheap when the
+         cached token is still valid).
+      2. Strips any pre-set ``Authorization`` / ``api-key`` /
+         ``x-api-key`` headers the SDK may have added (avoids
+         conflicting auth values).
+      3. Sets ``Authorization: Bearer <fresh-jwt>``.
+
+    ``token_provider`` must be a zero-arg callable returning a string —
+    typically the result of :func:`build_token_provider`.
+
+    ``httpx_kwargs`` are forwarded verbatim to ``httpx.Client(...)`` so
+    callers can attach a ``timeout``, ``transport``, ``proxy``, etc.
+
+    Raises ``ImportError`` if ``httpx`` is not installed (it is a
+    transitive dependency of both ``openai`` and ``anthropic`` SDKs, so
+    in practice always available when this helper is reached).
+    """
+    if not is_token_provider(token_provider):
+        raise ValueError(
+            "build_bearer_http_client requires a zero-arg callable "
+            "token provider"
+        )
+
+    try:
+        import httpx
+    except ImportError as exc:  # pragma: no cover — httpx ships with openai/anthropic
+        raise ImportError(
+            "httpx is required for Entra ID bearer auth on Microsoft Foundry "
+            "Anthropic-style endpoints. It is normally a transitive "
+            "dependency of the openai/anthropic SDKs."
+        ) from exc
+
+    def _inject_bearer(request: "httpx.Request") -> None:
+        try:
+            token = materialize_bearer_for_http(token_provider)
+        except ValueError as exc:
+            # Token provider failed (chain exhausted, token service unreachable,
+            # az login expired, etc.). Strip any auth headers the SDK
+            # may have set — including our own placeholder sentinel
+            # ``entra-id-bearer-via-http-hook`` from
+            # ``_build_anthropic_client_with_bearer_hook`` — so the
+            # outbound request hits Azure with NO Authorization rather
+            # than with the placeholder. Azure returns a clean 401
+            # "missing auth" that is easier to diagnose than a 401
+            # against the sentinel string, and the sentinel never
+            # appears in upstream access logs.
+            #
+            # Log at WARNING (not DEBUG) so the misconfiguration is
+            # visible at default log levels.
+            logger.warning(
+                "Bearer hook: Entra ID token provider returned empty (%s) "
+                "— stripping Authorization headers. Azure will respond 401. "
+                "Run `hermes doctor` or `az login` to recover.",
+                exc,
+            )
+            for header_name in ("Authorization", "authorization", "Api-Key", "api-key", "X-Api-Key", "x-api-key"):
+                request.headers.pop(header_name, None)
+            return
+        for header_name in ("Authorization", "authorization", "Api-Key", "api-key", "X-Api-Key", "x-api-key"):
+            request.headers.pop(header_name, None)
+        request.headers["Authorization"] = f"Bearer {token}"
+
+    return httpx.Client(
+        event_hooks={"request": [_inject_bearer]},
+        **httpx_kwargs,
+    )
+
+
+__all__ = [
+    "EntraIdentityConfig",
+    "SCOPE_AI_AZURE_DEFAULT",
+    "build_bearer_http_client",
+    "build_credential",
+    "build_token_provider",
+    "describe_active_credential",
+    "has_azure_identity_credentials",
+    "has_azure_identity_installed",
+    "is_token_provider",
+    "materialize_bearer_for_http",
+    "reset_credential_cache",
+]
--- a/agent/chat_completion_helpers.py
+++ b/agent/chat_completion_helpers.py
@ -866,9 +866,14 @@ def try_activate_fallback(agent, reason: "FailoverReason | None" = None) -> bool
        # the fallback activation drops to 128K even when config says 204800.
        if hasattr(agent, 'context_compressor') and agent.context_compressor:
            from agent.model_metadata import get_model_context_length
+            # ``agent.api_key`` may be callable (Entra ID); the
+            # context-length resolver expects a string for live
+            # probes. Foundry typically resolves via config/static
+            # catalogs anyway, so coerce defensively.
+            _fb_ctx_api_key = agent.api_key if isinstance(agent.api_key, str) else ""
            fb_context_length = get_model_context_length(
                agent.model, base_url=agent.base_url,
-                api_key=agent.api_key, provider=agent.provider,
+                api_key=_fb_ctx_api_key, provider=agent.provider,
                config_context_length=getattr(agent, "_config_context_length", None),
                custom_providers=getattr(agent, "_custom_providers", None),
            )
@ -876,7 +881,7 @@ def try_activate_fallback(agent, reason: "FailoverReason | None" = None) -> bool
                model=agent.model,
                context_length=fb_context_length,
                base_url=agent.base_url,
-                api_key=getattr(agent, "api_key", ""),
+                api_key=getattr(agent, "api_key", ""),  # callable preserved → call_llm
                provider=agent.provider,
            )

--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@ -486,7 +486,7 @@ class ContextCompressor(ContextEngine):
        model: str,
        context_length: int,
        base_url: str = "",
-        api_key: str = "",
+        api_key: Any = "",
        provider: str = "",
        api_mode: str = "",
    ) -> None:
--- a/agent/conversation_compression.py
+++ b/agent/conversation_compression.py
@ -103,7 +103,15 @@ def check_compression_model_feasibility(agent: Any) -> None:
            return

        aux_base_url = str(getattr(client, "base_url", ""))
-        aux_api_key = str(getattr(client, "api_key", ""))
+        # ``client.api_key`` may be a callable (Azure Foundry Entra ID
+        # bearer provider). The context-length resolver chain expects a
+        # string, but it only needs a key for live catalogue probes
+        # (provider model lists). For Entra clients the model-metadata
+        # chain still resolves via models.dev + hardcoded family
+        # fallbacks, which don't require auth — pass empty string rather
+        # than minting a bearer JWT just to look up a context length.
+        _raw_aux_key = getattr(client, "api_key", "")
+        aux_api_key = "" if (callable(_raw_aux_key) and not isinstance(_raw_aux_key, str)) else str(_raw_aux_key or "")

        aux_context = get_model_context_length(
            aux_model,
--- a/agent/conversation_loop.py
+++ b/agent/conversation_loop.py
@ -1807,7 +1807,11 @@ def run_conversation(
                        # that survives message/tool sanitization (#6843).
                        _credential_sanitized = False
                        _raw_key = getattr(agent, "api_key", None) or ""
-                        if _raw_key:
+                        # Entra ID bearer providers are callables — their
+                        # minted JWTs are always ASCII, so no sanitization
+                        # is needed (and ``_strip_non_ascii`` would crash
+                        # on a callable input).
+                        if _raw_key and isinstance(_raw_key, str):
                            _clean_key = _strip_non_ascii(_raw_key)
                            if _clean_key != _raw_key:
                                agent.api_key = _clean_key
@ -2080,15 +2084,26 @@ def run_conversation(
                ):
                    anthropic_auth_retry_attempted = True
                    from agent.anthropic_adapter import _is_oauth_token
+                    from agent.azure_identity_adapter import is_token_provider
                    if agent._try_refresh_anthropic_client_credentials():
                        print(f"{agent.log_prefix}🔐 Anthropic credentials refreshed after 401. Retrying request...")
                        continue
                    # Credential refresh didn't help — show diagnostic info
                    key = agent._anthropic_api_key
-                    auth_method = "Bearer (OAuth/setup-token)" if _is_oauth_token(key) else "x-api-key (API key)"
                    print(f"{agent.log_prefix}🔐 Anthropic 401 — authentication failed.")
-                    print(f"{agent.log_prefix}   Auth method: {auth_method}")
-                    print(f"{agent.log_prefix}   Token prefix: {key[:12]}..." if key and len(key) > 12 else f"{agent.log_prefix}   Token: (empty or short)")
+                    if is_token_provider(key):
+                        # Azure Foundry Entra ID — the bearer token is
+                        # minted per-request by an httpx event hook on a
+                        # custom http_client passed to the SDK. The 401
+                        # means Azure rejected the JWT (RBAC role missing,
+                        # az login expired, IMDS unreachable, etc.).
+                        print(f"{agent.log_prefix}   Auth method: Microsoft Entra ID (httpx event hook)")
+                        print(f"{agent.log_prefix}   Run `hermes doctor` for credential-chain diagnostics, or")
+                        print(f"{agent.log_prefix}   `az login` if your developer session expired.")
+                    else:
+                        auth_method = "Bearer (OAuth/setup-token)" if _is_oauth_token(key) else "x-api-key (API key)"
+                        print(f"{agent.log_prefix}   Auth method: {auth_method}")
+                        print(f"{agent.log_prefix}   Token prefix: {key[:12]}..." if isinstance(key, str) and len(key) > 12 else f"{agent.log_prefix}   Token: (empty or short)")
                    print(f"{agent.log_prefix}   Troubleshooting:")
                    from hermes_constants import display_hermes_home as _dhh_fn
                    _dhh = _dhh_fn()
--- a/batch_runner.py
+++ b/batch_runner.py
@ -862,13 +862,32 @@ class BatchRunner:
                "last_updated": None
            }
        
-        # Prepare configuration for workers
+        # Prepare configuration for workers.
+        #
+        # ``self.api_key`` may be a zero-arg callable (Azure Foundry Entra ID
+        # bearer provider returned by ``agent.azure_identity_adapter``). Such
+        # closures are not safely picklable across the multiprocessing.Pool
+        # boundary. Drop the callable here and let each worker rebuild its
+        # own provider via ``resolve_runtime_provider()``, which reads
+        # ``model.auth_mode`` from ``config.yaml`` and constructs a fresh
+        # token provider in the worker process (azure-identity caches
+        # in-process so each worker gets its own short-lived cache).
+        if callable(self.api_key) and not isinstance(self.api_key, str):
+            worker_api_key = None
+            print(
+                "ℹ️  Detected Entra ID bearer provider — workers will rebuild "
+                "credentials from config.yaml in each process.",
+                flush=True,
+            )
+        else:
+            worker_api_key = self.api_key
+
        config = {
            "distribution": self.distribution,
            "model": self.model,
            "max_iterations": self.max_iterations,
            "base_url": self.base_url,
-            "api_key": self.api_key,
+            "api_key": worker_api_key,
            "verbose": self.verbose,
            "ephemeral_system_prompt": self.ephemeral_system_prompt,
            "log_prefix_chars": self.log_prefix_chars,
--- a/cli-config.yaml.example
+++ b/cli-config.yaml.example
@ -30,6 +30,7 @@ model:
  #   "ollama-cloud" - Ollama Cloud (requires: OLLAMA_API_KEY — https://ollama.com/settings)
  #   "kilocode"     - KiloCode gateway (requires: KILOCODE_API_KEY)
  #   "ai-gateway"   - Vercel AI Gateway (requires: AI_GATEWAY_API_KEY)
+  #   "azure-foundry" - Microsoft Foundry / Azure OpenAI (API key or Entra ID)
  #   "lmstudio"     - LM Studio local server (optional: LM_API_KEY, defaults to http://127.0.0.1:1234/v1)
  #
  # Local servers (LM Studio, Ollama, vLLM, llama.cpp):
@ -45,6 +46,14 @@ model:
  # api_key: "your-key-here"  # Uncomment to set here instead of .env
  base_url: "https://openrouter.ai/api/v1"

+  # Azure Foundry keyless auth example:
+  # provider: "azure-foundry"
+  # base_url: "https://<resource>.openai.azure.com/openai/v1"
+  # auth_mode: "entra_id"      # DefaultAzureCredential: az login, managed identity, workload identity, etc.
+  # default: "gpt-4o"          # Deployment/model name
+  # entra:
+  #   scope: "https://ai.azure.com/.default"  # Optional; this is the default.
+
  # ── Token limits — two settings, easy to confuse ──────────────────────────
  #
  # context_length: TOTAL context window (input + output tokens combined).
--- a/cli.py
+++ b/cli.py
@ -4251,7 +4251,13 @@ class HermesCLI:
        resolved_acp_command = runtime.get("command")
        resolved_acp_args = list(runtime.get("args") or [])
        resolved_credential_pool = runtime.get("credential_pool")
-        if not isinstance(api_key, str) or not api_key:
+        # A callable api_key is a bearer-token provider (Azure Foundry
+        # Entra ID — ``azure_identity_adapter.build_token_provider``).
+        # The OpenAI SDK accepts ``Callable[[], str]`` for ``api_key`` and
+        # invokes it before every request. Skip the string-only validation
+        # and placeholder substitution for callables.
+        _is_callable_provider = callable(api_key) and not isinstance(api_key, str)
+        if not _is_callable_provider and (not isinstance(api_key, str) or not api_key):
            # Custom / local endpoints (llama.cpp, ollama, vLLM, etc.) often
            # don't require authentication.  When a base_url IS configured but
            # no API key was found, use a placeholder so the OpenAI SDK
@ -5723,7 +5729,15 @@ class HermesCLI:
            config_path = project_config_path
        config_status = "(loaded)" if config_path.exists() else "(not found)"
        
-        api_key_display = '********' + self.api_key[-4:] if self.api_key and len(self.api_key) > 4 else 'Not set!'
+        # ``self.api_key`` may be a callable (Azure Foundry Entra ID bearer
+        # provider). Never invoke it; just identify the auth surface.
+        from agent.azure_identity_adapter import is_token_provider
+        if is_token_provider(self.api_key):
+            api_key_display = "Microsoft Entra ID"
+        elif isinstance(self.api_key, str) and len(self.api_key) > 12:
+            api_key_display = f"{self.api_key[:8]}...{self.api_key[-4:]}"
+        else:
+            api_key_display = "Not set!"
        
        print()
        title = "(^_^) Configuration"
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@ -5334,7 +5334,9 @@ def get_external_process_provider_status(provider_id: str) -> Dict[str, Any]:

 def get_auth_status(provider_id: Optional[str] = None) -> Dict[str, Any]:
    """Generic auth status dispatcher."""
-    target = provider_id or get_active_provider()
+    target = (provider_id or get_active_provider() or "").strip().lower()
+    if not target:
+        return {"logged_in": False}
    if target == "spotify":
        return get_spotify_auth_status()
    if target == "nous":
@ -5351,6 +5353,8 @@ def get_auth_status(provider_id: Optional[str] = None) -> Dict[str, Any]:
        return get_minimax_oauth_auth_status()
    if target == "copilot-acp":
        return get_external_process_provider_status(target)
+    if target == "azure-foundry":
+        return _get_azure_foundry_auth_status()
    # API-key providers
    pconfig = PROVIDER_REGISTRY.get(target)
    if pconfig and pconfig.auth_type == "api_key":
@ -5365,6 +5369,83 @@ def get_auth_status(provider_id: Optional[str] = None) -> Dict[str, Any]:
    return {"logged_in": False}


+def _get_azure_foundry_auth_status() -> Dict[str, Any]:
+    """Return structural auth status for Azure Foundry.
+
+    ``logged_in`` is structural, matching other non-OAuth provider status
+    checks:
+
+      * ``auth_mode == "entra_id"`` AND ``azure-identity`` is importable
+        (we do NOT mint a token here; ``hermes doctor`` runs the live
+        probe and reports whether the credential chain can acquire one).
+      * ``auth_mode == "api_key"`` (default) AND ``AZURE_FOUNDRY_API_KEY``
+        is set with a usable value.
+
+    Never invokes the Entra credential chain — keeps CLI startup latency
+    flat regardless of token-service / az login state.
+    """
+    info: Dict[str, Any] = {"provider": "azure-foundry"}
+    try:
+        from hermes_cli.config import load_config, get_env_value
+        cfg = load_config()
+    except Exception:
+        cfg = {}
+
+    model_cfg = cfg.get("model") if isinstance(cfg, dict) else None
+    auth_mode = "api_key"
+    base_url = ""
+    if isinstance(model_cfg, dict):
+        auth_mode = str(model_cfg.get("auth_mode") or "api_key").strip().lower() or "api_key"
+        base_url = str(model_cfg.get("base_url") or "").strip()
+    info["auth_mode"] = auth_mode
+    info["base_url"] = base_url
+
+    if auth_mode == "entra_id":
+        try:
+            from agent.azure_identity_adapter import (
+                EntraIdentityConfig,
+                SCOPE_AI_AZURE_DEFAULT,
+                has_azure_identity_installed,
+            )
+            installed = has_azure_identity_installed()
+            entra_cfg = {}
+            if isinstance(model_cfg, dict) and isinstance(model_cfg.get("entra"), dict):
+                entra_cfg = model_cfg["entra"]
+            identity_config = EntraIdentityConfig.from_dict(
+                entra_cfg,
+                default_scope=SCOPE_AI_AZURE_DEFAULT,
+            )
+            info["azure_identity_installed"] = installed
+            info["scope"] = identity_config.scope
+            info["credential_probe"] = "not_run"
+            info["credential_verified"] = False
+            info["logged_in"] = bool(installed)
+            if not installed:
+                info["hint"] = (
+                    "azure-identity not installed. Install with: "
+                    "pip install azure-identity  (or rely on Hermes' "
+                    "lazy-install at first use)."
+                )
+            else:
+                info["hint"] = (
+                    "azure-identity is installed; live credential validation "
+                    "is skipped here. Run `hermes doctor` to verify token acquisition."
+                )
+            return info
+        except Exception as exc:
+            info["logged_in"] = False
+            info["error"] = f"azure-identity check failed: {exc}"
+            return info
+
+    # api_key mode (default)
+    try:
+        api_key = get_env_value("AZURE_FOUNDRY_API_KEY") or os.getenv("AZURE_FOUNDRY_API_KEY", "")
+    except Exception:
+        api_key = os.getenv("AZURE_FOUNDRY_API_KEY", "")
+    info["logged_in"] = has_usable_secret(api_key)
+    return info
+
+
 def resolve_api_key_provider_credentials(provider_id: str) -> Dict[str, Any]:
    """Resolve API key and base URL for an API-key provider.

--- a/hermes_cli/auth_commands.py
+++ b/hermes_cli/auth_commands.py
@ -566,6 +566,54 @@ def _interactive_auth() -> None:
            print()
    except ImportError:
        pass  # boto3 or bedrock_adapter not available
+
+    # Show Azure Foundry Entra ID status
+    try:
+        from hermes_cli.config import load_config
+        _cfg = load_config()
+        _model_cfg = _cfg.get("model") if isinstance(_cfg, dict) else None
+        if isinstance(_model_cfg, dict):
+            _cfg_provider = str(_model_cfg.get("provider") or "").strip().lower()
+            _cfg_auth_mode = str(_model_cfg.get("auth_mode") or "").strip().lower()
+            if _cfg_provider == "azure-foundry" and _cfg_auth_mode == "entra_id":
+                from agent.azure_identity_adapter import (
+                    EntraIdentityConfig,
+                    SCOPE_AI_AZURE_DEFAULT,
+                    describe_active_credential,
+                    has_azure_identity_installed,
+                )
+                _base_url = str(_model_cfg.get("base_url") or "").strip()
+                _entra = _model_cfg.get("entra") or {}
+                if not isinstance(_entra, dict):
+                    _entra = {}
+                _scope = (
+                    str(_entra.get("scope") or "").strip()
+                    or SCOPE_AI_AZURE_DEFAULT
+                )
+                print(f"azure-foundry (Microsoft Entra ID):")
+                print(f"  Endpoint: {_base_url or '(not configured)'}")
+                print(f"  Scope: {_scope}")
+                if not has_azure_identity_installed():
+                    print("  Status: ⚠ azure-identity not installed "
+                          "(pip install azure-identity)")
+                else:
+                    _entra_cfg = EntraIdentityConfig(
+                        scope=_scope,
+                    )
+                    _info = describe_active_credential(config=_entra_cfg, timeout_seconds=10.0)
+                    _env_sources = _info.get("env_sources") or []
+                    if _info.get("ok"):
+                        _tag = ", ".join(_env_sources) if _env_sources else "default chain"
+                        print(f"  Status: ✓ token acquired ({_tag})")
+                    else:
+                        _err = _info.get("error") or "credential chain exhausted"
+                        print(f"  Status: ⚠ {_err}")
+                        _hint = _info.get("hint")
+                        if _hint:
+                            print(f"  Hint: {_hint}")
+                print()
+    except Exception:
+        pass
    print()

    # Main menu
--- a/hermes_cli/azure_detect.py
+++ b/hermes_cli/azure_detect.py
@ -1,6 +1,6 @@
 """Azure Foundry endpoint auto-detection.

-Inspect an Azure AI Foundry / Azure OpenAI endpoint to determine:
+Inspect a Microsoft Foundry / Azure OpenAI endpoint to determine:
  - API transport (OpenAI-style ``chat_completions`` vs
    Anthropic-style ``anthropic_messages``)
  - Available models (best effort — Azure does not expose a deployment
@ -19,6 +19,16 @@ rather than the user's *deployed* deployment names.  In practice it is
 still a useful hint — the user picks a familiar model name and we look
 up its context length from the catalog.

+Authentication modes:
+  - ``api_key`` (default): the wizard passes an ``api_key`` string; the
+    probe sends both ``api-key:`` and ``Authorization: Bearer`` headers
+    so we hit any Azure deployment regardless of which header it expects.
+  - ``entra_id``: the wizard passes a ``token_provider`` callable from
+    :mod:`agent.azure_identity_adapter`. The probe mints exactly one
+    bearer JWT, sends **only** ``Authorization: Bearer <jwt>`` (never
+    ``api-key:``), and never persists the token. This matches Microsoft's
+    documented contract for keyless inference.
+
 The detector never crashes on errors (every HTTP call is wrapped in a
 broad try/except).  Callers get a :class:`DetectionResult` with whatever
 information could be gathered, and fall back to manual entry for the
@ -31,7 +41,7 @@ import json
 import logging
 import re
 from dataclasses import dataclass, field
-from typing import Optional
+from typing import Any, Callable, Optional
 from urllib import request as urllib_request
 from urllib.error import HTTPError, URLError
 from urllib.parse import urlparse
@ -79,15 +89,73 @@ class DetectionResult:
    is_anthropic: bool = False


-def _http_get_json(url: str, api_key: str, timeout: float = 6.0) -> tuple[int, Optional[dict]]:
-    """GET a URL with ``api-key`` + ``Authorization`` headers.  Return
+def _resolve_credential(api_key: Any,
+                        token_provider: Optional[Callable[[], str]] = None,
+                        ) -> tuple[Optional[str], str]:
+    """Coerce wizard inputs into a (token, mode) pair.
+
+    Returns ``(token_or_None, mode)`` where ``mode`` is:
+      - ``"entra_id"`` when a callable token provider was supplied — the
+        returned token is a freshly minted bearer JWT, sent ONLY in
+        ``Authorization: Bearer``.
+      - ``"api_key"`` when a string key was supplied — the returned token
+        is the raw API key, sent in BOTH ``api-key:`` and
+        ``Authorization: Bearer`` headers (preserves the original
+        broad-compat probe behaviour).
+      - ``("", "api_key")`` when neither yields a value.
+
+    Bearer minting failures degrade to ``("", "entra_id")`` so the caller
+    can still report "detection incomplete" rather than crashing.
+    """
+    # Token-provider path (callable wins when both supplied).
+    if token_provider is not None and callable(token_provider):
+        try:
+            token = token_provider()
+            return (str(token) if token else None), "entra_id"
+        except Exception as exc:
+            logger.debug("azure_detect: token_provider failed: %s", exc)
+            return None, "entra_id"
+    if callable(api_key) and not isinstance(api_key, str):
+        try:
+            token = api_key()
+            return (str(token) if token else None), "entra_id"
+        except Exception as exc:
+            logger.debug("azure_detect: api_key callable failed: %s", exc)
+            return None, "entra_id"
+    # API-key path.
+    if isinstance(api_key, str) and api_key:
+        return api_key, "api_key"
+    return None, "api_key"
+
+
+def _apply_auth_headers(req: urllib_request.Request,
+                        token: Optional[str],
+                        mode: str) -> None:
+    """Attach the right auth headers to ``req`` based on credential mode."""
+    if not token:
+        return
+    if mode == "entra_id":
+        # Bearer-only: do NOT also set api-key, which would log a JWT in
+        # a header slot intended for static keys.
+        req.add_header("Authorization", f"Bearer {token}")
+    else:
+        # Legacy broad-compat behaviour: send both headers so we land on
+        # any Azure resource regardless of which it accepts.
+        req.add_header("api-key", token)
+        req.add_header("Authorization", f"Bearer {token}")
+
+
+def _http_get_json(url: str,
+                   api_key: Any,
+                   timeout: float = 6.0,
+                   *,
+                   token_provider: Optional[Callable[[], str]] = None,
+                   ) -> tuple[int, Optional[dict]]:
+    """GET a URL with the appropriate auth headers.  Return
    ``(status_code, parsed_json_or_None)``.  Never raises."""
+    token, mode = _resolve_credential(api_key, token_provider)
    req = urllib_request.Request(url, method="GET")
-    # Azure OpenAI uses ``api-key``.  Some Azure deployments (and
-    # Anthropic-style routes) use ``Authorization: Bearer``.  Send both
-    # so we probe once per URL rather than twice.
-    req.add_header("api-key", api_key)
-    req.add_header("Authorization", f"Bearer {api_key}")
+    _apply_auth_headers(req, token, mode)
    req.add_header("User-Agent", "hermes-agent/azure-detect")
    try:
        with urllib_request.urlopen(req, timeout=timeout) as resp:
@ -140,7 +208,11 @@ def _extract_model_ids(payload: dict) -> list[str]:
    return ids


-def _probe_openai_models(base_url: str, api_key: str) -> tuple[bool, list[str]]:
+def _probe_openai_models(base_url: str,
+                         api_key: Any,
+                         *,
+                         token_provider: Optional[Callable[[], str]] = None,
+                         ) -> tuple[bool, list[str]]:
    """Probe ``<base>/models`` for an OpenAI-shaped response.

    Returns ``(ok, models)``.  ``ok`` is True iff the endpoint accepted
@ -156,7 +228,7 @@ def _probe_openai_models(base_url: str, api_key: str) -> tuple[bool, list[str]]:
        candidates.append(f"{base_url}/models?api-version={v}")

    for url in candidates:
-        status, body = _http_get_json(url, api_key)
+        status, body = _http_get_json(url, api_key, token_provider=token_provider)
        if status == 200 and body is not None:
            ids = _extract_model_ids(body)
            if ids:
@ -172,7 +244,11 @@ def _probe_openai_models(base_url: str, api_key: str) -> tuple[bool, list[str]]:
    return False, []


-def _probe_anthropic_messages(base_url: str, api_key: str) -> bool:
+def _probe_anthropic_messages(base_url: str,
+                              api_key: Any,
+                              *,
+                              token_provider: Optional[Callable[[], str]] = None,
+                              ) -> bool:
    """Send a zero-token request to ``<base>/v1/messages`` and check
    whether the endpoint at least *recognises* the Anthropic Messages
    shape (any 4xx that mentions ``messages`` or ``model``, or a 400
@ -187,8 +263,8 @@ def _probe_anthropic_messages(base_url: str, api_key: str) -> bool:
        "messages": [{"role": "user", "content": "ping"}],
    }).encode("utf-8")
    req = urllib_request.Request(url, method="POST", data=payload)
-    req.add_header("api-key", api_key)
-    req.add_header("Authorization", f"Bearer {api_key}")
+    token, mode = _resolve_credential(api_key, token_provider)
+    _apply_auth_headers(req, token, mode)
    req.add_header("anthropic-version", "2023-06-01")
    req.add_header("content-type", "application/json")
    req.add_header("User-Agent", "hermes-agent/azure-detect")
@ -218,13 +294,23 @@ def _probe_anthropic_messages(base_url: str, api_key: str) -> bool:
        return False


-def detect(base_url: str, api_key: str) -> DetectionResult:
+def detect(base_url: str,
+           api_key: Any = "",
+           *,
+           token_provider: Optional[Callable[[], str]] = None,
+           ) -> DetectionResult:
    """Inspect an Azure endpoint and describe its transport + models.

    Call this from the wizard before asking the user to pick an API
    mode manually.  The caller should treat the returned
    :class:`DetectionResult` as *advisory* — if ``api_mode`` is None,
    fall back to asking the user.
+
+    ``api_key`` may be a string (legacy API-key auth — sends both
+    ``api-key:`` and ``Authorization: Bearer``) or a callable returning
+    a bearer JWT (Entra ID auth — sends ONLY ``Authorization: Bearer``).
+    ``token_provider`` is an alternative explicit name for the callable
+    form; if both are supplied the callable wins.
    """
    result = DetectionResult()

@ -244,7 +330,7 @@ def detect(base_url: str, api_key: str) -> DetectionResult:

    # 2. Try the OpenAI-style /models probe.  If this works, the
    #    endpoint definitely speaks OpenAI wire.
-    ok, models = _probe_openai_models(base_url, api_key)
+    ok, models = _probe_openai_models(base_url, api_key, token_provider=token_provider)
    if ok:
        result.models_probe_ok = True
        result.models = models
@ -259,7 +345,7 @@ def detect(base_url: str, api_key: str) -> DetectionResult:
    # 3. Fallback: probe the Anthropic Messages shape.  Slower and more
    #    intrusive than /models, so only run it when the OpenAI probe
    #    failed.
-    if _probe_anthropic_messages(base_url, api_key):
+    if _probe_anthropic_messages(base_url, api_key, token_provider=token_provider):
        result.is_anthropic = True
        result.api_mode = "anthropic_messages"
        result.reason = "Endpoint accepts Anthropic Messages shape"
@ -273,11 +359,26 @@ def detect(base_url: str, api_key: str) -> DetectionResult:
    return result


-def lookup_context_length(model: str, base_url: str, api_key: str) -> Optional[int]:
+def lookup_context_length(model: str,
+                          base_url: str,
+                          api_key: Any = "",
+                          *,
+                          token_provider: Optional[Callable[[], str]] = None,
+                          ) -> Optional[int]:
    """Thin wrapper around :func:`agent.model_metadata.get_model_context_length`
    that returns ``None`` when only the fallback default (128k) would
    fire, so the wizard can distinguish "we actually know this" from
-    "we guessed."""
+    "we guessed.
+
+    For Entra-ID mode pass a callable as ``api_key`` (or via
+    ``token_provider=``); the wrapped resolver expects a string, so we
+    mint one bearer JWT here for the single lookup. The resolver itself
+    only reads catalog metadata over HTTP — no SDK client is built — so
+    the minted token is consumed for at most one /models probe.
+    """
+    model_id = str(model or "").strip()
+    if not model_id:
+        return None
    try:
        from agent.model_metadata import (
            DEFAULT_FALLBACK_CONTEXT,
@ -286,8 +387,13 @@ def lookup_context_length(model: str, base_url: str, api_key: str) -> Optional[i
    except Exception:
        return None

+    # Resolve the credential once. For Entra mode this calls the token
+    # provider; for legacy api_key this is a no-op string pass-through.
+    token, mode = _resolve_credential(api_key, token_provider)
+    effective_key = token or ""
+
    try:
-        n = get_model_context_length(model, base_url=base_url, api_key=api_key)
+        n = get_model_context_length(model_id, base_url=base_url, api_key=effective_key)
    except Exception as exc:
        logger.debug("azure_detect: context length lookup failed: %s", exc)
        return None
--- a/hermes_cli/doctor.py
+++ b/hermes_cli/doctor.py
@ -1613,6 +1613,87 @@ def run_doctor(args):
                 f"bedrock:ListFoundationModels"],
            )

+    def _probe_azure_entra() -> _ConnectivityResult:
+        """Probe Azure Foundry Entra ID auth, parallel to ``_probe_bedrock``.
+
+        Skipped unless the active config has ``model.provider:
+        azure-foundry`` AND ``model.auth_mode: entra_id`` — we don't probe
+        the token-service / CLI chain for users on plain API-key Azure.
+
+        Bounded by a 10s timeout (via
+        :func:`agent.azure_identity_adapter.describe_active_credential`)
+        so a slow token service can't pad the doctor run.
+        """
+        label = "Azure Foundry (Entra ID)".ljust(28)
+        try:
+            from hermes_cli.config import load_config
+            cfg = load_config()
+            model_cfg = cfg.get("model") if isinstance(cfg, dict) else {}
+            if not isinstance(model_cfg, dict):
+                return _ConnectivityResult("Azure Foundry (Entra ID)", [], [])
+            cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
+            auth_mode = str(model_cfg.get("auth_mode") or "").strip().lower()
+            if cfg_provider != "azure-foundry" or auth_mode != "entra_id":
+                return _ConnectivityResult("Azure Foundry (Entra ID)", [], [])
+        except Exception:
+            return _ConnectivityResult("Azure Foundry (Entra ID)", [], [])
+
+        try:
+            from agent.azure_identity_adapter import (
+                EntraIdentityConfig,
+                SCOPE_AI_AZURE_DEFAULT,
+                describe_active_credential,
+                has_azure_identity_installed,
+            )
+        except Exception as exc:
+            return _ConnectivityResult(
+                "Azure Foundry (Entra ID)",
+                [(color("⚠", Colors.YELLOW), label,
+                  color(f"(adapter import failed: {exc})", Colors.DIM))],
+                [f"Azure Foundry adapter import failed: {exc}"],
+            )
+
+        if not has_azure_identity_installed():
+            return _ConnectivityResult(
+                "Azure Foundry (Entra ID)",
+                [(color("⚠", Colors.YELLOW), label,
+                  color("(azure-identity not installed)", Colors.DIM))],
+                [f"Install azure-identity: {sys.executable} -m pip install azure-identity"],
+            )
+
+        base_url = str(model_cfg.get("base_url") or "").strip()
+        entra_cfg = model_cfg.get("entra") or {}
+        if not isinstance(entra_cfg, dict):
+            entra_cfg = {}
+        scope = (
+            str(entra_cfg.get("scope") or "").strip()
+            or SCOPE_AI_AZURE_DEFAULT
+        )
+        config = EntraIdentityConfig(
+            scope=scope,
+        )
+        info = describe_active_credential(config=config, timeout_seconds=10.0)
+        if info.get("ok"):
+            env_sources = info.get("env_sources") or []
+            tag = ", ".join(env_sources) if env_sources else "default credential chain"
+            return _ConnectivityResult(
+                "Azure Foundry (Entra ID)",
+                [(color("✓", Colors.GREEN), label,
+                  color(f"({tag}, scope={scope})", Colors.DIM))],
+                [],
+            )
+        err = info.get("error") or "credential chain exhausted"
+        hint = info.get("hint") or (
+            "Run `az login`, set AZURE_TENANT_ID/AZURE_CLIENT_ID/"
+            "AZURE_CLIENT_SECRET, or attach a managed identity to this VM."
+        )
+        return _ConnectivityResult(
+            "Azure Foundry (Entra ID)",
+            [(color("⚠", Colors.YELLOW), label,
+              color(f"({err})", Colors.DIM))],
+            [f"Azure Foundry Entra: {err}. {hint}"],
+        )
+
    # Build the probe submission list in display order
    _probes.append(("OpenRouter API", _probe_openrouter))
    _probes.append(("Anthropic API", _probe_anthropic))
@ -1630,6 +1711,7 @@ def run_doctor(args):
                                _probe_apikey_provider(p, e, u, b, s)))

    _probes.append(("AWS Bedrock", _probe_bedrock))
+    _probes.append(("Azure Foundry (Entra ID)", _probe_azure_entra))

    # Print a single status line so users see something happening, then
    # fan out. ``\r`` clears it once the first real result line lands.
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@ -3535,11 +3535,27 @@ def _save_custom_provider(


 def _model_flow_azure_foundry(config, current_model=""):
-    """Azure Foundry provider: configure endpoint, API mode, API key, and model.
+    """Azure Foundry provider: configure endpoint, auth mode, API mode, and model.

    Azure Foundry supports both OpenAI-style (``/v1/chat/completions``) and
-    Anthropic-style (``/v1/messages``) endpoints.  The wizard auto-detects
-    the transport and available models when possible:
+    Anthropic-style (``/v1/messages``) endpoints, and two authentication
+    modes:
+
+    * **API key** (default) — uses ``AZURE_FOUNDRY_API_KEY`` from .env.
+    * **Microsoft Entra ID** — keyless, RBAC-based auth via the
+      ``azure-identity`` SDK (Managed Identity / Workload Identity / az
+      login / VS Code / azd / service principal env vars). Works on both
+      OpenAI-style and Anthropic-style endpoints — Microsoft RBAC is
+      per-resource and the same ``Azure AI User`` role grants
+      both. For OpenAI-style the OpenAI SDK's native callable
+      ``api_key=`` contract is used; for Anthropic-style an
+      ``httpx.Client`` with a request event hook (built by
+      :func:`agent.azure_identity_adapter.build_bearer_http_client`)
+      mints a fresh JWT per request because the Anthropic SDK does not
+      accept a callable ``auth_token`` natively.
+
+    The wizard auto-detects the transport and available models when
+    possible:

    * URLs ending in ``/anthropic`` → Anthropic Messages API.
    * Successful ``GET <base>/models`` probe → OpenAI-style + populates
@ -3566,9 +3582,14 @@ def _model_flow_azure_foundry(config, current_model=""):
    if isinstance(model_cfg, dict) and model_cfg.get("provider") == "azure-foundry":
        current_base_url = str(model_cfg.get("base_url", "") or "")
        current_api_mode = str(model_cfg.get("api_mode", "") or "")
+        current_auth_mode = str(model_cfg.get("auth_mode") or "api_key").strip().lower() or "api_key"
+        _cur_entra = model_cfg.get("entra") or {}
+        current_entra = _cur_entra if isinstance(_cur_entra, dict) else {}
    else:
        current_base_url = ""
        current_api_mode = ""
+        current_auth_mode = "api_key"
+        current_entra = {}

    current_api_key = get_env_value("AZURE_FOUNDRY_API_KEY") or ""

@ -3583,22 +3604,29 @@ def _model_flow_azure_foundry(config, current_model=""):
    print()

    if current_base_url:
-        print(f"  Current endpoint: {current_base_url}")
+        print(f"  Current endpoint:  {current_base_url}")
    if current_api_mode:
        _lbl = (
            "OpenAI-style"
            if current_api_mode == "chat_completions"
            else "Anthropic-style"
        )
-        print(f"  Current API mode: {_lbl}")
-    if current_api_key:
-        print(f"  Current API key:  {current_api_key[:8]}...")
+        print(f"  Current API mode:  {_lbl}")
+    if current_auth_mode == "entra_id":
+        print(f"  Current auth mode: Microsoft Entra ID (keyless)")
+    elif current_api_key:
+        print(f"  Current auth mode: API key ({current_api_key[:8]}...)")
    print()

    # ── Step 1: endpoint URL ─────────────────────────────────────────
    try:
+        _placeholder = (
+            current_base_url
+            or "e.g. https://<resource>.openai.azure.com/openai/v1 "
+              "or https://<resource>.services.ai.azure.com/anthropic"
+        )
        base_url = input(
-            f"API endpoint URL [{current_base_url or 'e.g. https://your-resource.openai.azure.com/openai/v1'}]: "
+            f"API endpoint URL [{_placeholder}]: "
        ).strip()
    except (KeyboardInterrupt, EOFError):
        print("\nCancelled.")
@ -3612,25 +3640,125 @@ def _model_flow_azure_foundry(config, current_model=""):
        print(f"Invalid URL: {effective_url} (must start with http:// or https://)")
        return

-    # ── Step 2: API key ──────────────────────────────────────────────
+    # ── Step 2: authentication mode ──────────────────────────────────
    print()
+    print("Authentication:")
+    print("  1. API key                  (AZURE_FOUNDRY_API_KEY in .env)")
+    print("  2. Microsoft Entra ID       (managed identity / workload identity / az login)")
+    print("     Recommended by Microsoft. Works for both OpenAI-style and Anthropic-style endpoints.")
+    print("     Requires the 'Azure AI User' role on the Foundry resource.")
    try:
-        api_key = getpass.getpass(
-            f"API key [{current_api_key[:8] + '...' if current_api_key else 'required'}]: "
-        ).strip()
+        _auth_default = "2" if current_auth_mode == "entra_id" else "1"
+        auth_choice = (
+            input(f"Authentication mode [1/2] ({_auth_default}): ").strip()
+            or _auth_default
+        )
    except (KeyboardInterrupt, EOFError):
        print("\nCancelled.")
        return
+    use_entra = auth_choice == "2"
+    auth_mode_label = "entra_id" if use_entra else "api_key"

-    effective_key = api_key or current_api_key
-    if not effective_key:
-        print("No API key provided. Cancelled.")
-        return
+    # ── Step 3: credentials (key OR Entra preflight) ─────────────────
+    effective_key: str = ""
+    entra_overrides: dict = {}
+    token_provider = None  # callable when entra
+    entra_scope = ""

-    # ── Step 3: auto-detect transport + models ───────────────────────
+    if use_entra:
+        try:
+            from agent.azure_identity_adapter import (
+                EntraIdentityConfig,
+                SCOPE_AI_AZURE_DEFAULT,
+                build_token_provider,
+                describe_active_credential,
+                has_azure_identity_installed,
+            )
+        except ImportError as exc:
+            print()
+            print(f"⚠ Could not import azure-identity adapter: {exc}")
+            print("  Falling back to API key auth.")
+            use_entra = False
+            auth_mode_label = "api_key"
+
+    if use_entra:
+        print()
+        if not has_azure_identity_installed():
+            print("◐ The 'azure-identity' package is not installed yet.")
+            print(
+                "  Hermes will install it now (the preflight below "
+                "triggers the lazy-install). To skip lazy installs, "
+                "run:  pip install azure-identity"
+            )
+
+        # Preserve only the optional scope override. Identity selection
+        # (tenant, user-assigned MI, workload identity, service principal)
+        # stays in Azure SDK env vars such as AZURE_CLIENT_ID.
+        _persisted_scope_override = str(current_entra.get("scope") or "").strip()
+        entra_scope = _persisted_scope_override or SCOPE_AI_AZURE_DEFAULT
+
+        entra_overrides = {}
+        if _persisted_scope_override:
+            entra_overrides["scope"] = _persisted_scope_override
+
+        print()
+        print("◐ Probing Microsoft Entra ID credential chain (up to 10s)...")
+        _config = EntraIdentityConfig(
+            scope=entra_scope,
+        )
+        info = describe_active_credential(config=_config, timeout_seconds=10.0)
+        if info.get("ok"):
+            env_sources = info.get("env_sources") or []
+            tag = ", ".join(env_sources) if env_sources else "default chain"
+            print(f"✓ Entra ID token acquired ({tag}, scope={entra_scope})")
+        else:
+            err = info.get("error") or "credential chain exhausted"
+            hint = info.get("hint") or (
+                "Run `az login`, attach a managed identity to this VM, or "
+                "set AZURE_TENANT_ID/AZURE_CLIENT_ID/AZURE_CLIENT_SECRET."
+            )
+            print(f"⚠ {err}")
+            print(f"  Hint: {hint}")
+            try:
+                ans = input("Save Entra config anyway and validate later? [Y/n]: ").strip().lower()
+            except (KeyboardInterrupt, EOFError):
+                print("\nCancelled.")
+                return
+            if ans and ans not in ("y", "yes"):
+                print("Cancelled.")
+                return
+
+        # Build the token provider for the detection probe (best-effort —
+        # if the credential chain failed above, this will silently return
+        # None inside azure_detect and the probe falls back to manual).
+        try:
+            token_provider = build_token_provider(config=_config)
+        except Exception as exc:
+            print(f"⚠ Could not build token provider for probing: {exc}")
+            token_provider = None
+    else:
+        print()
+        try:
+            api_key = getpass.getpass(
+                f"API key [{current_api_key[:8] + '...' if current_api_key else 'required'}]: "
+            ).strip()
+        except (KeyboardInterrupt, EOFError):
+            print("\nCancelled.")
+            return
+
+        effective_key = api_key or current_api_key
+        if not effective_key:
+            print("No API key provided. Cancelled.")
+            return
+
+    # ── Step 4: auto-detect transport + models ───────────────────────
    print()
    print("◐ Probing endpoint to auto-detect transport and models...")
-    detection = azure_detect.detect(effective_url, effective_key)
+    detection = azure_detect.detect(
+        effective_url,
+        api_key=effective_key,
+        token_provider=token_provider,
+    )

    discovered_models: list[str] = list(detection.models)
    api_mode: str = detection.api_mode or ""
@ -3665,7 +3793,7 @@ def _model_flow_azure_foundry(config, current_model=""):
            return
        api_mode = "anthropic_messages" if mode_choice == "2" else "chat_completions"

-    # ── Step 4: model name ───────────────────────────────────────────
+    # ── Step 5: model name ───────────────────────────────────────────
    print()
    effective_model = ""
    if discovered_models:
@ -3704,15 +3832,17 @@ def _model_flow_azure_foundry(config, current_model=""):
        print("No model name provided. Cancelled.")
        return

-    # ── Step 5: context-length lookup ────────────────────────────────
+    # ── Step 6: context-length lookup ────────────────────────────────
    ctx_len = azure_detect.lookup_context_length(
        effective_model,
        effective_url,
-        effective_key,
+        api_key=effective_key,
+        token_provider=token_provider,
    )

-    # ── Step 6: persist ──────────────────────────────────────────────
-    save_env_value("AZURE_FOUNDRY_API_KEY", effective_key)
+    # ── Step 7: persist ──────────────────────────────────────────────
+    if not use_entra:
+        save_env_value("AZURE_FOUNDRY_API_KEY", effective_key)

    cfg = load_config()
    model = cfg.get("model")
@ -3724,6 +3854,22 @@ def _model_flow_azure_foundry(config, current_model=""):
    model["base_url"] = effective_url
    model["api_mode"] = api_mode
    model["default"] = effective_model
+    model["auth_mode"] = auth_mode_label
+    if use_entra:
+        # Persist only the non-default Entra scope so config.yaml stays tidy.
+        # Azure identity selection stays in standard AZURE_* env vars.
+        clean_entra: dict = {}
+        for key in ("scope",):
+            val = entra_overrides.get(key)
+            if val:
+                clean_entra[key] = val
+        if clean_entra:
+            model["entra"] = clean_entra
+        elif "entra" in model:
+            del model["entra"]
+    else:
+        if "entra" in model:
+            del model["entra"]
    if ctx_len:
        model["context_length"] = ctx_len

@ -3739,10 +3885,14 @@ def _model_flow_azure_foundry(config, current_model=""):
        save_env_value("OPENAI_API_KEY", "")

    mode_label = "OpenAI-style" if api_mode == "chat_completions" else "Anthropic-style"
+    auth_label = (
+        "Microsoft Entra ID (keyless)" if use_entra else "API key"
+    )
    print()
    print("✓ Azure Foundry configured:")
    print(f"    Endpoint:       {effective_url}")
    print(f"    API mode:       {mode_label}")
+    print(f"    Auth:           {auth_label}")
    print(f"    Model:          {effective_model}")
    if ctx_len:
        print(f"    Context length: {ctx_len:,} tokens")
--- a/hermes_cli/runtime_provider.py
+++ b/hermes_cli/runtime_provider.py
@ -744,6 +744,15 @@ def _resolve_azure_foundry_runtime(
    strips a trailing ``/v1`` for Anthropic-style endpoints because the
    Anthropic SDK appends ``/v1/messages`` internally.

+    When ``model.auth_mode == "entra_id"`` (and the model is OpenAI-style),
+    the returned ``api_key`` is a zero-arg callable produced by
+    :func:`agent.azure_identity_adapter.build_token_provider` rather than
+    a string. Downstream code that constructs an OpenAI SDK client passes
+    this through unchanged (the SDK accepts ``Callable[[], str]`` for
+    ``api_key`` and calls it before every request). Code paths that need
+    a string (logging, manual HTTP probes, header injection) must use the
+    helpers in ``agent.azure_identity_adapter``.
+
    Raises :class:`AuthError` when required values are missing.
    """
    explicit_api_key = str(explicit_api_key or "").strip()
@ -752,9 +761,15 @@ def _resolve_azure_foundry_runtime(
    cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
    cfg_base_url = ""
    cfg_api_mode = "chat_completions"
+    cfg_auth_mode = "api_key"
+    cfg_entra: Dict[str, Any] = {}
    if cfg_provider == "azure-foundry":
        cfg_base_url = str(model_cfg.get("base_url") or "").strip().rstrip("/")
        cfg_api_mode = _parse_api_mode(model_cfg.get("api_mode")) or "chat_completions"
+        cfg_auth_mode = str(model_cfg.get("auth_mode") or "api_key").strip().lower() or "api_key"
+        _entra = model_cfg.get("entra")
+        if isinstance(_entra, dict):
+            cfg_entra = _entra

    # Model-family inference: Azure Foundry deploys GPT-5.x / codex / o1-o4
    # reasoning models as Responses-API-only.  Calling /chat/completions
@ -780,6 +795,79 @@ def _resolve_azure_foundry_runtime(
            "the AZURE_FOUNDRY_BASE_URL environment variable."
        )

+    # Anthropic SDK appends /v1/messages itself, so strip any trailing /v1
+    # we inherited from the configured base_url to avoid double-/v1 paths.
+    if cfg_api_mode == "anthropic_messages":
+        base_url = re.sub(r"/v1/?$", "", base_url)
+
+    # ── Entra ID (Microsoft Foundry recommended path) ──────────────────
+    #
+    # OpenAI-style endpoints use the OpenAI SDK's native callable
+    # ``api_key=`` contract — the SDK mints a fresh JWT per request
+    # automatically.
+    #
+    # Anthropic-style endpoints (Claude on Foundry) take the callable
+    # too: :func:`agent.anthropic_adapter.build_anthropic_client`
+    # detects the callable and constructs an ``httpx.Client`` with a
+    # request event hook that injects a fresh ``Authorization: Bearer``
+    # header per request (the Anthropic SDK does not accept callables
+    # natively). From the runtime resolver's perspective both modes
+    # are identical — return the callable api_key and let the
+    # downstream SDK wrapper handle the contract difference.
+    if cfg_auth_mode == "entra_id":
+        if explicit_api_key:
+            # User passed --api-key on the CLI while config says entra_id —
+            # honour the explicit string (escape hatch for one-off testing).
+            api_key: Any = explicit_api_key
+            source = "explicit"
+            auth_mode = "api_key"
+        else:
+            try:
+                from agent.azure_identity_adapter import (
+                    EntraIdentityConfig,
+                    SCOPE_AI_AZURE_DEFAULT,
+                    build_token_provider,
+                )
+            except Exception as exc:
+                raise AuthError(
+                    "Azure Foundry Entra ID auth requires the 'azure-identity' "
+                    "package. Install it with: pip install azure-identity "
+                    f"(import failed: {exc})"
+                ) from exc
+
+            scope = (
+                str(cfg_entra.get("scope") or "").strip()
+                or SCOPE_AI_AZURE_DEFAULT
+            )
+            try:
+                entra_config = EntraIdentityConfig(
+                    scope=scope,
+                )
+                token_provider = build_token_provider(config=entra_config)
+            except ImportError as exc:
+                raise AuthError(str(exc)) from exc
+            api_key = token_provider
+            source = "entra_id"
+            auth_mode = "entra_id"
+
+        clean_entra = {}
+        if auth_mode == "entra_id":
+            configured_scope = str(cfg_entra.get("scope") or "").strip()
+            if configured_scope:
+                clean_entra["scope"] = configured_scope
+
+        return {
+            "provider": "azure-foundry",
+            "api_mode": cfg_api_mode,
+            "base_url": base_url,
+            "api_key": api_key,
+            "auth_mode": auth_mode,
+            "entra": clean_entra,
+            "source": source,
+            "requested_provider": requested_provider,
+        }
+
+    # ── Static API key (legacy / default) ──────────────────────────────
    api_key = explicit_api_key
    if not api_key:
        try:
@ -792,20 +880,19 @@ def _resolve_azure_foundry_runtime(
    if not api_key:
        raise AuthError(
            "Azure Foundry requires an API key. Set AZURE_FOUNDRY_API_KEY in "
-            "~/.hermes/.env or run 'hermes model' to configure."
+            "~/.hermes/.env or run 'hermes model' to configure. To use "
+            "keyless Microsoft Entra ID auth instead, set "
+            "model.auth_mode: entra_id in config.yaml (or pick "
+            "'Microsoft Entra ID' in 'hermes model')."
        )

-    # Anthropic SDK appends /v1/messages itself, so strip any trailing /v1
-    # we inherited from the configured base_url to avoid double-/v1 paths.
-    if cfg_api_mode == "anthropic_messages":
-        base_url = re.sub(r"/v1/?$", "", base_url)
-
    source = "explicit" if (explicit_api_key or explicit_base_url) else "config"
    return {
        "provider": "azure-foundry",
        "api_mode": cfg_api_mode,
        "base_url": base_url,
        "api_key": api_key,
+        "auth_mode": "api_key",
        "source": source,
        "requested_provider": requested_provider,
    }
@ -1232,7 +1319,7 @@ def resolve_runtime_provider(
            cfg_base_url = (model_cfg.get("base_url") or "").strip().rstrip("/")
        base_url = cfg_base_url or "https://api.anthropic.com"

-        # For Azure AI Foundry endpoints, use ANTHROPIC_API_KEY directly —
+        # For Microsoft Foundry endpoints, use ANTHROPIC_API_KEY directly —
        # Claude Code OAuth tokens (sk-ant-oat01) are not accepted by Azure.
        # Azure keys don't start with "sk-ant-" so resolve_anthropic_token()
        # would find the Claude Code OAuth token first (priority 3) and return
--- a/hermes_cli/web_server.py
+++ b/hermes_cli/web_server.py
@ -1288,9 +1288,15 @@ def _truncate_token(value: Optional[str], visible: int = 6) -> str:
    OAuth access token. JWT prefixes (the part before the first dot) are
    stripped first when present so the visible suffix is always part of
    the signing region rather than a meaningless header chunk.
+
+    Returns the Entra-ID placeholder when handed a callable (Azure Foundry
+    bearer provider) — the callable is NEVER invoked here.
    """
    if not value:
        return ""
+    if callable(value) and not isinstance(value, str):
+        # Entra ID bearer provider — never reveal a minted token in the UI.
+        return "<entra-id-bearer>"
    s = str(value)
    if "." in s and s.count(".") >= 2:
        # Looks like a JWT — show the trailing piece of the signature only.
--- a/plugins/model-providers/azure-foundry/init.py
+++ b/plugins/model-providers/azure-foundry/init.py
@ -1,4 +1,4 @@
-"""Azure AI Foundry provider profile.
+"""Microsoft Foundry provider profile.

 Azure Foundry exposes an OpenAI-compatible endpoint; users supply their own
 base URL at setup since endpoints are per-resource.
@ -11,7 +11,7 @@ azure_foundry = ProviderProfile(
    name="azure-foundry",
    aliases=("azure", "azure-ai-foundry", "azure-ai"),
    display_name="Azure Foundry",
-    description="Azure AI Foundry — OpenAI-compatible endpoint (user-supplied base URL)",
+    description="Microsoft Foundry - OpenAI-compatible endpoint (user-supplied base URL)",
    signup_url="https://ai.azure.com/",
    env_vars=("AZURE_FOUNDRY_API_KEY", "AZURE_FOUNDRY_BASE_URL"),
    base_url="",  # per-resource; user provides at setup
--- a/plugins/model-providers/azure-foundry/plugin.yaml
+++ b/plugins/model-providers/azure-foundry/plugin.yaml
@ -1,5 +1,5 @@
 name: azure-foundry-provider
 kind: model-provider
 version: 1.0.0
-description: Azure AI Foundry
+description: Microsoft Foundry
 author: Nous Research
--- a/pyproject.toml
+++ b/pyproject.toml
@ -125,6 +125,7 @@ acp = ["agent-client-protocol==0.9.0"]
 #   4. Run `uv lock` to regenerate transitives.
 #   5. Optionally re-add to [all] only after a few days of clean operation.
 bedrock = ["boto3==1.42.89"]
+azure-identity = ["azure-identity==1.25.3"]
 termux = [
  # Baseline Android / Termux path for reliable fresh installs.
  "python-telegram-bot[webhooks]==22.6",
--- a/run_agent.py
+++ b/run_agent.py
@ -1428,7 +1428,11 @@ class AIAgent:
        prefix = f"HTTP {status_code}: " if status_code else ""
        return f"{prefix}{raw[:500]}"

-    def _mask_api_key_for_logs(self, key: Optional[str]) -> Optional[str]:
+    def _mask_api_key_for_logs(self, key: Any) -> Optional[str]:
+        # Azure Foundry Entra ID bearer providers are callables — never
+        # invoke them in log paths; identify the auth surface instead.
+        if callable(key) and not isinstance(key, str):
+            return "<entra-id-bearer>"
        if not key:
            return None
        if len(key) <= 12:
--- a/tests/acp_adapter/test_detect_provider_entra.py
+++ b/tests/acp_adapter/test_detect_provider_entra.py
@ -0,0 +1,87 @@
+"""Regression tests for ACP adapter detection under Azure Foundry Entra ID.
+
+The ACP adapter's ``detect_provider`` previously gated on
+``isinstance(api_key, str)`` and returned ``None`` for any runtime that
+returned a callable ``api_key`` — i.e. Azure Foundry with
+``auth_mode=entra_id``. Downstream, ACP would default to
+``"openrouter"`` and reject the legitimate provider in its auth handshake.
+This test pins the callable-aware fix so it never regresses.
+"""
+
+from __future__ import annotations
+
+from unittest.mock import patch
+
+
+class TestDetectProviderEntra:
+    def test_callable_api_key_is_a_valid_credential(self):
+        """A runtime returning a callable ``api_key`` (Entra bearer token
+        provider) must be detected as a configured provider, not
+        ``None``."""
+        from acp_adapter import auth as _acp_auth
+
+        def _fake_runtime(**_kwargs):
+            return {
+                "provider": "azure-foundry",
+                "api_mode": "chat_completions",
+                "auth_mode": "entra_id",
+                "base_url": "https://r.openai.azure.com/openai/v1",
+                "api_key": lambda: "jwt-fresh",
+            }
+
+        with patch(
+            "hermes_cli.runtime_provider.resolve_runtime_provider",
+            side_effect=_fake_runtime,
+        ):
+            assert _acp_auth.detect_provider() == "azure-foundry"
+            assert _acp_auth.has_provider() is True
+
+    def test_string_api_key_still_works(self):
+        from acp_adapter import auth as _acp_auth
+
+        def _fake_runtime(**_kwargs):
+            return {
+                "provider": "openrouter",
+                "api_key": "sk-or-static-key",
+            }
+
+        with patch(
+            "hermes_cli.runtime_provider.resolve_runtime_provider",
+            side_effect=_fake_runtime,
+        ):
+            assert _acp_auth.detect_provider() == "openrouter"
+
+    def test_empty_string_api_key_returns_none(self):
+        from acp_adapter import auth as _acp_auth
+
+        def _fake_runtime(**_kwargs):
+            return {"provider": "openrouter", "api_key": ""}
+
+        with patch(
+            "hermes_cli.runtime_provider.resolve_runtime_provider",
+            side_effect=_fake_runtime,
+        ):
+            assert _acp_auth.detect_provider() is None
+
+    def test_missing_provider_returns_none(self):
+        """A callable api_key without a provider is still ``None`` —
+        we don't synthesize a provider name from the credential shape."""
+        from acp_adapter import auth as _acp_auth
+
+        def _fake_runtime(**_kwargs):
+            return {"api_key": lambda: "jwt-fresh", "provider": ""}
+
+        with patch(
+            "hermes_cli.runtime_provider.resolve_runtime_provider",
+            side_effect=_fake_runtime,
+        ):
+            assert _acp_auth.detect_provider() is None
+
+    def test_resolver_exception_returns_none(self):
+        from acp_adapter import auth as _acp_auth
+
+        with patch(
+            "hermes_cli.runtime_provider.resolve_runtime_provider",
+            side_effect=RuntimeError("simulated"),
+        ):
+            assert _acp_auth.detect_provider() is None
--- a/tests/agent/test_anthropic_adapter.py
+++ b/tests/agent/test_anthropic_adapter.py
@ -9,6 +9,7 @@ import pytest

 from agent.prompt_caching import apply_anthropic_cache_control
 from agent.anthropic_adapter import (
+    _is_azure_anthropic_endpoint,
    _is_oauth_token,
    _refresh_oauth_token,
    _to_plain_data,
@ -121,6 +122,20 @@ class TestBuildAnthropicClient:
            betas = kwargs["default_headers"]["anthropic-beta"]
            assert "context-1m-2025-08-07" in betas

+    def test_azure_anthropic_endpoint_detection_is_host_and_path_scoped(self):
+        assert _is_azure_anthropic_endpoint(
+            "https://example.services.ai.azure.com/models/anthropic"
+        ) is True
+        assert _is_azure_anthropic_endpoint(
+            "https://example.services.ai.azure.us/anthropic"
+        ) is True
+        assert _is_azure_anthropic_endpoint(
+            "https://example.openai.azure.com/openai/v1"
+        ) is False
+        assert _is_azure_anthropic_endpoint(
+            "https://management.azure.com/anthropic"
+        ) is False
+
    def test_bedrock_client_keeps_context_1m_beta(self):
        with patch("agent.anthropic_adapter._anthropic_sdk") as mock_sdk:
            mock_sdk.AnthropicBedrock = MagicMock()
--- a/tests/agent/test_auxiliary_client_azure_foundry.py
+++ b/tests/agent/test_auxiliary_client_azure_foundry.py
@ -0,0 +1,350 @@
+"""Tests for auxiliary client routing of the ``azure-foundry`` provider.
+
+Covers the dedicated branch in ``agent.auxiliary_client.resolve_provider_client``
+that delegates to :func:`hermes_cli.runtime_provider._resolve_azure_foundry_runtime`
+instead of falling into the generic ``resolve_api_key_provider_credentials``
+path (which only knows about ``AZURE_FOUNDRY_API_KEY`` and would 401 for
+Entra ID users and miss ``model.base_url`` overrides for api-key users
+with non-standard Foundry-projects endpoints).
+
+Pinned scenarios:
+
+  * ``auth_mode: api_key`` → plain OpenAI client with the static string
+    key for ``chat_completions``.
+  * ``auth_mode: entra_id`` + ``chat_completions`` → plain OpenAI
+    client with a callable ``api_key`` (the bearer-token provider) —
+    confirms the callable survives the auxiliary path end-to-end.
+  * ``auth_mode: entra_id`` + GPT-5.x model → CodexAuxiliaryClient
+    wrapping the OpenAI client (api_mode auto-upgrades to
+    codex_responses).
+  * Anthropic-style + entra_id → rejected at the runtime resolver,
+    so the aux path returns ``(None, None)``.
+  * Failure path when no model is configured returns ``(None, None)``
+    cleanly so the auto chain falls through.
+"""
+
+from __future__ import annotations
+
+import sys
+from types import SimpleNamespace
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+
+@pytest.fixture(autouse=True)
+def _reset_credential_cache():
+    from agent.azure_identity_adapter import reset_credential_cache
+    reset_credential_cache()
+    yield
+    reset_credential_cache()
+
+
+@pytest.fixture
+def fake_azure_identity(monkeypatch):
+    """Stand-in for azure.identity (keeps CI hermetic when the SDK is
+    not installed)."""
+    from agent import azure_identity_adapter as _adapter
+
+    last = {"scope": None}
+
+    def _provider(scope):
+        return lambda: f"jwt-for-{scope}"
+
+    fake_module = SimpleNamespace(
+        DefaultAzureCredential=lambda **kw: SimpleNamespace(
+            kwargs=kw,
+            get_token=lambda scope: SimpleNamespace(token="fake", expires_on=9999999999),
+        ),
+        get_bearer_token_provider=lambda credential, scope: (
+            last.__setitem__("scope", scope),
+            _provider(scope),
+        )[-1],
+    )
+    monkeypatch.setattr(_adapter, "_require_azure_identity", lambda: fake_module)
+    monkeypatch.setitem(sys.modules, "azure.identity", fake_module)
+    return last
+
+
+@pytest.fixture
+def patch_load_config(monkeypatch):
+    """Helper to set model_cfg seen by _try_azure_foundry."""
+    def _apply(model_cfg):
+        monkeypatch.setattr(
+            "hermes_cli.config.load_config",
+            lambda: {"model": model_cfg},
+        )
+    return _apply
+
+
+# ---------------------------------------------------------------------------
+# auth_mode: api_key (default) — regression for the legacy path
+# ---------------------------------------------------------------------------
+
+
+class TestAuxAzureFoundryApiKey:
+    def test_chat_completions_returns_plain_openai_client(self, monkeypatch, patch_load_config):
+        from agent.auxiliary_client import _try_azure_foundry
+        from openai import OpenAI as _OpenAI
+
+        monkeypatch.setenv("AZURE_FOUNDRY_API_KEY", "sk-azure-static-key")
+        patch_load_config({
+            "provider": "azure-foundry",
+            "base_url": "https://r.openai.azure.com/openai/v1",
+            "api_mode": "chat_completions",
+            "default": "gpt-4o",
+        })
+        client, resolved = _try_azure_foundry(model="gpt-4o")
+        assert client is not None
+        assert resolved == "gpt-4o"
+        assert isinstance(client, _OpenAI)
+        assert client.api_key == "sk-azure-static-key"
+
+    def test_codex_responses_wraps_in_codex_aux_client(self, monkeypatch, patch_load_config):
+        from agent.auxiliary_client import _try_azure_foundry, CodexAuxiliaryClient
+
+        monkeypatch.setenv("AZURE_FOUNDRY_API_KEY", "sk-azure-static-key")
+        patch_load_config({
+            "provider": "azure-foundry",
+            "base_url": "https://r.openai.azure.com/openai/v1",
+            "api_mode": "chat_completions",
+            "default": "gpt-5.4-mini",
+        })
+        # GPT-5.x → runtime auto-upgrades to codex_responses
+        client, resolved = _try_azure_foundry(model="gpt-5.4-mini")
+        assert resolved == "gpt-5.4-mini"
+        assert isinstance(client, CodexAuxiliaryClient)
+        assert client.api_key == "sk-azure-static-key"
+
+    def test_no_key_returns_none(self, monkeypatch, patch_load_config):
+        from agent.auxiliary_client import _try_azure_foundry
+
+        monkeypatch.delenv("AZURE_FOUNDRY_API_KEY", raising=False)
+        patch_load_config({
+            "provider": "azure-foundry",
+            "base_url": "https://r.openai.azure.com/openai/v1",
+            "api_mode": "chat_completions",
+            "default": "gpt-4o",
+        })
+        client, resolved = _try_azure_foundry(model="gpt-4o")
+        assert client is None
+        assert resolved is None
+
+    def test_no_model_returns_none(self, monkeypatch, patch_load_config):
+        """Azure has no fallback aux model — fail soft so the auto chain
+        can try other providers."""
+        from agent.auxiliary_client import _try_azure_foundry
+
+        monkeypatch.setenv("AZURE_FOUNDRY_API_KEY", "sk-azure-static-key")
+        patch_load_config({
+            "provider": "azure-foundry",
+            "base_url": "https://r.openai.azure.com/openai/v1",
+            "api_mode": "chat_completions",
+            # No default model
+        })
+        client, resolved = _try_azure_foundry()
+        assert client is None
+        assert resolved is None
+
+
+# ---------------------------------------------------------------------------
+# auth_mode: entra_id — callable api_key survives end-to-end
+# ---------------------------------------------------------------------------
+
+
+class TestAuxAzureFoundryEntra:
+    def test_callable_api_key_reaches_openai_constructor(
+        self, monkeypatch, fake_azure_identity, patch_load_config,
+    ):
+        """The token provider callable must arrive at ``OpenAI(api_key=...)``
+        intact — never stringified to ``"no-key-required"`` or to the
+        SDK-internal empty-string representation BEFORE we hand it off.
+
+        We assert on the public SDK contract (constructor receives the
+        callable) rather than ``client.api_key``, because OpenAI 2.24.0
+        stores callable api_keys in a private attribute and exposes
+        ``client.api_key`` as ``""``. The SDK still calls the callable
+        per request to mint ``Authorization: Bearer <token>``; that
+        behaviour is the documented Microsoft/OpenAI contract we rely on.
+        """
+        from agent import auxiliary_client as _aux
+
+        received = {}
+
+        class _FakeOpenAI:
+            def __init__(self, **kwargs):
+                received.update(kwargs)
+                # Mirror the fields downstream callers read.
+                self.api_key = kwargs.get("api_key", "")
+                self.base_url = kwargs.get("base_url", "")
+
+        monkeypatch.setattr(_aux, "OpenAI", _FakeOpenAI)
+        patch_load_config({
+            "provider": "azure-foundry",
+            "base_url": "https://r.openai.azure.com/openai/v1",
+            "api_mode": "chat_completions",
+            "auth_mode": "entra_id",
+            "default": "gpt-4o",
+        })
+        client, resolved = _aux._try_azure_foundry(model="gpt-4o")
+        assert client is not None
+        assert resolved == "gpt-4o"
+        # Public-contract assertion: the OpenAI SDK constructor saw the
+        # callable, exactly as Microsoft's Foundry sample requires.
+        assert callable(received["api_key"])
+        assert not isinstance(received["api_key"], str)
+        assert received["api_key"]().startswith("jwt-for-")
+        # Base URL forwarded verbatim (no /responses suffix stripping
+        # in this path — that's a separate concern handled by the
+        # runtime resolver only when the user re-saves config).
+        assert received["base_url"] == "https://r.openai.azure.com/openai/v1"
+
+    def test_codex_responses_with_entra_wraps_correctly(
+        self, monkeypatch, fake_azure_identity, patch_load_config,
+    ):
+        """GPT-5.x deployment on Entra ID — auto-upgraded to
+        codex_responses, wrapped in CodexAuxiliaryClient, callable
+        api_key handed to the underlying OpenAI SDK."""
+        from agent import auxiliary_client as _aux
+
+        received = {}
+
+        class _FakeOpenAI:
+            def __init__(self, **kwargs):
+                received.update(kwargs)
+                self.api_key = kwargs.get("api_key", "")
+                self.base_url = kwargs.get("base_url", "")
+
+        monkeypatch.setattr(_aux, "OpenAI", _FakeOpenAI)
+        patch_load_config({
+            "provider": "azure-foundry",
+            "base_url": "https://r.openai.azure.com/openai/v1",
+            "api_mode": "chat_completions",
+            "auth_mode": "entra_id",
+            "default": "gpt-5.4-mini",
+        })
+        client, resolved = _aux._try_azure_foundry(model="gpt-5.4-mini")
+        assert resolved == "gpt-5.4-mini"
+        assert isinstance(client, _aux.CodexAuxiliaryClient)
+        # The Codex wrapper received an OpenAI client built with the
+        # callable api_key — verify against the SDK constructor record,
+        # not the wrapper attribute (which mirrors the SDK's empty-
+        # string representation).
+        assert callable(received["api_key"])
+        assert received["api_key"]().startswith("jwt-for-")
+
+    def test_entra_anthropic_messages_uses_bearer_hook(
+        self, monkeypatch, fake_azure_identity, patch_load_config,
+    ):
+        """Entra ID + anthropic_messages: runtime returns a callable
+        api_key; ``_maybe_wrap_anthropic`` → ``build_anthropic_client``
+        detects the callable and installs the bearer-injecting httpx
+        event hook on a custom ``httpx.Client`` passed to the
+        Anthropic SDK via ``http_client=``."""
+        from agent import auxiliary_client as _aux
+        from agent import anthropic_adapter as _anthropic
+
+        received = {}
+
+        class _FakeOpenAI:
+            def __init__(self, **kwargs):
+                received["openai"] = kwargs
+                self.api_key = kwargs.get("api_key", "")
+                self.base_url = kwargs.get("base_url", "")
+
+        class _FakeAnthropicSDK:
+            class Anthropic:
+                def __init__(self, **kwargs):
+                    received["anthropic"] = kwargs
+
+        monkeypatch.setattr(_aux, "OpenAI", _FakeOpenAI)
+        monkeypatch.setattr(_anthropic, "_get_anthropic_sdk", lambda: _FakeAnthropicSDK)
+
+        patch_load_config({
+            "provider": "azure-foundry",
+            "base_url": "https://r.services.ai.azure.com/anthropic",
+            "api_mode": "anthropic_messages",
+            "auth_mode": "entra_id",
+            "default": "claude-sonnet-4-5",
+        })
+        client, resolved = _aux._try_azure_foundry(model="claude-sonnet-4-5")
+        assert client is not None
+        assert resolved == "claude-sonnet-4-5"
+        # The Anthropic SDK constructor received a custom http_client
+        # (the bearer-injecting hook) and a placeholder auth_token.
+        anthropic_kwargs = received.get("anthropic") or {}
+        assert "http_client" in anthropic_kwargs, (
+            "build_anthropic_client must pass a custom http_client when "
+            "given a callable api_key, otherwise the SDK cannot mint "
+            "fresh tokens per request"
+        )
+        assert anthropic_kwargs.get("auth_token") == "entra-id-bearer-via-http-hook"
+        # Verify the http_client actually has our event hook installed.
+        http_client = anthropic_kwargs["http_client"]
+        hooks = getattr(http_client, "event_hooks", {})
+        assert "request" in hooks and len(hooks["request"]) >= 1
+
+
+# ---------------------------------------------------------------------------
+# resolve_provider_client → azure-foundry dispatch
+# ---------------------------------------------------------------------------
+
+
+class TestResolveProviderClientAzureFoundry:
+    def test_dispatches_to_azure_branch_not_generic_api_key_path(
+        self, monkeypatch, fake_azure_identity, patch_load_config,
+    ):
+        """End-to-end: the public ``resolve_provider_client`` entry
+        point must take the dedicated azure-foundry branch, NOT the
+        generic api-key registry path that would call
+        ``resolve_api_key_provider_credentials`` and return None for
+        Entra users."""
+        from agent import auxiliary_client as _aux
+
+        received = {}
+
+        class _FakeOpenAI:
+            def __init__(self, **kwargs):
+                received.update(kwargs)
+                self.api_key = kwargs.get("api_key", "")
+                self.base_url = kwargs.get("base_url", "")
+
+        monkeypatch.setattr(_aux, "OpenAI", _FakeOpenAI)
+        patch_load_config({
+            "provider": "azure-foundry",
+            "base_url": "https://r.openai.azure.com/openai/v1",
+            "api_mode": "chat_completions",
+            "auth_mode": "entra_id",
+            "default": "gpt-4o",
+        })
+        client, resolved = _aux.resolve_provider_client("azure-foundry", "gpt-4o")
+        assert client is not None
+        assert resolved == "gpt-4o"
+        # The callable made it through resolve_provider_client → _try_azure_foundry
+        # → OpenAI(api_key=...).
+        assert callable(received["api_key"])
+
+    def test_warns_and_returns_none_on_failure(
+        self, monkeypatch, patch_load_config, caplog,
+    ):
+        """When azure-foundry is requested but cannot be resolved
+        (e.g. no model + no key), we return (None, None) and log a
+        clear warning pointing at ``hermes doctor``."""
+        import logging
+        from agent.auxiliary_client import resolve_provider_client
+
+        monkeypatch.delenv("AZURE_FOUNDRY_API_KEY", raising=False)
+        patch_load_config({
+            "provider": "azure-foundry",
+            "base_url": "https://r.openai.azure.com/openai/v1",
+            "api_mode": "chat_completions",
+            # No default → resolver yields no model → bail
+        })
+        with caplog.at_level(logging.WARNING, logger="agent.auxiliary_client"):
+            client, resolved = resolve_provider_client("azure-foundry")
+        assert client is None
+        assert resolved is None
+        assert any(
+            "azure-foundry" in rec.message and "hermes doctor" in rec.message
+            for rec in caplog.records
+        )
--- a/tests/agent/test_azure_identity_adapter.py
+++ b/tests/agent/test_azure_identity_adapter.py
@ -0,0 +1,662 @@
+"""Tests for the Microsoft Entra ID adapter (agent/azure_identity_adapter.py).
+
+Covers:
+  - Scope resolution per Azure host shape
+  - Display masking for callable + string + None inputs
+  - Cache-fingerprint stability under callable refresh
+  - is_token_provider truthiness on callables vs strings
+  - EntraIdentityConfig serialization round-trip
+  - Token provider construction with mocked azure-identity
+  - Credential cache reuse + reset
+  - has_azure_identity_credentials timeout / failure paths
+  - describe_active_credential structural reporting
+  - Lazy-install error path when azure-identity absent + lazy installs
+    disabled
+
+We mock azure.identity at the import boundary rather than hitting any
+real Azure endpoint. Tests must remain hermetic per AGENTS.md.
+"""
+
+from __future__ import annotations
+
+import sys
+from collections.abc import Callable
+from types import SimpleNamespace
+from typing import cast
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+# Ensure we always import a fresh adapter module — credential caches in
+# the adapter persist across tests otherwise, polluting assertions
+# about cache invalidation.
+@pytest.fixture(autouse=True)
+def _reset_adapter_cache():
+    from agent.azure_identity_adapter import reset_credential_cache
+    reset_credential_cache()
+    yield
+    reset_credential_cache()
+
+
+# ---------------------------------------------------------------------------
+# Scope constant
+# ---------------------------------------------------------------------------
+
+
+class TestEntraScopeConstant:
+    """Pin the Microsoft-documented Foundry inference scope.
+
+    Microsoft's official samples for both ``*.openai.azure.com`` and
+    ``*.services.ai.azure.com`` use ``https://ai.azure.com/.default``.
+    The older ``cognitiveservices.azure.com/.default`` is the
+    control-plane scope and is rejected for inference by newer
+    Azure OpenAI / Foundry resources.
+
+    Users with sovereign-cloud or unusual-tenant requirements pass the
+    scope explicitly via ``model.entra.scope`` in ``config.yaml``.
+
+    Refs:
+      * https://learn.microsoft.com/azure/ai-foundry/openai/how-to/managed-identity
+      * https://learn.microsoft.com/azure/ai-foundry/foundry-models/how-to/configure-entra-id
+    """
+
+    def test_default_scope_matches_microsoft_documentation(self):
+        from agent.azure_identity_adapter import SCOPE_AI_AZURE_DEFAULT
+        assert SCOPE_AI_AZURE_DEFAULT == "https://ai.azure.com/.default"
+
+
+# ---------------------------------------------------------------------------
+# Cache fingerprint + http-bearer helpers
+# ---------------------------------------------------------------------------
+
+
+class TestMaterializeBearerForHttp:
+    """The only helper that mints a real bearer JWT — must call the
+    callable exactly once and never fall through to display masking."""
+
+    def test_callable_is_invoked_and_returns_token(self):
+        from agent.azure_identity_adapter import materialize_bearer_for_http
+
+        invoked = {"count": 0}
+
+        def provider():
+            invoked["count"] += 1
+            return "fresh-jwt"
+
+        assert materialize_bearer_for_http(provider) == "fresh-jwt"
+        assert invoked["count"] == 1
+
+    def test_string_passes_through(self):
+        from agent.azure_identity_adapter import materialize_bearer_for_http
+        assert materialize_bearer_for_http("plain-key") == "plain-key"
+
+    def test_callable_returning_empty_raises(self):
+        from agent.azure_identity_adapter import materialize_bearer_for_http
+        with pytest.raises(ValueError):
+            materialize_bearer_for_http(lambda: "")
+
+    def test_empty_string_raises(self):
+        from agent.azure_identity_adapter import materialize_bearer_for_http
+        with pytest.raises(ValueError):
+            materialize_bearer_for_http("")
+        with pytest.raises(ValueError):
+            materialize_bearer_for_http(None)
+
+
+# ---------------------------------------------------------------------------
+# build_bearer_http_client — the Anthropic-on-Foundry bridge
+# ---------------------------------------------------------------------------
+
+
+class TestBuildBearerHttpClient:
+    """``build_bearer_http_client`` returns an ``httpx.Client`` whose
+    request event hook mints a fresh JWT per outbound request. This is
+    how Entra ID auth reaches the Anthropic SDK (which does not accept
+    callable ``auth_token``)."""
+
+    def test_returns_httpx_client_with_request_hook(self):
+        import httpx
+        from agent.azure_identity_adapter import build_bearer_http_client
+
+        client = build_bearer_http_client(lambda: "jwt")
+        try:
+            assert isinstance(client, httpx.Client)
+            hooks = client.event_hooks.get("request", [])
+            assert len(hooks) >= 1
+        finally:
+            client.close()
+
+    def test_hook_overrides_authorization_header(self):
+        import httpx
+        from agent.azure_identity_adapter import build_bearer_http_client
+
+        minted_tokens = []
+
+        def provider():
+            minted_tokens.append(f"jwt-{len(minted_tokens) + 1}")
+            return minted_tokens[-1]
+
+        client = build_bearer_http_client(provider)
+        try:
+            hook = client.event_hooks["request"][0]
+            # Build a request with conflicting pre-set headers and verify
+            # the hook strips them and installs the fresh bearer.
+            req = httpx.Request(
+                "POST", "https://example.com/v1/messages",
+                headers={
+                    "Authorization": "Bearer stale-token",
+                    "api-key": "static-key",
+                    "x-api-key": "static-key",
+                },
+                json={"hello": "world"},
+            )
+            hook(req)
+            assert req.headers["Authorization"] == "Bearer jwt-1"
+            # The static-key headers must be stripped — sending both
+            # auth values would be ambiguous on Azure.
+            assert "api-key" not in req.headers
+            assert "x-api-key" not in req.headers
+
+            # Second invocation mints a fresh token.
+            req2 = httpx.Request("GET", "https://example.com/v1/models")
+            hook(req2)
+            assert req2.headers["Authorization"] == "Bearer jwt-2"
+            assert len(minted_tokens) == 2
+        finally:
+            client.close()
+
+    def test_hook_strips_auth_headers_and_warns_when_token_provider_fails(self, caplog):
+        """When the token provider fails (chain exhausted, IMDS down, az
+        login expired), the hook must:
+          1. Log at WARNING level so the misconfiguration is visible at
+             default log level (not buried at DEBUG).
+          2. Strip any pre-set Authorization headers — including the
+             placeholder ``entra-id-bearer-via-http-hook`` sentinel that
+             :func:`_build_anthropic_client_with_bearer_hook` sets on the
+             Anthropic SDK constructor. This produces a clean
+             "missing auth" 401 from Azure rather than a sentinel-bearing
+             401 that's harder to diagnose AND avoids leaking the
+             sentinel string into upstream access logs.
+        """
+        import logging
+        import httpx
+        from agent.azure_identity_adapter import build_bearer_http_client
+
+        def bad_provider():
+            return ""  # empty token → materialize_bearer_for_http raises
+
+        client = build_bearer_http_client(bad_provider)
+        try:
+            hook = client.event_hooks["request"][0]
+            req = httpx.Request(
+                "POST", "https://example.com/v1/messages",
+                headers={
+                    "Authorization": "Bearer entra-id-bearer-via-http-hook",
+                    "api-key": "leaked-placeholder",
+                },
+            )
+            with caplog.at_level(logging.WARNING, logger="agent.azure_identity_adapter"):
+                hook(req)  # Must not raise.
+            # Pre-set auth headers stripped — no sentinel makes it to Azure.
+            assert "Authorization" not in req.headers
+            assert "api-key" not in req.headers
+            # WARNING was logged so the user sees the misconfiguration.
+            assert any(
+                rec.levelno == logging.WARNING and "Entra ID token provider" in rec.message
+                for rec in caplog.records
+            )
+        finally:
+            client.close()
+
+    def test_rejects_non_callable_provider(self):
+        from agent.azure_identity_adapter import build_bearer_http_client
+        with pytest.raises(ValueError):
+            build_bearer_http_client(cast(Callable[[], str], "plain-string-not-callable"))
+        with pytest.raises(ValueError):
+            build_bearer_http_client(cast(Callable[[], str], None))
+
+    def test_forwards_httpx_kwargs(self):
+        import httpx
+        from agent.azure_identity_adapter import build_bearer_http_client
+
+        timeout = httpx.Timeout(60.0, connect=5.0)
+        client = build_bearer_http_client(lambda: "jwt", timeout=timeout)
+        try:
+            # httpx stores the timeout per-pool; just sanity-check it was
+            # accepted without TypeError.
+            assert client is not None
+        finally:
+            client.close()
+
+
+class TestIsTokenProvider:
+    def test_callable_is_token_provider(self):
+        from agent.azure_identity_adapter import is_token_provider
+        assert is_token_provider(lambda: "x") is True
+
+    def test_string_is_not_token_provider(self):
+        from agent.azure_identity_adapter import is_token_provider
+        assert is_token_provider("static-key") is False
+        # ``str`` instances are technically callable in some edge cases
+        # — confirm they're never classified as token providers.
+        assert is_token_provider("") is False
+
+
+# ---------------------------------------------------------------------------
+# EntraIdentityConfig
+# ---------------------------------------------------------------------------
+
+
+class TestEntraIdentityConfig:
+    """The serializable config that crosses multiprocessing boundaries —
+    must round-trip through dict cleanly and never lose fields."""
+
+    def test_to_dict_round_trip(self):
+        from agent.azure_identity_adapter import EntraIdentityConfig
+        cfg = EntraIdentityConfig(
+            scope="https://ai.azure.com/.default",
+            exclude_interactive_browser=False,
+        )
+        rebuilt = EntraIdentityConfig.from_dict(cfg.to_dict())
+        assert rebuilt == cfg
+
+    def test_from_dict_handles_empty_strings(self):
+        from agent.azure_identity_adapter import EntraIdentityConfig
+        cfg = EntraIdentityConfig.from_dict({
+            "scope": "",
+            "client_id": None,
+        })
+        # Empty scope falls back to default
+        assert cfg.scope.endswith("/.default")
+
+    def test_from_dict_ignores_legacy_identity_keys(self):
+        """Old config.yaml that still has model.entra.client_id /
+        tenant_id / authority should not crash from_dict — those values
+        are now read from AZURE_* env vars by azure-identity directly."""
+        from agent.azure_identity_adapter import EntraIdentityConfig
+        cfg = EntraIdentityConfig.from_dict({
+            "tenant_id": "legacy-tenant",
+            "authority": "https://login.partner.microsoftonline.cn",
+            "client_id": "user-mi-client",
+        })
+        # Legacy keys silently ignored — no crash, no surprise field on the dataclass.
+        assert not hasattr(cfg, "client_id")
+        assert not hasattr(cfg, "tenant_id")
+        assert not hasattr(cfg, "authority")
+
+    def test_constructor_normalizes_empty_scope(self):
+        from agent.azure_identity_adapter import EntraIdentityConfig
+        cfg = EntraIdentityConfig(scope="")
+        assert cfg.scope.endswith("/.default")
+
+    def test_from_dict_default_scope_override(self):
+        from agent.azure_identity_adapter import EntraIdentityConfig
+        cfg = EntraIdentityConfig.from_dict(
+            {"scope": ""},
+            default_scope="https://custom.example/.default",
+        )
+        assert cfg.scope == "https://custom.example/.default"
+
+    def test_dataclass_is_frozen(self):
+        # Frozen dataclasses are hashable / safe to pass through caches.
+        from agent.azure_identity_adapter import EntraIdentityConfig
+        cfg = EntraIdentityConfig()
+        with pytest.raises((AttributeError, Exception)):
+            setattr(cfg, "scope", "mutated")
+
+
+# ---------------------------------------------------------------------------
+# Credential / token provider construction
+# ---------------------------------------------------------------------------
+
+
+class _FakeAzureIdentity:
+    """Stand-in for the ``azure.identity`` module.
+
+    Captures kwargs passed to ``DefaultAzureCredential`` so tests can
+    assert how config flows into the SDK.
+    """
+
+    def __init__(self):
+        self.last_credential_kwargs = None
+        self.last_scope = None
+        self.credential_count = 0
+
+    def DefaultAzureCredential(self, **kwargs):  # noqa: N802 — match SDK
+        self.last_credential_kwargs = kwargs
+        self.credential_count += 1
+        return SimpleNamespace(
+            get_token=lambda scope: SimpleNamespace(token="fake-jwt", expires_on=9999999999),
+            kwargs=kwargs,
+        )
+
+    def get_bearer_token_provider(self, credential, scope):
+        self.last_scope = scope
+        # Return a callable that mints a token when invoked.
+        return lambda: f"jwt-for-{scope}"
+
+
+@pytest.fixture
+def fake_azure_identity(monkeypatch):
+    """Install a fake azure.identity into sys.modules and stub the
+    adapter's `_require_azure_identity` so all tests use the fake."""
+    fake = _FakeAzureIdentity()
+
+    fake_module = SimpleNamespace(
+        DefaultAzureCredential=fake.DefaultAzureCredential,
+        get_bearer_token_provider=fake.get_bearer_token_provider,
+    )
+    monkeypatch.setitem(sys.modules, "azure", SimpleNamespace(identity=fake_module))
+    monkeypatch.setitem(sys.modules, "azure.identity", fake_module)
+
+    # The adapter's `_require_azure_identity` does its own import, so
+    # patch that too to make sure tests never hit the real package's
+    # singleton state.
+    from agent import azure_identity_adapter as _adapter
+    monkeypatch.setattr(_adapter, "_require_azure_identity", lambda: fake_module)
+
+    return fake
+
+
+class TestBuildCredential:
+    def test_default_kwargs_are_minimal(self, fake_azure_identity):
+        """SDK default for ``exclude_interactive_browser_credential`` is
+        True; we only pass it when the user opts IN to interactive
+        browser auth. Tenant / authority / service principal config
+        flow through the standard ``AZURE_*`` env vars (read by
+        azure-identity directly), not Hermes config kwargs."""
+        from agent.azure_identity_adapter import EntraIdentityConfig, build_credential
+        cred = build_credential(EntraIdentityConfig())
+        kwargs = fake_azure_identity.last_credential_kwargs
+        # Default config should produce empty kwargs — SDK uses its own
+        # defaults plus env-var-driven settings.
+        assert kwargs == {}
+        assert cred is not None
+
+    def test_interactive_browser_opt_in(self, fake_azure_identity):
+        """When the user explicitly sets
+        ``exclude_interactive_browser=False``, the SDK kwarg is set to
+        False. Without the opt-in we don't pass the kwarg at all (SDK
+        default is True / browser excluded)."""
+        from agent.azure_identity_adapter import EntraIdentityConfig, build_credential
+        build_credential(EntraIdentityConfig(exclude_interactive_browser=False))
+        kwargs = fake_azure_identity.last_credential_kwargs
+        assert kwargs["exclude_interactive_browser_credential"] is False
+
+    def test_credential_is_cached_per_config(self, fake_azure_identity):
+        from agent.azure_identity_adapter import EntraIdentityConfig, build_credential
+        cfg = EntraIdentityConfig(scope="s1")
+        c1 = build_credential(cfg)
+        c2 = build_credential(cfg)
+        assert c1 is c2
+        assert fake_azure_identity.credential_count == 1
+
+    def test_distinct_configs_get_distinct_credentials(self, fake_azure_identity):
+        from agent.azure_identity_adapter import EntraIdentityConfig, build_credential
+        c1 = build_credential(EntraIdentityConfig(scope="s1"))
+        c2 = build_credential(EntraIdentityConfig(scope="s2"))
+        assert c1 is not c2
+        assert fake_azure_identity.credential_count == 2
+
+    def test_reset_cache_invalidates(self, fake_azure_identity):
+        from agent.azure_identity_adapter import (
+            EntraIdentityConfig,
+            build_credential,
+            reset_credential_cache,
+        )
+        cfg = EntraIdentityConfig(scope="x")
+        c1 = build_credential(cfg)
+        reset_credential_cache()
+        c2 = build_credential(cfg)
+        assert c1 is not c2
+
+
+class TestBuildTokenProvider:
+    def test_returns_callable_for_scope(self, fake_azure_identity):
+        from agent.azure_identity_adapter import build_token_provider
+        provider = build_token_provider(scope="https://ai.azure.com/.default")
+        assert callable(provider)
+        assert provider() == "jwt-for-https://ai.azure.com/.default"
+        assert fake_azure_identity.last_scope == "https://ai.azure.com/.default"
+
+    def test_falls_back_to_default_scope_when_unspecified(self, fake_azure_identity):
+        """When neither ``scope`` nor ``config`` is provided,
+        ``build_token_provider`` uses ``SCOPE_AI_AZURE_DEFAULT`` —
+        Microsoft's documented Foundry inference scope. ``base_url`` is
+        accepted for back-compat but ignored."""
+        from agent.azure_identity_adapter import (
+            SCOPE_AI_AZURE_DEFAULT,
+            build_token_provider,
+        )
+        build_token_provider(base_url="https://r.openai.azure.com/openai/v1")
+        assert fake_azure_identity.last_scope == SCOPE_AI_AZURE_DEFAULT
+
+    def test_explicit_scope_wins_over_base_url(self, fake_azure_identity):
+        from agent.azure_identity_adapter import build_token_provider
+        build_token_provider(
+            scope="https://override.example/.default",
+            base_url="https://r.openai.azure.com/openai/v1",
+        )
+        assert fake_azure_identity.last_scope == "https://override.example/.default"
+
+    def test_config_object_wins_over_kwargs(self, fake_azure_identity):
+        from agent.azure_identity_adapter import (
+            EntraIdentityConfig,
+            build_token_provider,
+        )
+        cfg = EntraIdentityConfig(scope="cfg-scope")
+        build_token_provider(scope="ignored", config=cfg)
+        assert fake_azure_identity.last_scope == "cfg-scope"
+        assert fake_azure_identity.last_credential_kwargs == {}
+
+
+# ---------------------------------------------------------------------------
+# Lazy-install / missing-package surface
+# ---------------------------------------------------------------------------
+
+
+class TestRequireAzureIdentityMissing:
+    def test_clear_error_when_lazy_install_disabled(self, monkeypatch):
+        """When azure-identity isn't importable AND lazy installs are
+        off, the adapter must raise ImportError with an actionable
+        message, not propagate FeatureUnavailable."""
+        from agent import azure_identity_adapter as _adapter
+
+        # Force the import path to fail.
+        original_import = __builtins__["__import__"] if isinstance(__builtins__, dict) else __import__
+        def _fake_import(name, *args, **kwargs):
+            if name == "azure.identity" or name.startswith("azure.identity."):
+                raise ImportError("simulated missing azure-identity")
+            return original_import(name, *args, **kwargs)
+
+        monkeypatch.setattr("builtins.__import__", _fake_import)
+
+        # Simulate lazy installs disabled.
+        from tools.lazy_deps import FeatureUnavailable
+
+        def _fake_ensure(*args, **kwargs):
+            raise FeatureUnavailable(
+                "provider.azure_identity",
+                ("azure-identity==1.25.3",),
+                "lazy installs disabled (test simulation)",
+            )
+
+        # The adapter calls ``ensure`` from ``tools.lazy_deps``; intercept
+        # it by patching the actual symbol path.
+        monkeypatch.setattr("tools.lazy_deps.ensure", _fake_ensure)
+
+        with pytest.raises(ImportError) as exc_info:
+            _adapter._require_azure_identity()
+        msg = str(exc_info.value)
+        assert "azure-identity" in msg
+        assert "Foundry" in msg or "foundry" in msg.lower()
+
+
+# ---------------------------------------------------------------------------
+# has_azure_identity_credentials probe (timeout-bounded)
+# ---------------------------------------------------------------------------
+
+
+class TestHasAzureIdentityCredentials:
+    def test_returns_false_when_package_missing_and_install_disabled(self, monkeypatch):
+        from agent import azure_identity_adapter as _adapter
+        monkeypatch.setattr(_adapter, "has_azure_identity_installed", lambda: False)
+        assert _adapter.has_azure_identity_credentials(
+            "https://x/.default", allow_install=False,
+        ) is False
+
+    def test_lazy_install_triggered_when_package_missing(self, monkeypatch):
+        """With allow_install=True (default), the probe must trigger the
+        lazy-install path before bailing — otherwise the wizard's
+        ``preflight`` would silently fail for fresh installs that haven't
+        run ``pip install azure-identity`` yet."""
+        from agent import azure_identity_adapter as _adapter
+
+        installed = {"called": False}
+
+        def _fake_install():
+            installed["called"] = True
+            # After install, pretend the package is now importable.
+            monkeypatch.setattr(_adapter, "has_azure_identity_installed", lambda: True)
+            return SimpleNamespace(
+                DefaultAzureCredential=lambda **kw: SimpleNamespace(
+                    kwargs=kw,
+                    get_token=lambda scope: SimpleNamespace(token="post-install-jwt", expires_on=0),
+                ),
+                get_bearer_token_provider=lambda c, s: lambda: "x",
+            )
+
+        monkeypatch.setattr(_adapter, "has_azure_identity_installed", lambda: False)
+        monkeypatch.setattr(_adapter, "_require_azure_identity", _fake_install)
+
+        # Provide a credential factory so the probe proceeds after install.
+        monkeypatch.setattr(
+            _adapter, "build_credential",
+            lambda config: SimpleNamespace(
+                get_token=lambda scope: SimpleNamespace(token="probe-jwt", expires_on=0),
+            ),
+        )
+
+        result = _adapter.has_azure_identity_credentials(
+            "https://x/.default", timeout_seconds=0.5,
+        )
+        assert installed["called"] is True, (
+            "has_azure_identity_credentials must trigger lazy install "
+            "before bailing"
+        )
+        assert result is True
+
+    def test_returns_true_on_successful_token_mint(self, fake_azure_identity):
+        from agent.azure_identity_adapter import has_azure_identity_credentials
+        assert has_azure_identity_credentials("https://x/.default", timeout_seconds=0.5) is True
+
+    def test_returns_false_when_get_token_raises(self, monkeypatch):
+        from agent import azure_identity_adapter as _adapter
+
+        def _failing_credential(_config):
+            class _Cred:
+                def get_token(self, scope):
+                    raise RuntimeError("simulated chain exhaustion")
+            return _Cred()
+
+        monkeypatch.setattr(_adapter, "build_credential", _failing_credential)
+        monkeypatch.setattr(_adapter, "has_azure_identity_installed", lambda: True)
+        assert _adapter.has_azure_identity_credentials("https://x/.default", timeout_seconds=0.5) is False
+
+    def test_returns_false_on_timeout(self, monkeypatch):
+        """Slow IMDS / network must time out, not hang the caller."""
+        import threading
+        from agent import azure_identity_adapter as _adapter
+
+        slow_release = threading.Event()
+
+        def _slow_credential(_config):
+            class _Cred:
+                def get_token(self, scope):
+                    # Block forever from the test's perspective; the
+                    # adapter must give up via its thread-bounded probe.
+                    slow_release.wait(timeout=10)
+                    return SimpleNamespace(token="never-returned", expires_on=0)
+            return _Cred()
+
+        monkeypatch.setattr(_adapter, "build_credential", _slow_credential)
+        monkeypatch.setattr(_adapter, "has_azure_identity_installed", lambda: True)
+        try:
+            assert _adapter.has_azure_identity_credentials(
+                "https://x/.default", timeout_seconds=0.1
+            ) is False
+        finally:
+            slow_release.set()
+
+
+# ---------------------------------------------------------------------------
+# describe_active_credential — used by hermes doctor + hermes auth
+# ---------------------------------------------------------------------------
+
+
+class TestDescribeActiveCredential:
+    def test_reports_not_installed(self, monkeypatch):
+        from agent import azure_identity_adapter as _adapter
+        monkeypatch.setattr(_adapter, "has_azure_identity_installed", lambda: False)
+        info = _adapter.describe_active_credential(
+            scope="https://x/.default", allow_install=False,
+        )
+        assert info["ok"] is False
+        assert "not installed" in info["error"].lower()
+        assert "pip install" in info["hint"].lower()
+
+    def test_reports_install_failure(self, monkeypatch):
+        """When lazy install is allowed but fails (e.g. lazy installs
+        disabled), the diagnostic surfaces the failure as the error."""
+        from agent import azure_identity_adapter as _adapter
+        monkeypatch.setattr(_adapter, "has_azure_identity_installed", lambda: False)
+
+        def _fail_install():
+            raise ImportError("simulated: lazy installs disabled")
+
+        monkeypatch.setattr(_adapter, "_require_azure_identity", _fail_install)
+        info = _adapter.describe_active_credential(
+            scope="https://x/.default", allow_install=True,
+        )
+        assert info["ok"] is False
+        assert "lazy installs disabled" in info["error"]
+        assert "lazy" in info["hint"].lower()
+
+    def test_reports_env_sources_for_managed_identity(self, fake_azure_identity, monkeypatch):
+        from agent.azure_identity_adapter import describe_active_credential
+        monkeypatch.setenv("IDENTITY_ENDPOINT", "http://169.254.169.254")
+        info = describe_active_credential(scope="https://x/.default", timeout_seconds=0.5)
+        assert info["ok"] is True
+        sources = info.get("env_sources") or []
+        assert any("ManagedIdentity" in s for s in sources)
+
+    def test_reports_env_sources_for_workload_identity(self, fake_azure_identity, monkeypatch):
+        from agent.azure_identity_adapter import describe_active_credential
+        monkeypatch.setenv("AZURE_FEDERATED_TOKEN_FILE", "/var/secrets/azure/federated-token")
+        info = describe_active_credential(scope="https://x/.default", timeout_seconds=0.5)
+        sources = info.get("env_sources") or []
+        assert any("WorkloadIdentity" in s for s in sources)
+
+    def test_reports_env_sources_for_service_principal(self, fake_azure_identity, monkeypatch):
+        from agent.azure_identity_adapter import describe_active_credential
+        monkeypatch.setenv("AZURE_TENANT_ID", "t")
+        monkeypatch.setenv("AZURE_CLIENT_ID", "c")
+        monkeypatch.setenv("AZURE_CLIENT_SECRET", "s")
+        info = describe_active_credential(scope="https://x/.default", timeout_seconds=0.5)
+        sources = info.get("env_sources") or []
+        assert any("EnvironmentCredential" in s for s in sources)
+
+    def test_reports_error_on_chain_failure(self, monkeypatch):
+        from agent import azure_identity_adapter as _adapter
+
+        def _failing_credential(_config):
+            class _Cred:
+                def get_token(self, scope):
+                    raise RuntimeError("auth failed")
+            return _Cred()
+
+        monkeypatch.setattr(_adapter, "build_credential", _failing_credential)
+        monkeypatch.setattr(_adapter, "has_azure_identity_installed", lambda: True)
+        info = _adapter.describe_active_credential(scope="https://x/.default", timeout_seconds=0.5)
+        assert info["ok"] is False
+        assert "auth failed" in info.get("error", "")
--- a/tests/agent/test_bedrock_1m_context.py
+++ b/tests/agent/test_bedrock_1m_context.py
@ -1,7 +1,7 @@
 """Tests for the 1M-context beta header on AWS Bedrock Claude models.

 Claude Opus 4.6/4.7 and Sonnet 4.6 support a 1M context window, but on AWS
-Bedrock (and Azure AI Foundry) that window is still gated behind the
+Bedrock (and Microsoft Foundry) that window is still gated behind the
 ``context-1m-2025-08-07`` beta header as of 2026-04. Without it, Bedrock
 caps these models at 200K even though ``model_metadata.py`` advertises 1M.

@ -61,4 +61,3 @@ class TestBedrockContext1MBeta:
        # Other common betas still present — no regression.
        assert "interleaved-thinking-2025-05-14" in beta_header
        assert "fine-grained-tool-streaming-2025-05-14" in beta_header
-
--- a/tests/hermes_cli/test_azure_detect.py
+++ b/tests/hermes_cli/test_azure_detect.py
@ -102,7 +102,7 @@ def test_detect_anthropic_path_wins_without_http():

 def test_detect_openai_models_probe_success():
    """/models probe returning a model list → chat_completions."""
-    def _fake_get(url, api_key, timeout=6.0):
+    def _fake_get(url, api_key, timeout=6.0, **kwargs):
        assert "key-abc" == api_key
        return 200, json.loads(_openai_models_body("gpt-5.4", "claude-opus-4-6"))

@ -118,7 +118,7 @@ def test_detect_openai_models_probe_success():

 def test_detect_openai_models_probe_empty_list_still_counts():
    """Endpoint returned OpenAI shape but no models → still chat_completions."""
-    def _fake_get(url, api_key, timeout=6.0):
+    def _fake_get(url, api_key, timeout=6.0, **kwargs):
        return 200, {"object": "list", "data": []}

    with patch.object(azure_detect, "_http_get_json", side_effect=_fake_get):
@ -132,7 +132,7 @@ def test_detect_openai_models_probe_empty_list_still_counts():

 def test_detect_falls_back_to_anthropic_probe():
    """/models fails but Anthropic Messages probe succeeds."""
-    def _fake_get(url, api_key, timeout=6.0):
+    def _fake_get(url, api_key, timeout=6.0, **kwargs):
        return 401, None  # /models forbidden

    with patch.object(azure_detect, "_http_get_json", side_effect=_fake_get), \
@ -164,7 +164,7 @@ def test_probe_openai_models_tries_multiple_api_versions():
    """First call (no api-version) fails, api-version fallback succeeds."""
    calls = []

-    def _fake_get(url, api_key, timeout=6.0):
+    def _fake_get(url, api_key, timeout=6.0, **kwargs):
        calls.append(url)
        if "api-version" not in url:
            return 404, None
--- a/tests/hermes_cli/test_azure_foundry_entra.py
+++ b/tests/hermes_cli/test_azure_foundry_entra.py
@ -0,0 +1,404 @@
+"""Tests for Azure Foundry Entra ID runtime resolution.
+
+Covers the contract introduced in PR for Microsoft Entra ID auth on
+``azure-foundry``:
+
+  * ``_resolve_azure_foundry_runtime`` returns a callable ``api_key`` for
+    ``model.auth_mode = entra_id`` (OpenAI-style only).
+  * Anthropic-style endpoints with ``auth_mode = entra_id`` return the same
+    callable runtime credential as OpenAI-style endpoints.
+  * The legacy ``api_key`` path is unchanged when ``auth_mode`` is absent
+    or set to ``api_key``.
+  * Explicit ``--api-key`` overrides at runtime still work in entra mode
+    (escape hatch for one-off testing).
+  * ``model.entra.scope`` propagates to the token-provider config; Azure
+    identity selection stays in standard AZURE_* env vars.
+  * ``_get_azure_foundry_auth_status`` is structural — never mints a
+    token (verified by checking the credential cache untouched).
+  * ``has_usable_secret`` for ``AZURE_FOUNDRY_API_KEY`` is irrelevant
+    when ``auth_mode == entra_id``.
+"""
+
+from __future__ import annotations
+
+import sys
+from types import SimpleNamespace
+from typing import cast
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+
+@pytest.fixture(autouse=True)
+def _reset_credential_cache():
+    from agent.azure_identity_adapter import reset_credential_cache
+    reset_credential_cache()
+    yield
+    reset_credential_cache()
+
+
+@pytest.fixture
+def fake_azure_identity(monkeypatch):
+    """Identical fake to test_azure_identity_adapter — keeps Azure SDK
+    out of these tests so they run in CI without the package installed."""
+    from agent import azure_identity_adapter as _adapter
+
+    last = {"scope": None, "kwargs": None, "credential_count": 0}
+
+    def _provider(scope):
+        return lambda: f"jwt-for-{scope}"
+
+    fake_module = SimpleNamespace(
+        DefaultAzureCredential=lambda **kw: SimpleNamespace(
+            kwargs=kw,
+            get_token=lambda scope: SimpleNamespace(token="fake", expires_on=9999999999),
+        ),
+        get_bearer_token_provider=lambda credential, scope: (
+            last.__setitem__("scope", scope),
+            last.__setitem__("kwargs", credential.kwargs),
+            last.__setitem__("credential_count", cast(int, last["credential_count"]) + 1),
+            _provider(scope),
+        )[-1],
+    )
+    monkeypatch.setattr(_adapter, "_require_azure_identity", lambda: fake_module)
+    monkeypatch.setitem(sys.modules, "azure.identity", fake_module)
+    return last
+
+
+# ---------------------------------------------------------------------------
+# _resolve_azure_foundry_runtime: entra_id branch
+# ---------------------------------------------------------------------------
+
+
+class TestResolveAzureFoundryRuntimeEntra:
+    def test_returns_callable_api_key_for_entra(self, fake_azure_identity):
+        from hermes_cli.runtime_provider import _resolve_azure_foundry_runtime
+        runtime = _resolve_azure_foundry_runtime(
+            requested_provider="azure-foundry",
+            model_cfg={
+                "provider": "azure-foundry",
+                "base_url": "https://my-resource.openai.azure.com/openai/v1",
+                "api_mode": "chat_completions",
+                "auth_mode": "entra_id",
+                "default": "gpt-4o",  # stays on chat_completions (no codex auto-upgrade)
+            },
+        )
+        assert runtime["provider"] == "azure-foundry"
+        assert runtime["auth_mode"] == "entra_id"
+        assert runtime["api_mode"] == "chat_completions"
+        assert callable(runtime["api_key"])
+        assert runtime["source"] == "entra_id"
+
+    def test_entra_inherits_codex_responses_for_gpt5_family(self, fake_azure_identity):
+        """GPT-5.x / o-series / codex models on Azure are Responses-API-only.
+        The runtime auto-upgrades api_mode regardless of auth mode — this is
+        the same behaviour as the static-key path (see
+        ``hermes_cli/models.py::azure_foundry_model_api_mode``)."""
+        from hermes_cli.runtime_provider import _resolve_azure_foundry_runtime
+        runtime = _resolve_azure_foundry_runtime(
+            requested_provider="azure-foundry",
+            model_cfg={
+                "provider": "azure-foundry",
+                "base_url": "https://my-resource.openai.azure.com/openai/v1",
+                "api_mode": "chat_completions",
+                "auth_mode": "entra_id",
+                "default": "gpt-5.4",
+            },
+        )
+        # GPT-5.x is upgraded to codex_responses — Entra path inherits.
+        assert runtime["api_mode"] == "codex_responses"
+        assert callable(runtime["api_key"])
+        assert runtime["auth_mode"] == "entra_id"
+
+    def test_entra_propagates_scope_only(self, fake_azure_identity):
+        """``model.entra.scope`` is the only Hermes-managed Azure SDK
+        setting. Identity selection (client ID, tenant, authority,
+        service principal secret, federated token file) flows through
+        standard ``AZURE_*`` env vars read by azure-identity directly.
+        Legacy ``model.entra.client_id`` / ``tenant_id`` / ``authority``
+        keys in config.yaml are silently ignored."""
+        from hermes_cli.runtime_provider import _resolve_azure_foundry_runtime
+        _resolve_azure_foundry_runtime(
+            requested_provider="azure-foundry",
+            model_cfg={
+                "provider": "azure-foundry",
+                "base_url": "https://my-resource.services.ai.azure.com/v1",
+                "api_mode": "chat_completions",
+                "auth_mode": "entra_id",
+                "entra": {
+                    "scope": "https://custom.example/.default",
+                    "client_id": "client-uuid",
+                    # Legacy keys must not crash — they are accepted in
+                    # from_dict but never propagated to the SDK.
+                    "tenant_id": "legacy-tenant",
+                    "authority": "https://login.microsoftonline.us",
+                },
+            },
+        )
+        assert fake_azure_identity["scope"] == "https://custom.example/.default"
+        kw = fake_azure_identity["kwargs"]
+        assert "managed_identity_client_id" not in kw
+        assert "workload_identity_client_id" not in kw
+        assert "interactive_browser_tenant_id" not in kw
+        assert "authority" not in kw
+
+    def test_entra_default_scope_when_unset(self, fake_azure_identity):
+        """When ``model.entra.scope`` is not set, the runtime resolves
+        Microsoft's documented inference scope —
+        ``https://ai.azure.com/.default`` — regardless of whether the
+        endpoint is ``*.openai.azure.com`` or ``*.services.ai.azure.com``.
+        Both shapes use the SAME scope per Microsoft's docs; the
+        ``cognitiveservices.azure.com`` scope is the control-plane
+        audience and is rejected for inference by newer resources."""
+        from hermes_cli.runtime_provider import _resolve_azure_foundry_runtime
+        from agent.azure_identity_adapter import SCOPE_AI_AZURE_DEFAULT
+        _resolve_azure_foundry_runtime(
+            requested_provider="azure-foundry",
+            model_cfg={
+                "provider": "azure-foundry",
+                "base_url": "https://r.openai.azure.com/openai/v1",
+                "api_mode": "chat_completions",
+                "auth_mode": "entra_id",
+            },
+        )
+        assert fake_azure_identity["scope"] == SCOPE_AI_AZURE_DEFAULT
+
+    def test_entra_scope_override_wins(self, fake_azure_identity):
+        """Users on sovereign clouds / unusual tenants can set
+        ``model.entra.scope`` to override the default."""
+        from hermes_cli.runtime_provider import _resolve_azure_foundry_runtime
+        _resolve_azure_foundry_runtime(
+            requested_provider="azure-foundry",
+            model_cfg={
+                "provider": "azure-foundry",
+                "base_url": "https://r.openai.azure.com/openai/v1",
+                "api_mode": "chat_completions",
+                "auth_mode": "entra_id",
+                "entra": {
+                    "scope": "https://cognitiveservices.azure.com/.default",
+                },
+            },
+        )
+        assert (
+            fake_azure_identity["scope"]
+            == "https://cognitiveservices.azure.com/.default"
+        )
+
+    def test_entra_with_anthropic_messages_is_supported(self, fake_azure_identity):
+        """Entra ID now works for both OpenAI-style and Anthropic-style
+        Azure Foundry endpoints. The runtime returns a callable
+        ``api_key``; downstream
+        :func:`agent.anthropic_adapter.build_anthropic_client` detects
+        the callable and installs an httpx event hook that mints a
+        fresh bearer JWT per request (the Anthropic SDK does not
+        accept callable auth_token natively)."""
+        from hermes_cli.runtime_provider import _resolve_azure_foundry_runtime
+        runtime = _resolve_azure_foundry_runtime(
+            requested_provider="azure-foundry",
+            model_cfg={
+                "provider": "azure-foundry",
+                "base_url": "https://r.services.ai.azure.com/anthropic",
+                "api_mode": "anthropic_messages",
+                "auth_mode": "entra_id",
+                "default": "claude-sonnet-4-5",
+            },
+        )
+        assert runtime["provider"] == "azure-foundry"
+        assert runtime["auth_mode"] == "entra_id"
+        assert runtime["api_mode"] == "anthropic_messages"
+        # Callable api_key — the anthropic_adapter detects this and
+        # plumbs through an httpx event hook.
+        assert callable(runtime["api_key"])
+        assert not isinstance(runtime["api_key"], str)
+
+    def test_entra_with_explicit_api_key_uses_string_escape_hatch(self, fake_azure_identity):
+        """Passing --api-key on the CLI overrides the entra path so a
+        user can debug a single request with a static key without
+        editing config.yaml."""
+        from hermes_cli.runtime_provider import _resolve_azure_foundry_runtime
+        runtime = _resolve_azure_foundry_runtime(
+            requested_provider="azure-foundry",
+            model_cfg={
+                "provider": "azure-foundry",
+                "base_url": "https://r.openai.azure.com/openai/v1",
+                "api_mode": "chat_completions",
+                "auth_mode": "entra_id",
+            },
+            explicit_api_key="explicit-string-key",
+        )
+        assert runtime["api_key"] == "explicit-string-key"
+        assert runtime["auth_mode"] == "api_key"
+        assert runtime["source"] == "explicit"
+
+    def test_entra_runtime_dict_keeps_only_scope_override(self, fake_azure_identity):
+        from hermes_cli.runtime_provider import _resolve_azure_foundry_runtime
+        runtime = _resolve_azure_foundry_runtime(
+            requested_provider="azure-foundry",
+            model_cfg={
+                "provider": "azure-foundry",
+                "base_url": "https://r.openai.azure.com/openai/v1",
+                "api_mode": "chat_completions",
+                "auth_mode": "entra_id",
+                "entra": {
+                    "scope": "https://custom.example/.default",
+                    "client_id": "legacy-client",
+                },
+            },
+        )
+        assert runtime["entra"] == {"scope": "https://custom.example/.default"}
+
+
+# ---------------------------------------------------------------------------
+# _resolve_azure_foundry_runtime: legacy api_key branch (regression)
+# ---------------------------------------------------------------------------
+
+
+class TestResolveAzureFoundryRuntimeApiKey:
+    def test_default_auth_mode_uses_static_key(self, monkeypatch):
+        from hermes_cli.runtime_provider import _resolve_azure_foundry_runtime
+        monkeypatch.setenv("AZURE_FOUNDRY_API_KEY", "sk-azure-static-key")
+        runtime = _resolve_azure_foundry_runtime(
+            requested_provider="azure-foundry",
+            model_cfg={
+                "provider": "azure-foundry",
+                "base_url": "https://r.openai.azure.com/openai/v1",
+                "api_mode": "chat_completions",
+            },
+        )
+        assert runtime["api_key"] == "sk-azure-static-key"
+        assert runtime["auth_mode"] == "api_key"
+        assert "entra" not in runtime  # only present in entra mode
+
+    def test_explicit_auth_mode_api_key(self, monkeypatch):
+        from hermes_cli.runtime_provider import _resolve_azure_foundry_runtime
+        monkeypatch.setenv("AZURE_FOUNDRY_API_KEY", "sk-static")
+        runtime = _resolve_azure_foundry_runtime(
+            requested_provider="azure-foundry",
+            model_cfg={
+                "provider": "azure-foundry",
+                "base_url": "https://r.openai.azure.com/openai/v1",
+                "api_mode": "chat_completions",
+                "auth_mode": "api_key",
+            },
+        )
+        assert runtime["api_key"] == "sk-static"
+        assert runtime["auth_mode"] == "api_key"
+
+    def test_anthropic_messages_strips_v1_suffix(self, monkeypatch):
+        from hermes_cli.runtime_provider import _resolve_azure_foundry_runtime
+        monkeypatch.setenv("AZURE_FOUNDRY_API_KEY", "k")
+        runtime = _resolve_azure_foundry_runtime(
+            requested_provider="azure-foundry",
+            model_cfg={
+                "provider": "azure-foundry",
+                "base_url": "https://r.services.ai.azure.com/anthropic/v1",
+                "api_mode": "anthropic_messages",
+            },
+        )
+        assert runtime["base_url"] == "https://r.services.ai.azure.com/anthropic"
+
+    def test_missing_api_key_raises_with_entra_hint(self, monkeypatch):
+        from hermes_cli.auth import AuthError
+        from hermes_cli.runtime_provider import _resolve_azure_foundry_runtime
+        monkeypatch.delenv("AZURE_FOUNDRY_API_KEY", raising=False)
+        with pytest.raises(AuthError) as exc_info:
+            _resolve_azure_foundry_runtime(
+                requested_provider="azure-foundry",
+                model_cfg={
+                    "provider": "azure-foundry",
+                    "base_url": "https://r.openai.azure.com/openai/v1",
+                    "api_mode": "chat_completions",
+                },
+            )
+        msg = str(exc_info.value)
+        assert "AZURE_FOUNDRY_API_KEY" in msg
+        # Surface the Entra alternative so users discover the keyless path.
+        assert "entra_id" in msg
+
+
+# ---------------------------------------------------------------------------
+# _get_azure_foundry_auth_status (auth.py) — never mints a token
+# ---------------------------------------------------------------------------
+
+
+class TestAzureFoundryAuthStatus:
+    def test_entra_status_does_not_mint_token(self, monkeypatch, tmp_path):
+        """Structural check — must return logged_in=True based on
+        importable + config, never call get_bearer_token_provider."""
+        from hermes_cli import auth as _auth
+        # Force load_config to return our entra config.
+        monkeypatch.setattr(
+            "hermes_cli.config.load_config",
+            lambda: {
+                "model": {
+                    "provider": "azure-foundry",
+                    "auth_mode": "entra_id",
+                    "base_url": "https://r.openai.azure.com/openai/v1",
+                },
+            },
+        )
+        # Patch has_azure_identity_installed to True; do NOT patch the
+        # token provider — if the code path tried to mint, the SDK
+        # missing would raise.
+        monkeypatch.setattr(
+            "agent.azure_identity_adapter.has_azure_identity_installed",
+            lambda: True,
+        )
+        info = _auth._get_azure_foundry_auth_status()
+        assert info["logged_in"] is True
+        assert info["auth_mode"] == "entra_id"
+        assert info["azure_identity_installed"] is True
+        assert info["scope"].endswith("/.default")
+
+    def test_entra_status_reports_missing_package(self, monkeypatch):
+        from hermes_cli import auth as _auth
+        monkeypatch.setattr(
+            "hermes_cli.config.load_config",
+            lambda: {
+                "model": {
+                    "provider": "azure-foundry",
+                    "auth_mode": "entra_id",
+                    "base_url": "https://r.openai.azure.com/openai/v1",
+                },
+            },
+        )
+        monkeypatch.setattr(
+            "agent.azure_identity_adapter.has_azure_identity_installed",
+            lambda: False,
+        )
+        info = _auth._get_azure_foundry_auth_status()
+        assert info["logged_in"] is False
+        assert info["azure_identity_installed"] is False
+        assert "azure-identity" in info["hint"]
+
+    def test_api_key_status_uses_env_var(self, monkeypatch):
+        from hermes_cli import auth as _auth
+        monkeypatch.setattr(
+            "hermes_cli.config.load_config",
+            lambda: {
+                "model": {
+                    "provider": "azure-foundry",
+                    "auth_mode": "api_key",
+                    "base_url": "https://r.openai.azure.com/openai/v1",
+                },
+            },
+        )
+        monkeypatch.setenv("AZURE_FOUNDRY_API_KEY", "sk-real-key-xxx")
+        info = _auth._get_azure_foundry_auth_status()
+        assert info["auth_mode"] == "api_key"
+        assert info["logged_in"] is True
+
+    def test_api_key_status_false_when_missing(self, monkeypatch):
+        from hermes_cli import auth as _auth
+        monkeypatch.setattr(
+            "hermes_cli.config.load_config",
+            lambda: {
+                "model": {
+                    "provider": "azure-foundry",
+                    "auth_mode": "api_key",
+                },
+            },
+        )
+        monkeypatch.delenv("AZURE_FOUNDRY_API_KEY", raising=False)
+        info = _auth._get_azure_foundry_auth_status()
+        assert info["logged_in"] is False
--- a/tests/run_agent/test_callable_api_key.py
+++ b/tests/run_agent/test_callable_api_key.py
@ -0,0 +1,375 @@
+"""Tests that callable api_key (Entra ID bearer provider) flows through
+the agent stack without coercion.
+
+The OpenAI Python SDK accepts ``api_key: str | None | Callable[[], str]``,
+and ``azure-identity``'s ``get_bearer_token_provider`` returns a callable.
+Hermes preserves the callable end-to-end so the SDK refreshes tokens
+transparently. This file pins the contract at the high-risk seams the
+rubber-duck audit identified.
+
+Covered:
+  * ``_create_openai_client`` passes a callable ``api_key`` straight
+    through to ``openai.OpenAI(...)``.
+  * ``_normalize_main_runtime`` preserves the callable so auxiliary
+    clients inherit Entra auth.
+  * ``_truncate_token`` (dashboard preview) renders ``"<entra-id-bearer>"``
+    instead of ``"<function ...>"`` and never invokes the callable.
+  * ``run_agent.py`` masked-banner path renders the Entra placeholder
+    and never tries to slice/len the callable.
+  * Serialization scrub: dumping a runtime dict via ``json.dumps`` with
+    a callable api_key raises (default behaviour) — guards against
+    silently leaking ``"<function ...>"`` strings into event logs.
+  * ``batch_runner`` strips the callable from the worker config dict
+    so multiprocessing.Pool can pickle the rest.
+"""
+
+from __future__ import annotations
+
+import json
+from types import SimpleNamespace
+from typing import cast
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+
+# ---------------------------------------------------------------------------
+# OpenAI SDK construction preserves the callable
+# ---------------------------------------------------------------------------
+
+
+class TestCreateOpenAIClientCallable:
+    """``AIAgent._create_openai_client`` must pass the callable through
+    to ``openai.OpenAI(...)`` without coercion."""
+
+    def test_callable_api_key_passed_to_openai_constructor(self, monkeypatch):
+        """Construct the smallest possible AIAgent surface and verify
+        the OpenAI client receives the callable unchanged."""
+        captured = {}
+
+        def fake_openai(**kwargs):
+            captured["kwargs"] = kwargs
+            return MagicMock(api_key=kwargs.get("api_key"))
+
+        # Patch the module-level OpenAI proxy used by ``_create_openai_client``.
+        monkeypatch.setattr("run_agent.OpenAI", fake_openai)
+
+        # Build a minimal stand-in for AIAgent so we can call the bound
+        # method directly without paying the full __init__ cost.
+        from run_agent import AIAgent
+
+        agent = AIAgent.__new__(AIAgent)
+        # Attributes consulted by _create_openai_client / _client_log_context.
+        agent.provider = "azure-foundry"
+        agent.model = "gpt-4o"
+        agent.base_url = "https://r.openai.azure.com/openai/v1"
+        agent._client_kwargs = {}
+
+        def token_provider():
+            return "fresh-jwt"
+
+        client_kwargs = {
+            "api_key": token_provider,
+            "base_url": "https://r.openai.azure.com/openai/v1",
+        }
+        client = agent._create_openai_client(client_kwargs, reason="test", shared=False)
+
+        # The OpenAI constructor must receive the *callable*, not a string.
+        forwarded = captured["kwargs"]["api_key"]
+        assert callable(forwarded)
+        assert not isinstance(forwarded, str)
+        assert forwarded is token_provider, (
+            "_create_openai_client must not wrap or coerce the callable"
+        )
+        assert client is not None
+
+
+# ---------------------------------------------------------------------------
+# Auxiliary runtime preserves the callable
+# ---------------------------------------------------------------------------
+
+
+class TestNormalizeMainRuntimePreservesCallable:
+    """The aux client orchestrator must keep the callable on the
+    runtime dict so compression / vision / embedding / title-gen clients
+    inherit Entra ID auth from the main agent."""
+
+    def test_callable_api_key_survives_normalization(self):
+        from agent.auxiliary_client import _normalize_main_runtime
+
+        def provider():
+            return "jwt"
+
+        normalized = _normalize_main_runtime({
+            "provider": "azure-foundry",
+            "model": "gpt-4o",
+            "base_url": "https://r.openai.azure.com/openai/v1",
+            "api_key": provider,
+            "api_mode": "chat_completions",
+            "auth_mode": "entra_id",
+        })
+        assert normalized["api_key"] is provider
+        assert normalized["auth_mode"] == "entra_id"
+
+    def test_string_api_key_still_works(self):
+        from agent.auxiliary_client import _normalize_main_runtime
+        normalized = _normalize_main_runtime({
+            "provider": "azure-foundry",
+            "api_key": "sk-static",
+        })
+        assert normalized["api_key"] == "sk-static"
+
+    def test_normalization_drops_empty_string_but_preserves_callable(self):
+        from agent.auxiliary_client import _normalize_main_runtime
+
+        def provider():
+            return ""
+
+        # Empty string fields are dropped, but a callable is preserved
+        # even if it would mint an empty token (we don't invoke during
+        # normalization).
+        normalized = _normalize_main_runtime({
+            "provider": "azure-foundry",
+            "api_key": provider,
+            "model": "",
+        })
+        assert normalized["api_key"] is provider
+        assert "model" not in normalized
+
+    def test_unknown_field_dropped(self):
+        from agent.auxiliary_client import _normalize_main_runtime, _MAIN_RUNTIME_FIELDS
+        normalized = _normalize_main_runtime({
+            "provider": "azure-foundry",
+            "api_key": "k",
+            "secret_field_we_dont_want": "leak",
+        })
+        assert "secret_field_we_dont_want" not in normalized
+        # auth_mode IS in the field allowlist (rubber-duck blocker fix).
+        assert "auth_mode" in _MAIN_RUNTIME_FIELDS
+
+
+# ---------------------------------------------------------------------------
+# Display surfaces never invoke the callable
+# ---------------------------------------------------------------------------
+
+
+class TestTruncateTokenCallable:
+    def test_callable_returns_placeholder(self):
+        """Dashboard preview must render the Entra placeholder, NOT
+        ``"<function ...>"``."""
+        from hermes_cli.web_server import _truncate_token
+
+        invoked = {"count": 0}
+
+        def provider():
+            invoked["count"] += 1
+            return "should-not-appear-in-ui"
+
+        token_provider = cast(str | None, provider)
+        rendered = _truncate_token(token_provider)
+        assert rendered == "<entra-id-bearer>"
+        assert invoked["count"] == 0
+
+    def test_string_jwt_still_truncated_to_signature_tail(self):
+        from hermes_cli.web_server import _truncate_token
+        # JWT shape: header.payload.signature → only signature tail shown.
+        out = _truncate_token("aaaa.bbbb.cccccccsig", visible=4)
+        assert out == "…csig"
+
+    def test_empty_returns_empty(self):
+        from hermes_cli.web_server import _truncate_token
+        assert _truncate_token(None) == ""
+        assert _truncate_token("") == ""
+
+
+# ---------------------------------------------------------------------------
+# Serialization scrub — runtime dicts with callables must NOT silently
+# JSON-encode as ``"<function ...>"`` (would leak garbage into events).
+# ---------------------------------------------------------------------------
+
+
+class TestRuntimeDictSerializationGuard:
+    def test_json_dumps_default_str_does_not_silently_stringify_callable(self):
+        """Sanity check: a runtime dict with a callable api_key must
+        either raise on plain ``json.dumps`` (good — fail loud) or be
+        sanitized BEFORE serialization. This test pins the loud-fail
+        behaviour so future changes that introduce
+        ``json.dumps(..., default=str)`` over a runtime dict are caught
+        by a regression here."""
+
+        def provider():
+            return "jwt"
+
+        runtime = {
+            "provider": "azure-foundry",
+            "api_key": provider,
+            "auth_mode": "entra_id",
+        }
+        # Plain json.dumps — must raise, not silently produce
+        # ``"<function provider at 0x...>"``.
+        with pytest.raises(TypeError):
+            json.dumps(runtime)
+
+
+# ---------------------------------------------------------------------------
+# batch_runner strips callables from the worker config dict
+# ---------------------------------------------------------------------------
+
+
+class TestBatchRunnerCallableHandling:
+    def test_callable_api_key_stripped_from_worker_config(self, capsys, monkeypatch, tmp_path):
+        """``BatchRunner._run_batches`` (or the equivalent code path)
+        must replace a callable api_key with None before pickling the
+        worker config dict — otherwise multiprocessing.Pool fails."""
+        # We can't easily run BatchRunner end-to-end in a unit test
+        # (it spawns subprocesses), but we CAN inline the same logic:
+        # the production code uses ``callable(self.api_key) and not
+        # isinstance(self.api_key, str)`` to gate the substitution.
+        # Re-execute the same predicate here as a contract guard.
+
+        def provider():
+            return "jwt"
+
+        api_key = provider
+        worker_api_key = None if (callable(api_key) and not isinstance(api_key, str)) else api_key
+        assert worker_api_key is None, (
+            "BatchRunner must replace callable api_key with None so "
+            "multiprocessing.Pool can pickle the worker config"
+        )
+
+        # And a string passes through unchanged.
+        api_key_str = "sk-static"
+        worker_api_key_str = None if (callable(api_key_str) and not isinstance(api_key_str, str)) else api_key_str
+        assert worker_api_key_str == "sk-static"
+
+    def test_batch_runner_source_uses_the_correct_predicate(self):
+        """Pin the predicate string in batch_runner so refactors that
+        change it are caught here. Reading the source rather than
+        importing avoids spinning up the full BatchRunner."""
+        from pathlib import Path
+        src = (Path(__file__).resolve().parent.parent.parent
+               / "batch_runner.py").read_text()
+        assert "callable(self.api_key) and not isinstance(self.api_key, str)" in src, (
+            "BatchRunner.api_key callable check changed — update test or "
+            "verify the new predicate still routes Entra token providers "
+            "to the worker-rebuild path."
+        )
+
+
+# ---------------------------------------------------------------------------
+# Inline masked-banner / display sites (callable-aware)
+# ---------------------------------------------------------------------------
+
+
+class TestCliEnsureRuntimeCredentialsCallable:
+    """Regression: ``cli.py:_ensure_runtime_credentials`` previously
+    treated a callable ``api_key`` as "not a string" and overwrote it
+    with the ``"no-key-required"`` placeholder, which then got sent as
+    ``Authorization: Bearer no-key-required`` and rejected by Azure
+    with a 401. This is the most subtle of the callable-api_key audit
+    sites — gated by ``not isinstance(api_key, str)`` rather than the
+    cleaner ``callable(...)`` check used elsewhere.
+
+    We verify the source pattern (rather than spinning up a real
+    ``HermesCLI`` instance) — the predicate change is the load-bearing
+    fix and is invariant under the surrounding orchestration code."""
+
+    def test_callable_predicate_present_in_cli_runtime_validation(self):
+        from pathlib import Path
+        src = (Path(__file__).resolve().parent.parent.parent
+               / "cli.py").read_text()
+        # The fix introduces ``_is_callable_provider`` which gates the
+        # string-only check so callable token providers survive.
+        assert "_is_callable_provider = callable(api_key)" in src, (
+            "cli.py:_ensure_runtime_credentials must preserve a callable "
+            "api_key (Entra ID bearer provider). Without the guard, the "
+            "callable is stringified to 'no-key-required' and Azure 401s."
+        )
+
+
+class TestInlinedDisplayMasks:
+    """The masked-credential display sites are now inlined per-site (no
+    shared helper). Each site uses the ``is_token_provider`` predicate
+    to short-circuit on callables and print a static
+    ``"Microsoft Entra ID"`` label, then falls through to its own
+    context-appropriate string mask. This replaces a unified helper
+    that would have forced one mask shape across sites with legitimately
+    different display needs (banner vs diagnostic vs UI vs preview)."""
+
+    def test_run_agent_banner_uses_is_token_provider_guard(self):
+        """The masked-banner sites live in ``agent/agent_init.py``
+        (the ``__init__`` body was extracted into ``init_agent`` after
+        this feature was first written). Both the OpenAI and Anthropic
+        client init paths must guard their banner prints with
+        ``is_token_provider`` so a callable Entra ID provider doesn't
+        crash ``len(api_key)``."""
+        from pathlib import Path
+        src = (Path(__file__).resolve().parent.parent.parent
+               / "agent" / "agent_init.py").read_text()
+        assert src.count("is_token_provider(") >= 2, (
+            "agent/agent_init.py must guard BOTH masked-banner paths "
+            "(chat_completions and anthropic_messages) with "
+            "is_token_provider()."
+        )
+        assert src.count('"🔑 Using credentials: Microsoft Entra ID"') >= 2, (
+            "agent/agent_init.py banner blocks should print a static "
+            "'Microsoft Entra ID' label for callable api_keys — no "
+            "placeholder plumbing, no describe-mask fallback."
+        )
+
+    def test_cli_show_config_handles_callable(self):
+        """``cli.HermesCLI.show_config`` previously did
+        ``self.api_key[-4:]`` / ``len(self.api_key)`` which crashes on
+        callable Entra ID providers. The inlined version uses
+        ``is_token_provider`` and prints the same static label as the
+        run_agent banners."""
+        from pathlib import Path
+        src = (Path(__file__).resolve().parent.parent.parent
+               / "cli.py").read_text()
+        assert "is_token_provider(self.api_key)" in src, (
+            "cli.HermesCLI.show_config must guard self.api_key via "
+            "is_token_provider so callable Entra ID providers don't "
+            "crash /config."
+        )
+        assert '"Microsoft Entra ID"' in src, (
+            "cli.HermesCLI.show_config must print the static "
+            "'Microsoft Entra ID' label (matching run_agent banners) "
+            "instead of attempting to slice the callable."
+        )
+
+    def test_mask_api_key_for_logs_handles_callable(self):
+        """``run_agent._mask_api_key_for_logs`` is called from the
+        request-dump JSON path. For Entra users, ``self.client.api_key``
+        is the SDK's empty string (callable stashed privately) — but
+        defensively the helper must also accept a callable directly
+        and return the placeholder rather than crashing on
+        ``len(callable)``."""
+        from pathlib import Path
+        src = (Path(__file__).resolve().parent.parent.parent
+               / "run_agent.py").read_text()
+        # The function now starts with a callable check.
+        assert (
+            "if callable(key) and not isinstance(key, str):" in src
+            and '"<entra-id-bearer>"' in src
+        ), (
+            "run_agent._mask_api_key_for_logs must short-circuit for "
+            "callable api_keys to avoid len(callable) crashes in "
+            "request-dump paths."
+        )
+
+    def test_anthropic_401_diagnostic_handles_callable(self):
+        """The Anthropic 401 diagnostic path lives in
+        ``agent/conversation_loop.py`` (the ``run_conversation`` body
+        was extracted after this feature was first written). It used
+        to do ``key[:12]`` on ``self._anthropic_api_key``. For Entra ID +
+        Anthropic-style mode that's a callable; slicing crashes."""
+        from pathlib import Path
+        src = (Path(__file__).resolve().parent.parent.parent
+               / "agent" / "conversation_loop.py").read_text()
+        # The Anthropic 401 block now branches on is_token_provider
+        # before slicing the key.
+        assert "Microsoft Entra ID (httpx event hook)" in src, (
+            "agent/conversation_loop.py Anthropic 401 diagnostic must "
+            "surface a Microsoft Entra ID branch before slicing the "
+            "key prefix."
+        )
--- a/tools/lazy_deps.py
+++ b/tools/lazy_deps.py
@ -81,6 +81,11 @@ LAZY_DEPS: dict[str, tuple[str, ...]] = {
    "provider.anthropic": ("anthropic==0.87.0",),  # CVE-2026-34450, CVE-2026-34452
    # AWS Bedrock provider
    "provider.bedrock": ("boto3==1.42.89",),
+    # Microsoft Foundry — Entra ID auth (managed identity, workload identity,
+    # service principal, az login, VS Code, azd, PowerShell). Only loaded
+    # when model.auth_mode=entra_id is selected; key-based azure-foundry
+    # users never pay this import.
+    "provider.azure_identity": ("azure-identity==1.25.3",),

    # ─── Web search backends ───────────────────────────────────────────────
    "search.exa": ("exa-py==2.10.2",),
--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@ -1087,7 +1087,16 @@ def _apply_model_switch(sid: str, session: dict, raw_input: str) -> dict:
        current_provider = str(runtime.get("provider", "") or "")
        current_model = _resolve_model()
        current_base_url = str(runtime.get("base_url", "") or "")
-        current_api_key = str(runtime.get("api_key", "") or "")
+        # Preserve a callable api_key (Azure Foundry Entra ID bearer
+        # provider) unchanged — ``str(...)`` would produce
+        # ``"<function ...>"`` and poison downstream switch_model
+        # validation. Match the agent-present branch's behavior at the
+        # top of this block.
+        _runtime_key = runtime.get("api_key", "")
+        if callable(_runtime_key) and not isinstance(_runtime_key, str):
+            current_api_key = _runtime_key
+        else:
+            current_api_key = str(_runtime_key or "")

    # Load user-defined providers so switch_model can resolve named custom
    # endpoints (e.g. "ollama-launch") and validate against saved model lists.
--- a/uv.lock
+++ b/uv.lock
@ -500,6 +500,35 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/41/0a/0896b829a39b5669a2d811e1a79598de661693685cd62b31f11d0c18e65b/av-17.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:dba98603fc4665b4f750de86fbaf6c0cfaece970671a9b529e0e3d1711e8367e", size = 22071058, upload-time = "2026-03-14T14:38:43.663Z" },
 ]

+[[package]]
+name = "azure-core"
+version = "1.41.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "requests" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/a6/f3/b416179e408990df5db0d516283022dde0f5d0111d98c1a848e41853e81c/azure_core-1.41.0.tar.gz", hash = "sha256:f46ff5dfcd230f25cf1c19e8a34b8dc08a337b2503e268bb600a16c00db8ad5a", size = 381042, upload-time = "2026-05-07T23:30:54.302Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5b/db/325c6d7312d2200251c52323878281045aaffcb5586612296484e4280eaa/azure_core-1.41.0-py3-none-any.whl", hash = "sha256:522b4011e8180b1a3dcd2024396a4e7fe9ac37fb8597db47163d230b5efe892d", size = 220920, upload-time = "2026-05-07T23:30:56.357Z" },
+]
+
+[[package]]
+name = "azure-identity"
+version = "1.25.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "azure-core" },
+    { name = "cryptography" },
+    { name = "msal" },
+    { name = "msal-extensions" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c5/0e/3a63efb48aa4a5ae2cfca61ee152fbcb668092134d3eb8bfda472dd5c617/azure_identity-1.25.3.tar.gz", hash = "sha256:ab23c0d63015f50b630ef6c6cf395e7262f439ce06e5d07a64e874c724f8d9e6", size = 286304, upload-time = "2026-03-13T01:12:20.892Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/49/9a/417b3a533e01953a7c618884df2cb05a71e7b68bdbce4fbdb62349d2a2e8/azure_identity-1.25.3-py3-none-any.whl", hash = "sha256:f4d0b956a8146f30333e071374171f3cfa7bdb8073adb8c3814b65567aa7447c", size = 192138, upload-time = "2026-03-13T01:12:22.951Z" },
+]
+
 [[package]]
 name = "base58"
 version = "2.1.1"
@ -1618,6 +1647,9 @@ all = [
 anthropic = [
    { name = "anthropic" },
 ]
+azure-identity = [
+    { name = "azure-identity" },
+]
 bedrock = [
    { name = "boto3" },
 ]
@ -1767,6 +1799,7 @@ requires-dist = [
    { name = "alibabacloud-dingtalk", marker = "extra == 'dingtalk'", specifier = "==2.2.42" },
    { name = "anthropic", marker = "extra == 'anthropic'", specifier = "==0.86.0" },
    { name = "asyncpg", marker = "extra == 'matrix'", specifier = "==0.31.0" },
+    { name = "azure-identity", marker = "extra == 'azure-identity'", specifier = "==1.25.3" },
    { name = "boto3", marker = "extra == 'bedrock'", specifier = "==1.42.89" },
    { name = "brotlicffi", marker = "extra == 'messaging'", specifier = "==1.2.0.1" },
    { name = "croniter", specifier = "==6.0.0" },
@ -1855,7 +1888,7 @@ requires-dist = [
    { name = "vercel", marker = "extra == 'vercel'", specifier = "==0.5.7" },
    { name = "youtube-transcript-api", marker = "extra == 'youtube'", specifier = "==1.2.4" },
 ]
-provides-extras = ["anthropic", "exa", "firecrawl", "parallel-web", "fal", "edge-tts", "modal", "daytona", "vercel", "hindsight", "dev", "messaging", "cron", "slack", "matrix", "cli", "tts-premium", "voice", "pty", "honcho", "mcp", "homeassistant", "sms", "computer-use", "acp", "bedrock", "termux", "termux-all", "dingtalk", "feishu", "google", "youtube", "web", "all"]
+provides-extras = ["anthropic", "exa", "firecrawl", "parallel-web", "fal", "edge-tts", "modal", "daytona", "vercel", "hindsight", "dev", "messaging", "cron", "slack", "matrix", "cli", "tts-premium", "voice", "pty", "honcho", "mcp", "homeassistant", "sms", "computer-use", "acp", "bedrock", "azure-identity", "termux", "termux-all", "dingtalk", "feishu", "google", "youtube", "web", "all"]

 [[package]]
 name = "hf-xet"
@ -2421,6 +2454,32 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/43/e3/7d92a15f894aa0c9c4b49b8ee9ac9850d6e63b03c9c32c0367a13ae62209/mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c", size = 536198, upload-time = "2023-03-07T16:47:09.197Z" },
 ]

+[[package]]
+name = "msal"
+version = "1.36.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cryptography" },
+    { name = "pyjwt", extra = ["crypto"] },
+    { name = "requests" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/de/cb/b02b0f748ac668922364ccb3c3bff5b71628a05f5adfec2ba2a5c3031483/msal-1.36.0.tar.gz", hash = "sha256:3f6a4af2b036b476a4215111c4297b4e6e236ed186cd804faefba23e4990978b", size = 174217, upload-time = "2026-04-09T10:20:33.525Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2a/d3/414d1f0a5f6f4fe5313c2b002c54e78a3332970feb3f5fed14237aa17064/msal-1.36.0-py3-none-any.whl", hash = "sha256:36ecac30e2ff4322d956029aabce3c82301c29f0acb1ad89b94edcabb0e58ec4", size = 121547, upload-time = "2026-04-09T10:20:32.336Z" },
+]
+
+[[package]]
+name = "msal-extensions"
+version = "1.3.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "msal" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/01/99/5d239b6156eddf761a636bded1118414d161bd6b7b37a9335549ed159396/msal_extensions-1.3.1.tar.gz", hash = "sha256:c5b0fd10f65ef62b5f1d62f4251d51cbcaf003fcedae8c91b040a488614be1a4", size = 23315, upload-time = "2025-03-14T23:51:03.902Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5e/75/bd9b7bb966668920f06b200e84454c8f3566b102183bc55c5473d96cb2b9/msal_extensions-1.3.1-py3-none-any.whl", hash = "sha256:96d3de4d034504e969ac5e85bae8106c8373b5c6568e4c8fa7af2eca9dbe6bca", size = 20583, upload-time = "2025-03-14T23:51:03.016Z" },
+]
+
 [[package]]
 name = "msgpack"
 version = "1.1.2"
--- a/website/docs/guides/azure-foundry.md
+++ b/website/docs/guides/azure-foundry.md
@ -1,23 +1,23 @@
 ---
 sidebar_position: 15
-title: "Azure AI Foundry"
-description: "Use Hermes Agent with Azure AI Foundry — OpenAI-style and Anthropic-style endpoints, auto-detection of transport and deployed models"
+title: "Microsoft Foundry"
+description: "Use Hermes Agent with Microsoft Foundry — OpenAI-style and Anthropic-style endpoints, auto-detection of transport and deployed models"
 ---

-# Azure AI Foundry
+# Microsoft Foundry

-Hermes Agent supports Azure AI Foundry (and Azure OpenAI) as a first-class provider. A single Azure resource can host models with two different wire formats:
+Hermes Agent's `azure-foundry` provider supports Microsoft Foundry (formerly Azure AI Foundry) and Azure OpenAI. A single Foundry resource can host models with two different wire formats:

 - **OpenAI-style** — `POST /v1/chat/completions` on endpoints like `https://<resource>.openai.azure.com/openai/v1`. Used for GPT-4.x, GPT-5.x, Llama, Mistral, and most open-weight models.
- **Anthropic-style** — `POST /v1/messages` on endpoints like `https://<resource>.services.ai.azure.com/anthropic`. Used when Azure Foundry serves Claude models via the Anthropic Messages API format.
+- **Anthropic-style** — `POST /v1/messages` on endpoints like `https://<resource>.services.ai.azure.com/anthropic`. Used when Microsoft Foundry serves Claude models via the Anthropic Messages API format.

 The setup wizard probes your endpoint and auto-detects which transport it uses, which deployments are available, and each model's context length.

 ## Prerequisites

- An Azure AI Foundry or Azure OpenAI resource with at least one deployment
- An API key for that resource (available in the Azure Portal under "Keys and Endpoint")
+- A Microsoft Foundry or Azure OpenAI resource with at least one deployment
 - The deployment's endpoint URL
+- **Either** an API key (from the Azure Portal under "Keys and Endpoint") **or** the **Azure AI User** RBAC role on the Foundry resource if you plan to use Microsoft Entra ID (the keyless path Microsoft recommends). Some tenants may show the role as **Foundry User** during Microsoft's rename rollout.

 ## Quick Start

@ -25,20 +25,172 @@ The setup wizard probes your endpoint and auto-detects which transport it uses,
 hermes model
 # → Select "Azure Foundry"
 # → Enter your endpoint URL
-# → Enter your API key
+# → Choose Authentication:
+#     1. API key
+#     2. Microsoft Entra ID  (managed identity / workload identity / az login)
+# → (Entra) Hermes probes DefaultAzureCredential; on success it never asks for a key
+# → (API key) Enter your API key
 # Hermes probes the endpoint and auto-detects transport + models
 # → Pick a model from the list (or type a deployment name manually)
 ```

 The wizard will:

-1. **Sniff the URL path** — URLs ending in `/anthropic` are recognised as Azure Foundry Claude routes.
+1. **Sniff the URL path** — URLs ending in `/anthropic` are recognised as Microsoft Foundry Claude routes.
 2. **Probe `GET <base>/models`** — if the endpoint returns an OpenAI-shaped model list, Hermes switches to `chat_completions` and prefills a picker with the returned deployment IDs.
 3. **Probe Anthropic Messages shape** — fallback for endpoints that do not expose `/models` but do accept the Anthropic Messages format.
 4. **Fall back to manual entry** — private/gated endpoints that reject every probe still work; you pick the API mode and type a deployment name by hand.

 Context length for the chosen model is resolved via Hermes' standard metadata chain (`models.dev`, provider metadata, and hardcoded family fallbacks) and stored in `config.yaml` so the model can size its own context window correctly.

+## Microsoft Entra ID (keyless, RBAC) — recommended
+
+Microsoft recommends [keyless authentication with Microsoft Entra ID](https://learn.microsoft.com/azure/ai-foundry/foundry-models/how-to/configure-entra-id) for production Foundry workloads. Hermes supports Entra ID for **both** API surfaces:
+
+- **OpenAI-style** (`api_mode: chat_completions` / `codex_responses`) — GPT-4/5, Llama, Mistral, DeepSeek, etc.
+- **Anthropic-style** (`api_mode: anthropic_messages`) — Claude models on Microsoft Foundry.
+
+Foundry's RBAC is per-resource (`Azure AI User` grants both surfaces; some tenants may display `Foundry User`) and Microsoft documents the same inference scope (`https://ai.azure.com/.default`) for both. Under the hood:
+
+- OpenAI-style uses the OpenAI Python SDK's native callable `api_key=` contract — the SDK mints a fresh JWT per request automatically.
+- Anthropic-style uses an `httpx.Client` with a request event hook installed by `agent.azure_identity_adapter.build_bearer_http_client`, because the Anthropic SDK does not accept callable `auth_token` natively. The hook rewrites `Authorization: Bearer <fresh-jwt>` per outbound request. Same Microsoft RBAC, same Foundry scope — the SDK contract is the only difference.
+
+### Why use Entra ID?
+
+- No long-lived API keys to rotate or revoke.
+- RBAC-driven access — grant or remove `Azure AI User` on the Foundry resource, no config rewrite needed.
+- Access and audit logs are segmented by assignee instead of all callers sharing one static key.
+- Single auth surface for Azure VMs, AKS pods, App Service, Functions, Container Apps, and Foundry Agent Service via managed identity.
+- Workload identity and service-principal flows for CI/CD pipelines.
+
+### One-time setup (Azure side)
+
+1. In the Azure Portal, open your Foundry resource → **Access control (IAM)** → **Add → Add role assignment**.
+2. Pick the **Azure AI User** role (or **Foundry User** if your tenant has the renamed role).
+3. Assign it to:
+   - **Your user account** for local development with `az login`.
+   - **A managed identity or workload identity** for Azure-hosted compute (recommended for production).
+   - **A Foundry Agent Service hosted agent's agent identity** when Hermes runs inside a hosted agent.
+   - **A service principal** for CI/CD pipelines when workload identity is not available.
+4. Wait ~5 minutes for the role to propagate.
+
+Azure CLI equivalent:
+
+```bash
+az role assignment create \
+  --assignee <principal-or-agent-identity-client-id> \
+  --role "Azure AI User" \
+  --scope <foundry-resource-id>
+```
+
+### One-time setup (Hermes side)
+
+```bash
+hermes model
+# → Select "Azure Foundry"
+# → Enter your endpoint URL
+# → Authentication: 2 (Microsoft Entra ID)
+# → (optional) user-assigned managed identity client ID
+# → (optional) Azure tenant ID
+# → Hermes probes DefaultAzureCredential() and reports which inner
+#    credential succeeded (e.g. AzureCliCredential, ManagedIdentityCredential)
+```
+
+The wizard runs a bounded preflight probe (10 s timeout). On failure it offers to "save anyway, validate later" — useful when configuring on a machine that doesn't yet have credentials but will at runtime (e.g. preparing config for a managed-identity deployment).
+
+`azure-identity` is installed automatically on first use via Hermes' lazy-install path. To pre-install:
+
+```bash
+pip install azure-identity
+```
+
+### Configuration written to `config.yaml`
+
+```yaml
+model:
+  provider: azure-foundry
+  base_url: https://my-resource.openai.azure.com/openai/v1
+  api_mode: chat_completions
+  auth_mode: entra_id
+  default: gpt-4o
+  context_length: 128000
+  entra:
+    scope: https://ai.azure.com/.default        # only when overriding the default
+```
+
+Hermes only manages one Entra-specific knob in `config.yaml`:
+
+- **`scope`** — the OAuth resource scope. Defaults to Microsoft's documented inference scope (`https://ai.azure.com/.default`). Override only if your resource was provisioned against a non-standard audience.
+
+Everything else (tenant, service principal secret, federated token file, sovereign cloud authority, broker preferences) is read by `azure-identity` directly from the standard `AZURE_*` environment variables — see the [credential resolution order](#credential-resolution-order) below. Set those in `~/.hermes/.env` or your deployment environment, exactly as Microsoft's SDK reference describes.
+
+No secrets land in `~/.hermes/.env` for Entra mode — `azure-identity` caches tokens in-process (and where available, in your OS keychain / `~/.IdentityService`).
+
+### Credential resolution order
+
+`azure-identity`'s `DefaultAzureCredential` walks this chain on each token request, stopping at the first credential that returns a token:
+
+1. **Environment credential** — `AZURE_TENANT_ID` + `AZURE_CLIENT_ID` + `AZURE_CLIENT_SECRET` (or `AZURE_CLIENT_CERTIFICATE_PATH` / `AZURE_FEDERATED_TOKEN_FILE`).
+2. **Workload Identity** — `AZURE_FEDERATED_TOKEN_FILE` (AKS federated tokens / OIDC).
+3. **Managed Identity** — IMDS endpoint (`169.254.169.254`) for virtual machines; `IDENTITY_ENDPOINT` for App Service / Functions / Container Apps. Foundry Agent Service hosted agents use the hosted agent's agent identity.
+4. **Visual Studio Code** — Azure account extension.
+5. **Azure CLI** — `az login` session.
+6. **Azure Developer CLI** — `azd auth login`.
+7. **Azure PowerShell** — `Connect-AzAccount`.
+8. **Broker** (Windows / WSL only) — Web Account Manager.
+
+Interactive browser credential is excluded by default for unattended Hermes runs; use Azure CLI, Azure Developer CLI, managed identity, workload identity, or service principal credentials instead.
+
+### Deployment patterns
+
+**Local development:**
+```bash
+az login
+hermes model   # pick Azure Foundry → Entra ID
+hermes         # uses your az login token
+```
+
+**Azure VM / Functions / App Service / Container Apps (system-assigned managed identity):**
+1. Enable system-assigned identity on the compute resource.
+2. Grant the identity `Azure AI User` (or `Foundry User`) on the Foundry resource.
+3. Set `model.auth_mode: entra_id` in config.yaml — no env vars needed.
+
+**Azure VM / Functions / App Service / Container Apps (user-assigned managed identity):**
+- Set `AZURE_CLIENT_ID` to the user-assigned identity's client ID so `DefaultAzureCredential` picks the right one.
+
+**Foundry Agent Service hosted agent:**
+- Create the hosted agent and grant that agent's identity `Azure AI User` (or `Foundry User`) on the Foundry resource. Hermes uses `ManagedIdentityCredential` from inside the hosted agent; role assignment belongs on the agent identity, not just the parent project or your user.
+
+**AKS Workload Identity (replaces AAD Pod Identity):**
+- Annotate the pod's service account with the workload identity client ID.
+- The pod's federated token file is auto-detected via `AZURE_FEDERATED_TOKEN_FILE`.
+- `model.auth_mode: entra_id` works without further config changes.
+
+**Service principal in CI:**
+- Set `AZURE_TENANT_ID`, `AZURE_CLIENT_ID`, `AZURE_CLIENT_SECRET` in the runner env.
+
+**Sovereign clouds (Government, China):**
+- Export `AZURE_AUTHORITY_HOST` (e.g. `https://login.microsoftonline.us` for Azure Government, `https://login.partner.microsoftonline.cn` for Azure China). `azure-identity` reads it directly.
+
+### Health checks
+
+`hermes doctor` runs a 10 s probe against `DefaultAzureCredential` when `model.auth_mode: entra_id`, reporting which inner credential won (env vars present, managed identity endpoint reachable, etc.).
+
+`hermes auth` shows a structured status block:
+
+```
+azure-foundry (Microsoft Entra ID):
+  Endpoint: https://my-resource.openai.azure.com/openai/v1
+  Scope: https://ai.azure.com/.default
+  Status: configured; live token probe is skipped here
+```
+
+### Limitations
+
+- **Anthropic-style endpoints use an httpx event hook.** The Anthropic Python SDK does not accept a callable `auth_token` natively (≤ 0.86.0). Hermes installs a request event hook on a custom `httpx.Client` that mints a fresh JWT per outbound request and rewrites `Authorization: Bearer <jwt>`. This is functionally equivalent to the OpenAI SDK's native `Callable[[], str]` contract but adds one indirection layer. If the Anthropic SDK adds first-class callable-auth support in a future release, Hermes will switch to it transparently.
+- **Batch jobs and `multiprocessing.Pool`.** The Entra token provider is a closure that cannot be pickled across process boundaries. `batch_runner.py` automatically drops the callable from the worker config and lets each worker process rebuild its own provider from `config.yaml` — no user action required, but each worker pays one chain walk at startup.
+- **No bearer JWT persistence in `auth.json`.** Hermes does not duplicate `azure-identity`'s internal token cache; cold starts walk the credential chain on first inference.
+
 ## Configuration (written to `config.yaml`)

 After running the wizard you'll see something like this:
@ -72,11 +224,11 @@ model:

 Important behaviour:

- **GPT-5.x, codex, and o-series auto-route to the Responses API.** Azure Foundry deploys GPT-5 / codex / o1 / o3 / o4 models as Responses-API-only — calling `/chat/completions` against them returns `400 "The requested operation is unsupported."`. Hermes detects these model families by name and upgrades `api_mode` to `codex_responses` transparently, even when `config.yaml` still reads `api_mode: chat_completions`. GPT-4, GPT-4o, Llama, Mistral, and other deployments stay on `/chat/completions`.
+- **GPT-5.x, codex, and o-series auto-route to the Responses API.** Microsoft Foundry deploys GPT-5 / codex / o1 / o3 / o4 models as Responses-API-only — calling `/chat/completions` against them returns `400 "The requested operation is unsupported."`. Hermes detects these model families by name and upgrades `api_mode` to `codex_responses` transparently, even when `config.yaml` still reads `api_mode: chat_completions`. GPT-4, GPT-4o, Llama, Mistral, and other deployments stay on `/chat/completions`.
 - **`max_completion_tokens` is used automatically.** Azure OpenAI (like direct OpenAI) requires `max_completion_tokens` for gpt-4o, o-series, and gpt-5.x models. Hermes sends the right parameter based on the endpoint.
 - **Pre-v1 endpoints that require `api-version`.** If you have a legacy base URL like `https://<resource>.openai.azure.com/openai?api-version=2025-04-01-preview`, Hermes extracts the query string and forwards it via `default_query` on every request (the OpenAI SDK otherwise drops it when joining paths).

-## Anthropic-style endpoints (Claude via Azure Foundry)
+## Anthropic-style endpoints (Claude via Microsoft Foundry)

 For Claude deployments, use the Anthropic-style route:

@ -96,7 +248,7 @@ Important behaviour:

 ## Alternative: `provider: anthropic` + Azure base URL

-If you already have `provider: anthropic` configured and just want to point it at Azure AI Foundry for Claude, you can skip the `azure-foundry` provider entirely:
+If you already have `provider: anthropic` configured and just want to point it at Microsoft Foundry for Claude, you can skip the `azure-foundry` provider entirely:

 ```yaml
 model:
@ -117,7 +269,7 @@ Azure does **not** expose a pure-API-key endpoint to list your *deployed* model
 What Hermes can do:

 - Azure OpenAI v1 endpoints (`<resource>.openai.azure.com/openai/v1`) expose `GET /models` with the resource's **available** model catalog. Hermes uses this list to prefill the model picker.
- Azure Foundry `/anthropic` routes: detected via URL path, model name entered manually.
+- Microsoft Foundry `/anthropic` routes: detected via URL path, model name entered manually.
 - Private / firewalled endpoints: manual entry with a friendly "couldn't probe" message.

 You can always type a deployment name directly — Hermes does not validate against the returned list.
@ -126,9 +278,18 @@ You can always type a deployment name directly — Hermes does not validate agai

 | Variable | Purpose |
 |----------|---------|
-| `AZURE_FOUNDRY_API_KEY` | Primary API key for Azure AI Foundry / Azure OpenAI |
+| `AZURE_FOUNDRY_API_KEY` | Primary API key for Microsoft Foundry / Azure OpenAI (api_key mode) |
 | `AZURE_FOUNDRY_BASE_URL` | Endpoint URL (set via `hermes model`; env var is used as a fallback) |
 | `AZURE_ANTHROPIC_KEY` | Used by `provider: anthropic` + Azure base URL (alternative to `ANTHROPIC_API_KEY`) |
+| `AZURE_TENANT_ID` | Entra ID tenant for service-principal flows |
+| `AZURE_CLIENT_ID` | Entra ID client ID (service principal, workload identity, or user-assigned managed identity) |
+| `AZURE_CLIENT_SECRET` | Service principal secret |
+| `AZURE_CLIENT_CERTIFICATE_PATH` | Service principal cert (alternative to secret) |
+| `AZURE_FEDERATED_TOKEN_FILE` | Workload Identity federated token path (AKS) |
+| `AZURE_AUTHORITY_HOST` | Sovereign cloud authority host override |
+| `IDENTITY_ENDPOINT` / `MSI_ENDPOINT` | Managed Identity endpoint for App Service, Functions, and Container Apps; VMs usually use IMDS instead |
+
+The Azure SDK reads the `AZURE_*` env vars directly. Hermes never inspects them other than to report which sources are present in `hermes doctor` output.

 ## Troubleshooting

@ -150,8 +311,21 @@ model:
  api_mode: anthropic_messages   # or chat_completions
 ```

+**Entra ID: "credential chain exhausted" or 401 Unauthorized after switching to `auth_mode: entra_id`.**
+- Run `az login` to refresh your developer session (the cached token may have expired).
+- Verify the `Azure AI User` (or `Foundry User`) role assignment took effect: `az role assignment list --assignee <user-or-identity-id>` should list it on your Foundry resource. Role propagation can take up to 5 minutes.
+- For user-assigned managed identities, double-check `AZURE_CLIENT_ID` matches the identity attached to the compute resource.
+- Run `hermes doctor` — the Azure Entra probe reports whether token acquisition succeeded and includes a remediation hint.
+
+**Entra ID: wizard preflight hangs or times out.**
+The 10 s preflight is a soft check. Choose "Save anyway and validate later" and run `hermes doctor` after deploying to the target environment. Common causes include an unreachable token service or stale local login state — prefer workload identity in CI, set `AZURE_TENANT_ID`+`AZURE_CLIENT_ID`+`AZURE_CLIENT_SECRET` when using a service principal, or run `az login` for local development.
+
+**401 on Anthropic-style endpoint with Entra ID.**
+Verify the same `Azure AI User` (or `Foundry User`) role is assigned on the Foundry resource (it covers both `/openai/v1` and `/anthropic` paths). If the OpenAI-style probe works during the wizard but `claude-*` requests fail at runtime, the most common cause is a stale `model.entra.scope` left over from an earlier wizard run — delete the `entra.scope` line from `config.yaml` so the runtime falls back to the default `https://ai.azure.com/.default` scope.
+
 ## Related

 - [Environment variables](/docs/reference/environment-variables)
 - [Configuration](/docs/user-guide/configuration)
 - [AWS Bedrock](/docs/guides/aws-bedrock) — the other major cloud provider integration
+- [Microsoft: Configure Entra ID for Foundry](https://learn.microsoft.com/azure/ai-foundry/foundry-models/how-to/configure-entra-id) — upstream documentation for the keyless path
--- a/website/docs/reference/environment-variables.md
+++ b/website/docs/reference/environment-variables.md
@ -50,9 +50,16 @@ All variables go in `~/.hermes/.env`. You can also set them with `hermes config
 | `XIAOMI_BASE_URL` | Override Xiaomi MiMo base URL (default: `https://api.xiaomimimo.com/v1`) |
 | `TOKENHUB_API_KEY` | Tencent TokenHub API key ([tokenhub.tencentmaas.com](https://tokenhub.tencentmaas.com)) |
 | `TOKENHUB_BASE_URL` | Override Tencent TokenHub base URL (default: `https://tokenhub.tencentmaas.com/v1`) |
-| `AZURE_FOUNDRY_API_KEY` | Azure AI Foundry / Azure OpenAI API key ([ai.azure.com](https://ai.azure.com/)) |
-| `AZURE_FOUNDRY_BASE_URL` | Azure AI Foundry endpoint URL (e.g. `https://<resource>.openai.azure.com/openai/v1` for OpenAI-style, or `https://<resource>.services.ai.azure.com/anthropic` for Anthropic-style) |
-| `AZURE_ANTHROPIC_KEY` | Azure Anthropic API key for `provider: anthropic` + `base_url` pointing at an Azure Foundry Claude deployment (alternative to `ANTHROPIC_API_KEY` when both Anthropic and Azure Anthropic are configured) |
+| `AZURE_FOUNDRY_API_KEY` | Microsoft Foundry / Azure OpenAI API key ([ai.azure.com](https://ai.azure.com/)). Not needed when `model.auth_mode: entra_id` |
+| `AZURE_FOUNDRY_BASE_URL` | Microsoft Foundry endpoint URL (e.g. `https://<resource>.openai.azure.com/openai/v1` for OpenAI-style, or `https://<resource>.services.ai.azure.com/anthropic` for Anthropic-style) |
+| `AZURE_ANTHROPIC_KEY` | Azure Anthropic API key for `provider: anthropic` + `base_url` pointing at a Microsoft Foundry Claude deployment (alternative to `ANTHROPIC_API_KEY` when both Anthropic and Azure Anthropic are configured) |
+| `AZURE_TENANT_ID` | Entra ID tenant ID (service-principal flows; honored by `azure-identity` when `model.auth_mode: entra_id`) |
+| `AZURE_CLIENT_ID` | Entra ID client ID (service principal, workload identity, or user-assigned managed identity) |
+| `AZURE_CLIENT_SECRET` | Service principal secret used by `EnvironmentCredential` |
+| `AZURE_CLIENT_CERTIFICATE_PATH` | Service principal certificate (alternative to `AZURE_CLIENT_SECRET`) |
+| `AZURE_FEDERATED_TOKEN_FILE` | Federated token file path for AKS Workload Identity / OIDC flows |
+| `AZURE_AUTHORITY_HOST` | Sovereign-cloud authority override (e.g. `https://login.microsoftonline.us` for Azure Government). See [Azure Foundry guide](/docs/guides/azure-foundry#sovereign-clouds-government-china) |
+| `IDENTITY_ENDPOINT` / `MSI_ENDPOINT` | Managed Identity endpoint for App Service, Functions, and Container Apps; VMs usually use IMDS instead and do not set these |
 | `HF_TOKEN` | Hugging Face token for Inference Providers ([huggingface.co/settings/tokens](https://huggingface.co/settings/tokens)) |
 | `HF_BASE_URL` | Override Hugging Face base URL (default: `https://router.huggingface.co/v1`) |
 | `GOOGLE_API_KEY` | Google AI Studio API key ([aistudio.google.com/app/apikey](https://aistudio.google.com/app/apikey)) |
--- a/website/docs/user-guide/features/fallback-providers.md
+++ b/website/docs/user-guide/features/fallback-providers.md
@ -81,7 +81,7 @@ Both `provider` and `model` are **required**. If either is missing, the fallback
 | Kimi / Moonshot (China) | `kimi-coding-cn` | `KIMI_CN_API_KEY` |
 | StepFun | `stepfun` | `STEPFUN_API_KEY` |
 | Tencent TokenHub | `tencent-tokenhub` | `TOKENHUB_API_KEY` |
-| Azure AI Foundry | `azure-foundry` | `AZURE_FOUNDRY_API_KEY` + `AZURE_FOUNDRY_BASE_URL` |
+| Microsoft Foundry | `azure-foundry` | `AZURE_FOUNDRY_API_KEY` + `AZURE_FOUNDRY_BASE_URL` |
 | LM Studio (local) | `lmstudio` | `LM_API_KEY` (or none for local) + `LM_BASE_URL` |
 | Hugging Face | `huggingface` | `HF_TOKEN` |
 | Custom endpoint | `custom` | `base_url` + `key_env` (see below) |