mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-08 08:11:38 +00:00
feat(azure-foundry): add Microsoft Entra ID auth
Use azure-identity DefaultAzureCredential for keyless Foundry auth. Preserve refreshable callable credentials through OpenAI and Anthropic client paths. Add setup, doctor, auth status, docs, and tests for Entra auth. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
parent
457fa913b8
commit
9df9816dab
38 changed files with 3772 additions and 122 deletions
375
tests/run_agent/test_callable_api_key.py
Normal file
375
tests/run_agent/test_callable_api_key.py
Normal file
|
|
@ -0,0 +1,375 @@
|
|||
"""Tests that callable api_key (Entra ID bearer provider) flows through
|
||||
the agent stack without coercion.
|
||||
|
||||
The OpenAI Python SDK accepts ``api_key: str | None | Callable[[], str]``,
|
||||
and ``azure-identity``'s ``get_bearer_token_provider`` returns a callable.
|
||||
Hermes preserves the callable end-to-end so the SDK refreshes tokens
|
||||
transparently. This file pins the contract at the high-risk seams the
|
||||
rubber-duck audit identified.
|
||||
|
||||
Covered:
|
||||
* ``_create_openai_client`` passes a callable ``api_key`` straight
|
||||
through to ``openai.OpenAI(...)``.
|
||||
* ``_normalize_main_runtime`` preserves the callable so auxiliary
|
||||
clients inherit Entra auth.
|
||||
* ``_truncate_token`` (dashboard preview) renders ``"<entra-id-bearer>"``
|
||||
instead of ``"<function ...>"`` and never invokes the callable.
|
||||
* ``run_agent.py`` masked-banner path renders the Entra placeholder
|
||||
and never tries to slice/len the callable.
|
||||
* Serialization scrub: dumping a runtime dict via ``json.dumps`` with
|
||||
a callable api_key raises (default behaviour) — guards against
|
||||
silently leaking ``"<function ...>"`` strings into event logs.
|
||||
* ``batch_runner`` strips the callable from the worker config dict
|
||||
so multiprocessing.Pool can pickle the rest.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from types import SimpleNamespace
|
||||
from typing import cast
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# OpenAI SDK construction preserves the callable
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestCreateOpenAIClientCallable:
|
||||
"""``AIAgent._create_openai_client`` must pass the callable through
|
||||
to ``openai.OpenAI(...)`` without coercion."""
|
||||
|
||||
def test_callable_api_key_passed_to_openai_constructor(self, monkeypatch):
|
||||
"""Construct the smallest possible AIAgent surface and verify
|
||||
the OpenAI client receives the callable unchanged."""
|
||||
captured = {}
|
||||
|
||||
def fake_openai(**kwargs):
|
||||
captured["kwargs"] = kwargs
|
||||
return MagicMock(api_key=kwargs.get("api_key"))
|
||||
|
||||
# Patch the module-level OpenAI proxy used by ``_create_openai_client``.
|
||||
monkeypatch.setattr("run_agent.OpenAI", fake_openai)
|
||||
|
||||
# Build a minimal stand-in for AIAgent so we can call the bound
|
||||
# method directly without paying the full __init__ cost.
|
||||
from run_agent import AIAgent
|
||||
|
||||
agent = AIAgent.__new__(AIAgent)
|
||||
# Attributes consulted by _create_openai_client / _client_log_context.
|
||||
agent.provider = "azure-foundry"
|
||||
agent.model = "gpt-4o"
|
||||
agent.base_url = "https://r.openai.azure.com/openai/v1"
|
||||
agent._client_kwargs = {}
|
||||
|
||||
def token_provider():
|
||||
return "fresh-jwt"
|
||||
|
||||
client_kwargs = {
|
||||
"api_key": token_provider,
|
||||
"base_url": "https://r.openai.azure.com/openai/v1",
|
||||
}
|
||||
client = agent._create_openai_client(client_kwargs, reason="test", shared=False)
|
||||
|
||||
# The OpenAI constructor must receive the *callable*, not a string.
|
||||
forwarded = captured["kwargs"]["api_key"]
|
||||
assert callable(forwarded)
|
||||
assert not isinstance(forwarded, str)
|
||||
assert forwarded is token_provider, (
|
||||
"_create_openai_client must not wrap or coerce the callable"
|
||||
)
|
||||
assert client is not None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Auxiliary runtime preserves the callable
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestNormalizeMainRuntimePreservesCallable:
|
||||
"""The aux client orchestrator must keep the callable on the
|
||||
runtime dict so compression / vision / embedding / title-gen clients
|
||||
inherit Entra ID auth from the main agent."""
|
||||
|
||||
def test_callable_api_key_survives_normalization(self):
|
||||
from agent.auxiliary_client import _normalize_main_runtime
|
||||
|
||||
def provider():
|
||||
return "jwt"
|
||||
|
||||
normalized = _normalize_main_runtime({
|
||||
"provider": "azure-foundry",
|
||||
"model": "gpt-4o",
|
||||
"base_url": "https://r.openai.azure.com/openai/v1",
|
||||
"api_key": provider,
|
||||
"api_mode": "chat_completions",
|
||||
"auth_mode": "entra_id",
|
||||
})
|
||||
assert normalized["api_key"] is provider
|
||||
assert normalized["auth_mode"] == "entra_id"
|
||||
|
||||
def test_string_api_key_still_works(self):
|
||||
from agent.auxiliary_client import _normalize_main_runtime
|
||||
normalized = _normalize_main_runtime({
|
||||
"provider": "azure-foundry",
|
||||
"api_key": "sk-static",
|
||||
})
|
||||
assert normalized["api_key"] == "sk-static"
|
||||
|
||||
def test_normalization_drops_empty_string_but_preserves_callable(self):
|
||||
from agent.auxiliary_client import _normalize_main_runtime
|
||||
|
||||
def provider():
|
||||
return ""
|
||||
|
||||
# Empty string fields are dropped, but a callable is preserved
|
||||
# even if it would mint an empty token (we don't invoke during
|
||||
# normalization).
|
||||
normalized = _normalize_main_runtime({
|
||||
"provider": "azure-foundry",
|
||||
"api_key": provider,
|
||||
"model": "",
|
||||
})
|
||||
assert normalized["api_key"] is provider
|
||||
assert "model" not in normalized
|
||||
|
||||
def test_unknown_field_dropped(self):
|
||||
from agent.auxiliary_client import _normalize_main_runtime, _MAIN_RUNTIME_FIELDS
|
||||
normalized = _normalize_main_runtime({
|
||||
"provider": "azure-foundry",
|
||||
"api_key": "k",
|
||||
"secret_field_we_dont_want": "leak",
|
||||
})
|
||||
assert "secret_field_we_dont_want" not in normalized
|
||||
# auth_mode IS in the field allowlist (rubber-duck blocker fix).
|
||||
assert "auth_mode" in _MAIN_RUNTIME_FIELDS
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Display surfaces never invoke the callable
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestTruncateTokenCallable:
|
||||
def test_callable_returns_placeholder(self):
|
||||
"""Dashboard preview must render the Entra placeholder, NOT
|
||||
``"<function ...>"``."""
|
||||
from hermes_cli.web_server import _truncate_token
|
||||
|
||||
invoked = {"count": 0}
|
||||
|
||||
def provider():
|
||||
invoked["count"] += 1
|
||||
return "should-not-appear-in-ui"
|
||||
|
||||
token_provider = cast(str | None, provider)
|
||||
rendered = _truncate_token(token_provider)
|
||||
assert rendered == "<entra-id-bearer>"
|
||||
assert invoked["count"] == 0
|
||||
|
||||
def test_string_jwt_still_truncated_to_signature_tail(self):
|
||||
from hermes_cli.web_server import _truncate_token
|
||||
# JWT shape: header.payload.signature → only signature tail shown.
|
||||
out = _truncate_token("aaaa.bbbb.cccccccsig", visible=4)
|
||||
assert out == "…csig"
|
||||
|
||||
def test_empty_returns_empty(self):
|
||||
from hermes_cli.web_server import _truncate_token
|
||||
assert _truncate_token(None) == ""
|
||||
assert _truncate_token("") == ""
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Serialization scrub — runtime dicts with callables must NOT silently
|
||||
# JSON-encode as ``"<function ...>"`` (would leak garbage into events).
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestRuntimeDictSerializationGuard:
|
||||
def test_json_dumps_default_str_does_not_silently_stringify_callable(self):
|
||||
"""Sanity check: a runtime dict with a callable api_key must
|
||||
either raise on plain ``json.dumps`` (good — fail loud) or be
|
||||
sanitized BEFORE serialization. This test pins the loud-fail
|
||||
behaviour so future changes that introduce
|
||||
``json.dumps(..., default=str)`` over a runtime dict are caught
|
||||
by a regression here."""
|
||||
|
||||
def provider():
|
||||
return "jwt"
|
||||
|
||||
runtime = {
|
||||
"provider": "azure-foundry",
|
||||
"api_key": provider,
|
||||
"auth_mode": "entra_id",
|
||||
}
|
||||
# Plain json.dumps — must raise, not silently produce
|
||||
# ``"<function provider at 0x...>"``.
|
||||
with pytest.raises(TypeError):
|
||||
json.dumps(runtime)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# batch_runner strips callables from the worker config dict
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestBatchRunnerCallableHandling:
|
||||
def test_callable_api_key_stripped_from_worker_config(self, capsys, monkeypatch, tmp_path):
|
||||
"""``BatchRunner._run_batches`` (or the equivalent code path)
|
||||
must replace a callable api_key with None before pickling the
|
||||
worker config dict — otherwise multiprocessing.Pool fails."""
|
||||
# We can't easily run BatchRunner end-to-end in a unit test
|
||||
# (it spawns subprocesses), but we CAN inline the same logic:
|
||||
# the production code uses ``callable(self.api_key) and not
|
||||
# isinstance(self.api_key, str)`` to gate the substitution.
|
||||
# Re-execute the same predicate here as a contract guard.
|
||||
|
||||
def provider():
|
||||
return "jwt"
|
||||
|
||||
api_key = provider
|
||||
worker_api_key = None if (callable(api_key) and not isinstance(api_key, str)) else api_key
|
||||
assert worker_api_key is None, (
|
||||
"BatchRunner must replace callable api_key with None so "
|
||||
"multiprocessing.Pool can pickle the worker config"
|
||||
)
|
||||
|
||||
# And a string passes through unchanged.
|
||||
api_key_str = "sk-static"
|
||||
worker_api_key_str = None if (callable(api_key_str) and not isinstance(api_key_str, str)) else api_key_str
|
||||
assert worker_api_key_str == "sk-static"
|
||||
|
||||
def test_batch_runner_source_uses_the_correct_predicate(self):
|
||||
"""Pin the predicate string in batch_runner so refactors that
|
||||
change it are caught here. Reading the source rather than
|
||||
importing avoids spinning up the full BatchRunner."""
|
||||
from pathlib import Path
|
||||
src = (Path(__file__).resolve().parent.parent.parent
|
||||
/ "batch_runner.py").read_text()
|
||||
assert "callable(self.api_key) and not isinstance(self.api_key, str)" in src, (
|
||||
"BatchRunner.api_key callable check changed — update test or "
|
||||
"verify the new predicate still routes Entra token providers "
|
||||
"to the worker-rebuild path."
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Inline masked-banner / display sites (callable-aware)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestCliEnsureRuntimeCredentialsCallable:
|
||||
"""Regression: ``cli.py:_ensure_runtime_credentials`` previously
|
||||
treated a callable ``api_key`` as "not a string" and overwrote it
|
||||
with the ``"no-key-required"`` placeholder, which then got sent as
|
||||
``Authorization: Bearer no-key-required`` and rejected by Azure
|
||||
with a 401. This is the most subtle of the callable-api_key audit
|
||||
sites — gated by ``not isinstance(api_key, str)`` rather than the
|
||||
cleaner ``callable(...)`` check used elsewhere.
|
||||
|
||||
We verify the source pattern (rather than spinning up a real
|
||||
``HermesCLI`` instance) — the predicate change is the load-bearing
|
||||
fix and is invariant under the surrounding orchestration code."""
|
||||
|
||||
def test_callable_predicate_present_in_cli_runtime_validation(self):
|
||||
from pathlib import Path
|
||||
src = (Path(__file__).resolve().parent.parent.parent
|
||||
/ "cli.py").read_text()
|
||||
# The fix introduces ``_is_callable_provider`` which gates the
|
||||
# string-only check so callable token providers survive.
|
||||
assert "_is_callable_provider = callable(api_key)" in src, (
|
||||
"cli.py:_ensure_runtime_credentials must preserve a callable "
|
||||
"api_key (Entra ID bearer provider). Without the guard, the "
|
||||
"callable is stringified to 'no-key-required' and Azure 401s."
|
||||
)
|
||||
|
||||
|
||||
class TestInlinedDisplayMasks:
|
||||
"""The masked-credential display sites are now inlined per-site (no
|
||||
shared helper). Each site uses the ``is_token_provider`` predicate
|
||||
to short-circuit on callables and print a static
|
||||
``"Microsoft Entra ID"`` label, then falls through to its own
|
||||
context-appropriate string mask. This replaces a unified helper
|
||||
that would have forced one mask shape across sites with legitimately
|
||||
different display needs (banner vs diagnostic vs UI vs preview)."""
|
||||
|
||||
def test_run_agent_banner_uses_is_token_provider_guard(self):
|
||||
"""The masked-banner sites live in ``agent/agent_init.py``
|
||||
(the ``__init__`` body was extracted into ``init_agent`` after
|
||||
this feature was first written). Both the OpenAI and Anthropic
|
||||
client init paths must guard their banner prints with
|
||||
``is_token_provider`` so a callable Entra ID provider doesn't
|
||||
crash ``len(api_key)``."""
|
||||
from pathlib import Path
|
||||
src = (Path(__file__).resolve().parent.parent.parent
|
||||
/ "agent" / "agent_init.py").read_text()
|
||||
assert src.count("is_token_provider(") >= 2, (
|
||||
"agent/agent_init.py must guard BOTH masked-banner paths "
|
||||
"(chat_completions and anthropic_messages) with "
|
||||
"is_token_provider()."
|
||||
)
|
||||
assert src.count('"🔑 Using credentials: Microsoft Entra ID"') >= 2, (
|
||||
"agent/agent_init.py banner blocks should print a static "
|
||||
"'Microsoft Entra ID' label for callable api_keys — no "
|
||||
"placeholder plumbing, no describe-mask fallback."
|
||||
)
|
||||
|
||||
def test_cli_show_config_handles_callable(self):
|
||||
"""``cli.HermesCLI.show_config`` previously did
|
||||
``self.api_key[-4:]`` / ``len(self.api_key)`` which crashes on
|
||||
callable Entra ID providers. The inlined version uses
|
||||
``is_token_provider`` and prints the same static label as the
|
||||
run_agent banners."""
|
||||
from pathlib import Path
|
||||
src = (Path(__file__).resolve().parent.parent.parent
|
||||
/ "cli.py").read_text()
|
||||
assert "is_token_provider(self.api_key)" in src, (
|
||||
"cli.HermesCLI.show_config must guard self.api_key via "
|
||||
"is_token_provider so callable Entra ID providers don't "
|
||||
"crash /config."
|
||||
)
|
||||
assert '"Microsoft Entra ID"' in src, (
|
||||
"cli.HermesCLI.show_config must print the static "
|
||||
"'Microsoft Entra ID' label (matching run_agent banners) "
|
||||
"instead of attempting to slice the callable."
|
||||
)
|
||||
|
||||
def test_mask_api_key_for_logs_handles_callable(self):
|
||||
"""``run_agent._mask_api_key_for_logs`` is called from the
|
||||
request-dump JSON path. For Entra users, ``self.client.api_key``
|
||||
is the SDK's empty string (callable stashed privately) — but
|
||||
defensively the helper must also accept a callable directly
|
||||
and return the placeholder rather than crashing on
|
||||
``len(callable)``."""
|
||||
from pathlib import Path
|
||||
src = (Path(__file__).resolve().parent.parent.parent
|
||||
/ "run_agent.py").read_text()
|
||||
# The function now starts with a callable check.
|
||||
assert (
|
||||
"if callable(key) and not isinstance(key, str):" in src
|
||||
and '"<entra-id-bearer>"' in src
|
||||
), (
|
||||
"run_agent._mask_api_key_for_logs must short-circuit for "
|
||||
"callable api_keys to avoid len(callable) crashes in "
|
||||
"request-dump paths."
|
||||
)
|
||||
|
||||
def test_anthropic_401_diagnostic_handles_callable(self):
|
||||
"""The Anthropic 401 diagnostic path lives in
|
||||
``agent/conversation_loop.py`` (the ``run_conversation`` body
|
||||
was extracted after this feature was first written). It used
|
||||
to do ``key[:12]`` on ``self._anthropic_api_key``. For Entra ID +
|
||||
Anthropic-style mode that's a callable; slicing crashes."""
|
||||
from pathlib import Path
|
||||
src = (Path(__file__).resolve().parent.parent.parent
|
||||
/ "agent" / "conversation_loop.py").read_text()
|
||||
# The Anthropic 401 block now branches on is_token_provider
|
||||
# before slicing the key.
|
||||
assert "Microsoft Entra ID (httpx event hook)" in src, (
|
||||
"agent/conversation_loop.py Anthropic 401 diagnostic must "
|
||||
"surface a Microsoft Entra ID branch before slicing the "
|
||||
"key prefix."
|
||||
)
|
||||
Loading…
Add table
Add a link
Reference in a new issue