mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-18 04:41:56 +00:00
feat(xai-oauth): add xAI Grok OAuth (SuperGrok Subscription) provider
Adds a new authentication provider that lets SuperGrok subscribers sign in to Hermes with their xAI account via the standard OAuth 2.0 PKCE loopback flow, instead of pasting a raw API key from console.x.ai. Highlights ---------- * OAuth 2.0 PKCE loopback login against accounts.x.ai with discovery, state/nonce, and a strict CORS-origin allowlist on the callback. * Authorize URL carries `plan=generic` (required for non-allowlisted loopback clients) and `referrer=hermes-agent` for best-effort attribution in xAI's OAuth server logs. * Token storage in `auth.json` with file-locked atomic writes; JWT `exp`-based expiry detection with skew; refresh-token rotation synced both ways between the singleton store and the credential pool so multi-process / multi-profile setups don't tear each other's refresh tokens. * Reactive 401 retry: on a 401 from the xAI Responses API, the agent refreshes the token, swaps it back into `self.api_key`, and retries the call once. Guarded against silent account swaps when the active key was sourced from a different (manual) pool entry. * Auxiliary tasks (curator, vision, embeddings, etc.) route through a dedicated xAI Responses-mode auxiliary client instead of falling back to OpenRouter billing. * Direct HTTP tools (`tools/xai_http.py`, transcription, TTS, image-gen plugin) resolve credentials through a unified runtime → singleton → env-var fallback chain so xai-oauth users get them for free. * `hermes auth add xai-oauth` and `hermes auth remove xai-oauth N` are wired through the standard auth-commands surface; remove cleans up the singleton loopback_pkce entry so it doesn't silently reinstate. * `hermes model` provider picker shows "xAI Grok OAuth (SuperGrok Subscription)" and the model-flow falls back to pool credentials when the singleton is missing. Hardening --------- * Discovery and refresh responses validate the returned `token_endpoint` host against the same `*.x.ai` allowlist as the authorization endpoint, blocking MITM persistence of a hostile endpoint. * Discovery / refresh / token-exchange `response.json()` calls are wrapped to raise typed `AuthError` on malformed bodies (captive portals, proxy error pages) instead of leaking JSONDecodeError tracebacks. * `prompt_cache_key` is routed through `extra_body` on the codex transport (sending it as a top-level kwarg trips xAI's SDK with a TypeError). * Credential-pool sync-back preserves `active_provider` so refreshing an OAuth entry doesn't silently flip the active provider out from under the running agent. Testing ------- * New `tests/hermes_cli/test_auth_xai_oauth_provider.py` (~63 tests) covers JWT expiry, OAuth URL params (plan + referrer), CORS origins, redirect URI validation, singleton↔pool sync, concurrency races, refresh error paths, runtime resolution, and malformed-JSON guards. * Extended `test_credential_pool.py`, `test_codex_transport.py`, and `test_run_agent_codex_responses.py` cover the pool sync-back, `extra_body` routing, and 401 reactive refresh paths. * 165 tests passing on this branch via `scripts/run_tests.sh`.
This commit is contained in:
parent
9fb40e6a3d
commit
b62c997973
27 changed files with 3843 additions and 131 deletions
|
|
@ -578,6 +578,197 @@ def test_run_conversation_codex_refreshes_after_401_and_retries(monkeypatch):
|
|||
assert result["final_response"] == "Recovered after refresh"
|
||||
|
||||
|
||||
def _build_xai_oauth_agent(monkeypatch):
|
||||
_patch_agent_bootstrap(monkeypatch)
|
||||
agent = run_agent.AIAgent(
|
||||
model="grok-code-fast-1",
|
||||
provider="xai-oauth",
|
||||
api_mode="codex_responses",
|
||||
base_url="https://api.x.ai/v1",
|
||||
api_key="xai-oauth-token",
|
||||
quiet_mode=True,
|
||||
max_iterations=4,
|
||||
skip_context_files=True,
|
||||
skip_memory=True,
|
||||
)
|
||||
agent._cleanup_task_resources = lambda task_id: None
|
||||
agent._persist_session = lambda messages, history=None: None
|
||||
agent._save_trajectory = lambda messages, user_message, completed: None
|
||||
agent._save_session_log = lambda messages: None
|
||||
return agent
|
||||
|
||||
|
||||
def test_build_api_kwargs_xai_oauth_sends_cache_key_via_extra_body(monkeypatch):
|
||||
"""xai-oauth + codex_responses must route prompt caching via the
|
||||
``prompt_cache_key`` body field on /v1/responses (xAI's documented
|
||||
Responses-API cache key — see docs.x.ai prompt-caching/maximizing-
|
||||
cache-hits).
|
||||
|
||||
We pass it through ``extra_body`` rather than as a top-level kwarg so
|
||||
the body field is serialized into JSON regardless of whether the
|
||||
installed openai SDK build still accepts ``prompt_cache_key`` on
|
||||
``Responses.stream()``. Older or trimmed SDK builds drop it from the
|
||||
signature and would otherwise raise ``TypeError`` before the request
|
||||
reaches api.x.ai. The ``x-grok-conv-id`` header is retained as a
|
||||
belt-and-braces fallback for clients/proxies that route on headers."""
|
||||
agent = _build_xai_oauth_agent(monkeypatch)
|
||||
kwargs = agent._build_api_kwargs(
|
||||
[
|
||||
{"role": "system", "content": "You are Hermes."},
|
||||
{"role": "user", "content": "Ping"},
|
||||
]
|
||||
)
|
||||
|
||||
assert kwargs.get("model") == "grok-code-fast-1"
|
||||
# Top-level kwarg must NOT be set — that's the openai SDK
|
||||
# incompatibility this whole indirection exists to dodge.
|
||||
assert "prompt_cache_key" not in kwargs
|
||||
extra_body = kwargs.get("extra_body") or {}
|
||||
assert extra_body.get("prompt_cache_key"), (
|
||||
"xAI prompt-cache routing must travel via extra_body.prompt_cache_key "
|
||||
"for /v1/responses — body field is the documented surface."
|
||||
)
|
||||
headers = kwargs.get("extra_headers") or {}
|
||||
assert "x-grok-conv-id" in headers, (
|
||||
"x-grok-conv-id header kept as belt-and-braces fallback for clients "
|
||||
"that route on headers."
|
||||
)
|
||||
|
||||
|
||||
def test_run_conversation_xai_oauth_refreshes_after_401_and_retries(monkeypatch):
|
||||
"""xai-oauth speaks the Responses API just like codex. When the access
|
||||
token is rejected mid-call (401), the same proactive refresh-and-retry
|
||||
handler that fires for openai-codex must also fire for xai-oauth — the
|
||||
bug it caught: the gating condition checked only ``provider == "openai-codex"``,
|
||||
so xai-oauth 401s leaked straight to non-retryable abort path with no
|
||||
chance to swap in a freshly refreshed access token."""
|
||||
agent = _build_xai_oauth_agent(monkeypatch)
|
||||
calls = {"api": 0, "refresh": 0}
|
||||
|
||||
class _UnauthorizedError(RuntimeError):
|
||||
def __init__(self):
|
||||
super().__init__("Error code: 401 - unauthorized")
|
||||
self.status_code = 401
|
||||
|
||||
def _fake_api_call(api_kwargs):
|
||||
calls["api"] += 1
|
||||
if calls["api"] == 1:
|
||||
raise _UnauthorizedError()
|
||||
return _codex_message_response("Recovered after xAI refresh")
|
||||
|
||||
def _fake_refresh(*, force=True):
|
||||
calls["refresh"] += 1
|
||||
assert force is True
|
||||
return True
|
||||
|
||||
monkeypatch.setattr(agent, "_interruptible_api_call", _fake_api_call)
|
||||
monkeypatch.setattr(agent, "_try_refresh_codex_client_credentials", _fake_refresh)
|
||||
|
||||
result = agent.run_conversation("Say OK")
|
||||
|
||||
assert calls["api"] == 2
|
||||
assert calls["refresh"] == 1
|
||||
assert result["completed"] is True
|
||||
assert result["final_response"] == "Recovered after xAI refresh"
|
||||
|
||||
|
||||
def test_try_refresh_codex_client_credentials_handles_xai_oauth(monkeypatch):
|
||||
"""``_try_refresh_codex_client_credentials`` must rebuild the OpenAI
|
||||
client with freshly resolved xAI OAuth credentials when the active
|
||||
provider is xai-oauth. The function name is shared between codex and
|
||||
xai-oauth (both speak codex_responses) — covering both cases prevents
|
||||
silent regressions where the function gets gated to a single provider."""
|
||||
agent = _build_xai_oauth_agent(monkeypatch)
|
||||
closed = {"value": False}
|
||||
rebuilt = {"kwargs": None}
|
||||
|
||||
class _ExistingClient:
|
||||
def close(self):
|
||||
closed["value"] = True
|
||||
|
||||
class _RebuiltClient:
|
||||
pass
|
||||
|
||||
def _fake_openai(**kwargs):
|
||||
rebuilt["kwargs"] = kwargs
|
||||
return _RebuiltClient()
|
||||
|
||||
def _fake_resolve(force_refresh=False, refresh_if_expiring=True, **_):
|
||||
# The pre-refresh guard reads the singleton with refresh_if_expiring=False
|
||||
# to verify that the agent's active key still matches; the actual
|
||||
# refresh later passes force_refresh=True. Both calls must succeed.
|
||||
return {
|
||||
"api_key": "fresh-xai-token" if force_refresh else agent.api_key,
|
||||
"base_url": "https://api.x.ai/v1",
|
||||
}
|
||||
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.auth.resolve_xai_oauth_runtime_credentials",
|
||||
_fake_resolve,
|
||||
)
|
||||
monkeypatch.setattr(run_agent, "OpenAI", _fake_openai)
|
||||
|
||||
agent.client = _ExistingClient()
|
||||
ok = agent._try_refresh_codex_client_credentials(force=True)
|
||||
|
||||
assert ok is True
|
||||
assert closed["value"] is True
|
||||
assert rebuilt["kwargs"]["api_key"] == "fresh-xai-token"
|
||||
assert rebuilt["kwargs"]["base_url"] == "https://api.x.ai/v1"
|
||||
assert isinstance(agent.client, _RebuiltClient)
|
||||
assert agent.api_key == "fresh-xai-token"
|
||||
|
||||
|
||||
def test_try_refresh_codex_client_credentials_skips_xai_oauth_when_singleton_differs(monkeypatch):
|
||||
"""An xai-oauth agent constructed with a non-singleton credential
|
||||
(e.g. a manual pool entry whose tokens belong to a different account
|
||||
than the loopback_pkce singleton, or an explicit ``api_key=`` arg)
|
||||
MUST NOT silently adopt the singleton's tokens on a 401 reactive
|
||||
refresh. Otherwise a 401 mid-conversation would re-route the rest
|
||||
of the conversation onto a different account, with no user feedback.
|
||||
|
||||
The credential pool's reactive recovery is the right channel for
|
||||
pool-managed credentials; this fallback path is for the singleton-
|
||||
only case and must short-circuit when the active key differs."""
|
||||
agent = _build_xai_oauth_agent(monkeypatch)
|
||||
# Agent is using "xai-oauth-token" (per the builder); singleton holds
|
||||
# a *different* account's token. No force_refresh should fire.
|
||||
refresh_calls = {"count": 0}
|
||||
|
||||
def _fake_resolve(force_refresh=False, refresh_if_expiring=True, **_):
|
||||
if force_refresh:
|
||||
refresh_calls["count"] += 1
|
||||
return {
|
||||
"api_key": "singleton-account-token",
|
||||
"base_url": "https://api.x.ai/v1",
|
||||
}
|
||||
# The pre-refresh guard read — return the singleton's view of the
|
||||
# singleton's token, which is NOT what the agent is currently using.
|
||||
return {
|
||||
"api_key": "singleton-account-token",
|
||||
"base_url": "https://api.x.ai/v1",
|
||||
}
|
||||
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.auth.resolve_xai_oauth_runtime_credentials",
|
||||
_fake_resolve,
|
||||
)
|
||||
|
||||
pre_refresh_key = agent.api_key
|
||||
ok = agent._try_refresh_codex_client_credentials(force=True)
|
||||
|
||||
assert ok is False, (
|
||||
"must not refresh when the active credential isn't the singleton; "
|
||||
"otherwise the conversation silently swaps accounts mid-flight."
|
||||
)
|
||||
assert refresh_calls["count"] == 0, (
|
||||
"force_refresh must not run — that would mutate the singleton's "
|
||||
"tokens on disk and consume its single-use refresh_token for an "
|
||||
"agent that wasn't even using the singleton."
|
||||
)
|
||||
assert agent.api_key == pre_refresh_key
|
||||
|
||||
|
||||
def test_run_conversation_copilot_refreshes_after_401_and_retries(monkeypatch):
|
||||
agent = _build_copilot_agent(monkeypatch)
|
||||
calls = {"api": 0, "refresh": 0}
|
||||
|
|
@ -624,12 +815,18 @@ def test_try_refresh_codex_client_credentials_rebuilds_client(monkeypatch):
|
|||
rebuilt["kwargs"] = kwargs
|
||||
return _RebuiltClient()
|
||||
|
||||
def _fake_resolve(force_refresh=False, refresh_if_expiring=True, **_):
|
||||
# Pre-refresh guard reads the singleton (refresh_if_expiring=False).
|
||||
# It must report the agent's current api_key so the equality check
|
||||
# passes; only then does the actual force_refresh run.
|
||||
return {
|
||||
"api_key": "new-codex-token" if force_refresh else agent.api_key,
|
||||
"base_url": "https://chatgpt.com/backend-api/codex",
|
||||
}
|
||||
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.auth.resolve_codex_runtime_credentials",
|
||||
lambda force_refresh=True: {
|
||||
"api_key": "new-codex-token",
|
||||
"base_url": "https://chatgpt.com/backend-api/codex",
|
||||
},
|
||||
_fake_resolve,
|
||||
)
|
||||
monkeypatch.setattr(run_agent, "OpenAI", _fake_openai)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue