feat(xai-oauth): add xAI Grok OAuth (SuperGrok Subscription) provider

Adds a new authentication provider that lets SuperGrok subscribers sign
in to Hermes with their xAI account via the standard OAuth 2.0 PKCE
loopback flow, instead of pasting a raw API key from console.x.ai.

Highlights
----------
* OAuth 2.0 PKCE loopback login against accounts.x.ai with discovery,
  state/nonce, and a strict CORS-origin allowlist on the callback.
* Authorize URL carries `plan=generic` (required for non-allowlisted
  loopback clients) and `referrer=hermes-agent` for best-effort
  attribution in xAI's OAuth server logs.
* Token storage in `auth.json` with file-locked atomic writes; JWT
  `exp`-based expiry detection with skew; refresh-token rotation
  synced both ways between the singleton store and the credential
  pool so multi-process / multi-profile setups don't tear each other's
  refresh tokens.
* Reactive 401 retry: on a 401 from the xAI Responses API, the agent
  refreshes the token, swaps it back into `self.api_key`, and retries
  the call once. Guarded against silent account swaps when the active
  key was sourced from a different (manual) pool entry.
* Auxiliary tasks (curator, vision, embeddings, etc.) route through a
  dedicated xAI Responses-mode auxiliary client instead of falling back
  to OpenRouter billing.
* Direct HTTP tools (`tools/xai_http.py`, transcription, TTS, image-gen
  plugin) resolve credentials through a unified runtime → singleton →
  env-var fallback chain so xai-oauth users get them for free.
* `hermes auth add xai-oauth` and `hermes auth remove xai-oauth N` are
  wired through the standard auth-commands surface; remove cleans up
  the singleton loopback_pkce entry so it doesn't silently reinstate.
* `hermes model` provider picker shows
  "xAI Grok OAuth (SuperGrok Subscription)" and the model-flow falls
  back to pool credentials when the singleton is missing.

Hardening
---------
* Discovery and refresh responses validate the returned
  `token_endpoint` host against the same `*.x.ai` allowlist as the
  authorization endpoint, blocking MITM persistence of a hostile
  endpoint.
* Discovery / refresh / token-exchange `response.json()` calls are
  wrapped to raise typed `AuthError` on malformed bodies (captive
  portals, proxy error pages) instead of leaking JSONDecodeError
  tracebacks.
* `prompt_cache_key` is routed through `extra_body` on the codex
  transport (sending it as a top-level kwarg trips xAI's SDK with a
  TypeError).
* Credential-pool sync-back preserves `active_provider` so refreshing
  an OAuth entry doesn't silently flip the active provider out from
  under the running agent.

Testing
-------
* New `tests/hermes_cli/test_auth_xai_oauth_provider.py` (~63 tests)
  covers JWT expiry, OAuth URL params (plan + referrer), CORS origins,
  redirect URI validation, singleton↔pool sync, concurrency races,
  refresh error paths, runtime resolution, and malformed-JSON guards.
* Extended `test_credential_pool.py`, `test_codex_transport.py`, and
  `test_run_agent_codex_responses.py` cover the pool sync-back,
  `extra_body` routing, and 401 reactive refresh paths.
* 165 tests passing on this branch via `scripts/run_tests.sh`.
This commit is contained in:
Jaaneek 2026-05-15 16:10:38 +01:00 committed by Teknium
parent 9fb40e6a3d
commit b62c997973
27 changed files with 3843 additions and 131 deletions

View file

@ -578,6 +578,197 @@ def test_run_conversation_codex_refreshes_after_401_and_retries(monkeypatch):
assert result["final_response"] == "Recovered after refresh"
def _build_xai_oauth_agent(monkeypatch):
_patch_agent_bootstrap(monkeypatch)
agent = run_agent.AIAgent(
model="grok-code-fast-1",
provider="xai-oauth",
api_mode="codex_responses",
base_url="https://api.x.ai/v1",
api_key="xai-oauth-token",
quiet_mode=True,
max_iterations=4,
skip_context_files=True,
skip_memory=True,
)
agent._cleanup_task_resources = lambda task_id: None
agent._persist_session = lambda messages, history=None: None
agent._save_trajectory = lambda messages, user_message, completed: None
agent._save_session_log = lambda messages: None
return agent
def test_build_api_kwargs_xai_oauth_sends_cache_key_via_extra_body(monkeypatch):
"""xai-oauth + codex_responses must route prompt caching via the
``prompt_cache_key`` body field on /v1/responses (xAI's documented
Responses-API cache key see docs.x.ai prompt-caching/maximizing-
cache-hits).
We pass it through ``extra_body`` rather than as a top-level kwarg so
the body field is serialized into JSON regardless of whether the
installed openai SDK build still accepts ``prompt_cache_key`` on
``Responses.stream()``. Older or trimmed SDK builds drop it from the
signature and would otherwise raise ``TypeError`` before the request
reaches api.x.ai. The ``x-grok-conv-id`` header is retained as a
belt-and-braces fallback for clients/proxies that route on headers."""
agent = _build_xai_oauth_agent(monkeypatch)
kwargs = agent._build_api_kwargs(
[
{"role": "system", "content": "You are Hermes."},
{"role": "user", "content": "Ping"},
]
)
assert kwargs.get("model") == "grok-code-fast-1"
# Top-level kwarg must NOT be set — that's the openai SDK
# incompatibility this whole indirection exists to dodge.
assert "prompt_cache_key" not in kwargs
extra_body = kwargs.get("extra_body") or {}
assert extra_body.get("prompt_cache_key"), (
"xAI prompt-cache routing must travel via extra_body.prompt_cache_key "
"for /v1/responses — body field is the documented surface."
)
headers = kwargs.get("extra_headers") or {}
assert "x-grok-conv-id" in headers, (
"x-grok-conv-id header kept as belt-and-braces fallback for clients "
"that route on headers."
)
def test_run_conversation_xai_oauth_refreshes_after_401_and_retries(monkeypatch):
"""xai-oauth speaks the Responses API just like codex. When the access
token is rejected mid-call (401), the same proactive refresh-and-retry
handler that fires for openai-codex must also fire for xai-oauth the
bug it caught: the gating condition checked only ``provider == "openai-codex"``,
so xai-oauth 401s leaked straight to non-retryable abort path with no
chance to swap in a freshly refreshed access token."""
agent = _build_xai_oauth_agent(monkeypatch)
calls = {"api": 0, "refresh": 0}
class _UnauthorizedError(RuntimeError):
def __init__(self):
super().__init__("Error code: 401 - unauthorized")
self.status_code = 401
def _fake_api_call(api_kwargs):
calls["api"] += 1
if calls["api"] == 1:
raise _UnauthorizedError()
return _codex_message_response("Recovered after xAI refresh")
def _fake_refresh(*, force=True):
calls["refresh"] += 1
assert force is True
return True
monkeypatch.setattr(agent, "_interruptible_api_call", _fake_api_call)
monkeypatch.setattr(agent, "_try_refresh_codex_client_credentials", _fake_refresh)
result = agent.run_conversation("Say OK")
assert calls["api"] == 2
assert calls["refresh"] == 1
assert result["completed"] is True
assert result["final_response"] == "Recovered after xAI refresh"
def test_try_refresh_codex_client_credentials_handles_xai_oauth(monkeypatch):
"""``_try_refresh_codex_client_credentials`` must rebuild the OpenAI
client with freshly resolved xAI OAuth credentials when the active
provider is xai-oauth. The function name is shared between codex and
xai-oauth (both speak codex_responses) covering both cases prevents
silent regressions where the function gets gated to a single provider."""
agent = _build_xai_oauth_agent(monkeypatch)
closed = {"value": False}
rebuilt = {"kwargs": None}
class _ExistingClient:
def close(self):
closed["value"] = True
class _RebuiltClient:
pass
def _fake_openai(**kwargs):
rebuilt["kwargs"] = kwargs
return _RebuiltClient()
def _fake_resolve(force_refresh=False, refresh_if_expiring=True, **_):
# The pre-refresh guard reads the singleton with refresh_if_expiring=False
# to verify that the agent's active key still matches; the actual
# refresh later passes force_refresh=True. Both calls must succeed.
return {
"api_key": "fresh-xai-token" if force_refresh else agent.api_key,
"base_url": "https://api.x.ai/v1",
}
monkeypatch.setattr(
"hermes_cli.auth.resolve_xai_oauth_runtime_credentials",
_fake_resolve,
)
monkeypatch.setattr(run_agent, "OpenAI", _fake_openai)
agent.client = _ExistingClient()
ok = agent._try_refresh_codex_client_credentials(force=True)
assert ok is True
assert closed["value"] is True
assert rebuilt["kwargs"]["api_key"] == "fresh-xai-token"
assert rebuilt["kwargs"]["base_url"] == "https://api.x.ai/v1"
assert isinstance(agent.client, _RebuiltClient)
assert agent.api_key == "fresh-xai-token"
def test_try_refresh_codex_client_credentials_skips_xai_oauth_when_singleton_differs(monkeypatch):
"""An xai-oauth agent constructed with a non-singleton credential
(e.g. a manual pool entry whose tokens belong to a different account
than the loopback_pkce singleton, or an explicit ``api_key=`` arg)
MUST NOT silently adopt the singleton's tokens on a 401 reactive
refresh. Otherwise a 401 mid-conversation would re-route the rest
of the conversation onto a different account, with no user feedback.
The credential pool's reactive recovery is the right channel for
pool-managed credentials; this fallback path is for the singleton-
only case and must short-circuit when the active key differs."""
agent = _build_xai_oauth_agent(monkeypatch)
# Agent is using "xai-oauth-token" (per the builder); singleton holds
# a *different* account's token. No force_refresh should fire.
refresh_calls = {"count": 0}
def _fake_resolve(force_refresh=False, refresh_if_expiring=True, **_):
if force_refresh:
refresh_calls["count"] += 1
return {
"api_key": "singleton-account-token",
"base_url": "https://api.x.ai/v1",
}
# The pre-refresh guard read — return the singleton's view of the
# singleton's token, which is NOT what the agent is currently using.
return {
"api_key": "singleton-account-token",
"base_url": "https://api.x.ai/v1",
}
monkeypatch.setattr(
"hermes_cli.auth.resolve_xai_oauth_runtime_credentials",
_fake_resolve,
)
pre_refresh_key = agent.api_key
ok = agent._try_refresh_codex_client_credentials(force=True)
assert ok is False, (
"must not refresh when the active credential isn't the singleton; "
"otherwise the conversation silently swaps accounts mid-flight."
)
assert refresh_calls["count"] == 0, (
"force_refresh must not run — that would mutate the singleton's "
"tokens on disk and consume its single-use refresh_token for an "
"agent that wasn't even using the singleton."
)
assert agent.api_key == pre_refresh_key
def test_run_conversation_copilot_refreshes_after_401_and_retries(monkeypatch):
agent = _build_copilot_agent(monkeypatch)
calls = {"api": 0, "refresh": 0}
@ -624,12 +815,18 @@ def test_try_refresh_codex_client_credentials_rebuilds_client(monkeypatch):
rebuilt["kwargs"] = kwargs
return _RebuiltClient()
def _fake_resolve(force_refresh=False, refresh_if_expiring=True, **_):
# Pre-refresh guard reads the singleton (refresh_if_expiring=False).
# It must report the agent's current api_key so the equality check
# passes; only then does the actual force_refresh run.
return {
"api_key": "new-codex-token" if force_refresh else agent.api_key,
"base_url": "https://chatgpt.com/backend-api/codex",
}
monkeypatch.setattr(
"hermes_cli.auth.resolve_codex_runtime_credentials",
lambda force_refresh=True: {
"api_key": "new-codex-token",
"base_url": "https://chatgpt.com/backend-api/codex",
},
_fake_resolve,
)
monkeypatch.setattr(run_agent, "OpenAI", _fake_openai)