feat(xai-oauth): add xAI Grok OAuth (SuperGrok Subscription) provider

Adds a new authentication provider that lets SuperGrok subscribers sign in to Hermes with their xAI account via the standard OAuth 2.0 PKCE loopback flow, instead of pasting a raw API key from console.x.ai. Highlights ---------- * OAuth 2.0 PKCE loopback login against accounts.x.ai with discovery, state/nonce, and a strict CORS-origin allowlist on the callback. * Authorize URL carries `plan=generic` (required for non-allowlisted loopback clients) and `referrer=hermes-agent` for best-effort attribution in xAI's OAuth server logs. * Token storage in `auth.json` with file-locked atomic writes; JWT `exp`-based expiry detection with skew; refresh-token rotation synced both ways between the singleton store and the credential pool so multi-process / multi-profile setups don't tear each other's refresh tokens. * Reactive 401 retry: on a 401 from the xAI Responses API, the agent refreshes the token, swaps it back into `self.api_key`, and retries the call once. Guarded against silent account swaps when the active key was sourced from a different (manual) pool entry. * Auxiliary tasks (curator, vision, embeddings, etc.) route through a dedicated xAI Responses-mode auxiliary client instead of falling back to OpenRouter billing. * Direct HTTP tools (`tools/xai_http.py`, transcription, TTS, image-gen plugin) resolve credentials through a unified runtime → singleton → env-var fallback chain so xai-oauth users get them for free. * `hermes auth add xai-oauth` and `hermes auth remove xai-oauth N` are wired through the standard auth-commands surface; remove cleans up the singleton loopback_pkce entry so it doesn't silently reinstate. * `hermes model` provider picker shows "xAI Grok OAuth (SuperGrok Subscription)" and the model-flow falls back to pool credentials when the singleton is missing. Hardening --------- * Discovery and refresh responses validate the returned `token_endpoint` host against the same `*.x.ai` allowlist as the authorization endpoint, blocking MITM persistence of a hostile endpoint. * Discovery / refresh / token-exchange `response.json()` calls are wrapped to raise typed `AuthError` on malformed bodies (captive portals, proxy error pages) instead of leaking JSONDecodeError tracebacks. * `prompt_cache_key` is routed through `extra_body` on the codex transport (sending it as a top-level kwarg trips xAI's SDK with a TypeError). * Credential-pool sync-back preserves `active_provider` so refreshing an OAuth entry doesn't silently flip the active provider out from under the running agent. Testing ------- * New `tests/hermes_cli/test_auth_xai_oauth_provider.py` (~63 tests) covers JWT expiry, OAuth URL params (plan + referrer), CORS origins, redirect URI validation, singleton↔pool sync, concurrency races, refresh error paths, runtime resolution, and malformed-JSON guards. * Extended `test_credential_pool.py`, `test_codex_transport.py`, and `test_run_agent_codex_responses.py` cover the pool sync-back, `extra_body` routing, and 401 reactive refresh paths. * 165 tests passing on this branch via `scripts/run_tests.sh`.
2026-05-18 04:41:56 +00:00 · 2026-05-15 16:10:38 +01:00 · 2026-05-15 16:10:38 +01:00 · b62c997973
commit b62c997973
parent 9fb40e6a3d
27 changed files with 3843 additions and 131 deletions
--- a/tests/run_agent/test_run_agent_codex_responses.py
+++ b/tests/run_agent/test_run_agent_codex_responses.py
@ -578,6 +578,197 @@ def test_run_conversation_codex_refreshes_after_401_and_retries(monkeypatch):
    assert result["final_response"] == "Recovered after refresh"


+def _build_xai_oauth_agent(monkeypatch):
+    _patch_agent_bootstrap(monkeypatch)
+    agent = run_agent.AIAgent(
+        model="grok-code-fast-1",
+        provider="xai-oauth",
+        api_mode="codex_responses",
+        base_url="https://api.x.ai/v1",
+        api_key="xai-oauth-token",
+        quiet_mode=True,
+        max_iterations=4,
+        skip_context_files=True,
+        skip_memory=True,
+    )
+    agent._cleanup_task_resources = lambda task_id: None
+    agent._persist_session = lambda messages, history=None: None
+    agent._save_trajectory = lambda messages, user_message, completed: None
+    agent._save_session_log = lambda messages: None
+    return agent
+
+
+def test_build_api_kwargs_xai_oauth_sends_cache_key_via_extra_body(monkeypatch):
+    """xai-oauth + codex_responses must route prompt caching via the
+    ``prompt_cache_key`` body field on /v1/responses (xAI's documented
+    Responses-API cache key — see docs.x.ai prompt-caching/maximizing-
+    cache-hits).
+
+    We pass it through ``extra_body`` rather than as a top-level kwarg so
+    the body field is serialized into JSON regardless of whether the
+    installed openai SDK build still accepts ``prompt_cache_key`` on
+    ``Responses.stream()``. Older or trimmed SDK builds drop it from the
+    signature and would otherwise raise ``TypeError`` before the request
+    reaches api.x.ai. The ``x-grok-conv-id`` header is retained as a
+    belt-and-braces fallback for clients/proxies that route on headers."""
+    agent = _build_xai_oauth_agent(monkeypatch)
+    kwargs = agent._build_api_kwargs(
+        [
+            {"role": "system", "content": "You are Hermes."},
+            {"role": "user", "content": "Ping"},
+        ]
+    )
+
+    assert kwargs.get("model") == "grok-code-fast-1"
+    # Top-level kwarg must NOT be set — that's the openai SDK
+    # incompatibility this whole indirection exists to dodge.
+    assert "prompt_cache_key" not in kwargs
+    extra_body = kwargs.get("extra_body") or {}
+    assert extra_body.get("prompt_cache_key"), (
+        "xAI prompt-cache routing must travel via extra_body.prompt_cache_key "
+        "for /v1/responses — body field is the documented surface."
+    )
+    headers = kwargs.get("extra_headers") or {}
+    assert "x-grok-conv-id" in headers, (
+        "x-grok-conv-id header kept as belt-and-braces fallback for clients "
+        "that route on headers."
+    )
+
+
+def test_run_conversation_xai_oauth_refreshes_after_401_and_retries(monkeypatch):
+    """xai-oauth speaks the Responses API just like codex.  When the access
+    token is rejected mid-call (401), the same proactive refresh-and-retry
+    handler that fires for openai-codex must also fire for xai-oauth — the
+    bug it caught: the gating condition checked only ``provider == "openai-codex"``,
+    so xai-oauth 401s leaked straight to non-retryable abort path with no
+    chance to swap in a freshly refreshed access token."""
+    agent = _build_xai_oauth_agent(monkeypatch)
+    calls = {"api": 0, "refresh": 0}
+
+    class _UnauthorizedError(RuntimeError):
+        def __init__(self):
+            super().__init__("Error code: 401 - unauthorized")
+            self.status_code = 401
+
+    def _fake_api_call(api_kwargs):
+        calls["api"] += 1
+        if calls["api"] == 1:
+            raise _UnauthorizedError()
+        return _codex_message_response("Recovered after xAI refresh")
+
+    def _fake_refresh(*, force=True):
+        calls["refresh"] += 1
+        assert force is True
+        return True
+
+    monkeypatch.setattr(agent, "_interruptible_api_call", _fake_api_call)
+    monkeypatch.setattr(agent, "_try_refresh_codex_client_credentials", _fake_refresh)
+
+    result = agent.run_conversation("Say OK")
+
+    assert calls["api"] == 2
+    assert calls["refresh"] == 1
+    assert result["completed"] is True
+    assert result["final_response"] == "Recovered after xAI refresh"
+
+
+def test_try_refresh_codex_client_credentials_handles_xai_oauth(monkeypatch):
+    """``_try_refresh_codex_client_credentials`` must rebuild the OpenAI
+    client with freshly resolved xAI OAuth credentials when the active
+    provider is xai-oauth.  The function name is shared between codex and
+    xai-oauth (both speak codex_responses) — covering both cases prevents
+    silent regressions where the function gets gated to a single provider."""
+    agent = _build_xai_oauth_agent(monkeypatch)
+    closed = {"value": False}
+    rebuilt = {"kwargs": None}
+
+    class _ExistingClient:
+        def close(self):
+            closed["value"] = True
+
+    class _RebuiltClient:
+        pass
+
+    def _fake_openai(**kwargs):
+        rebuilt["kwargs"] = kwargs
+        return _RebuiltClient()
+
+    def _fake_resolve(force_refresh=False, refresh_if_expiring=True, **_):
+        # The pre-refresh guard reads the singleton with refresh_if_expiring=False
+        # to verify that the agent's active key still matches; the actual
+        # refresh later passes force_refresh=True.  Both calls must succeed.
+        return {
+            "api_key": "fresh-xai-token" if force_refresh else agent.api_key,
+            "base_url": "https://api.x.ai/v1",
+        }
+
+    monkeypatch.setattr(
+        "hermes_cli.auth.resolve_xai_oauth_runtime_credentials",
+        _fake_resolve,
+    )
+    monkeypatch.setattr(run_agent, "OpenAI", _fake_openai)
+
+    agent.client = _ExistingClient()
+    ok = agent._try_refresh_codex_client_credentials(force=True)
+
+    assert ok is True
+    assert closed["value"] is True
+    assert rebuilt["kwargs"]["api_key"] == "fresh-xai-token"
+    assert rebuilt["kwargs"]["base_url"] == "https://api.x.ai/v1"
+    assert isinstance(agent.client, _RebuiltClient)
+    assert agent.api_key == "fresh-xai-token"
+
+
+def test_try_refresh_codex_client_credentials_skips_xai_oauth_when_singleton_differs(monkeypatch):
+    """An xai-oauth agent constructed with a non-singleton credential
+    (e.g. a manual pool entry whose tokens belong to a different account
+    than the loopback_pkce singleton, or an explicit ``api_key=`` arg)
+    MUST NOT silently adopt the singleton's tokens on a 401 reactive
+    refresh.  Otherwise a 401 mid-conversation would re-route the rest
+    of the conversation onto a different account, with no user feedback.
+
+    The credential pool's reactive recovery is the right channel for
+    pool-managed credentials; this fallback path is for the singleton-
+    only case and must short-circuit when the active key differs."""
+    agent = _build_xai_oauth_agent(monkeypatch)
+    # Agent is using "xai-oauth-token" (per the builder); singleton holds
+    # a *different* account's token.  No force_refresh should fire.
+    refresh_calls = {"count": 0}
+
+    def _fake_resolve(force_refresh=False, refresh_if_expiring=True, **_):
+        if force_refresh:
+            refresh_calls["count"] += 1
+            return {
+                "api_key": "singleton-account-token",
+                "base_url": "https://api.x.ai/v1",
+            }
+        # The pre-refresh guard read — return the singleton's view of the
+        # singleton's token, which is NOT what the agent is currently using.
+        return {
+            "api_key": "singleton-account-token",
+            "base_url": "https://api.x.ai/v1",
+        }
+
+    monkeypatch.setattr(
+        "hermes_cli.auth.resolve_xai_oauth_runtime_credentials",
+        _fake_resolve,
+    )
+
+    pre_refresh_key = agent.api_key
+    ok = agent._try_refresh_codex_client_credentials(force=True)
+
+    assert ok is False, (
+        "must not refresh when the active credential isn't the singleton; "
+        "otherwise the conversation silently swaps accounts mid-flight."
+    )
+    assert refresh_calls["count"] == 0, (
+        "force_refresh must not run — that would mutate the singleton's "
+        "tokens on disk and consume its single-use refresh_token for an "
+        "agent that wasn't even using the singleton."
+    )
+    assert agent.api_key == pre_refresh_key
+
+
 def test_run_conversation_copilot_refreshes_after_401_and_retries(monkeypatch):
    agent = _build_copilot_agent(monkeypatch)
    calls = {"api": 0, "refresh": 0}
@ -624,12 +815,18 @@ def test_try_refresh_codex_client_credentials_rebuilds_client(monkeypatch):
        rebuilt["kwargs"] = kwargs
        return _RebuiltClient()

+    def _fake_resolve(force_refresh=False, refresh_if_expiring=True, **_):
+        # Pre-refresh guard reads the singleton (refresh_if_expiring=False).
+        # It must report the agent's current api_key so the equality check
+        # passes; only then does the actual force_refresh run.
+        return {
+            "api_key": "new-codex-token" if force_refresh else agent.api_key,
+            "base_url": "https://chatgpt.com/backend-api/codex",
+        }
+
    monkeypatch.setattr(
        "hermes_cli.auth.resolve_codex_runtime_credentials",
-        lambda force_refresh=True: {
-            "api_key": "new-codex-token",
-            "base_url": "https://chatgpt.com/backend-api/codex",
-        },
+        _fake_resolve,
    )
    monkeypatch.setattr(run_agent, "OpenAI", _fake_openai)