feat(xai-oauth): add xAI Grok OAuth (SuperGrok Subscription) provider

Adds a new authentication provider that lets SuperGrok subscribers sign in to Hermes with their xAI account via the standard OAuth 2.0 PKCE loopback flow, instead of pasting a raw API key from console.x.ai. Highlights ---------- * OAuth 2.0 PKCE loopback login against accounts.x.ai with discovery, state/nonce, and a strict CORS-origin allowlist on the callback. * Authorize URL carries `plan=generic` (required for non-allowlisted loopback clients) and `referrer=hermes-agent` for best-effort attribution in xAI's OAuth server logs. * Token storage in `auth.json` with file-locked atomic writes; JWT `exp`-based expiry detection with skew; refresh-token rotation synced both ways between the singleton store and the credential pool so multi-process / multi-profile setups don't tear each other's refresh tokens. * Reactive 401 retry: on a 401 from the xAI Responses API, the agent refreshes the token, swaps it back into `self.api_key`, and retries the call once. Guarded against silent account swaps when the active key was sourced from a different (manual) pool entry. * Auxiliary tasks (curator, vision, embeddings, etc.) route through a dedicated xAI Responses-mode auxiliary client instead of falling back to OpenRouter billing. * Direct HTTP tools (`tools/xai_http.py`, transcription, TTS, image-gen plugin) resolve credentials through a unified runtime → singleton → env-var fallback chain so xai-oauth users get them for free. * `hermes auth add xai-oauth` and `hermes auth remove xai-oauth N` are wired through the standard auth-commands surface; remove cleans up the singleton loopback_pkce entry so it doesn't silently reinstate. * `hermes model` provider picker shows "xAI Grok OAuth (SuperGrok Subscription)" and the model-flow falls back to pool credentials when the singleton is missing. Hardening --------- * Discovery and refresh responses validate the returned `token_endpoint` host against the same `*.x.ai` allowlist as the authorization endpoint, blocking MITM persistence of a hostile endpoint. * Discovery / refresh / token-exchange `response.json()` calls are wrapped to raise typed `AuthError` on malformed bodies (captive portals, proxy error pages) instead of leaking JSONDecodeError tracebacks. * `prompt_cache_key` is routed through `extra_body` on the codex transport (sending it as a top-level kwarg trips xAI's SDK with a TypeError). * Credential-pool sync-back preserves `active_provider` so refreshing an OAuth entry doesn't silently flip the active provider out from under the running agent. Testing ------- * New `tests/hermes_cli/test_auth_xai_oauth_provider.py` (~63 tests) covers JWT expiry, OAuth URL params (plan + referrer), CORS origins, redirect URI validation, singleton↔pool sync, concurrency races, refresh error paths, runtime resolution, and malformed-JSON guards. * Extended `test_credential_pool.py`, `test_codex_transport.py`, and `test_run_agent_codex_responses.py` cover the pool sync-back, `extra_body` routing, and 401 reactive refresh paths. * 165 tests passing on this branch via `scripts/run_tests.sh`.
2026-05-18 04:41:56 +00:00 · 2026-05-15 16:10:38 +01:00 · 2026-05-15 16:10:38 +01:00 · b62c997973
commit b62c997973
parent 9fb40e6a3d
27 changed files with 3843 additions and 131 deletions
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@ -1254,6 +1254,30 @@ def _resolve_nous_runtime_api(*, force_refresh: bool = False) -> Optional[tuple[
    return api_key, base_url


+def _resolve_xai_oauth_for_aux() -> Optional[Tuple[str, str]]:
+    """Resolve a fresh xAI OAuth (api_key, base_url) for auxiliary clients.
+
+    Routes through ``hermes_cli.auth``'s runtime resolver so the auto-refresh
+    path is shared with the main agent, instead of relying on whatever raw
+    tokens happen to be sitting in auth.json or the credential pool.  Returns
+    ``None`` if the user is not authenticated with xAI Grok OAuth (so
+    ``_resolve_auto`` Step 1 falls through to the next provider in the chain).
+    """
+    try:
+        from hermes_cli.auth import resolve_xai_oauth_runtime_credentials
+
+        creds = resolve_xai_oauth_runtime_credentials()
+    except Exception as exc:
+        logger.debug("Auxiliary xAI OAuth runtime credential resolution failed: %s", exc)
+        return None
+
+    api_key = str(creds.get("api_key") or "").strip()
+    base_url = str(creds.get("base_url") or "").strip().rstrip("/")
+    if not api_key or not base_url:
+        return None
+    return api_key, base_url
+
+
 def _read_codex_access_token() -> Optional[str]:
    """Read a valid, non-expired Codex OAuth access token from Hermes auth store.

@ -1744,6 +1768,32 @@ def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]:
    return _fallback_client, model


+def _build_xai_oauth_aux_client(model: str) -> Tuple[Optional[Any], Optional[str]]:
+    """Build a CodexAuxiliaryClient for an xAI Grok OAuth-authenticated session.
+
+    xAI's ``/v1/responses`` endpoint speaks the OpenAI Responses API, so we
+    wrap a plain ``OpenAI`` client in ``CodexAuxiliaryClient`` to translate
+    ``chat.completions.create()`` calls into ``responses.stream()`` requests.
+
+    The caller must pass an explicit model — pinning a default for Grok
+    would silently rot when xAI's allowlist drifts.  Returns ``(None, None)``
+    when the user has not authenticated with xAI Grok OAuth.
+    """
+    if not model:
+        logger.warning(
+            "Auxiliary client: xai-oauth requested without a model; "
+            "pass model explicitly (auxiliary.<task>.model in config.yaml)."
+        )
+        return None, None
+    resolved = _resolve_xai_oauth_for_aux()
+    if resolved is None:
+        return None, None
+    api_key, base_url = resolved
+    logger.debug("Auxiliary client: xAI OAuth (%s via Responses API)", model)
+    real_client = OpenAI(api_key=api_key, base_url=base_url)
+    return CodexAuxiliaryClient(real_client, model), model
+
+
 def _build_codex_client(model: str) -> Tuple[Optional[Any], Optional[str]]:
    """Build a CodexAuxiliaryClient for an explicitly-requested model.

@ -2851,6 +2901,26 @@ def resolve_provider_client(
        return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
                else (client, final_model))

+    # ── xAI Grok OAuth (loopback PKCE → Responses API) ───────────────
+    # Without this branch, an xai-oauth main provider falls through to the
+    # generic ``oauth_external`` arm below and returns ``(None, None)``,
+    # silently re-routing every auxiliary task (compression, web extract,
+    # session search, curator, etc.) to whatever Step-2 fallback the user
+    # has configured.  Users on xAI Grok OAuth would then see surprise
+    # OpenRouter / Nous bills for side tasks they thought were running on
+    # their xAI subscription.
+    if provider == "xai-oauth":
+        client, default = _build_xai_oauth_aux_client(model)
+        if client is None:
+            logger.warning(
+                "resolve_provider_client: xai-oauth requested but no xAI "
+                "OAuth token found (run: hermes model -> xAI Grok OAuth — SuperGrok Subscription)"
+            )
+            return None, None
+        final_model = _normalize_resolved_model(model or default, provider)
+        return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
+                else (client, final_model))
+
    # ── Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY) ───────────
    if provider == "custom":
        if explicit_base_url:
@ -3201,6 +3271,8 @@ def resolve_provider_client(
            return resolve_provider_client("nous", model, async_mode)
        if provider == "openai-codex":
            return resolve_provider_client("openai-codex", model, async_mode)
+        if provider == "xai-oauth":
+            return resolve_provider_client("xai-oauth", model, async_mode)
        # Other OAuth providers not directly supported
        logger.warning("resolve_provider_client: OAuth provider %s not "
                       "directly supported, try 'auto'", provider)
--- a/agent/codex_responses_adapter.py
+++ b/agent/codex_responses_adapter.py
@ -726,7 +726,7 @@ def _preflight_codex_api_kwargs(
        "model", "instructions", "input", "tools", "store",
        "reasoning", "include", "max_output_tokens", "temperature",
        "tool_choice", "parallel_tool_calls", "prompt_cache_key", "service_tier",
-        "extra_headers",
+        "extra_headers", "extra_body",
    }
    normalized: Dict[str, Any] = {
        "model": model,
@ -776,6 +776,19 @@ def _preflight_codex_api_kwargs(
        if normalized_headers:
            normalized["extra_headers"] = normalized_headers

+    extra_body = api_kwargs.get("extra_body")
+    if extra_body is not None:
+        if not isinstance(extra_body, dict):
+            raise ValueError("Codex Responses request 'extra_body' must be an object.")
+        # Pass extra_body through verbatim — used by xAI Responses to
+        # carry `prompt_cache_key` as a body-level field (the documented
+        # cache-routing surface on /v1/responses). The openai SDK
+        # serializes extra_body into the JSON body without per-field
+        # type checks, so it survives Responses.stream() kwarg-signature
+        # changes that would otherwise raise TypeError before the wire.
+        if extra_body:
+            normalized["extra_body"] = dict(extra_body)
+
    if allow_stream:
        stream = api_kwargs.get("stream")
        if stream is not None and stream is not True:
--- a/agent/credential_pool.py
+++ b/agent/credential_pool.py
@ -29,6 +29,7 @@ from hermes_cli.auth import (
    _resolve_zai_base_url,
    _save_auth_store,
    _save_provider_state,
+    _store_provider_state,
    read_credential_pool,
    write_credential_pool,
 )
@ -539,6 +540,64 @@ class CredentialPool:
            logger.debug("Failed to sync Codex entry from auth.json: %s", exc)
        return entry

+    def _sync_xai_oauth_entry_from_auth_store(self, entry: PooledCredential) -> PooledCredential:
+        """Sync an xAI OAuth pool entry from auth.json if tokens differ.
+
+        xAI OAuth refresh tokens are single-use.  When another Hermes process
+        (or another profile sharing the same auth.json) refreshes the token,
+        it writes the new pair to ``providers["xai-oauth"]["tokens"]`` under
+        ``_auth_store_lock``.  Without this resync, our in-memory pool entry
+        keeps the consumed refresh_token and the next ``_refresh_entry`` call
+        would replay it and get a ``refresh_token_reused``-style 4xx.
+
+        Only applies to entries seeded from the singleton (``loopback_pkce``);
+        manually added entries (``manual:xai_pkce``) are independent
+        credentials with their own refresh-token lifecycle.
+        """
+        if self.provider != "xai-oauth" or entry.source != "loopback_pkce":
+            return entry
+        try:
+            with _auth_store_lock():
+                auth_store = _load_auth_store()
+                state = _load_provider_state(auth_store, "xai-oauth")
+            if not isinstance(state, dict):
+                return entry
+            tokens = state.get("tokens")
+            if not isinstance(tokens, dict):
+                return entry
+            store_access = tokens.get("access_token", "")
+            store_refresh = tokens.get("refresh_token", "")
+            entry_access = entry.access_token or ""
+            entry_refresh = entry.refresh_token or ""
+            if store_access and (
+                store_access != entry_access
+                or (store_refresh and store_refresh != entry_refresh)
+            ):
+                logger.debug(
+                    "Pool entry %s: syncing xAI OAuth tokens from auth.json "
+                    "(refreshed by another process)",
+                    entry.id,
+                )
+                field_updates: Dict[str, Any] = {
+                    "access_token": store_access,
+                    "refresh_token": store_refresh or entry.refresh_token,
+                    "last_status": None,
+                    "last_status_at": None,
+                    "last_error_code": None,
+                    "last_error_reason": None,
+                    "last_error_message": None,
+                    "last_error_reset_at": None,
+                }
+                if state.get("last_refresh"):
+                    field_updates["last_refresh"] = state["last_refresh"]
+                updated = replace(entry, **field_updates)
+                self._replace_entry(entry, updated)
+                self._persist()
+                return updated
+        except Exception as exc:
+            logger.debug("Failed to sync xAI OAuth entry from auth.json: %s", exc)
+        return entry
+
    def _sync_nous_entry_from_auth_store(self, entry: PooledCredential) -> PooledCredential:
        """Sync a Nous pool entry from auth.json if tokens differ.

@ -604,9 +663,22 @@ class CredentialPool:
        re-seeding a consumed single-use refresh token.

        Applies to any OAuth provider whose singleton lives in auth.json
-        (currently Nous and OpenAI Codex).
+        (currently Nous, OpenAI Codex, and xAI Grok OAuth).
+
+        ``set_active=False`` on every write: a pool sync-back is a
+        token-rotation side effect, not the user choosing a provider.
+        Using ``_save_provider_state`` (which sets ``active_provider``)
+        here would mean every Nous/Codex/xAI refresh in a multi-provider
+        setup silently flips the ``active_provider`` flag — the next
+        ``hermes`` invocation that defaults to the active provider
+        (e.g. setup wizard, ``hermes auth status``) would land on
+        whatever provider happened to refresh last, not whatever the
+        user actually chose.
        """
-        if entry.source != "device_code":
+        # Only sync entries that were seeded *from* a singleton.  Manually
+        # added pool entries (source="manual:*") are independent credentials
+        # and must not write back to the singleton.
+        if entry.source not in {"device_code", "loopback_pkce"}:
            return
        try:
            with _auth_store_lock():
@ -632,7 +704,7 @@ class CredentialPool:
                            state[extra_key] = val
                    if entry.inference_base_url:
                        state["inference_base_url"] = entry.inference_base_url
-                    _save_provider_state(auth_store, "nous", state)
+                    _store_provider_state(auth_store, "nous", state, set_active=False)

                elif self.provider == "openai-codex":
                    state = _load_provider_state(auth_store, "openai-codex")
@ -646,7 +718,21 @@ class CredentialPool:
                        tokens["refresh_token"] = entry.refresh_token
                    if entry.last_refresh:
                        state["last_refresh"] = entry.last_refresh
-                    _save_provider_state(auth_store, "openai-codex", state)
+                    _store_provider_state(auth_store, "openai-codex", state, set_active=False)
+
+                elif self.provider == "xai-oauth":
+                    state = _load_provider_state(auth_store, "xai-oauth")
+                    if not isinstance(state, dict):
+                        return
+                    tokens = state.get("tokens")
+                    if not isinstance(tokens, dict):
+                        return
+                    tokens["access_token"] = entry.access_token
+                    if entry.refresh_token:
+                        tokens["refresh_token"] = entry.refresh_token
+                    if entry.last_refresh:
+                        state["last_refresh"] = entry.last_refresh
+                    _store_provider_state(auth_store, "xai-oauth", state, set_active=False)

                else:
                    return
@ -699,6 +785,25 @@ class CredentialPool:
                    refresh_token=refreshed["refresh_token"],
                    last_refresh=refreshed.get("last_refresh"),
                )
+            elif self.provider == "xai-oauth":
+                # Adopt fresher tokens from auth.json before spending the
+                # refresh_token — single-use tokens consumed by another
+                # process (or another profile sharing the singleton) would
+                # otherwise trigger ``refresh_token_reused`` on the next
+                # POST.  Only meaningful for singleton-seeded entries.
+                synced = self._sync_xai_oauth_entry_from_auth_store(entry)
+                if synced is not entry:
+                    entry = synced
+                refreshed = auth_mod.refresh_xai_oauth_pure(
+                    entry.access_token,
+                    entry.refresh_token,
+                )
+                updated = replace(
+                    entry,
+                    access_token=refreshed["access_token"],
+                    refresh_token=refreshed["refresh_token"],
+                    last_refresh=refreshed.get("last_refresh"),
+                )
            elif self.provider == "nous":
                synced = self._sync_nous_entry_from_auth_store(entry)
                if synced is not entry:
@ -777,6 +882,30 @@ class CredentialPool:
                    # Credentials file had a valid (non-expired) token — use it directly
                    logger.debug("Credentials file has valid token, using without refresh")
                    return synced
+            # For xai-oauth: same race as nous — another process may have
+            # consumed the refresh token between our proactive sync and the
+            # HTTP call.  Re-check auth.json and adopt the fresh tokens if
+            # they have rotated since.  Only meaningful for singleton-seeded
+            # (loopback_pkce) entries; manual entries don't share state with
+            # the singleton.
+            if self.provider == "xai-oauth":
+                synced = self._sync_xai_oauth_entry_from_auth_store(entry)
+                if synced.refresh_token != entry.refresh_token:
+                    logger.debug(
+                        "xAI OAuth refresh failed but auth.json has newer tokens — adopting"
+                    )
+                    updated = replace(
+                        synced,
+                        last_status=STATUS_OK,
+                        last_status_at=None,
+                        last_error_code=None,
+                        last_error_reason=None,
+                        last_error_message=None,
+                        last_error_reset_at=None,
+                    )
+                    self._replace_entry(synced, updated)
+                    self._persist()
+                    return updated
            # For nous: another process may have consumed the refresh token
            # between our proactive sync and the HTTP call.  Re-sync from
            # auth.json and adopt the fresh tokens if available.
@ -829,6 +958,11 @@ class CredentialPool:
                entry.access_token,
                CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
            )
+        if self.provider == "xai-oauth":
+            return auth_mod._xai_access_token_is_expiring(
+                entry.access_token,
+                auth_mod.XAI_ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
+            )
        if self.provider == "nous":
            # Nous refresh/mint can require network access and should happen when
            # runtime credentials are actually resolved, not merely when the pool
@ -883,6 +1017,17 @@ class CredentialPool:
                if synced is not entry:
                    entry = synced
                    cleared_any = True
+            # For xai-oauth singleton-seeded entries, identical pattern:
+            # an entry frozen as exhausted may simply be holding stale
+            # tokens that another process (or a fresh `hermes model` ->
+            # xAI Grok OAuth login) has since rotated in auth.json.
+            if (self.provider == "xai-oauth"
+                    and entry.source == "loopback_pkce"
+                    and entry.last_status == STATUS_EXHAUSTED):
+                synced = self._sync_xai_oauth_entry_from_auth_store(entry)
+                if synced is not entry:
+                    entry = synced
+                    cleared_any = True
            if entry.last_status == STATUS_EXHAUSTED:
                exhausted_until = _exhausted_until(entry)
                if exhausted_until is not None and now < exhausted_until:
@ -1394,6 +1539,37 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
                },
            )

+    elif provider == "xai-oauth":
+        # When the user logs in via ``hermes model`` -> xAI Grok OAuth,
+        # tokens are written to the auth.json singleton
+        # (``providers["xai-oauth"]``).  Surface them in the pool too so
+        # ``hermes auth list`` reflects the logged-in state and so the pool
+        # is the single source of truth for refresh during runtime resolution.
+        if _is_suppressed(provider, "loopback_pkce"):
+            return changed, active_sources
+
+        state = _load_provider_state(auth_store, "xai-oauth")
+        tokens = state.get("tokens") if isinstance(state, dict) else None
+        if isinstance(tokens, dict) and tokens.get("access_token"):
+            active_sources.add("loopback_pkce")
+            from hermes_cli.auth import DEFAULT_XAI_OAUTH_BASE_URL
+
+            base_url = DEFAULT_XAI_OAUTH_BASE_URL
+            changed |= _upsert_entry(
+                entries,
+                provider,
+                "loopback_pkce",
+                {
+                    "source": "loopback_pkce",
+                    "auth_type": AUTH_TYPE_OAUTH,
+                    "access_token": tokens.get("access_token", ""),
+                    "refresh_token": tokens.get("refresh_token"),
+                    "base_url": base_url,
+                    "last_refresh": state.get("last_refresh"),
+                    "label": label_from_token(tokens.get("access_token", ""), "loopback_pkce"),
+                },
+            )
+
    return changed, active_sources


--- a/agent/credential_sources.py
+++ b/agent/credential_sources.py
@ -265,6 +265,31 @@ def _remove_minimax_oauth(provider: str, removed) -> RemovalResult:
    return result


+def _remove_xai_oauth_loopback_pkce(provider: str, removed) -> RemovalResult:
+    """xAI OAuth tokens live in auth.json providers.xai-oauth — clear them.
+
+    Without this step, ``hermes auth remove xai-oauth <N>`` silently undoes
+    itself: the central dispatcher only removes the in-memory pool entry,
+    leaves ``providers.xai-oauth`` in auth.json intact, and on the next
+    ``load_pool("xai-oauth")`` call ``_seed_from_singletons`` re-seeds the
+    entry from the still-present singleton — credentials reappear with no
+    user feedback. Clearing the singleton in step with the suppression set
+    by the central dispatcher makes the removal stick.
+
+    Belt-and-braces against the manual entry path: ``hermes auth add
+    xai-oauth`` produces a ``manual:xai_pkce`` entry whose removal step
+    falls through to "unregistered → nothing to clean up" (correct —
+    manual entries are pool-only).
+    """
+    result = RemovalResult()
+    if _clear_auth_store_provider(provider):
+        result.cleaned.append(f"Cleared {provider} OAuth tokens from auth store")
+    result.hints.append(
+        "Run `hermes model` → xAI Grok OAuth (SuperGrok Subscription) to re-authenticate if needed."
+    )
+    return result
+
+
 def _remove_codex_device_code(provider: str, removed) -> RemovalResult:
    """Codex tokens live in TWO places: our auth store AND ~/.codex/auth.json.

@ -397,6 +422,11 @@ def _register_all_sources() -> None:
        remove_fn=_remove_codex_device_code,
        description="auth.json providers.openai-codex + ~/.codex/auth.json",
    ))
+    register(RemovalStep(
+        provider="xai-oauth", source_id="loopback_pkce",
+        remove_fn=_remove_xai_oauth_loopback_pkce,
+        description="auth.json providers.xai-oauth",
+    ))
    register(RemovalStep(
        provider="qwen-oauth", source_id="qwen-cli",
        remove_fn=_remove_qwen_cli,
--- a/agent/transports/codex.py
+++ b/agent/transports/codex.py
@ -89,18 +89,25 @@ class ResponsesApiTransport(ProviderTransport):
        _effort_clamp = {"minimal": "low"}
        reasoning_effort = _effort_clamp.get(reasoning_effort, reasoning_effort)

+        response_tools = _responses_tools(tools)
        kwargs = {
            "model": model,
            "instructions": instructions,
            "input": _chat_messages_to_responses_input(payload_messages),
-            "tools": _responses_tools(tools),
-            "tool_choice": "auto",
-            "parallel_tool_calls": True,
+            "tools": response_tools,
            "store": False,
        }
+        if response_tools:
+            kwargs["tool_choice"] = "auto"
+            kwargs["parallel_tool_calls"] = True

        session_id = params.get("session_id")
-        if not is_github_responses and session_id:
+        # xAI's Responses API uses `prompt_cache_key` (body-level) as the
+        # cache-routing key, not a top-level kwarg — the body-field
+        # injection below survives openai SDK builds whose
+        # Responses.stream() signature drops the kwarg. Everything else
+        # that ISN'T github/xAI keeps using the typed kwarg.
+        if not is_github_responses and not is_xai_responses and session_id:
            kwargs["prompt_cache_key"] = session_id

        if reasoning_enabled and is_xai_responses:
@ -165,6 +172,22 @@ class ResponsesApiTransport(ProviderTransport):
            merged_extra_headers["x-grok-conv-id"] = session_id
            kwargs["extra_headers"] = merged_extra_headers

+            # xAI Responses cache-routing field. Lives in the request body
+            # (per https://docs.x.ai/.../prompt-caching/maximizing-cache-hits),
+            # so we ship it via extra_body — the openai SDK serializes
+            # extra_body fields into the JSON body without per-field type
+            # validation, sidestepping the TypeError that fires on
+            # Responses.stream() builds whose `prompt_cache_key` kwarg has
+            # been dropped. Setdefault preserves a caller-supplied value
+            # (e.g. request_overrides.extra_body.prompt_cache_key) over
+            # the auto-derived session_id.
+            existing_extra_body = kwargs.get("extra_body")
+            merged_extra_body: Dict[str, Any] = {}
+            if isinstance(existing_extra_body, dict):
+                merged_extra_body.update(existing_extra_body)
+            merged_extra_body.setdefault("prompt_cache_key", session_id)
+            kwargs["extra_body"] = merged_extra_body
+
        return kwargs

    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse: