From b0135c741d2ed1ab413399759679682eaaac34e6 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Sun, 24 May 2026 23:05:25 -0700 Subject: [PATCH] diag(xai-oauth): log loopback callback hits + wait-timeout outcome (#27385) (#31894) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit #27385 reports that on macOS the browser sees the xAI 'authorization received' success page but Hermes still raises xai_callback_timeout. The loopback HTTP handler was silent — no log line on receipt, no log line on wait timeout — so triaging the gap between 'browser saw success' and 'CLI saw timeout' required either a code change or guesswork. Adds two INFO log lines: - Per callback hit (handler): path, has_code, has_state, has_error, truncated User-Agent. Booleans / fingerprints only — no actual code/state strings leak. - On wait timeout: report whether result.code or result.error was populated at deadline. Distinguishes three failure modes: 1. No hit log + timeout log w/ has_code=False has_error=False → xAI's IDP never reached the loopback (firewall, port-binding, IPv6/IPv4 mismatch, browser blocked private-network access). 2. Hit log w/ has_code=False has_error=False + timeout log → xAI hit the loopback without OAuth params (the bare-URL case the handler already 400s on). 3. Hit log w/ has_code=True + timeout log w/ has_code=False → result_lock contention or race; would indicate a real bug. 133/133 in tests/hermes_cli/test_auth_xai_oauth_provider.py, tests/hermes_cli/test_xai_oauth_pkce_token_exchange.py, and tests/run_agent/test_codex_xai_oauth_recovery.py. --- hermes_cli/auth.py | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py index 4cc8d9df589..57bed0c9e26 100644 --- a/hermes_cli/auth.py +++ b/hermes_cli/auth.py @@ -2470,6 +2470,32 @@ def _make_xai_callback_handler(expected_path: str) -> tuple[type[BaseHTTPRequest "error_description": params.get("error_description", [None])[0], } + # Diagnostic logging — emits at INFO so reporters of loopback bugs + # (#27385 — "callback received but Hermes times out") can produce + # actionable evidence without a code change. Logged values are + # fingerprints / booleans only; no actual code/state strings leak + # into the log file. Run with ``HERMES_LOG_LEVEL=INFO`` (or check + # ``~/.hermes/logs/agent.log`` which captures INFO+ unconditionally). + try: + logger.info( + "xAI loopback callback received: path=%s has_code=%s has_state=%s has_error=%s " + "ua=%s", + parsed.path, + incoming["code"] is not None, + incoming["state"] is not None, + incoming["error"] is not None, + (self.headers.get("User-Agent") or "")[:80], + ) + if incoming["error"]: + logger.info( + "xAI loopback callback carries error=%s error_description=%s", + incoming["error"], + (incoming["error_description"] or "")[:200], + ) + except Exception: + # Logging must never break the OAuth flow. + pass + # Treat a hit on the callback path with neither `code` nor `error` # as a missing OAuth callback (e.g. xAI's auth backend failed to # redirect and the user navigated to the bare loopback URL by hand). @@ -2574,6 +2600,17 @@ def _xai_wait_for_callback( server.shutdown() server.server_close() thread.join(timeout=1.0) + # Diagnostic: distinguish "no callback ever arrived" from "callback + # arrived but result wasn't populated" (#27385). The per-hit handler + # also logs at INFO; if neither line appears, xAI's IDP never reached + # the loopback at all (firewall, port-binding, IPv6/IPv4 mismatch). + logger.info( + "xAI loopback wait timed out after %.0fs with no usable callback " + "(result.code=%s result.error=%s)", + max(5.0, timeout_seconds), + result["code"] is not None, + result["error"] is not None, + ) raise AuthError( "xAI authorization timed out waiting for the local callback.", provider="xai-oauth",