fix(desktop): address second Copilot pass on xAI loopback flow

- onboarding: openSignInUrl now falls back to window.open when the desktop
  bridge's openExternal throws/rejects (OS handler missing, user denied),
  not just when the bridge is absent
- web_server: cancelling a loopback session shuts down the 127.0.0.1
  callback server + joins its thread immediately, freeing the port instead
  of holding it until the wait times out (+ regression test)
- web_server: document the new "loopback" flow in the /api/providers/oauth
  enum, the poll-endpoint docstring, and the Phase 2 flow comment block
This commit is contained in:
Brooklyn Nicholson 2026-06-02 18:14:00 -05:00
parent 3be9fb7317
commit d963ad56c1
3 changed files with 90 additions and 5 deletions

View file

@ -417,10 +417,18 @@ export async function refreshOnboarding(ctx: OnboardingContext) {
// apps/desktop/src/app/artifacts/index.tsx.
async function openSignInUrl(url: string) {
if (window.hermesDesktop?.openExternal) {
await window.hermesDesktop.openExternal(url)
} else {
window.open(url, '_blank', 'noopener,noreferrer')
try {
await window.hermesDesktop.openExternal(url)
return
} catch {
// Bridge present but failed (no OS handler, user denied, etc.). Fall
// through to window.open so the sign-in URL still opens and the flow
// doesn't strand a pending OAuth session in a waiting state.
}
}
window.open(url, '_blank', 'noopener,noreferrer')
}
export async function startProviderOAuth(provider: OAuthProvider, ctx: OnboardingContext) {

View file

@ -3022,7 +3022,7 @@ async def list_oauth_providers():
Response shape (per provider):
id stable identifier (used in DELETE path)
name human label
flow "pkce" | "device_code" | "external"
flow "pkce" | "device_code" | "external" | "loopback"
cli_command fallback CLI command for users to run manually
docs_url external docs/portal link for the "Learn more" link
status:
@ -3122,6 +3122,19 @@ async def disconnect_oauth_provider(provider_id: str, request: Request):
# 4. On "approved" the background thread has already saved creds; UI
# refreshes the providers list.
#
# Loopback PKCE (xAI Grok):
# 1. POST /api/providers/oauth/xai-oauth/start
# → server binds a 127.0.0.1 callback listener, builds the xAI
# authorize URL, spawns a background worker waiting on the redirect
# → returns { session_id, flow: "loopback", auth_url, expires_in }
# 2. UI opens auth_url in the browser. There is NO user_code/code to
# paste — the redirect lands back on the loopback listener.
# 3. UI polls GET /api/providers/oauth/{provider}/poll/{session_id}
# (same endpoint as device_code) until status != "pending".
# 4. The worker exchanges the code, persists creds, sets "approved".
# DELETE /sessions/{id} cancels: the worker bails before persisting
# and the callback server is shut down to free the port immediately.
#
# Sessions are kept in-memory only (single-process FastAPI) and time out
# after 15 minutes. A periodic cleanup runs on each /start call to GC
# expired sessions so the dict doesn't grow without bound.
@ -4038,7 +4051,13 @@ async def submit_oauth_code(provider_id: str, body: OAuthSubmitBody, request: Re
@app.get("/api/providers/oauth/{provider_id}/poll/{session_id}")
async def poll_oauth_session(provider_id: str, session_id: str):
"""Poll a device-code session's status (no auth — read-only state)."""
"""Poll a session's status (no auth — read-only state).
Shared by the device-code flows (Nous, OpenAI Codex, MiniMax) and the
loopback flow (xAI Grok). Both surface progress through the same
background-worker-updated ``status`` field, so a single poll endpoint
serves them all.
"""
with _oauth_sessions_lock:
sess = _oauth_sessions.get(session_id)
if not sess:
@ -4061,6 +4080,24 @@ async def cancel_oauth_session(session_id: str, request: Request):
sess = _oauth_sessions.pop(session_id, None)
if sess is None:
return {"ok": False, "message": "session not found"}
# Loopback sessions own a bound 127.0.0.1 callback server. Without an
# explicit shutdown the worker would keep that port held until
# _xai_wait_for_callback times out (up to 5 min). Free it immediately so
# an orphaned listener can't block a subsequent sign-in attempt.
if sess.get("flow") == "loopback":
server = sess.get("server")
thread = sess.get("thread")
try:
if server is not None:
server.shutdown()
server.server_close()
except Exception:
pass
try:
if thread is not None:
thread.join(timeout=1.0)
except Exception:
pass
return {"ok": True, "session_id": session_id}

View file

@ -533,6 +533,46 @@ def test_xai_loopback_worker_skips_persist_when_cancelled(monkeypatch):
assert session_id not in ws._oauth_sessions
def test_cancel_loopback_session_shuts_down_callback_server():
"""Cancelling a loopback session must free the bound callback port now."""
from hermes_cli import web_server as ws
shutdown_calls = {"shutdown": 0, "close": 0, "join": 0}
class _FakeServer:
def shutdown(self):
shutdown_calls["shutdown"] += 1
def server_close(self):
shutdown_calls["close"] += 1
class _FakeThread:
def join(self, timeout=None):
shutdown_calls["join"] += 1
session_id = "xai-loopback-cancel-shutdown-test"
ws._oauth_sessions[session_id] = {
"session_id": session_id,
"provider": "xai-oauth",
"flow": "loopback",
"created_at": time.time(),
"status": "pending",
"server": _FakeServer(),
"thread": _FakeThread(),
}
try:
resp = client.delete(
f"/api/providers/oauth/sessions/{session_id}", headers=HEADERS
)
assert resp.status_code == 200, resp.text
assert resp.json()["ok"] is True
assert shutdown_calls == {"shutdown": 1, "close": 1, "join": 1}
assert session_id not in ws._oauth_sessions
finally:
ws._oauth_sessions.pop(session_id, None)
def test_unknown_pkce_provider_rejected_cleanly():
"""A future PKCE provider without an explicit branch must NOT silently route to Anthropic.