feat(gemini): add Google Gemini (OAuth) inference provider

Adds 'google-gemini-cli' as a first-class inference provider using
Authorization Code + PKCE (S256) OAuth against Google's accounts.google.com,
hitting the OpenAI-compatible Gemini endpoint (v1beta/openai) with a Bearer
access token. Users sign in with their Google account — no API-key copy-paste.

Synthesized from three competing PRs per multi-PR design analysis:
- Clean PKCE module structure shaped after #10176 (thanks @sliverp)
- Cross-process file lock (fcntl POSIX / msvcrt Windows) with thread-local
  re-entrancy counter from #10779 (thanks @newarthur)
- Rejects #6745's subprocess approach entirely (different paradigm)

Improvements over the competing PRs:
- Port fallback: if 8085 is taken, bind ephemeral port instead of failing
- Preserves refresh_token when Google omits one (correct per Google spec)
- Accepts both full redirect URL and bare code in paste fallback
- doctor.py health check (neither PR had this)
- No regression in _OAUTH_CAPABLE_PROVIDERS (#10779 dropped anthropic/nous)
- No bundled unrelated features (#10779 mixed in persona/personality routing)

Storage:
- ~/.hermes/auth/google_oauth.json (0o600, atomic write via fsync+replace)
- Cross-process fcntl/msvcrt lock with 30s timeout
- Refresh 5 min before expiry on every request via get_valid_access_token

Provider registration (9-point checklist):
- auth.py: PROVIDER_REGISTRY entry, aliases (gemini-cli, gemini-oauth),
  resolve_gemini_oauth_runtime_credentials, get_gemini_oauth_auth_status,
  get_auth_status() dispatch
- models.py: _PROVIDER_MODELS catalog, CANONICAL_PROVIDERS entry, aliases
- providers.py: HermesOverlay, ALIASES entries
- runtime_provider.py: resolve_runtime_provider() dispatch branch
- config.py: OPTIONAL_ENV_VARS for HERMES_GEMINI_CLIENT_ID/_SECRET/_BASE_URL
- main.py: _model_flow_google_gemini_cli, select_provider_and_model dispatch
- auth_commands.py: add-to-pool handler, _OAUTH_CAPABLE_PROVIDERS
- doctor.py: 'Google Gemini OAuth' status line

Client ID: Not shipped. Users register a Desktop OAuth client in Google Cloud
Console (Generative Language API) and set HERMES_GEMINI_CLIENT_ID in
~/.hermes/.env. Documented in website/docs/integrations/providers.md.

Tests: 44 new unit tests covering PKCE S256 roundtrip, credential I/O
(permissions + atomic write), cross-process lock, port fallback, paste
fallback (URL + bare code), token exchange/refresh, rotation handling,
get_valid_access_token refresh semantics, runtime provider dispatch,
alias resolution, and regression guards for _OAUTH_CAPABLE_PROVIDERS.

Docs: new 'Google Gemini via OAuth' section in providers.md with full
walkthrough including GCP Desktop OAuth client registration, and env var
table updated in environment-variables.md.

Closes partial work in #6745, #10176, #10779 (to be closed with credit
once this merges).
This commit is contained in:
Teknium 2026-04-16 15:08:49 -07:00
parent 387aa9afc9
commit 1e5ee33f68
No known key found for this signature in database
12 changed files with 1693 additions and 4 deletions

View file

@ -78,6 +78,12 @@ QWEN_OAUTH_CLIENT_ID = "f0304373b74a44d2b584a3fb70ca9e56"
QWEN_OAUTH_TOKEN_URL = "https://chat.qwen.ai/api/v1/oauth2/token"
QWEN_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120
# Google Gemini OAuth (google-gemini-cli provider)
# Targets the OpenAI-compatible Gemini endpoint (v1beta/openai). Client id is
# sourced from HERMES_GEMINI_CLIENT_ID at runtime via agent.google_oauth.
DEFAULT_GEMINI_OAUTH_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai"
GEMINI_OAUTH_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 300 # refresh 5 min before expiry
# =============================================================================
# Provider Registry
@ -122,6 +128,13 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
auth_type="oauth_external",
inference_base_url=DEFAULT_QWEN_BASE_URL,
),
"google-gemini-cli": ProviderConfig(
id="google-gemini-cli",
name="Google Gemini (OAuth)",
auth_type="oauth_external",
inference_base_url=DEFAULT_GEMINI_OAUTH_BASE_URL,
base_url_env_var="HERMES_GEMINI_BASE_URL",
),
"copilot": ProviderConfig(
id="copilot",
name="GitHub Copilot",
@ -939,7 +952,7 @@ def resolve_provider(
"github-copilot-acp": "copilot-acp", "copilot-acp-agent": "copilot-acp",
"aigateway": "ai-gateway", "vercel": "ai-gateway", "vercel-ai-gateway": "ai-gateway",
"opencode": "opencode-zen", "zen": "opencode-zen",
"qwen-portal": "qwen-oauth", "qwen-cli": "qwen-oauth", "qwen-oauth": "qwen-oauth",
"qwen-portal": "qwen-oauth", "qwen-cli": "qwen-oauth", "qwen-oauth": "qwen-oauth", "google-gemini-cli": "google-gemini-cli", "gemini-oauth": "google-gemini-cli", "gemini-cli": "google-gemini-cli",
"hf": "huggingface", "hugging-face": "huggingface", "huggingface-hub": "huggingface",
"mimo": "xiaomi", "xiaomi-mimo": "xiaomi",
"aws": "bedrock", "aws-bedrock": "bedrock", "amazon-bedrock": "bedrock", "amazon": "bedrock",
@ -1251,6 +1264,88 @@ def get_qwen_auth_status() -> Dict[str, Any]:
}
# =============================================================================
# Google Gemini OAuth (google-gemini-cli) — browser-based PKCE flow.
#
# Unlike qwen-oauth / openai-codex which read tokens produced by external CLI
# tools, Hermes runs the PKCE flow itself via agent.google_oauth. Tokens live in
# ~/.hermes/auth/google_oauth.json, and runtime credential resolution refreshes
# the access token 5 min before expiry on every request.
# =============================================================================
def resolve_gemini_oauth_runtime_credentials(
*,
force_refresh: bool = False,
) -> Dict[str, Any]:
"""Resolve runtime OAuth creds for google-gemini-cli.
Raises AuthError if the user has not completed the OAuth login flow or if
refresh fails irrecoverably.
"""
try:
from agent.google_oauth import (
GoogleOAuthError,
_credentials_path,
get_valid_access_token,
load_credentials,
)
except ImportError as exc: # pragma: no cover — only hit if module is missing
raise AuthError(
f"agent.google_oauth is not importable: {exc}",
provider="google-gemini-cli",
code="google_oauth_module_missing",
) from exc
try:
access_token = get_valid_access_token(force_refresh=force_refresh)
except GoogleOAuthError as exc:
raise AuthError(
str(exc),
provider="google-gemini-cli",
code=exc.code,
) from exc
creds = load_credentials()
base_url = (
os.getenv("HERMES_GEMINI_BASE_URL", "").strip().rstrip("/")
or DEFAULT_GEMINI_OAUTH_BASE_URL
)
return {
"provider": "google-gemini-cli",
"base_url": base_url,
"api_key": access_token,
"source": "google-oauth",
"expires_at_ms": int((creds.expires_at if creds else 0) * 1000) or None,
"auth_file": str(_credentials_path()),
"email": (creds.email if creds else "") or "",
}
def get_gemini_oauth_auth_status() -> Dict[str, Any]:
"""Return a status dict for `hermes auth list` / `hermes status`."""
try:
from agent.google_oauth import _credentials_path, load_credentials
except ImportError:
return {"logged_in": False, "error": "agent.google_oauth unavailable"}
auth_path = _credentials_path()
creds = load_credentials()
if creds is None or not creds.access_token:
return {
"logged_in": False,
"auth_file": str(auth_path),
"error": "not logged in",
}
return {
"logged_in": True,
"auth_file": str(auth_path),
"source": "google-oauth",
"api_key": creds.access_token,
"expires_at_ms": int(creds.expires_at * 1000),
"email": creds.email,
}
# =============================================================================
# SSH / remote session detection
# =============================================================================
@ -2469,6 +2564,8 @@ def get_auth_status(provider_id: Optional[str] = None) -> Dict[str, Any]:
return get_codex_auth_status()
if target == "qwen-oauth":
return get_qwen_auth_status()
if target == "google-gemini-cli":
return get_gemini_oauth_auth_status()
if target == "copilot-acp":
return get_external_process_provider_status(target)
# API-key providers