feat(gemini): block free-tier keys at setup + surface guidance on 429 (#15100)

Google AI Studio's free tier (<= 250 req/day for gemini-2.5-flash) is exhausted in a handful of agent turns, so the setup wizard now refuses to wire up Gemini when the supplied key is on the free tier, and the runtime 429 handler appends actionable billing guidance. Setup-time probe (hermes_cli/main.py): - `_model_flow_api_key_provider` fires one minimal generateContent call when provider_id == 'gemini' and classifies the response as free/paid/unknown via x-ratelimit-limit-requests-per-day header or 429 body containing 'free_tier'. - Free -> print block message, refuse to save the provider, return. - Paid -> 'Tier check: paid' and proceed. - Unknown (network/auth error) -> 'could not verify', proceed anyway. Runtime 429 handler (agent/gemini_native_adapter.py): - `gemini_http_error` appends billing guidance when the 429 error body mentions 'free_tier', catching users who bypass setup by putting GOOGLE_API_KEY directly in .env. Tests: 21 unit tests for the probe + error path, 4 tests for the setup-flow block. All 67 existing gemini tests still pass.
2026-04-25 00:51:20 +00:00 · 2026-04-24 04:46:17 -07:00 · 2026-04-24 04:46:17 -07:00 · 3aa1a41e88
commit 3aa1a41e88
parent 346601ca8d
4 changed files with 463 additions and 0 deletions
--- a/agent/gemini_native_adapter.py
+++ b/agent/gemini_native_adapter.py
@ -44,6 +44,97 @@ def is_native_gemini_base_url(base_url: str) -> bool:
    return not normalized.endswith("/openai")


+def probe_gemini_tier(
+    api_key: str,
+    base_url: str = DEFAULT_GEMINI_BASE_URL,
+    *,
+    model: str = "gemini-2.5-flash",
+    timeout: float = 10.0,
+) -> str:
+    """Probe a Google AI Studio API key and return its tier.
+
+    Returns one of:
+
+    - ``"free"``    -- key is on the free tier (unusable with Hermes)
+    - ``"paid"``    -- key is on a paid tier
+    - ``"unknown"`` -- probe failed; callers should proceed without blocking.
+    """
+    key = (api_key or "").strip()
+    if not key:
+        return "unknown"
+
+    normalized_base = str(base_url or DEFAULT_GEMINI_BASE_URL).strip().rstrip("/")
+    if not normalized_base:
+        normalized_base = DEFAULT_GEMINI_BASE_URL
+    if normalized_base.lower().endswith("/openai"):
+        normalized_base = normalized_base[: -len("/openai")]
+
+    url = f"{normalized_base}/models/{model}:generateContent"
+    payload = {
+        "contents": [{"role": "user", "parts": [{"text": "hi"}]}],
+        "generationConfig": {"maxOutputTokens": 1},
+    }
+
+    try:
+        with httpx.Client(timeout=timeout) as client:
+            resp = client.post(
+                url,
+                params={"key": key},
+                json=payload,
+                headers={"Content-Type": "application/json"},
+            )
+    except Exception as exc:
+        logger.debug("probe_gemini_tier: network error: %s", exc)
+        return "unknown"
+
+    headers_lower = {k.lower(): v for k, v in resp.headers.items()}
+    rpd_header = headers_lower.get("x-ratelimit-limit-requests-per-day")
+    if rpd_header:
+        try:
+            rpd_val = int(rpd_header)
+        except (TypeError, ValueError):
+            rpd_val = None
+        # Published free-tier daily caps (Dec 2025):
+        #   gemini-2.5-pro: 100, gemini-2.5-flash: 250, flash-lite: 1000
+        # Tier 1 starts at ~1500+ for Flash. We treat <= 1000 as free.
+        if rpd_val is not None and rpd_val <= 1000:
+            return "free"
+        if rpd_val is not None and rpd_val > 1000:
+            return "paid"
+
+    if resp.status_code == 429:
+        body_text = ""
+        try:
+            body_text = resp.text or ""
+        except Exception:
+            body_text = ""
+        if "free_tier" in body_text.lower():
+            return "free"
+        return "paid"
+
+    if 200 <= resp.status_code < 300:
+        return "paid"
+
+    return "unknown"
+
+
+def is_free_tier_quota_error(error_message: str) -> bool:
+    """Return True when a Gemini 429 message indicates free-tier exhaustion."""
+    if not error_message:
+        return False
+    return "free_tier" in error_message.lower()
+
+
+_FREE_TIER_GUIDANCE = (
+    "\n\nYour Google API key is on the free tier (<= 250 requests/day for "
+    "gemini-2.5-flash). Hermes typically makes 3-10 API calls per user turn, "
+    "so the free tier is exhausted in a handful of messages and cannot sustain "
+    "an agent session. Enable billing on your Google Cloud project and "
+    "regenerate the key in a billing-enabled project: "
+    "https://aistudio.google.com/apikey"
+)
+
+
 class GeminiAPIError(Exception):
    """Error shape compatible with Hermes retry/error classification."""

@ -650,6 +741,12 @@ def gemini_http_error(response: httpx.Response) -> GeminiAPIError:
    else:
        message = f"Gemini returned HTTP {status}: {body_text[:500]}"

+    # Free-tier quota exhaustion -> append actionable guidance so users who
+    # bypassed the setup wizard (direct GOOGLE_API_KEY in .env) still learn
+    # that the free tier cannot sustain an agent session.
+    if status == 429 and is_free_tier_quota_error(err_message or body_text):
+        message = message + _FREE_TIER_GUIDANCE
+
    return GeminiAPIError(
        message,
        code=code,