remove Vercel AI Gateway and Vercel Sandbox (#33067)

* remove Vercel AI Gateway provider and Vercel Sandbox terminal backend Both Vercel-hosted integrations are removed end-to-end. Users on the AI Gateway should switch to OpenRouter or one of the other aggregators (Nous Portal, Kilo Code). Users on the Vercel Sandbox backend should switch to Docker, Modal, Daytona, or SSH. What's removed: - `plugins/model-providers/ai-gateway/` provider plugin - `hermes_cli/vercel_auth.py` Vercel-Sandbox auth helper - `tools/environments/vercel_sandbox.py` terminal backend - `ai-gateway` provider wiring across auth, doctor, setup, models, config, status, providers, main, web_server, model_normalize, dump - `vercel_sandbox` backend wiring across terminal_tool, file_tools, code_execution_tool, file_operations, approval, skills_tool, environments/local, credential_files, lazy_deps, prompt_builder, cli, gateway/run - `AI_GATEWAY_BASE_URL` constant, `_AI_GATEWAY_HEADERS` auxiliary-client header set, run_agent base-URL header/reasoning special-cases - `[vercel]` pyproject extra and `vercel`/`vercel-workers` from uv.lock - env vars: `AI_GATEWAY_API_KEY`, `AI_GATEWAY_BASE_URL`, `VERCEL_TOKEN`, `VERCEL_PROJECT_ID`, `VERCEL_TEAM_ID`, `VERCEL_OIDC_TOKEN`, `TERMINAL_VERCEL_RUNTIME` - Tests: deletes test_ai_gateway_models.py and test_vercel_sandbox_environment.py; scrubs references across 23 surviving test files (no entire tests deleted unless they were dedicated to AI Gateway / Sandbox) - Docs: provider tables, env-var reference, setup guides, security notes, tool config, terminal-backend tables — English plus zh-Hans i18n parity - `hermes-agent` skill: provider table entry and remote-backend list What stays (intentional): - `popular-web-designs/templates/vercel.md` — CSS design reference, unrelated to Vercel-the-AI-product - `x-vercel-id` in `stream_diag.py` headers — generic Vercel CDN response header, useful diag signal on any Vercel-hosted endpoint - `vercel-labs/agent-browser` URL in browser config — lightpanda browser project, different OSS effort - `userStories.json` historical contributor entry mentioning Vercel Sandbox — archive, not active docs Validation: - 1153 tests in the 22 targeted files pass (`scripts/run_tests.sh`) - Full repo `py_compile` clean - Live import of every touched module + invariant check (no `ai-gateway` in `PROVIDER_REGISTRY`, no `_AI_GATEWAY_HEADERS`, no `vercel_sandbox` in `_REMOTE_TERMINAL_BACKENDS`) * test: convert profile-count check from change-detector to invariant The hardcoded "== 34" assertion broke when ai-gateway was removed. Per AGENTS.md change-detector-test guidance, assert the relationship (registry count >= number of plugin dirs) instead of a literal count. Counts shift when providers are added/removed; that's expected.
2026-06-05 07:41:39 +00:00 · 2026-05-27 00:43:32 -07:00 · 2026-05-27 00:43:32 -07:00 · febc4cfec0
commit febc4cfec0
parent cb38ce28cb
95 changed files with 111 additions and 3088 deletions
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@ -69,29 +69,6 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
 _openrouter_catalog_cache: list[tuple[str, str]] | None = None


-# Fallback Vercel AI Gateway snapshot used when the live catalog is unavailable.
-# OSS / open-weight models prioritized first, then closed-source by family.
-# Slugs match Vercel's actual /v1/models catalog (e.g. alibaba/ for Qwen,
-# zai/ and xai/ without hyphens).
-VERCEL_AI_GATEWAY_MODELS: list[tuple[str, str]] = [
-    ("moonshotai/kimi-k2.6",                 "recommended"),
-    ("alibaba/qwen3.6-plus",                 ""),
-    ("zai/glm-5.1",                          ""),
-    ("minimax/minimax-m2.7",                 ""),
-    ("anthropic/claude-sonnet-4.6",          ""),
-    ("anthropic/claude-opus-4.7",            ""),
-    ("anthropic/claude-opus-4.6",            ""),
-    ("anthropic/claude-haiku-4.5",           ""),
-    ("openai/gpt-5.4",                       ""),
-    ("openai/gpt-5.4-mini",                  ""),
-    ("openai/gpt-5.3-codex",                 ""),
-    ("google/gemini-3.1-pro-preview",        ""),
-    ("google/gemini-3-flash",                ""),
-    ("google/gemini-3.1-flash-lite-preview", ""),
-    ("xai/grok-4.20-reasoning",              ""),
-]
-
-_ai_gateway_catalog_cache: list[tuple[str, str]] | None = None


 def _codex_curated_models() -> list[str]:
@ -479,12 +456,6 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
    ],
 }

-# Vercel AI Gateway: derive the bare-model-id catalog from the curated
-# ``VERCEL_AI_GATEWAY_MODELS`` snapshot so both the picker (tuples with descriptions)
-# and the static fallback catalog (bare ids) stay in sync from a single
-# source of truth.
-_PROVIDER_MODELS["ai-gateway"] = [mid for mid, _ in VERCEL_AI_GATEWAY_MODELS]
-
 # ---------------------------------------------------------------------------
 # Nous Portal free-model helper
 # ---------------------------------------------------------------------------
@ -969,7 +940,6 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
    ProviderEntry("opencode-go",    "OpenCode Go",              "OpenCode Go (open models, $10/month subscription)"),
    ProviderEntry("bedrock",        "AWS Bedrock",              "AWS Bedrock (Claude, Nova, Llama, DeepSeek — IAM or API key)"),
    ProviderEntry("azure-foundry",  "Azure Foundry",            "Azure Foundry (OpenAI-style or Anthropic-style endpoint — your Azure AI deployment)"),
-    ProviderEntry("ai-gateway",     "Vercel AI Gateway",        "Vercel AI Gateway"),
    ProviderEntry("qwen-oauth",     "Qwen OAuth (Portal)",      "Qwen OAuth (reuses local Qwen CLI login)"),
 ]

@ -1033,9 +1003,6 @@ _PROVIDER_ALIASES = {
    "zen": "opencode-zen",
    "go": "opencode-go",
    "opencode-go-sub": "opencode-go",
-    "aigateway": "ai-gateway",
-    "vercel": "ai-gateway",
-    "vercel-ai-gateway": "ai-gateway",
    "kilo": "kilocode",
    "kilo-code": "kilocode",
    "kilo-gateway": "kilocode",
@ -1220,95 +1187,6 @@ def get_curated_nous_model_ids() -> list[str]:
    return list(_PROVIDER_MODELS.get("nous", []))


-def _ai_gateway_model_is_free(pricing: Any) -> bool:
-    """Return True if an AI Gateway model has $0 input AND output pricing."""
-    if not isinstance(pricing, dict):
-        return False
-    try:
-        return float(pricing.get("input", "0")) == 0 and float(pricing.get("output", "0")) == 0
-    except (TypeError, ValueError):
-        return False
-
-
-def fetch_ai_gateway_models(
-    timeout: float = 8.0,
-    *,
-    force_refresh: bool = False,
-) -> list[tuple[str, str]]:
-    """Return the curated AI Gateway picker list, refreshed from the live catalog when possible."""
-    global _ai_gateway_catalog_cache
-
-    if _ai_gateway_catalog_cache is not None and not force_refresh:
-        return list(_ai_gateway_catalog_cache)
-
-    from hermes_constants import AI_GATEWAY_BASE_URL
-
-    fallback = list(VERCEL_AI_GATEWAY_MODELS)
-    preferred_ids = [mid for mid, _ in fallback]
-
-    try:
-        req = urllib.request.Request(
-            f"{AI_GATEWAY_BASE_URL.rstrip('/')}/models",
-            headers={"Accept": "application/json"},
-        )
-        with urllib.request.urlopen(req, timeout=timeout) as resp:
-            payload = json.loads(resp.read().decode())
-    except Exception:
-        return list(_ai_gateway_catalog_cache or fallback)
-
-    live_items = payload.get("data", [])
-    if not isinstance(live_items, list):
-        return list(_ai_gateway_catalog_cache or fallback)
-
-    live_by_id: dict[str, dict[str, Any]] = {}
-    for item in live_items:
-        if not isinstance(item, dict):
-            continue
-        mid = str(item.get("id") or "").strip()
-        if not mid:
-            continue
-        live_by_id[mid] = item
-
-    curated: list[tuple[str, str]] = []
-    for preferred_id in preferred_ids:
-        live_item = live_by_id.get(preferred_id)
-        if live_item is None:
-            continue
-        desc = "free" if _ai_gateway_model_is_free(live_item.get("pricing")) else ""
-        curated.append((preferred_id, desc))
-
-    if not curated:
-        return list(_ai_gateway_catalog_cache or fallback)
-
-    # If the live catalog offers a free Moonshot model, auto-promote it to
-    # position #1 as "recommended" — dynamic discovery without a PR.
-    free_moonshot = next(
-        (
-            mid
-            for mid, item in live_by_id.items()
-            if mid.startswith("moonshotai/")
-            and _ai_gateway_model_is_free(item.get("pricing"))
-        ),
-        None,
-    )
-    if free_moonshot:
-        curated = [(mid, desc) for mid, desc in curated if mid != free_moonshot]
-        curated.insert(0, (free_moonshot, "recommended"))
-    else:
-        first_id, _ = curated[0]
-        curated[0] = (first_id, "recommended")
-
-    _ai_gateway_catalog_cache = curated
-    return list(curated)
-
-
-def ai_gateway_model_ids(*, force_refresh: bool = False) -> list[str]:
-    """Return just the AI Gateway model-id strings."""
-    return [mid for mid, _ in fetch_ai_gateway_models(force_refresh=force_refresh)]
-
-
-
-
 # ---------------------------------------------------------------------------
 # Pricing helpers — fetch live pricing from OpenRouter-compatible /v1/models
 # ---------------------------------------------------------------------------
@ -1454,56 +1332,6 @@ def fetch_models_with_pricing(
    return result


-def fetch_ai_gateway_pricing(
-    timeout: float = 8.0,
-    *,
-    force_refresh: bool = False,
-) -> dict[str, dict[str, str]]:
-    """Fetch Vercel AI Gateway /v1/models and return hermes-shaped pricing.
-
-    Vercel uses ``input`` / ``output`` field names; hermes's picker expects
-    ``prompt`` / ``completion``. This translates. Cache read/write field names
-    already match.
-    """
-    from hermes_constants import AI_GATEWAY_BASE_URL
-
-    cache_key = AI_GATEWAY_BASE_URL.rstrip("/")
-    if not force_refresh and cache_key in _pricing_cache:
-        return _pricing_cache[cache_key]
-
-    try:
-        req = urllib.request.Request(
-            f"{cache_key}/models",
-            headers={"Accept": "application/json"},
-        )
-        with urllib.request.urlopen(req, timeout=timeout) as resp:
-            payload = json.loads(resp.read().decode())
-    except Exception:
-        _pricing_cache[cache_key] = {}
-        return {}
-
-    result: dict[str, dict[str, str]] = {}
-    for item in payload.get("data", []):
-        if not isinstance(item, dict):
-            continue
-        mid = item.get("id")
-        pricing = item.get("pricing")
-        if not (mid and isinstance(pricing, dict)):
-            continue
-        entry: dict[str, str] = {
-            "prompt": str(pricing.get("input", "")),
-            "completion": str(pricing.get("output", "")),
-        }
-        if pricing.get("input_cache_read"):
-            entry["input_cache_read"] = str(pricing["input_cache_read"])
-        if pricing.get("input_cache_write"):
-            entry["input_cache_write"] = str(pricing["input_cache_write"])
-        result[mid] = entry
-
-    _pricing_cache[cache_key] = result
-    return result
-
-
 def _resolve_openrouter_api_key() -> str:
    """Best-effort OpenRouter API key for pricing fetch."""
    return os.getenv("OPENROUTER_API_KEY", "").strip()
@ -1535,7 +1363,7 @@ def _resolve_nous_pricing_credentials() -> tuple[str, str]:


 def get_pricing_for_provider(provider: str, *, force_refresh: bool = False) -> dict[str, dict[str, str]]:
-    """Return live pricing for providers that support it (openrouter, nous, ai-gateway, novita)."""
+    """Return live pricing for providers that support it (openrouter, nous, novita)."""
    normalized = normalize_provider(provider)
    if normalized == "openrouter":
        return fetch_models_with_pricing(
@ -1543,8 +1371,6 @@ def get_pricing_for_provider(provider: str, *, force_refresh: bool = False) -> d
            base_url="https://openrouter.ai/api",
            force_refresh=force_refresh,
        )
-    if normalized == "ai-gateway":
-        return fetch_ai_gateway_pricing(force_refresh=force_refresh)
    if normalized == "novita":
        return _fetch_novita_pricing(force_refresh=force_refresh)
    if normalized == "nous":
@ -1574,9 +1400,8 @@ def _fetch_novita_pricing(
    0.0001 USD. Convert them to the per-token strings used by the shared
    pricing formatter.

-    Results are cached in ``_pricing_cache`` keyed on the resolved base URL,
-    matching the pattern used by ``fetch_ai_gateway_pricing`` — without this,
-    every menu render or pricing lookup re-hits the network.
+    Results are cached in ``_pricing_cache`` keyed on the resolved base URL —
+    without this, every menu render or pricing lookup re-hits the network.
    """
    api_key = os.getenv("NOVITA_API_KEY", "").strip()
    if not api_key:
@ -1763,7 +1588,7 @@ def _model_in_provider_catalog(name_lower: str, providers: set[str]) -> bool:


 _AGGREGATOR_PROVIDERS = frozenset(
-    {"nous", "openrouter", "ai-gateway", "copilot", "kilocode"}
+    {"nous", "openrouter", "copilot", "kilocode"}
 )


@ -2110,7 +1935,7 @@ def _resolve_copilot_catalog_api_key() -> str:
 #   - "nous": curated list and Portal /models endpoint are the source of
 #     truth for the subscription tier.
 # Also excluded: providers that already have dedicated live-endpoint
-# branches below (copilot, anthropic, ai-gateway, ollama-cloud, custom,
+# branches below (copilot, anthropic, ollama-cloud, custom,
 # stepfun, openai-codex) — those paths handle freshness themselves.
 _MODELS_DEV_PREFERRED: frozenset[str] = frozenset({
    "opencode-go",
@ -2235,10 +2060,6 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
        live = _fetch_anthropic_models()
        if live:
            return live
-    if normalized == "ai-gateway":
-        live = _fetch_ai_gateway_models()
-        if live:
-            return live
    if normalized == "ollama-cloud":
        live = fetch_ollama_cloud_models(force_refresh=force_refresh)
        if live:
@ -3152,36 +2973,6 @@ def probe_api_models(
    }


-def _fetch_ai_gateway_models(timeout: float = 5.0) -> Optional[list[str]]:
-    """Fetch available language models with tool-use from AI Gateway."""
-    api_key = os.getenv("AI_GATEWAY_API_KEY", "").strip()
-    if not api_key:
-        return None
-    base_url = os.getenv("AI_GATEWAY_BASE_URL", "").strip()
-    if not base_url:
-        from hermes_constants import AI_GATEWAY_BASE_URL
-        base_url = AI_GATEWAY_BASE_URL
-
-    url = base_url.rstrip("/") + "/models"
-    headers: dict[str, str] = {
-        "Authorization": f"Bearer {api_key}",
-        "User-Agent": _HERMES_USER_AGENT,
-    }
-    req = urllib.request.Request(url, headers=headers)
-    try:
-        with urllib.request.urlopen(req, timeout=timeout) as resp:
-            data = json.loads(resp.read().decode())
-            return [
-                m["id"]
-                for m in data.get("data", [])
-                if m.get("id")
-                and m.get("type") == "language"
-                and "tool-use" in (m.get("tags") or [])
-            ]
-    except Exception:
-        return None
-
-
 def fetch_api_models(
    api_key: Optional[str],
    base_url: Optional[str],