feat: curated picker with live pricing for ai-gateway provider

- Curated AI_GATEWAY_MODELS list in hermes_cli/models.py (OSS first, kimi-k2.5 as recommended default). - fetch_ai_gateway_models() filters the curated list against the live /v1/models catalog; falls back to the snapshot on network failure. - fetch_ai_gateway_pricing() translates Vercel's input/output field names to the prompt/completion shape the shared picker expects; carries input_cache_read / input_cache_write through unchanged. - get_pricing_for_provider() now handles ai-gateway. - _model_flow_ai_gateway() provides a guided URL prompt when no key is set and a pricing-column picker; routes ai-gateway to it instead of the generic api-key flow.
2026-06-10 08:32:09 +00:00 · 2026-04-19 23:05:14 -07:00 · 2026-04-19 23:05:14 -07:00 · 7004374404
commit 7004374404
parent b117538798
3 changed files with 335 additions and 3 deletions
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@ -193,7 +193,7 @@ import time as _time
 from datetime import datetime

 from hermes_cli import __version__, __release_date__
-from hermes_constants import OPENROUTER_BASE_URL
+from hermes_constants import AI_GATEWAY_BASE_URL, OPENROUTER_BASE_URL

 logger = logging.getLogger(__name__)

@ -1528,6 +1528,8 @@ def select_provider_and_model(args=None):
    # Step 2: Provider-specific setup + model selection
    if selected_provider == "openrouter":
        _model_flow_openrouter(config, current_model)
+    elif selected_provider == "ai-gateway":
+        _model_flow_ai_gateway(config, current_model)
    elif selected_provider == "nous":
        _model_flow_nous(config, current_model, args=args)
    elif selected_provider == "openai-codex":
@ -1573,7 +1575,6 @@ def select_provider_and_model(args=None):
        "kilocode",
        "opencode-zen",
        "opencode-go",
-        "ai-gateway",
        "alibaba",
        "huggingface",
        "xiaomi",
@ -2045,6 +2046,62 @@ def _model_flow_openrouter(config, current_model=""):
        print("No change.")


+def _model_flow_ai_gateway(config, current_model=""):
+    """Vercel AI Gateway provider: ensure API key, then pick model with pricing."""
+    from hermes_cli.auth import (
+        _prompt_model_selection,
+        _save_model_choice,
+        deactivate_provider,
+    )
+    from hermes_cli.config import get_env_value, save_env_value
+
+    api_key = get_env_value("AI_GATEWAY_API_KEY")
+    if not api_key:
+        print("No Vercel AI Gateway API key configured.")
+        print("Get one at: https://vercel.com/dashboard/ai-gateway")
+        print()
+        try:
+            import getpass
+
+            key = getpass.getpass("AI Gateway API key (or Enter to cancel): ").strip()
+        except (KeyboardInterrupt, EOFError):
+            print()
+            return
+        if not key:
+            print("Cancelled.")
+            return
+        save_env_value("AI_GATEWAY_API_KEY", key)
+        print("API key saved.")
+        print()
+
+    from hermes_cli.models import ai_gateway_model_ids, get_pricing_for_provider
+
+    models_list = ai_gateway_model_ids(force_refresh=True)
+    pricing = get_pricing_for_provider("ai-gateway", force_refresh=True)
+
+    selected = _prompt_model_selection(
+        models_list, current_model=current_model, pricing=pricing
+    )
+    if selected:
+        _save_model_choice(selected)
+
+        from hermes_cli.config import load_config, save_config
+
+        cfg = load_config()
+        model = cfg.get("model")
+        if not isinstance(model, dict):
+            model = {"default": model} if model else {}
+            cfg["model"] = model
+        model["provider"] = "ai-gateway"
+        model["base_url"] = AI_GATEWAY_BASE_URL
+        model["api_mode"] = "chat_completions"
+        save_config(cfg)
+        deactivate_provider()
+        print(f"Default model set to: {selected} (via Vercel AI Gateway)")
+    else:
+        print("No change.")
+
+
 def _model_flow_nous(config, current_model="", args=None):
    """Nous Portal provider: ensure logged in, then pick model."""
    from hermes_cli.auth import (
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@ -68,6 +68,29 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
 _openrouter_catalog_cache: list[tuple[str, str]] | None = None


+# Fallback Vercel AI Gateway snapshot used when the live catalog is unavailable.
+# OSS / open-weight models prioritized first, then closed-source by family.
+AI_GATEWAY_MODELS: list[tuple[str, str]] = [
+    ("moonshotai/kimi-k2.5",                 "recommended"),
+    ("qwen/qwen3.6-plus",                    ""),
+    ("z-ai/glm-5.1",                         ""),
+    ("minimax/minimax-m2.7",                 ""),
+    ("anthropic/claude-sonnet-4.6",          ""),
+    ("anthropic/claude-opus-4.7",            ""),
+    ("anthropic/claude-opus-4.6",            ""),
+    ("anthropic/claude-haiku-4.5",           ""),
+    ("openai/gpt-5.4",                       ""),
+    ("openai/gpt-5.4-mini",                  ""),
+    ("openai/gpt-5.3-codex",                 ""),
+    ("google/gemini-3.1-pro-preview",        ""),
+    ("google/gemini-3-flash-preview",        ""),
+    ("google/gemini-3.1-flash-lite-preview", ""),
+    ("x-ai/grok-4.20",                       ""),
+]
+
+_ai_gateway_catalog_cache: list[tuple[str, str]] | None = None
+
+
 def _codex_curated_models() -> list[str]:
    """Derive the openai-codex curated list from codex_models.py.

@ -729,6 +752,77 @@ def model_ids(*, force_refresh: bool = False) -> list[str]:
    return [mid for mid, _ in fetch_openrouter_models(force_refresh=force_refresh)]


+def _ai_gateway_model_is_free(pricing: Any) -> bool:
+    """Return True if an AI Gateway model has $0 input AND output pricing."""
+    if not isinstance(pricing, dict):
+        return False
+    try:
+        return float(pricing.get("input", "0")) == 0 and float(pricing.get("output", "0")) == 0
+    except (TypeError, ValueError):
+        return False
+
+
+def fetch_ai_gateway_models(
+    timeout: float = 8.0,
+    *,
+    force_refresh: bool = False,
+) -> list[tuple[str, str]]:
+    """Return the curated AI Gateway picker list, refreshed from the live catalog when possible."""
+    global _ai_gateway_catalog_cache
+
+    if _ai_gateway_catalog_cache is not None and not force_refresh:
+        return list(_ai_gateway_catalog_cache)
+
+    from hermes_constants import AI_GATEWAY_BASE_URL
+
+    fallback = list(AI_GATEWAY_MODELS)
+    preferred_ids = [mid for mid, _ in fallback]
+
+    try:
+        req = urllib.request.Request(
+            f"{AI_GATEWAY_BASE_URL.rstrip('/')}/models",
+            headers={"Accept": "application/json"},
+        )
+        with urllib.request.urlopen(req, timeout=timeout) as resp:
+            payload = json.loads(resp.read().decode())
+    except Exception:
+        return list(_ai_gateway_catalog_cache or fallback)
+
+    live_items = payload.get("data", [])
+    if not isinstance(live_items, list):
+        return list(_ai_gateway_catalog_cache or fallback)
+
+    live_by_id: dict[str, dict[str, Any]] = {}
+    for item in live_items:
+        if not isinstance(item, dict):
+            continue
+        mid = str(item.get("id") or "").strip()
+        if not mid:
+            continue
+        live_by_id[mid] = item
+
+    curated: list[tuple[str, str]] = []
+    for preferred_id in preferred_ids:
+        live_item = live_by_id.get(preferred_id)
+        if live_item is None:
+            continue
+        desc = "free" if _ai_gateway_model_is_free(live_item.get("pricing")) else ""
+        curated.append((preferred_id, desc))
+
+    if not curated:
+        return list(_ai_gateway_catalog_cache or fallback)
+
+    first_id, _ = curated[0]
+    curated[0] = (first_id, "recommended")
+    _ai_gateway_catalog_cache = curated
+    return list(curated)
+
+
+def ai_gateway_model_ids(*, force_refresh: bool = False) -> list[str]:
+    """Return just the AI Gateway model-id strings."""
+    return [mid for mid, _ in fetch_ai_gateway_models(force_refresh=force_refresh)]
+
+


 # ---------------------------------------------------------------------------
@ -873,6 +967,56 @@ def fetch_models_with_pricing(
    return result


+def fetch_ai_gateway_pricing(
+    timeout: float = 8.0,
+    *,
+    force_refresh: bool = False,
+) -> dict[str, dict[str, str]]:
+    """Fetch Vercel AI Gateway /v1/models and return hermes-shaped pricing.
+
+    Vercel uses ``input`` / ``output`` field names; hermes's picker expects
+    ``prompt`` / ``completion``. This translates. Cache read/write field names
+    already match.
+    """
+    from hermes_constants import AI_GATEWAY_BASE_URL
+
+    cache_key = AI_GATEWAY_BASE_URL.rstrip("/")
+    if not force_refresh and cache_key in _pricing_cache:
+        return _pricing_cache[cache_key]
+
+    try:
+        req = urllib.request.Request(
+            f"{cache_key}/models",
+            headers={"Accept": "application/json"},
+        )
+        with urllib.request.urlopen(req, timeout=timeout) as resp:
+            payload = json.loads(resp.read().decode())
+    except Exception:
+        _pricing_cache[cache_key] = {}
+        return {}
+
+    result: dict[str, dict[str, str]] = {}
+    for item in payload.get("data", []):
+        if not isinstance(item, dict):
+            continue
+        mid = item.get("id")
+        pricing = item.get("pricing")
+        if not (mid and isinstance(pricing, dict)):
+            continue
+        entry: dict[str, str] = {
+            "prompt": str(pricing.get("input", "")),
+            "completion": str(pricing.get("output", "")),
+        }
+        if pricing.get("input_cache_read"):
+            entry["input_cache_read"] = str(pricing["input_cache_read"])
+        if pricing.get("input_cache_write"):
+            entry["input_cache_write"] = str(pricing["input_cache_write"])
+        result[mid] = entry
+
+    _pricing_cache[cache_key] = result
+    return result
+
+
 def _resolve_openrouter_api_key() -> str:
    """Best-effort OpenRouter API key for pricing fetch."""
    return os.getenv("OPENROUTER_API_KEY", "").strip()
@ -891,7 +1035,7 @@ def _resolve_nous_pricing_credentials() -> tuple[str, str]:


 def get_pricing_for_provider(provider: str, *, force_refresh: bool = False) -> dict[str, dict[str, str]]:
-    """Return live pricing for providers that support it (openrouter, nous)."""
+    """Return live pricing for providers that support it (openrouter, nous, ai-gateway)."""
    normalized = normalize_provider(provider)
    if normalized == "openrouter":
        return fetch_models_with_pricing(
@ -899,6 +1043,8 @@ def get_pricing_for_provider(provider: str, *, force_refresh: bool = False) -> d
            base_url="https://openrouter.ai/api",
            force_refresh=force_refresh,
        )
+    if normalized == "ai-gateway":
+        return fetch_ai_gateway_pricing(force_refresh=force_refresh)
    if normalized == "nous":
        api_key, base_url = _resolve_nous_pricing_credentials()
        if base_url:
--- a/tests/hermes_cli/test_ai_gateway_models.py
+++ b/tests/hermes_cli/test_ai_gateway_models.py
@ -0,0 +1,129 @@
+"""AI Gateway model list and pricing translation.
+
+Vercel AI Gateway exposes ``/v1/models`` with a richer shape than OpenAI's
+spec (type, tags, pricing). The pricing object uses ``input`` / ``output``
+where hermes's shared picker expects ``prompt`` / ``completion``; these tests
+pin the translation and the curated-list filtering.
+"""
+import json
+from unittest.mock import patch, MagicMock
+
+from hermes_cli import models as models_module
+from hermes_cli.models import (
+    AI_GATEWAY_MODELS,
+    _ai_gateway_model_is_free,
+    fetch_ai_gateway_models,
+    fetch_ai_gateway_pricing,
+)
+
+
+def _mock_urlopen(payload):
+    """Build a urlopen() context manager mock returning the given payload."""
+    resp = MagicMock()
+    resp.read.return_value = json.dumps(payload).encode()
+    ctx = MagicMock()
+    ctx.__enter__.return_value = resp
+    ctx.__exit__.return_value = False
+    return ctx
+
+
+def _reset_caches():
+    models_module._ai_gateway_catalog_cache = None
+    models_module._pricing_cache.clear()
+
+
+def test_ai_gateway_pricing_translates_input_output_to_prompt_completion():
+    _reset_caches()
+    payload = {
+        "data": [
+            {
+                "id": "moonshotai/kimi-k2.5",
+                "type": "language",
+                "pricing": {
+                    "input": "0.0000006",
+                    "output": "0.0000025",
+                    "input_cache_read": "0.00000015",
+                    "input_cache_write": "0.0000006",
+                },
+            }
+        ]
+    }
+    with patch("urllib.request.urlopen", return_value=_mock_urlopen(payload)):
+        result = fetch_ai_gateway_pricing(force_refresh=True)
+
+    entry = result["moonshotai/kimi-k2.5"]
+    assert entry["prompt"] == "0.0000006"
+    assert entry["completion"] == "0.0000025"
+    assert entry["input_cache_read"] == "0.00000015"
+    assert entry["input_cache_write"] == "0.0000006"
+
+
+def test_ai_gateway_pricing_returns_empty_on_fetch_failure():
+    _reset_caches()
+    with patch("urllib.request.urlopen", side_effect=OSError("network down")):
+        result = fetch_ai_gateway_pricing(force_refresh=True)
+    assert result == {}
+
+
+def test_ai_gateway_pricing_skips_entries_without_pricing_dict():
+    _reset_caches()
+    payload = {
+        "data": [
+            {"id": "x/y", "pricing": None},
+            {"id": "a/b", "pricing": {"input": "0", "output": "0"}},
+        ]
+    }
+    with patch("urllib.request.urlopen", return_value=_mock_urlopen(payload)):
+        result = fetch_ai_gateway_pricing(force_refresh=True)
+    assert "x/y" not in result
+    assert result["a/b"] == {"prompt": "0", "completion": "0"}
+
+
+def test_ai_gateway_free_detector():
+    assert _ai_gateway_model_is_free({"input": "0", "output": "0"}) is True
+    assert _ai_gateway_model_is_free({"input": "0", "output": "0.01"}) is False
+    assert _ai_gateway_model_is_free({"input": "0.01", "output": "0"}) is False
+    assert _ai_gateway_model_is_free(None) is False
+    assert _ai_gateway_model_is_free({"input": "not a number"}) is False
+
+
+def test_fetch_ai_gateway_models_filters_against_live_catalog():
+    _reset_caches()
+    preferred = [mid for mid, _ in AI_GATEWAY_MODELS]
+    live_ids = preferred[:3]  # only first three exist live
+    payload = {
+        "data": [
+            {"id": mid, "pricing": {"input": "0.001", "output": "0.002"}}
+            for mid in live_ids
+        ]
+    }
+    with patch("urllib.request.urlopen", return_value=_mock_urlopen(payload)):
+        result = fetch_ai_gateway_models(force_refresh=True)
+
+    assert [mid for mid, _ in result] == live_ids
+    assert result[0][1] == "recommended"
+
+
+def test_fetch_ai_gateway_models_tags_free_models():
+    _reset_caches()
+    first_id = AI_GATEWAY_MODELS[0][0]
+    second_id = AI_GATEWAY_MODELS[1][0]
+    payload = {
+        "data": [
+            {"id": first_id, "pricing": {"input": "0.001", "output": "0.002"}},
+            {"id": second_id, "pricing": {"input": "0", "output": "0"}},
+        ]
+    }
+    with patch("urllib.request.urlopen", return_value=_mock_urlopen(payload)):
+        result = fetch_ai_gateway_models(force_refresh=True)
+
+    by_id = dict(result)
+    assert by_id[first_id] == "recommended"
+    assert by_id[second_id] == "free"
+
+
+def test_fetch_ai_gateway_models_falls_back_on_error():
+    _reset_caches()
+    with patch("urllib.request.urlopen", side_effect=OSError("network")):
+        result = fetch_ai_gateway_models(force_refresh=True)
+    assert result == list(AI_GATEWAY_MODELS)