diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 3da8424a76..256b0d46f4 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -193,7 +193,7 @@ import time as _time from datetime import datetime from hermes_cli import __version__, __release_date__ -from hermes_constants import OPENROUTER_BASE_URL +from hermes_constants import AI_GATEWAY_BASE_URL, OPENROUTER_BASE_URL logger = logging.getLogger(__name__) @@ -1528,6 +1528,8 @@ def select_provider_and_model(args=None): # Step 2: Provider-specific setup + model selection if selected_provider == "openrouter": _model_flow_openrouter(config, current_model) + elif selected_provider == "ai-gateway": + _model_flow_ai_gateway(config, current_model) elif selected_provider == "nous": _model_flow_nous(config, current_model, args=args) elif selected_provider == "openai-codex": @@ -1573,7 +1575,6 @@ def select_provider_and_model(args=None): "kilocode", "opencode-zen", "opencode-go", - "ai-gateway", "alibaba", "huggingface", "xiaomi", @@ -2045,6 +2046,62 @@ def _model_flow_openrouter(config, current_model=""): print("No change.") +def _model_flow_ai_gateway(config, current_model=""): + """Vercel AI Gateway provider: ensure API key, then pick model with pricing.""" + from hermes_cli.auth import ( + _prompt_model_selection, + _save_model_choice, + deactivate_provider, + ) + from hermes_cli.config import get_env_value, save_env_value + + api_key = get_env_value("AI_GATEWAY_API_KEY") + if not api_key: + print("No Vercel AI Gateway API key configured.") + print("Get one at: https://vercel.com/dashboard/ai-gateway") + print() + try: + import getpass + + key = getpass.getpass("AI Gateway API key (or Enter to cancel): ").strip() + except (KeyboardInterrupt, EOFError): + print() + return + if not key: + print("Cancelled.") + return + save_env_value("AI_GATEWAY_API_KEY", key) + print("API key saved.") + print() + + from hermes_cli.models import ai_gateway_model_ids, get_pricing_for_provider + + models_list = ai_gateway_model_ids(force_refresh=True) + pricing = get_pricing_for_provider("ai-gateway", force_refresh=True) + + selected = _prompt_model_selection( + models_list, current_model=current_model, pricing=pricing + ) + if selected: + _save_model_choice(selected) + + from hermes_cli.config import load_config, save_config + + cfg = load_config() + model = cfg.get("model") + if not isinstance(model, dict): + model = {"default": model} if model else {} + cfg["model"] = model + model["provider"] = "ai-gateway" + model["base_url"] = AI_GATEWAY_BASE_URL + model["api_mode"] = "chat_completions" + save_config(cfg) + deactivate_provider() + print(f"Default model set to: {selected} (via Vercel AI Gateway)") + else: + print("No change.") + + def _model_flow_nous(config, current_model="", args=None): """Nous Portal provider: ensure logged in, then pick model.""" from hermes_cli.auth import ( diff --git a/hermes_cli/models.py b/hermes_cli/models.py index 34e467adab..3995b27894 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -68,6 +68,29 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [ _openrouter_catalog_cache: list[tuple[str, str]] | None = None +# Fallback Vercel AI Gateway snapshot used when the live catalog is unavailable. +# OSS / open-weight models prioritized first, then closed-source by family. +AI_GATEWAY_MODELS: list[tuple[str, str]] = [ + ("moonshotai/kimi-k2.5", "recommended"), + ("qwen/qwen3.6-plus", ""), + ("z-ai/glm-5.1", ""), + ("minimax/minimax-m2.7", ""), + ("anthropic/claude-sonnet-4.6", ""), + ("anthropic/claude-opus-4.7", ""), + ("anthropic/claude-opus-4.6", ""), + ("anthropic/claude-haiku-4.5", ""), + ("openai/gpt-5.4", ""), + ("openai/gpt-5.4-mini", ""), + ("openai/gpt-5.3-codex", ""), + ("google/gemini-3.1-pro-preview", ""), + ("google/gemini-3-flash-preview", ""), + ("google/gemini-3.1-flash-lite-preview", ""), + ("x-ai/grok-4.20", ""), +] + +_ai_gateway_catalog_cache: list[tuple[str, str]] | None = None + + def _codex_curated_models() -> list[str]: """Derive the openai-codex curated list from codex_models.py. @@ -729,6 +752,77 @@ def model_ids(*, force_refresh: bool = False) -> list[str]: return [mid for mid, _ in fetch_openrouter_models(force_refresh=force_refresh)] +def _ai_gateway_model_is_free(pricing: Any) -> bool: + """Return True if an AI Gateway model has $0 input AND output pricing.""" + if not isinstance(pricing, dict): + return False + try: + return float(pricing.get("input", "0")) == 0 and float(pricing.get("output", "0")) == 0 + except (TypeError, ValueError): + return False + + +def fetch_ai_gateway_models( + timeout: float = 8.0, + *, + force_refresh: bool = False, +) -> list[tuple[str, str]]: + """Return the curated AI Gateway picker list, refreshed from the live catalog when possible.""" + global _ai_gateway_catalog_cache + + if _ai_gateway_catalog_cache is not None and not force_refresh: + return list(_ai_gateway_catalog_cache) + + from hermes_constants import AI_GATEWAY_BASE_URL + + fallback = list(AI_GATEWAY_MODELS) + preferred_ids = [mid for mid, _ in fallback] + + try: + req = urllib.request.Request( + f"{AI_GATEWAY_BASE_URL.rstrip('/')}/models", + headers={"Accept": "application/json"}, + ) + with urllib.request.urlopen(req, timeout=timeout) as resp: + payload = json.loads(resp.read().decode()) + except Exception: + return list(_ai_gateway_catalog_cache or fallback) + + live_items = payload.get("data", []) + if not isinstance(live_items, list): + return list(_ai_gateway_catalog_cache or fallback) + + live_by_id: dict[str, dict[str, Any]] = {} + for item in live_items: + if not isinstance(item, dict): + continue + mid = str(item.get("id") or "").strip() + if not mid: + continue + live_by_id[mid] = item + + curated: list[tuple[str, str]] = [] + for preferred_id in preferred_ids: + live_item = live_by_id.get(preferred_id) + if live_item is None: + continue + desc = "free" if _ai_gateway_model_is_free(live_item.get("pricing")) else "" + curated.append((preferred_id, desc)) + + if not curated: + return list(_ai_gateway_catalog_cache or fallback) + + first_id, _ = curated[0] + curated[0] = (first_id, "recommended") + _ai_gateway_catalog_cache = curated + return list(curated) + + +def ai_gateway_model_ids(*, force_refresh: bool = False) -> list[str]: + """Return just the AI Gateway model-id strings.""" + return [mid for mid, _ in fetch_ai_gateway_models(force_refresh=force_refresh)] + + # --------------------------------------------------------------------------- @@ -873,6 +967,56 @@ def fetch_models_with_pricing( return result +def fetch_ai_gateway_pricing( + timeout: float = 8.0, + *, + force_refresh: bool = False, +) -> dict[str, dict[str, str]]: + """Fetch Vercel AI Gateway /v1/models and return hermes-shaped pricing. + + Vercel uses ``input`` / ``output`` field names; hermes's picker expects + ``prompt`` / ``completion``. This translates. Cache read/write field names + already match. + """ + from hermes_constants import AI_GATEWAY_BASE_URL + + cache_key = AI_GATEWAY_BASE_URL.rstrip("/") + if not force_refresh and cache_key in _pricing_cache: + return _pricing_cache[cache_key] + + try: + req = urllib.request.Request( + f"{cache_key}/models", + headers={"Accept": "application/json"}, + ) + with urllib.request.urlopen(req, timeout=timeout) as resp: + payload = json.loads(resp.read().decode()) + except Exception: + _pricing_cache[cache_key] = {} + return {} + + result: dict[str, dict[str, str]] = {} + for item in payload.get("data", []): + if not isinstance(item, dict): + continue + mid = item.get("id") + pricing = item.get("pricing") + if not (mid and isinstance(pricing, dict)): + continue + entry: dict[str, str] = { + "prompt": str(pricing.get("input", "")), + "completion": str(pricing.get("output", "")), + } + if pricing.get("input_cache_read"): + entry["input_cache_read"] = str(pricing["input_cache_read"]) + if pricing.get("input_cache_write"): + entry["input_cache_write"] = str(pricing["input_cache_write"]) + result[mid] = entry + + _pricing_cache[cache_key] = result + return result + + def _resolve_openrouter_api_key() -> str: """Best-effort OpenRouter API key for pricing fetch.""" return os.getenv("OPENROUTER_API_KEY", "").strip() @@ -891,7 +1035,7 @@ def _resolve_nous_pricing_credentials() -> tuple[str, str]: def get_pricing_for_provider(provider: str, *, force_refresh: bool = False) -> dict[str, dict[str, str]]: - """Return live pricing for providers that support it (openrouter, nous).""" + """Return live pricing for providers that support it (openrouter, nous, ai-gateway).""" normalized = normalize_provider(provider) if normalized == "openrouter": return fetch_models_with_pricing( @@ -899,6 +1043,8 @@ def get_pricing_for_provider(provider: str, *, force_refresh: bool = False) -> d base_url="https://openrouter.ai/api", force_refresh=force_refresh, ) + if normalized == "ai-gateway": + return fetch_ai_gateway_pricing(force_refresh=force_refresh) if normalized == "nous": api_key, base_url = _resolve_nous_pricing_credentials() if base_url: diff --git a/tests/hermes_cli/test_ai_gateway_models.py b/tests/hermes_cli/test_ai_gateway_models.py new file mode 100644 index 0000000000..0a175b8344 --- /dev/null +++ b/tests/hermes_cli/test_ai_gateway_models.py @@ -0,0 +1,129 @@ +"""AI Gateway model list and pricing translation. + +Vercel AI Gateway exposes ``/v1/models`` with a richer shape than OpenAI's +spec (type, tags, pricing). The pricing object uses ``input`` / ``output`` +where hermes's shared picker expects ``prompt`` / ``completion``; these tests +pin the translation and the curated-list filtering. +""" +import json +from unittest.mock import patch, MagicMock + +from hermes_cli import models as models_module +from hermes_cli.models import ( + AI_GATEWAY_MODELS, + _ai_gateway_model_is_free, + fetch_ai_gateway_models, + fetch_ai_gateway_pricing, +) + + +def _mock_urlopen(payload): + """Build a urlopen() context manager mock returning the given payload.""" + resp = MagicMock() + resp.read.return_value = json.dumps(payload).encode() + ctx = MagicMock() + ctx.__enter__.return_value = resp + ctx.__exit__.return_value = False + return ctx + + +def _reset_caches(): + models_module._ai_gateway_catalog_cache = None + models_module._pricing_cache.clear() + + +def test_ai_gateway_pricing_translates_input_output_to_prompt_completion(): + _reset_caches() + payload = { + "data": [ + { + "id": "moonshotai/kimi-k2.5", + "type": "language", + "pricing": { + "input": "0.0000006", + "output": "0.0000025", + "input_cache_read": "0.00000015", + "input_cache_write": "0.0000006", + }, + } + ] + } + with patch("urllib.request.urlopen", return_value=_mock_urlopen(payload)): + result = fetch_ai_gateway_pricing(force_refresh=True) + + entry = result["moonshotai/kimi-k2.5"] + assert entry["prompt"] == "0.0000006" + assert entry["completion"] == "0.0000025" + assert entry["input_cache_read"] == "0.00000015" + assert entry["input_cache_write"] == "0.0000006" + + +def test_ai_gateway_pricing_returns_empty_on_fetch_failure(): + _reset_caches() + with patch("urllib.request.urlopen", side_effect=OSError("network down")): + result = fetch_ai_gateway_pricing(force_refresh=True) + assert result == {} + + +def test_ai_gateway_pricing_skips_entries_without_pricing_dict(): + _reset_caches() + payload = { + "data": [ + {"id": "x/y", "pricing": None}, + {"id": "a/b", "pricing": {"input": "0", "output": "0"}}, + ] + } + with patch("urllib.request.urlopen", return_value=_mock_urlopen(payload)): + result = fetch_ai_gateway_pricing(force_refresh=True) + assert "x/y" not in result + assert result["a/b"] == {"prompt": "0", "completion": "0"} + + +def test_ai_gateway_free_detector(): + assert _ai_gateway_model_is_free({"input": "0", "output": "0"}) is True + assert _ai_gateway_model_is_free({"input": "0", "output": "0.01"}) is False + assert _ai_gateway_model_is_free({"input": "0.01", "output": "0"}) is False + assert _ai_gateway_model_is_free(None) is False + assert _ai_gateway_model_is_free({"input": "not a number"}) is False + + +def test_fetch_ai_gateway_models_filters_against_live_catalog(): + _reset_caches() + preferred = [mid for mid, _ in AI_GATEWAY_MODELS] + live_ids = preferred[:3] # only first three exist live + payload = { + "data": [ + {"id": mid, "pricing": {"input": "0.001", "output": "0.002"}} + for mid in live_ids + ] + } + with patch("urllib.request.urlopen", return_value=_mock_urlopen(payload)): + result = fetch_ai_gateway_models(force_refresh=True) + + assert [mid for mid, _ in result] == live_ids + assert result[0][1] == "recommended" + + +def test_fetch_ai_gateway_models_tags_free_models(): + _reset_caches() + first_id = AI_GATEWAY_MODELS[0][0] + second_id = AI_GATEWAY_MODELS[1][0] + payload = { + "data": [ + {"id": first_id, "pricing": {"input": "0.001", "output": "0.002"}}, + {"id": second_id, "pricing": {"input": "0", "output": "0"}}, + ] + } + with patch("urllib.request.urlopen", return_value=_mock_urlopen(payload)): + result = fetch_ai_gateway_models(force_refresh=True) + + by_id = dict(result) + assert by_id[first_id] == "recommended" + assert by_id[second_id] == "free" + + +def test_fetch_ai_gateway_models_falls_back_on_error(): + _reset_caches() + with patch("urllib.request.urlopen", side_effect=OSError("network")): + result = fetch_ai_gateway_models(force_refresh=True) + assert result == list(AI_GATEWAY_MODELS)