From 1b61ec470b1b0b8a318a57fd7a9f5925143652e8 Mon Sep 17 00:00:00 2001 From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com> Date: Wed, 15 Apr 2026 22:32:05 -0700 Subject: [PATCH] feat: add Ollama Cloud as built-in provider Add ollama-cloud as a first-class provider with full parity to existing API-key providers (gemini, zai, minimax, etc.): - PROVIDER_REGISTRY entry with OLLAMA_API_KEY env var - Provider aliases: ollama -> custom (local), ollama_cloud -> ollama-cloud - models.dev integration for accurate context lengths - URL-to-provider mapping (ollama.com -> ollama-cloud) - Passthrough model normalization (preserves Ollama model:tag format) - Default auxiliary model (nemotron-3-nano:30b) - HermesOverlay in providers.py - CLI --provider choices, CANONICAL_PROVIDERS entry - Dynamic model discovery with disk caching (1hr TTL) - 37 provider-specific tests Cherry-picked from PR #6038 by kshitijk4poor. Closes #3926 --- .env.example | 9 + agent/auxiliary_client.py | 1 + agent/model_metadata.py | 4 +- agent/models_dev.py | 1 + cli-config.yaml.example | 8 +- hermes_cli/auth.py | 12 +- hermes_cli/config.py | 16 + hermes_cli/main.py | 65 ++-- hermes_cli/model_normalize.py | 1 + hermes_cli/models.py | 125 +++++++ hermes_cli/providers.py | 7 +- .../hermes_cli/test_ollama_cloud_provider.py | 351 ++++++++++++++++++ 12 files changed, 563 insertions(+), 37 deletions(-) create mode 100644 tests/hermes_cli/test_ollama_cloud_provider.py diff --git a/.env.example b/.env.example index 76be6ce26..066e93f7c 100644 --- a/.env.example +++ b/.env.example @@ -24,6 +24,15 @@ # Optional base URL override (default: Google's OpenAI-compatible endpoint) # GEMINI_BASE_URL=https://generativelanguage.googleapis.com/v1beta/openai +# ============================================================================= +# LLM PROVIDER (Ollama Cloud) +# ============================================================================= +# Cloud-hosted open models via Ollama's OpenAI-compatible endpoint. +# Get your key at: https://ollama.com/settings +# OLLAMA_API_KEY=your_ollama_key_here +# Optional base URL override (default: https://ollama.com/v1) +# OLLAMA_BASE_URL=https://ollama.com/v1 + # ============================================================================= # LLM PROVIDER (z.ai / GLM) # ============================================================================= diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 9702da941..34d7d4250 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -104,6 +104,7 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = { "opencode-zen": "gemini-3-flash", "opencode-go": "glm-5", "kilocode": "google/gemini-3-flash-preview", + "ollama-cloud": "nemotron-3-nano:30b", } # Vision-specific model overrides for direct providers. diff --git a/agent/model_metadata.py b/agent/model_metadata.py index a0e3bea8c..db3048941 100644 --- a/agent/model_metadata.py +++ b/agent/model_metadata.py @@ -23,7 +23,7 @@ logger = logging.getLogger(__name__) # are preserved so the full model name reaches cache lookups and server queries. _PROVIDER_PREFIXES: frozenset[str] = frozenset({ "openrouter", "nous", "openai-codex", "copilot", "copilot-acp", - "gemini", "zai", "kimi-coding", "kimi-coding-cn", "minimax", "minimax-cn", "anthropic", "deepseek", + "gemini", "ollama-cloud", "zai", "kimi-coding", "kimi-coding-cn", "minimax", "minimax-cn", "anthropic", "deepseek", "opencode-zen", "opencode-go", "ai-gateway", "kilocode", "alibaba", "qwen-oauth", "xiaomi", @@ -33,6 +33,7 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({ "google", "google-gemini", "google-ai-studio", "glm", "z-ai", "z.ai", "zhipu", "github", "github-copilot", "github-models", "kimi", "moonshot", "kimi-cn", "moonshot-cn", "claude", "deep-seek", + "ollama", "opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen", "mimo", "xiaomi-mimo", "arcee-ai", "arceeai", @@ -239,6 +240,7 @@ _URL_TO_PROVIDER: Dict[str, str] = { "api.x.ai": "xai", "api.xiaomimimo.com": "xiaomi", "xiaomimimo.com": "xiaomi", + "ollama.com": "ollama-cloud", } diff --git a/agent/models_dev.py b/agent/models_dev.py index 373daafc3..42c8925ff 100644 --- a/agent/models_dev.py +++ b/agent/models_dev.py @@ -169,6 +169,7 @@ PROVIDER_TO_MODELS_DEV: Dict[str, str] = { "togetherai": "togetherai", "perplexity": "perplexity", "cohere": "cohere", + "ollama-cloud": "ollama-cloud", } # Reverse mapping: models.dev → Hermes (built lazily) diff --git a/cli-config.yaml.example b/cli-config.yaml.example index 962b554b4..8c0484abd 100644 --- a/cli-config.yaml.example +++ b/cli-config.yaml.example @@ -26,6 +26,7 @@ model: # "huggingface" - Hugging Face Inference (requires: HF_TOKEN) # "xiaomi" - Xiaomi MiMo (requires: XIAOMI_API_KEY) # "arcee" - Arcee AI Trinity models (requires: ARCEEAI_API_KEY) + # "ollama-cloud" - Ollama Cloud (requires: OLLAMA_API_KEY — https://ollama.com/settings) # "kilocode" - KiloCode gateway (requires: KILOCODE_API_KEY) # "ai-gateway" - Vercel AI Gateway (requires: AI_GATEWAY_API_KEY) # @@ -37,12 +38,6 @@ model: # base_url: "http://localhost:1234/v1" # No API key needed — local servers typically ignore auth. # - # For Ollama Cloud (https://ollama.com/pricing): - # provider: "custom" - # base_url: "https://ollama.com/v1" - # Set OLLAMA_API_KEY in .env — automatically picked up when base_url - # points to ollama.com. - # # Can also be overridden with --provider flag or HERMES_INFERENCE_PROVIDER env var. provider: "auto" @@ -337,6 +332,7 @@ compression: # "openrouter" - Force OpenRouter (requires OPENROUTER_API_KEY) # "nous" - Force Nous Portal (requires: hermes login) # "gemini" - Force Google AI Studio direct (requires: GOOGLE_API_KEY or GEMINI_API_KEY) +# "ollama-cloud" - Ollama Cloud (requires: OLLAMA_API_KEY) # "codex" - Force Codex OAuth (requires: hermes model → Codex). # Uses gpt-5.3-codex which supports vision. # "main" - Use your custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY). diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py index b75b6b757..966082787 100644 --- a/hermes_cli/auth.py +++ b/hermes_cli/auth.py @@ -70,6 +70,7 @@ DEFAULT_CODEX_BASE_URL = "https://chatgpt.com/backend-api/codex" DEFAULT_QWEN_BASE_URL = "https://portal.qwen.ai/v1" DEFAULT_GITHUB_MODELS_BASE_URL = "https://api.githubcopilot.com" DEFAULT_COPILOT_ACP_BASE_URL = "acp://copilot" +DEFAULT_OLLAMA_CLOUD_BASE_URL = "https://ollama.com/v1" CODEX_OAUTH_CLIENT_ID = "app_EMoamEEZ73f0CkXaXp7hrann" CODEX_OAUTH_TOKEN_URL = "https://auth.openai.com/oauth/token" CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120 @@ -274,6 +275,14 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = { api_key_env_vars=("XIAOMI_API_KEY",), base_url_env_var="XIAOMI_BASE_URL", ), + "ollama-cloud": ProviderConfig( + id="ollama-cloud", + name="Ollama Cloud", + auth_type="api_key", + inference_base_url=DEFAULT_OLLAMA_CLOUD_BASE_URL, + api_key_env_vars=("OLLAMA_API_KEY",), + base_url_env_var="OLLAMA_BASE_URL", + ), "bedrock": ProviderConfig( id="bedrock", name="AWS Bedrock", @@ -937,7 +946,8 @@ def resolve_provider( "kilo": "kilocode", "kilo-code": "kilocode", "kilo-gateway": "kilocode", # Local server aliases — route through the generic custom provider "lmstudio": "custom", "lm-studio": "custom", "lm_studio": "custom", - "ollama": "custom", "vllm": "custom", "llamacpp": "custom", + "ollama": "custom", "ollama_cloud": "ollama-cloud", + "vllm": "custom", "llamacpp": "custom", "llama.cpp": "custom", "llama-cpp": "custom", } normalized = _PROVIDER_ALIASES.get(normalized, normalized) diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 6a646d0df..7f639726f 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -1024,6 +1024,22 @@ OPTIONAL_ENV_VARS = { "category": "provider", "advanced": True, }, + "OLLAMA_API_KEY": { + "description": "Ollama Cloud API key (ollama.com — cloud-hosted open models)", + "prompt": "Ollama Cloud API key", + "url": "https://ollama.com/settings", + "password": True, + "category": "provider", + "advanced": True, + }, + "OLLAMA_BASE_URL": { + "description": "Ollama Cloud base URL override (default: https://ollama.com/v1)", + "prompt": "Ollama base URL (leave empty for default)", + "url": None, + "password": False, + "category": "provider", + "advanced": True, + }, "XIAOMI_API_KEY": { "description": "Xiaomi MiMo API key for MiMo models (mimo-v2-pro, mimo-v2-omni, mimo-v2-flash)", "prompt": "Xiaomi MiMo API Key", diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 5c6db4e90..9d0615d53 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -1141,7 +1141,7 @@ def select_provider_and_model(args=None): _model_flow_kimi(config, current_model) elif selected_provider == "bedrock": _model_flow_bedrock(config, current_model) - elif selected_provider in ("gemini", "deepseek", "xai", "zai", "kimi-coding-cn", "minimax", "minimax-cn", "kilocode", "opencode-zen", "opencode-go", "ai-gateway", "alibaba", "huggingface", "xiaomi", "arcee"): + elif selected_provider in ("gemini", "deepseek", "xai", "zai", "kimi-coding-cn", "minimax", "minimax-cn", "kilocode", "opencode-zen", "opencode-go", "ai-gateway", "alibaba", "huggingface", "xiaomi", "arcee", "ollama-cloud"): _model_flow_api_key_provider(config, selected_provider, current_model) # ── Post-switch cleanup: clear stale OPENAI_BASE_URL ────────────── @@ -2734,34 +2734,43 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""): # 1. models.dev registry (cached, filtered for agentic/tool-capable models) # 2. Curated static fallback list (offline insurance) # 3. Live /models endpoint probe (small providers without models.dev data) - curated = _PROVIDER_MODELS.get(provider_id, []) - - # Try models.dev first — returns tool-capable models, filtered for noise - mdev_models: list = [] - try: - from agent.models_dev import list_agentic_models - mdev_models = list_agentic_models(provider_id) - except Exception: - pass - - if mdev_models: - model_list = mdev_models - print(f" Found {len(model_list)} model(s) from models.dev registry") - elif curated and len(curated) >= 8: - # Curated list is substantial — use it directly, skip live probe - model_list = curated - print(f" Showing {len(model_list)} curated models — use \"Enter custom model name\" for others.") - else: + # + # Ollama Cloud: dedicated merged discovery (live API + models.dev + disk cache) + if provider_id == "ollama-cloud": + from hermes_cli.models import fetch_ollama_cloud_models api_key_for_probe = existing_key or (get_env_value(key_env) if key_env else "") - live_models = fetch_api_models(api_key_for_probe, effective_base) - if live_models and len(live_models) >= len(curated): - model_list = live_models - print(f" Found {len(model_list)} model(s) from {pconfig.name} API") - else: + model_list = fetch_ollama_cloud_models(api_key=api_key_for_probe, base_url=effective_base) + if model_list: + print(f" Found {len(model_list)} model(s) from Ollama Cloud") + else: + curated = _PROVIDER_MODELS.get(provider_id, []) + + # Try models.dev first — returns tool-capable models, filtered for noise + mdev_models: list = [] + try: + from agent.models_dev import list_agentic_models + mdev_models = list_agentic_models(provider_id) + except Exception: + pass + + if mdev_models: + model_list = mdev_models + print(f" Found {len(model_list)} model(s) from models.dev registry") + elif curated and len(curated) >= 8: + # Curated list is substantial — use it directly, skip live probe model_list = curated - if model_list: - print(f" Showing {len(model_list)} curated models — use \"Enter custom model name\" for others.") - # else: no defaults either, will fall through to raw input + print(f" Showing {len(model_list)} curated models — use \"Enter custom model name\" for others.") + else: + api_key_for_probe = existing_key or (get_env_value(key_env) if key_env else "") + live_models = fetch_api_models(api_key_for_probe, effective_base) + if live_models and len(live_models) >= len(curated): + model_list = live_models + print(f" Found {len(model_list)} model(s) from {pconfig.name} API") + else: + model_list = curated + if model_list: + print(f" Showing {len(model_list)} curated models — use \"Enter custom model name\" for others.") + # else: no defaults either, will fall through to raw input if provider_id in {"opencode-zen", "opencode-go"}: model_list = [normalize_opencode_model_id(provider_id, mid) for mid in model_list] @@ -4860,7 +4869,7 @@ For more help on a command: ) chat_parser.add_argument( "--provider", - choices=["auto", "openrouter", "nous", "openai-codex", "copilot-acp", "copilot", "anthropic", "gemini", "huggingface", "zai", "kimi-coding", "kimi-coding-cn", "minimax", "minimax-cn", "kilocode", "xiaomi", "arcee"], + choices=["auto", "openrouter", "nous", "openai-codex", "copilot-acp", "copilot", "anthropic", "gemini", "ollama-cloud", "huggingface", "zai", "kimi-coding", "kimi-coding-cn", "minimax", "minimax-cn", "kilocode", "xiaomi", "arcee"], default=None, help="Inference provider (default: auto)" ) diff --git a/hermes_cli/model_normalize.py b/hermes_cli/model_normalize.py index 40afe003b..22ab0fa3f 100644 --- a/hermes_cli/model_normalize.py +++ b/hermes_cli/model_normalize.py @@ -96,6 +96,7 @@ _MATCHING_PREFIX_STRIP_PROVIDERS: frozenset[str] = frozenset({ "qwen-oauth", "xiaomi", "arcee", + "ollama-cloud", "custom", }) diff --git a/hermes_cli/models.py b/hermes_cli/models.py index 9fc68933e..9812fc97e 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -11,7 +11,9 @@ import json import os import urllib.request import urllib.error +import time from difflib import get_close_matches +from pathlib import Path from typing import Any, NamedTuple, Optional COPILOT_BASE_URL = "https://api.githubcopilot.com" @@ -547,6 +549,7 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [ ProviderEntry("minimax", "MiniMax", "MiniMax (global direct API)"), ProviderEntry("minimax-cn", "MiniMax (China)", "MiniMax China (domestic direct API)"), ProviderEntry("alibaba", "Alibaba Cloud (DashScope)","Alibaba Cloud / DashScope Coding (Qwen + multi-provider)"), + ProviderEntry("ollama-cloud", "Ollama Cloud", "Ollama Cloud (cloud-hosted open models — ollama.com)"), ProviderEntry("arcee", "Arcee AI", "Arcee AI (Trinity models — direct API)"), ProviderEntry("kilocode", "Kilo Code", "Kilo Code (Kilo Gateway API)"), ProviderEntry("opencode-zen", "OpenCode Zen", "OpenCode Zen (35+ curated models, pay-as-you-go)"), @@ -559,6 +562,7 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [ _PROVIDER_LABELS = {p.slug: p.label for p in CANONICAL_PROVIDERS} _PROVIDER_LABELS["custom"] = "Custom endpoint" # special case: not a named provider + _PROVIDER_ALIASES = { "glm": "zai", "z-ai": "zai", @@ -611,6 +615,8 @@ _PROVIDER_ALIASES = { "grok": "xai", "x-ai": "xai", "x.ai": "xai", + "ollama": "custom", # bare "ollama" = local; use "ollama-cloud" for cloud + "ollama_cloud": "ollama-cloud", } @@ -1786,6 +1792,125 @@ def fetch_api_models( return probe_api_models(api_key, base_url, timeout=timeout).get("models") +# --------------------------------------------------------------------------- +# Ollama Cloud — merged model discovery with disk cache +# --------------------------------------------------------------------------- + + + +_OLLAMA_CLOUD_CACHE_TTL = 3600 # 1 hour + + +def _ollama_cloud_cache_path() -> Path: + """Return the path for the Ollama Cloud model cache.""" + from hermes_constants import get_hermes_home + return get_hermes_home() / "ollama_cloud_models_cache.json" + + +def _load_ollama_cloud_cache(*, ignore_ttl: bool = False) -> Optional[dict]: + """Load cached Ollama Cloud models from disk. + + Args: + ignore_ttl: If True, return data even if the TTL has expired (stale fallback). + """ + try: + cache_path = _ollama_cloud_cache_path() + if not cache_path.exists(): + return None + with open(cache_path, encoding="utf-8") as f: + data = json.load(f) + if not isinstance(data, dict): + return None + models = data.get("models") + if not (isinstance(models, list) and models): + return None + if not ignore_ttl: + cached_at = data.get("cached_at", 0) + if (time.time() - cached_at) > _OLLAMA_CLOUD_CACHE_TTL: + return None # stale + return data + except Exception: + pass + return None + + +def _save_ollama_cloud_cache(models: list[str]) -> None: + """Persist the merged Ollama Cloud model list to disk.""" + try: + from utils import atomic_json_write + cache_path = _ollama_cloud_cache_path() + cache_path.parent.mkdir(parents=True, exist_ok=True) + atomic_json_write(cache_path, {"models": models, "cached_at": time.time()}, indent=None) + except Exception: + pass + + +def fetch_ollama_cloud_models( + api_key: Optional[str] = None, + base_url: Optional[str] = None, + *, + force_refresh: bool = False, +) -> list[str]: + """Fetch Ollama Cloud models by merging live API + models.dev, with disk cache. + + Resolution order: + 1. Disk cache (if fresh, < 1 hour, and not force_refresh) + 2. Live ``/v1/models`` endpoint (primary — freshest source) + 3. models.dev registry (secondary — fills gaps for unlisted models) + 4. Merge: live models first, then models.dev additions (deduped) + + Returns a list of model IDs (never None — empty list on total failure). + """ + # 1. Check disk cache + if not force_refresh: + cached = _load_ollama_cloud_cache() + if cached is not None: + return cached["models"] + + # 2. Live API probe + if not api_key: + api_key = os.getenv("OLLAMA_API_KEY", "") + if not base_url: + base_url = os.getenv("OLLAMA_BASE_URL", "") or "https://ollama.com/v1" + + live_models: list[str] = [] + if api_key: + result = fetch_api_models(api_key, base_url, timeout=8.0) + if result: + live_models = result + + # 3. models.dev registry + mdev_models: list[str] = [] + try: + from agent.models_dev import list_agentic_models + mdev_models = list_agentic_models("ollama-cloud") + except Exception: + pass + + # 4. Merge: live first, then models.dev additions (deduped, order-preserving) + if live_models or mdev_models: + seen: set[str] = set() + merged: list[str] = [] + for m in live_models: + if m and m not in seen: + seen.add(m) + merged.append(m) + for m in mdev_models: + if m and m not in seen: + seen.add(m) + merged.append(m) + if merged: + _save_ollama_cloud_cache(merged) + return merged + + # Total failure — return stale cache if available (ignore TTL) + stale = _load_ollama_cloud_cache(ignore_ttl=True) + if stale is not None: + return stale["models"] + + return [] + + def validate_requested_model( model_name: str, provider: Optional[str], diff --git a/hermes_cli/providers.py b/hermes_cli/providers.py index 8311e3652..eae832055 100644 --- a/hermes_cli/providers.py +++ b/hermes_cli/providers.py @@ -141,6 +141,10 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = { base_url_override="https://api.arcee.ai/api/v1", base_url_env_var="ARCEE_BASE_URL", ), + "ollama-cloud": HermesOverlay( + transport="openai_chat", + base_url_env_var="OLLAMA_BASE_URL", + ), } @@ -250,7 +254,7 @@ ALIASES: Dict[str, str] = { "lmstudio": "lmstudio", "lm-studio": "lmstudio", "lm_studio": "lmstudio", - "ollama": "ollama-cloud", + "ollama": "custom", # bare "ollama" = local; use "ollama-cloud" for cloud "vllm": "local", "llamacpp": "local", "llama.cpp": "local", @@ -269,6 +273,7 @@ _LABEL_OVERRIDES: Dict[str, str] = { "xiaomi": "Xiaomi MiMo", "local": "Local endpoint", "bedrock": "AWS Bedrock", + "ollama-cloud": "Ollama Cloud", } diff --git a/tests/hermes_cli/test_ollama_cloud_provider.py b/tests/hermes_cli/test_ollama_cloud_provider.py new file mode 100644 index 000000000..9dad26092 --- /dev/null +++ b/tests/hermes_cli/test_ollama_cloud_provider.py @@ -0,0 +1,351 @@ +"""Tests for Ollama Cloud provider integration.""" + +import os +import pytest +from unittest.mock import patch, MagicMock + +from hermes_cli.auth import PROVIDER_REGISTRY, resolve_provider, resolve_api_key_provider_credentials +from hermes_cli.models import _PROVIDER_MODELS, _PROVIDER_LABELS, _PROVIDER_ALIASES, normalize_provider +from hermes_cli.model_normalize import normalize_model_for_provider +from agent.model_metadata import _URL_TO_PROVIDER, _PROVIDER_PREFIXES +from agent.models_dev import PROVIDER_TO_MODELS_DEV, list_agentic_models + + +# ── Provider Registry ── + +class TestOllamaCloudProviderRegistry: + def test_ollama_cloud_in_registry(self): + assert "ollama-cloud" in PROVIDER_REGISTRY + + def test_ollama_cloud_config(self): + pconfig = PROVIDER_REGISTRY["ollama-cloud"] + assert pconfig.id == "ollama-cloud" + assert pconfig.name == "Ollama Cloud" + assert pconfig.auth_type == "api_key" + assert pconfig.inference_base_url == "https://ollama.com/v1" + + def test_ollama_cloud_env_vars(self): + pconfig = PROVIDER_REGISTRY["ollama-cloud"] + assert pconfig.api_key_env_vars == ("OLLAMA_API_KEY",) + assert pconfig.base_url_env_var == "OLLAMA_BASE_URL" + + def test_ollama_cloud_base_url(self): + assert "ollama.com" in PROVIDER_REGISTRY["ollama-cloud"].inference_base_url + + +# ── Provider Aliases ── + +PROVIDER_ENV_VARS = ( + "OPENROUTER_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY", + "GOOGLE_API_KEY", "GEMINI_API_KEY", "OLLAMA_API_KEY", + "GLM_API_KEY", "ZAI_API_KEY", "KIMI_API_KEY", + "MINIMAX_API_KEY", "DEEPSEEK_API_KEY", +) + +@pytest.fixture(autouse=True) +def _clean_provider_env(monkeypatch): + for var in PROVIDER_ENV_VARS: + monkeypatch.delenv(var, raising=False) + + +class TestOllamaCloudAliases: + def test_explicit_ollama_cloud(self): + assert resolve_provider("ollama-cloud") == "ollama-cloud" + + def test_alias_ollama_underscore(self): + """ollama_cloud (underscore) is the unambiguous cloud alias.""" + assert resolve_provider("ollama_cloud") == "ollama-cloud" + + def test_bare_ollama_stays_local(self): + """Bare 'ollama' alias routes to 'custom' (local) — not cloud.""" + assert resolve_provider("ollama") == "custom" + + def test_models_py_aliases(self): + assert _PROVIDER_ALIASES.get("ollama_cloud") == "ollama-cloud" + # bare "ollama" stays local + assert _PROVIDER_ALIASES.get("ollama") == "custom" + + def test_normalize_provider(self): + assert normalize_provider("ollama-cloud") == "ollama-cloud" + + +# ── Auto-detection ── + +class TestOllamaCloudAutoDetection: + def test_auto_detects_ollama_api_key(self, monkeypatch): + monkeypatch.setenv("OLLAMA_API_KEY", "test-ollama-key") + assert resolve_provider("auto") == "ollama-cloud" + + +# ── Credential Resolution ── + +class TestOllamaCloudCredentials: + def test_resolve_with_ollama_api_key(self, monkeypatch): + monkeypatch.setenv("OLLAMA_API_KEY", "ollama-secret") + creds = resolve_api_key_provider_credentials("ollama-cloud") + assert creds["provider"] == "ollama-cloud" + assert creds["api_key"] == "ollama-secret" + assert creds["base_url"] == "https://ollama.com/v1" + + def test_resolve_with_custom_base_url(self, monkeypatch): + monkeypatch.setenv("OLLAMA_API_KEY", "key") + monkeypatch.setenv("OLLAMA_BASE_URL", "https://custom.ollama/v1") + creds = resolve_api_key_provider_credentials("ollama-cloud") + assert creds["base_url"] == "https://custom.ollama/v1" + + def test_runtime_ollama_cloud(self, monkeypatch): + monkeypatch.setenv("OLLAMA_API_KEY", "ollama-key") + from hermes_cli.runtime_provider import resolve_runtime_provider + result = resolve_runtime_provider(requested="ollama-cloud") + assert result["provider"] == "ollama-cloud" + assert result["api_mode"] == "chat_completions" + assert result["api_key"] == "ollama-key" + assert result["base_url"] == "https://ollama.com/v1" + + +# ── Model Catalog (dynamic — no static list) ── + +class TestOllamaCloudModelCatalog: + def test_no_static_model_list(self): + """Ollama Cloud models are fetched dynamically — no static list to maintain.""" + assert "ollama-cloud" not in _PROVIDER_MODELS + + def test_provider_label(self): + assert "ollama-cloud" in _PROVIDER_LABELS + assert _PROVIDER_LABELS["ollama-cloud"] == "Ollama Cloud" + + +# ── Merged Model Discovery ── + +class TestOllamaCloudMergedDiscovery: + def test_merges_live_and_models_dev(self, tmp_path, monkeypatch): + """Live API models appear first, models.dev additions fill gaps.""" + from hermes_cli.models import fetch_ollama_cloud_models + + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + monkeypatch.setenv("OLLAMA_API_KEY", "test-key") + + mock_mdev = { + "ollama-cloud": { + "models": { + "glm-5": {"tool_call": True}, + "kimi-k2.5": {"tool_call": True}, + "nemotron-3-super": {"tool_call": True}, + } + } + } + with patch("hermes_cli.models.fetch_api_models", return_value=["qwen3.5:397b", "glm-5"]), \ + patch("agent.models_dev.fetch_models_dev", return_value=mock_mdev): + result = fetch_ollama_cloud_models(force_refresh=True) + + # Live models first, then models.dev additions (deduped) + assert result[0] == "qwen3.5:397b" # from live API + assert result[1] == "glm-5" # from live API (also in models.dev) + assert "kimi-k2.5" in result # from models.dev only + assert "nemotron-3-super" in result # from models.dev only + assert result.count("glm-5") == 1 # no duplicates + + def test_falls_back_to_models_dev_without_api_key(self, tmp_path, monkeypatch): + """Without API key, only models.dev results are returned.""" + from hermes_cli.models import fetch_ollama_cloud_models + + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + monkeypatch.delenv("OLLAMA_API_KEY", raising=False) + + mock_mdev = { + "ollama-cloud": { + "models": { + "glm-5": {"tool_call": True}, + } + } + } + with patch("agent.models_dev.fetch_models_dev", return_value=mock_mdev): + result = fetch_ollama_cloud_models(force_refresh=True) + + assert result == ["glm-5"] + + def test_uses_disk_cache(self, tmp_path, monkeypatch): + """Second call returns cached results without hitting APIs.""" + from hermes_cli.models import fetch_ollama_cloud_models + + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + monkeypatch.setenv("OLLAMA_API_KEY", "test-key") + + with patch("hermes_cli.models.fetch_api_models", return_value=["model-a"]) as mock_api, \ + patch("agent.models_dev.fetch_models_dev", return_value={}): + first = fetch_ollama_cloud_models(force_refresh=True) + assert first == ["model-a"] + assert mock_api.call_count == 1 + + # Second call — should use disk cache, not call API + second = fetch_ollama_cloud_models() + assert second == ["model-a"] + assert mock_api.call_count == 1 # no extra API call + + def test_force_refresh_bypasses_cache(self, tmp_path, monkeypatch): + """force_refresh=True always hits the API even with fresh cache.""" + from hermes_cli.models import fetch_ollama_cloud_models + + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + monkeypatch.setenv("OLLAMA_API_KEY", "test-key") + + with patch("hermes_cli.models.fetch_api_models", return_value=["model-a"]) as mock_api, \ + patch("agent.models_dev.fetch_models_dev", return_value={}): + fetch_ollama_cloud_models(force_refresh=True) + fetch_ollama_cloud_models(force_refresh=True) + assert mock_api.call_count == 2 + + def test_stale_cache_used_on_total_failure(self, tmp_path, monkeypatch): + """If both API and models.dev fail, stale cache is returned.""" + from hermes_cli.models import fetch_ollama_cloud_models, _save_ollama_cloud_cache + + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + monkeypatch.setenv("OLLAMA_API_KEY", "test-key") + + # Pre-populate a stale cache + _save_ollama_cloud_cache(["stale-model"]) + + # Make the cache appear stale by backdating it + import json + cache_path = tmp_path / "ollama_cloud_models_cache.json" + with open(cache_path) as f: + data = json.load(f) + data["cached_at"] = 0 # epoch = very stale + with open(cache_path, "w") as f: + json.dump(data, f) + + with patch("hermes_cli.models.fetch_api_models", return_value=None), \ + patch("agent.models_dev.fetch_models_dev", return_value={}): + result = fetch_ollama_cloud_models(force_refresh=True) + + assert result == ["stale-model"] + + def test_empty_on_total_failure_no_cache(self, tmp_path, monkeypatch): + """Returns empty list when everything fails and no cache exists.""" + from hermes_cli.models import fetch_ollama_cloud_models + + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + monkeypatch.delenv("OLLAMA_API_KEY", raising=False) + + with patch("agent.models_dev.fetch_models_dev", return_value={}): + result = fetch_ollama_cloud_models(force_refresh=True) + + assert result == [] + + +# ── Model Normalization ── + +class TestOllamaCloudModelNormalization: + def test_passthrough_bare_name(self): + """Ollama Cloud is a passthrough provider — model names used as-is.""" + assert normalize_model_for_provider("qwen3.5:397b", "ollama-cloud") == "qwen3.5:397b" + + def test_passthrough_with_tag(self): + assert normalize_model_for_provider("cogito-2.1:671b", "ollama-cloud") == "cogito-2.1:671b" + + def test_passthrough_no_tag(self): + assert normalize_model_for_provider("glm-5", "ollama-cloud") == "glm-5" + + +# ── URL-to-Provider Mapping ── + +class TestOllamaCloudUrlMapping: + def test_url_to_provider(self): + assert _URL_TO_PROVIDER.get("ollama.com") == "ollama-cloud" + + def test_provider_prefix_canonical(self): + assert "ollama-cloud" in _PROVIDER_PREFIXES + + def test_provider_prefix_alias(self): + assert "ollama" in _PROVIDER_PREFIXES + + +# ── models.dev Integration ── + +class TestOllamaCloudModelsDev: + def test_ollama_cloud_mapped(self): + assert PROVIDER_TO_MODELS_DEV.get("ollama-cloud") == "ollama-cloud" + + def test_list_agentic_models_with_mock_data(self): + """list_agentic_models filters correctly from mock models.dev data.""" + mock_data = { + "ollama-cloud": { + "models": { + "qwen3.5:397b": {"tool_call": True}, + "glm-5": {"tool_call": True}, + "nemotron-3-nano:30b": {"tool_call": True}, + "some-embedding:latest": {"tool_call": False}, + } + } + } + with patch("agent.models_dev.fetch_models_dev", return_value=mock_data): + result = list_agentic_models("ollama-cloud") + assert "qwen3.5:397b" in result + assert "glm-5" in result + assert "nemotron-3-nano:30b" in result + assert "some-embedding:latest" not in result # no tool_call + + +# ── Agent Init (no SyntaxError) ── + +class TestOllamaCloudAgentInit: + def test_agent_imports_without_error(self): + """Verify run_agent.py has no SyntaxError.""" + import importlib + import run_agent + importlib.reload(run_agent) + + def test_ollama_cloud_agent_uses_chat_completions(self, monkeypatch): + """Ollama Cloud falls through to chat_completions — no special elif needed.""" + monkeypatch.setenv("OLLAMA_API_KEY", "test-key") + with patch("run_agent.OpenAI") as mock_openai: + mock_openai.return_value = MagicMock() + from run_agent import AIAgent + agent = AIAgent( + model="qwen3.5:397b", + provider="ollama-cloud", + api_key="test-key", + base_url="https://ollama.com/v1", + ) + assert agent.api_mode == "chat_completions" + assert agent.provider == "ollama-cloud" + + +# ── providers.py New System ── + +class TestOllamaCloudProvidersNew: + def test_overlay_exists(self): + from hermes_cli.providers import HERMES_OVERLAYS + assert "ollama-cloud" in HERMES_OVERLAYS + overlay = HERMES_OVERLAYS["ollama-cloud"] + assert overlay.transport == "openai_chat" + assert overlay.base_url_env_var == "OLLAMA_BASE_URL" + + def test_alias_resolves(self): + from hermes_cli.providers import normalize_provider as np + assert np("ollama") == "custom" # bare "ollama" = local + assert np("ollama-cloud") == "ollama-cloud" + + def test_label_override(self): + from hermes_cli.providers import _LABEL_OVERRIDES + assert _LABEL_OVERRIDES.get("ollama-cloud") == "Ollama Cloud" + + def test_get_label(self): + from hermes_cli.providers import get_label + assert get_label("ollama-cloud") == "Ollama Cloud" + + def test_get_provider(self): + from hermes_cli.providers import get_provider + pdef = get_provider("ollama-cloud") + assert pdef is not None + assert pdef.id == "ollama-cloud" + assert pdef.transport == "openai_chat" + + +# ── Auxiliary Model ── + +class TestOllamaCloudAuxiliary: + def test_aux_model_defined(self): + from agent.auxiliary_client import _API_KEY_PROVIDER_AUX_MODELS + assert "ollama-cloud" in _API_KEY_PROVIDER_AUX_MODELS + assert _API_KEY_PROVIDER_AUX_MODELS["ollama-cloud"] == "nemotron-3-nano:30b"