feat: add Ollama Cloud as built-in provider

Add ollama-cloud as a first-class provider with full parity to existing
API-key providers (gemini, zai, minimax, etc.):

- PROVIDER_REGISTRY entry with OLLAMA_API_KEY env var
- Provider aliases: ollama -> custom (local), ollama_cloud -> ollama-cloud
- models.dev integration for accurate context lengths
- URL-to-provider mapping (ollama.com -> ollama-cloud)
- Passthrough model normalization (preserves Ollama model:tag format)
- Default auxiliary model (nemotron-3-nano:30b)
- HermesOverlay in providers.py
- CLI --provider choices, CANONICAL_PROVIDERS entry
- Dynamic model discovery with disk caching (1hr TTL)
- 37 provider-specific tests

Cherry-picked from PR #6038 by kshitijk4poor. Closes #3926
This commit is contained in:
kshitijk4poor 2026-04-15 22:32:05 -07:00 committed by Teknium
parent 8021a735c2
commit 1b61ec470b
12 changed files with 563 additions and 37 deletions

View file

@ -1141,7 +1141,7 @@ def select_provider_and_model(args=None):
_model_flow_kimi(config, current_model)
elif selected_provider == "bedrock":
_model_flow_bedrock(config, current_model)
elif selected_provider in ("gemini", "deepseek", "xai", "zai", "kimi-coding-cn", "minimax", "minimax-cn", "kilocode", "opencode-zen", "opencode-go", "ai-gateway", "alibaba", "huggingface", "xiaomi", "arcee"):
elif selected_provider in ("gemini", "deepseek", "xai", "zai", "kimi-coding-cn", "minimax", "minimax-cn", "kilocode", "opencode-zen", "opencode-go", "ai-gateway", "alibaba", "huggingface", "xiaomi", "arcee", "ollama-cloud"):
_model_flow_api_key_provider(config, selected_provider, current_model)
# ── Post-switch cleanup: clear stale OPENAI_BASE_URL ──────────────
@ -2734,34 +2734,43 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""):
# 1. models.dev registry (cached, filtered for agentic/tool-capable models)
# 2. Curated static fallback list (offline insurance)
# 3. Live /models endpoint probe (small providers without models.dev data)
curated = _PROVIDER_MODELS.get(provider_id, [])
# Try models.dev first — returns tool-capable models, filtered for noise
mdev_models: list = []
try:
from agent.models_dev import list_agentic_models
mdev_models = list_agentic_models(provider_id)
except Exception:
pass
if mdev_models:
model_list = mdev_models
print(f" Found {len(model_list)} model(s) from models.dev registry")
elif curated and len(curated) >= 8:
# Curated list is substantial — use it directly, skip live probe
model_list = curated
print(f" Showing {len(model_list)} curated models — use \"Enter custom model name\" for others.")
else:
#
# Ollama Cloud: dedicated merged discovery (live API + models.dev + disk cache)
if provider_id == "ollama-cloud":
from hermes_cli.models import fetch_ollama_cloud_models
api_key_for_probe = existing_key or (get_env_value(key_env) if key_env else "")
live_models = fetch_api_models(api_key_for_probe, effective_base)
if live_models and len(live_models) >= len(curated):
model_list = live_models
print(f" Found {len(model_list)} model(s) from {pconfig.name} API")
else:
model_list = fetch_ollama_cloud_models(api_key=api_key_for_probe, base_url=effective_base)
if model_list:
print(f" Found {len(model_list)} model(s) from Ollama Cloud")
else:
curated = _PROVIDER_MODELS.get(provider_id, [])
# Try models.dev first — returns tool-capable models, filtered for noise
mdev_models: list = []
try:
from agent.models_dev import list_agentic_models
mdev_models = list_agentic_models(provider_id)
except Exception:
pass
if mdev_models:
model_list = mdev_models
print(f" Found {len(model_list)} model(s) from models.dev registry")
elif curated and len(curated) >= 8:
# Curated list is substantial — use it directly, skip live probe
model_list = curated
if model_list:
print(f" Showing {len(model_list)} curated models — use \"Enter custom model name\" for others.")
# else: no defaults either, will fall through to raw input
print(f" Showing {len(model_list)} curated models — use \"Enter custom model name\" for others.")
else:
api_key_for_probe = existing_key or (get_env_value(key_env) if key_env else "")
live_models = fetch_api_models(api_key_for_probe, effective_base)
if live_models and len(live_models) >= len(curated):
model_list = live_models
print(f" Found {len(model_list)} model(s) from {pconfig.name} API")
else:
model_list = curated
if model_list:
print(f" Showing {len(model_list)} curated models — use \"Enter custom model name\" for others.")
# else: no defaults either, will fall through to raw input
if provider_id in {"opencode-zen", "opencode-go"}:
model_list = [normalize_opencode_model_id(provider_id, mid) for mid in model_list]
@ -4860,7 +4869,7 @@ For more help on a command:
)
chat_parser.add_argument(
"--provider",
choices=["auto", "openrouter", "nous", "openai-codex", "copilot-acp", "copilot", "anthropic", "gemini", "huggingface", "zai", "kimi-coding", "kimi-coding-cn", "minimax", "minimax-cn", "kilocode", "xiaomi", "arcee"],
choices=["auto", "openrouter", "nous", "openai-codex", "copilot-acp", "copilot", "anthropic", "gemini", "ollama-cloud", "huggingface", "zai", "kimi-coding", "kimi-coding-cn", "minimax", "minimax-cn", "kilocode", "xiaomi", "arcee"],
default=None,
help="Inference provider (default: auto)"
)