mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
feat: add Ollama Cloud as built-in provider
Add ollama-cloud as a first-class provider with full parity to existing API-key providers (gemini, zai, minimax, etc.): - PROVIDER_REGISTRY entry with OLLAMA_API_KEY env var - Provider aliases: ollama -> custom (local), ollama_cloud -> ollama-cloud - models.dev integration for accurate context lengths - URL-to-provider mapping (ollama.com -> ollama-cloud) - Passthrough model normalization (preserves Ollama model:tag format) - Default auxiliary model (nemotron-3-nano:30b) - HermesOverlay in providers.py - CLI --provider choices, CANONICAL_PROVIDERS entry - Dynamic model discovery with disk caching (1hr TTL) - 37 provider-specific tests Cherry-picked from PR #6038 by kshitijk4poor. Closes #3926
This commit is contained in:
parent
8021a735c2
commit
1b61ec470b
12 changed files with 563 additions and 37 deletions
|
|
@ -24,6 +24,15 @@
|
||||||
# Optional base URL override (default: Google's OpenAI-compatible endpoint)
|
# Optional base URL override (default: Google's OpenAI-compatible endpoint)
|
||||||
# GEMINI_BASE_URL=https://generativelanguage.googleapis.com/v1beta/openai
|
# GEMINI_BASE_URL=https://generativelanguage.googleapis.com/v1beta/openai
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# LLM PROVIDER (Ollama Cloud)
|
||||||
|
# =============================================================================
|
||||||
|
# Cloud-hosted open models via Ollama's OpenAI-compatible endpoint.
|
||||||
|
# Get your key at: https://ollama.com/settings
|
||||||
|
# OLLAMA_API_KEY=your_ollama_key_here
|
||||||
|
# Optional base URL override (default: https://ollama.com/v1)
|
||||||
|
# OLLAMA_BASE_URL=https://ollama.com/v1
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# LLM PROVIDER (z.ai / GLM)
|
# LLM PROVIDER (z.ai / GLM)
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|
|
||||||
|
|
@ -104,6 +104,7 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
|
||||||
"opencode-zen": "gemini-3-flash",
|
"opencode-zen": "gemini-3-flash",
|
||||||
"opencode-go": "glm-5",
|
"opencode-go": "glm-5",
|
||||||
"kilocode": "google/gemini-3-flash-preview",
|
"kilocode": "google/gemini-3-flash-preview",
|
||||||
|
"ollama-cloud": "nemotron-3-nano:30b",
|
||||||
}
|
}
|
||||||
|
|
||||||
# Vision-specific model overrides for direct providers.
|
# Vision-specific model overrides for direct providers.
|
||||||
|
|
|
||||||
|
|
@ -23,7 +23,7 @@ logger = logging.getLogger(__name__)
|
||||||
# are preserved so the full model name reaches cache lookups and server queries.
|
# are preserved so the full model name reaches cache lookups and server queries.
|
||||||
_PROVIDER_PREFIXES: frozenset[str] = frozenset({
|
_PROVIDER_PREFIXES: frozenset[str] = frozenset({
|
||||||
"openrouter", "nous", "openai-codex", "copilot", "copilot-acp",
|
"openrouter", "nous", "openai-codex", "copilot", "copilot-acp",
|
||||||
"gemini", "zai", "kimi-coding", "kimi-coding-cn", "minimax", "minimax-cn", "anthropic", "deepseek",
|
"gemini", "ollama-cloud", "zai", "kimi-coding", "kimi-coding-cn", "minimax", "minimax-cn", "anthropic", "deepseek",
|
||||||
"opencode-zen", "opencode-go", "ai-gateway", "kilocode", "alibaba",
|
"opencode-zen", "opencode-go", "ai-gateway", "kilocode", "alibaba",
|
||||||
"qwen-oauth",
|
"qwen-oauth",
|
||||||
"xiaomi",
|
"xiaomi",
|
||||||
|
|
@ -33,6 +33,7 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
|
||||||
"google", "google-gemini", "google-ai-studio",
|
"google", "google-gemini", "google-ai-studio",
|
||||||
"glm", "z-ai", "z.ai", "zhipu", "github", "github-copilot",
|
"glm", "z-ai", "z.ai", "zhipu", "github", "github-copilot",
|
||||||
"github-models", "kimi", "moonshot", "kimi-cn", "moonshot-cn", "claude", "deep-seek",
|
"github-models", "kimi", "moonshot", "kimi-cn", "moonshot-cn", "claude", "deep-seek",
|
||||||
|
"ollama",
|
||||||
"opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
|
"opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
|
||||||
"mimo", "xiaomi-mimo",
|
"mimo", "xiaomi-mimo",
|
||||||
"arcee-ai", "arceeai",
|
"arcee-ai", "arceeai",
|
||||||
|
|
@ -239,6 +240,7 @@ _URL_TO_PROVIDER: Dict[str, str] = {
|
||||||
"api.x.ai": "xai",
|
"api.x.ai": "xai",
|
||||||
"api.xiaomimimo.com": "xiaomi",
|
"api.xiaomimimo.com": "xiaomi",
|
||||||
"xiaomimimo.com": "xiaomi",
|
"xiaomimimo.com": "xiaomi",
|
||||||
|
"ollama.com": "ollama-cloud",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -169,6 +169,7 @@ PROVIDER_TO_MODELS_DEV: Dict[str, str] = {
|
||||||
"togetherai": "togetherai",
|
"togetherai": "togetherai",
|
||||||
"perplexity": "perplexity",
|
"perplexity": "perplexity",
|
||||||
"cohere": "cohere",
|
"cohere": "cohere",
|
||||||
|
"ollama-cloud": "ollama-cloud",
|
||||||
}
|
}
|
||||||
|
|
||||||
# Reverse mapping: models.dev → Hermes (built lazily)
|
# Reverse mapping: models.dev → Hermes (built lazily)
|
||||||
|
|
|
||||||
|
|
@ -26,6 +26,7 @@ model:
|
||||||
# "huggingface" - Hugging Face Inference (requires: HF_TOKEN)
|
# "huggingface" - Hugging Face Inference (requires: HF_TOKEN)
|
||||||
# "xiaomi" - Xiaomi MiMo (requires: XIAOMI_API_KEY)
|
# "xiaomi" - Xiaomi MiMo (requires: XIAOMI_API_KEY)
|
||||||
# "arcee" - Arcee AI Trinity models (requires: ARCEEAI_API_KEY)
|
# "arcee" - Arcee AI Trinity models (requires: ARCEEAI_API_KEY)
|
||||||
|
# "ollama-cloud" - Ollama Cloud (requires: OLLAMA_API_KEY — https://ollama.com/settings)
|
||||||
# "kilocode" - KiloCode gateway (requires: KILOCODE_API_KEY)
|
# "kilocode" - KiloCode gateway (requires: KILOCODE_API_KEY)
|
||||||
# "ai-gateway" - Vercel AI Gateway (requires: AI_GATEWAY_API_KEY)
|
# "ai-gateway" - Vercel AI Gateway (requires: AI_GATEWAY_API_KEY)
|
||||||
#
|
#
|
||||||
|
|
@ -37,12 +38,6 @@ model:
|
||||||
# base_url: "http://localhost:1234/v1"
|
# base_url: "http://localhost:1234/v1"
|
||||||
# No API key needed — local servers typically ignore auth.
|
# No API key needed — local servers typically ignore auth.
|
||||||
#
|
#
|
||||||
# For Ollama Cloud (https://ollama.com/pricing):
|
|
||||||
# provider: "custom"
|
|
||||||
# base_url: "https://ollama.com/v1"
|
|
||||||
# Set OLLAMA_API_KEY in .env — automatically picked up when base_url
|
|
||||||
# points to ollama.com.
|
|
||||||
#
|
|
||||||
# Can also be overridden with --provider flag or HERMES_INFERENCE_PROVIDER env var.
|
# Can also be overridden with --provider flag or HERMES_INFERENCE_PROVIDER env var.
|
||||||
provider: "auto"
|
provider: "auto"
|
||||||
|
|
||||||
|
|
@ -337,6 +332,7 @@ compression:
|
||||||
# "openrouter" - Force OpenRouter (requires OPENROUTER_API_KEY)
|
# "openrouter" - Force OpenRouter (requires OPENROUTER_API_KEY)
|
||||||
# "nous" - Force Nous Portal (requires: hermes login)
|
# "nous" - Force Nous Portal (requires: hermes login)
|
||||||
# "gemini" - Force Google AI Studio direct (requires: GOOGLE_API_KEY or GEMINI_API_KEY)
|
# "gemini" - Force Google AI Studio direct (requires: GOOGLE_API_KEY or GEMINI_API_KEY)
|
||||||
|
# "ollama-cloud" - Ollama Cloud (requires: OLLAMA_API_KEY)
|
||||||
# "codex" - Force Codex OAuth (requires: hermes model → Codex).
|
# "codex" - Force Codex OAuth (requires: hermes model → Codex).
|
||||||
# Uses gpt-5.3-codex which supports vision.
|
# Uses gpt-5.3-codex which supports vision.
|
||||||
# "main" - Use your custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY).
|
# "main" - Use your custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY).
|
||||||
|
|
|
||||||
|
|
@ -70,6 +70,7 @@ DEFAULT_CODEX_BASE_URL = "https://chatgpt.com/backend-api/codex"
|
||||||
DEFAULT_QWEN_BASE_URL = "https://portal.qwen.ai/v1"
|
DEFAULT_QWEN_BASE_URL = "https://portal.qwen.ai/v1"
|
||||||
DEFAULT_GITHUB_MODELS_BASE_URL = "https://api.githubcopilot.com"
|
DEFAULT_GITHUB_MODELS_BASE_URL = "https://api.githubcopilot.com"
|
||||||
DEFAULT_COPILOT_ACP_BASE_URL = "acp://copilot"
|
DEFAULT_COPILOT_ACP_BASE_URL = "acp://copilot"
|
||||||
|
DEFAULT_OLLAMA_CLOUD_BASE_URL = "https://ollama.com/v1"
|
||||||
CODEX_OAUTH_CLIENT_ID = "app_EMoamEEZ73f0CkXaXp7hrann"
|
CODEX_OAUTH_CLIENT_ID = "app_EMoamEEZ73f0CkXaXp7hrann"
|
||||||
CODEX_OAUTH_TOKEN_URL = "https://auth.openai.com/oauth/token"
|
CODEX_OAUTH_TOKEN_URL = "https://auth.openai.com/oauth/token"
|
||||||
CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120
|
CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120
|
||||||
|
|
@ -274,6 +275,14 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
|
||||||
api_key_env_vars=("XIAOMI_API_KEY",),
|
api_key_env_vars=("XIAOMI_API_KEY",),
|
||||||
base_url_env_var="XIAOMI_BASE_URL",
|
base_url_env_var="XIAOMI_BASE_URL",
|
||||||
),
|
),
|
||||||
|
"ollama-cloud": ProviderConfig(
|
||||||
|
id="ollama-cloud",
|
||||||
|
name="Ollama Cloud",
|
||||||
|
auth_type="api_key",
|
||||||
|
inference_base_url=DEFAULT_OLLAMA_CLOUD_BASE_URL,
|
||||||
|
api_key_env_vars=("OLLAMA_API_KEY",),
|
||||||
|
base_url_env_var="OLLAMA_BASE_URL",
|
||||||
|
),
|
||||||
"bedrock": ProviderConfig(
|
"bedrock": ProviderConfig(
|
||||||
id="bedrock",
|
id="bedrock",
|
||||||
name="AWS Bedrock",
|
name="AWS Bedrock",
|
||||||
|
|
@ -937,7 +946,8 @@ def resolve_provider(
|
||||||
"kilo": "kilocode", "kilo-code": "kilocode", "kilo-gateway": "kilocode",
|
"kilo": "kilocode", "kilo-code": "kilocode", "kilo-gateway": "kilocode",
|
||||||
# Local server aliases — route through the generic custom provider
|
# Local server aliases — route through the generic custom provider
|
||||||
"lmstudio": "custom", "lm-studio": "custom", "lm_studio": "custom",
|
"lmstudio": "custom", "lm-studio": "custom", "lm_studio": "custom",
|
||||||
"ollama": "custom", "vllm": "custom", "llamacpp": "custom",
|
"ollama": "custom", "ollama_cloud": "ollama-cloud",
|
||||||
|
"vllm": "custom", "llamacpp": "custom",
|
||||||
"llama.cpp": "custom", "llama-cpp": "custom",
|
"llama.cpp": "custom", "llama-cpp": "custom",
|
||||||
}
|
}
|
||||||
normalized = _PROVIDER_ALIASES.get(normalized, normalized)
|
normalized = _PROVIDER_ALIASES.get(normalized, normalized)
|
||||||
|
|
|
||||||
|
|
@ -1024,6 +1024,22 @@ OPTIONAL_ENV_VARS = {
|
||||||
"category": "provider",
|
"category": "provider",
|
||||||
"advanced": True,
|
"advanced": True,
|
||||||
},
|
},
|
||||||
|
"OLLAMA_API_KEY": {
|
||||||
|
"description": "Ollama Cloud API key (ollama.com — cloud-hosted open models)",
|
||||||
|
"prompt": "Ollama Cloud API key",
|
||||||
|
"url": "https://ollama.com/settings",
|
||||||
|
"password": True,
|
||||||
|
"category": "provider",
|
||||||
|
"advanced": True,
|
||||||
|
},
|
||||||
|
"OLLAMA_BASE_URL": {
|
||||||
|
"description": "Ollama Cloud base URL override (default: https://ollama.com/v1)",
|
||||||
|
"prompt": "Ollama base URL (leave empty for default)",
|
||||||
|
"url": None,
|
||||||
|
"password": False,
|
||||||
|
"category": "provider",
|
||||||
|
"advanced": True,
|
||||||
|
},
|
||||||
"XIAOMI_API_KEY": {
|
"XIAOMI_API_KEY": {
|
||||||
"description": "Xiaomi MiMo API key for MiMo models (mimo-v2-pro, mimo-v2-omni, mimo-v2-flash)",
|
"description": "Xiaomi MiMo API key for MiMo models (mimo-v2-pro, mimo-v2-omni, mimo-v2-flash)",
|
||||||
"prompt": "Xiaomi MiMo API Key",
|
"prompt": "Xiaomi MiMo API Key",
|
||||||
|
|
|
||||||
|
|
@ -1141,7 +1141,7 @@ def select_provider_and_model(args=None):
|
||||||
_model_flow_kimi(config, current_model)
|
_model_flow_kimi(config, current_model)
|
||||||
elif selected_provider == "bedrock":
|
elif selected_provider == "bedrock":
|
||||||
_model_flow_bedrock(config, current_model)
|
_model_flow_bedrock(config, current_model)
|
||||||
elif selected_provider in ("gemini", "deepseek", "xai", "zai", "kimi-coding-cn", "minimax", "minimax-cn", "kilocode", "opencode-zen", "opencode-go", "ai-gateway", "alibaba", "huggingface", "xiaomi", "arcee"):
|
elif selected_provider in ("gemini", "deepseek", "xai", "zai", "kimi-coding-cn", "minimax", "minimax-cn", "kilocode", "opencode-zen", "opencode-go", "ai-gateway", "alibaba", "huggingface", "xiaomi", "arcee", "ollama-cloud"):
|
||||||
_model_flow_api_key_provider(config, selected_provider, current_model)
|
_model_flow_api_key_provider(config, selected_provider, current_model)
|
||||||
|
|
||||||
# ── Post-switch cleanup: clear stale OPENAI_BASE_URL ──────────────
|
# ── Post-switch cleanup: clear stale OPENAI_BASE_URL ──────────────
|
||||||
|
|
@ -2734,34 +2734,43 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""):
|
||||||
# 1. models.dev registry (cached, filtered for agentic/tool-capable models)
|
# 1. models.dev registry (cached, filtered for agentic/tool-capable models)
|
||||||
# 2. Curated static fallback list (offline insurance)
|
# 2. Curated static fallback list (offline insurance)
|
||||||
# 3. Live /models endpoint probe (small providers without models.dev data)
|
# 3. Live /models endpoint probe (small providers without models.dev data)
|
||||||
curated = _PROVIDER_MODELS.get(provider_id, [])
|
#
|
||||||
|
# Ollama Cloud: dedicated merged discovery (live API + models.dev + disk cache)
|
||||||
# Try models.dev first — returns tool-capable models, filtered for noise
|
if provider_id == "ollama-cloud":
|
||||||
mdev_models: list = []
|
from hermes_cli.models import fetch_ollama_cloud_models
|
||||||
try:
|
|
||||||
from agent.models_dev import list_agentic_models
|
|
||||||
mdev_models = list_agentic_models(provider_id)
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
if mdev_models:
|
|
||||||
model_list = mdev_models
|
|
||||||
print(f" Found {len(model_list)} model(s) from models.dev registry")
|
|
||||||
elif curated and len(curated) >= 8:
|
|
||||||
# Curated list is substantial — use it directly, skip live probe
|
|
||||||
model_list = curated
|
|
||||||
print(f" Showing {len(model_list)} curated models — use \"Enter custom model name\" for others.")
|
|
||||||
else:
|
|
||||||
api_key_for_probe = existing_key or (get_env_value(key_env) if key_env else "")
|
api_key_for_probe = existing_key or (get_env_value(key_env) if key_env else "")
|
||||||
live_models = fetch_api_models(api_key_for_probe, effective_base)
|
model_list = fetch_ollama_cloud_models(api_key=api_key_for_probe, base_url=effective_base)
|
||||||
if live_models and len(live_models) >= len(curated):
|
if model_list:
|
||||||
model_list = live_models
|
print(f" Found {len(model_list)} model(s) from Ollama Cloud")
|
||||||
print(f" Found {len(model_list)} model(s) from {pconfig.name} API")
|
else:
|
||||||
else:
|
curated = _PROVIDER_MODELS.get(provider_id, [])
|
||||||
|
|
||||||
|
# Try models.dev first — returns tool-capable models, filtered for noise
|
||||||
|
mdev_models: list = []
|
||||||
|
try:
|
||||||
|
from agent.models_dev import list_agentic_models
|
||||||
|
mdev_models = list_agentic_models(provider_id)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if mdev_models:
|
||||||
|
model_list = mdev_models
|
||||||
|
print(f" Found {len(model_list)} model(s) from models.dev registry")
|
||||||
|
elif curated and len(curated) >= 8:
|
||||||
|
# Curated list is substantial — use it directly, skip live probe
|
||||||
model_list = curated
|
model_list = curated
|
||||||
if model_list:
|
print(f" Showing {len(model_list)} curated models — use \"Enter custom model name\" for others.")
|
||||||
print(f" Showing {len(model_list)} curated models — use \"Enter custom model name\" for others.")
|
else:
|
||||||
# else: no defaults either, will fall through to raw input
|
api_key_for_probe = existing_key or (get_env_value(key_env) if key_env else "")
|
||||||
|
live_models = fetch_api_models(api_key_for_probe, effective_base)
|
||||||
|
if live_models and len(live_models) >= len(curated):
|
||||||
|
model_list = live_models
|
||||||
|
print(f" Found {len(model_list)} model(s) from {pconfig.name} API")
|
||||||
|
else:
|
||||||
|
model_list = curated
|
||||||
|
if model_list:
|
||||||
|
print(f" Showing {len(model_list)} curated models — use \"Enter custom model name\" for others.")
|
||||||
|
# else: no defaults either, will fall through to raw input
|
||||||
|
|
||||||
if provider_id in {"opencode-zen", "opencode-go"}:
|
if provider_id in {"opencode-zen", "opencode-go"}:
|
||||||
model_list = [normalize_opencode_model_id(provider_id, mid) for mid in model_list]
|
model_list = [normalize_opencode_model_id(provider_id, mid) for mid in model_list]
|
||||||
|
|
@ -4860,7 +4869,7 @@ For more help on a command:
|
||||||
)
|
)
|
||||||
chat_parser.add_argument(
|
chat_parser.add_argument(
|
||||||
"--provider",
|
"--provider",
|
||||||
choices=["auto", "openrouter", "nous", "openai-codex", "copilot-acp", "copilot", "anthropic", "gemini", "huggingface", "zai", "kimi-coding", "kimi-coding-cn", "minimax", "minimax-cn", "kilocode", "xiaomi", "arcee"],
|
choices=["auto", "openrouter", "nous", "openai-codex", "copilot-acp", "copilot", "anthropic", "gemini", "ollama-cloud", "huggingface", "zai", "kimi-coding", "kimi-coding-cn", "minimax", "minimax-cn", "kilocode", "xiaomi", "arcee"],
|
||||||
default=None,
|
default=None,
|
||||||
help="Inference provider (default: auto)"
|
help="Inference provider (default: auto)"
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -96,6 +96,7 @@ _MATCHING_PREFIX_STRIP_PROVIDERS: frozenset[str] = frozenset({
|
||||||
"qwen-oauth",
|
"qwen-oauth",
|
||||||
"xiaomi",
|
"xiaomi",
|
||||||
"arcee",
|
"arcee",
|
||||||
|
"ollama-cloud",
|
||||||
"custom",
|
"custom",
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,9 @@ import json
|
||||||
import os
|
import os
|
||||||
import urllib.request
|
import urllib.request
|
||||||
import urllib.error
|
import urllib.error
|
||||||
|
import time
|
||||||
from difflib import get_close_matches
|
from difflib import get_close_matches
|
||||||
|
from pathlib import Path
|
||||||
from typing import Any, NamedTuple, Optional
|
from typing import Any, NamedTuple, Optional
|
||||||
|
|
||||||
COPILOT_BASE_URL = "https://api.githubcopilot.com"
|
COPILOT_BASE_URL = "https://api.githubcopilot.com"
|
||||||
|
|
@ -547,6 +549,7 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
|
||||||
ProviderEntry("minimax", "MiniMax", "MiniMax (global direct API)"),
|
ProviderEntry("minimax", "MiniMax", "MiniMax (global direct API)"),
|
||||||
ProviderEntry("minimax-cn", "MiniMax (China)", "MiniMax China (domestic direct API)"),
|
ProviderEntry("minimax-cn", "MiniMax (China)", "MiniMax China (domestic direct API)"),
|
||||||
ProviderEntry("alibaba", "Alibaba Cloud (DashScope)","Alibaba Cloud / DashScope Coding (Qwen + multi-provider)"),
|
ProviderEntry("alibaba", "Alibaba Cloud (DashScope)","Alibaba Cloud / DashScope Coding (Qwen + multi-provider)"),
|
||||||
|
ProviderEntry("ollama-cloud", "Ollama Cloud", "Ollama Cloud (cloud-hosted open models — ollama.com)"),
|
||||||
ProviderEntry("arcee", "Arcee AI", "Arcee AI (Trinity models — direct API)"),
|
ProviderEntry("arcee", "Arcee AI", "Arcee AI (Trinity models — direct API)"),
|
||||||
ProviderEntry("kilocode", "Kilo Code", "Kilo Code (Kilo Gateway API)"),
|
ProviderEntry("kilocode", "Kilo Code", "Kilo Code (Kilo Gateway API)"),
|
||||||
ProviderEntry("opencode-zen", "OpenCode Zen", "OpenCode Zen (35+ curated models, pay-as-you-go)"),
|
ProviderEntry("opencode-zen", "OpenCode Zen", "OpenCode Zen (35+ curated models, pay-as-you-go)"),
|
||||||
|
|
@ -559,6 +562,7 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
|
||||||
_PROVIDER_LABELS = {p.slug: p.label for p in CANONICAL_PROVIDERS}
|
_PROVIDER_LABELS = {p.slug: p.label for p in CANONICAL_PROVIDERS}
|
||||||
_PROVIDER_LABELS["custom"] = "Custom endpoint" # special case: not a named provider
|
_PROVIDER_LABELS["custom"] = "Custom endpoint" # special case: not a named provider
|
||||||
|
|
||||||
|
|
||||||
_PROVIDER_ALIASES = {
|
_PROVIDER_ALIASES = {
|
||||||
"glm": "zai",
|
"glm": "zai",
|
||||||
"z-ai": "zai",
|
"z-ai": "zai",
|
||||||
|
|
@ -611,6 +615,8 @@ _PROVIDER_ALIASES = {
|
||||||
"grok": "xai",
|
"grok": "xai",
|
||||||
"x-ai": "xai",
|
"x-ai": "xai",
|
||||||
"x.ai": "xai",
|
"x.ai": "xai",
|
||||||
|
"ollama": "custom", # bare "ollama" = local; use "ollama-cloud" for cloud
|
||||||
|
"ollama_cloud": "ollama-cloud",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1786,6 +1792,125 @@ def fetch_api_models(
|
||||||
return probe_api_models(api_key, base_url, timeout=timeout).get("models")
|
return probe_api_models(api_key, base_url, timeout=timeout).get("models")
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Ollama Cloud — merged model discovery with disk cache
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
_OLLAMA_CLOUD_CACHE_TTL = 3600 # 1 hour
|
||||||
|
|
||||||
|
|
||||||
|
def _ollama_cloud_cache_path() -> Path:
|
||||||
|
"""Return the path for the Ollama Cloud model cache."""
|
||||||
|
from hermes_constants import get_hermes_home
|
||||||
|
return get_hermes_home() / "ollama_cloud_models_cache.json"
|
||||||
|
|
||||||
|
|
||||||
|
def _load_ollama_cloud_cache(*, ignore_ttl: bool = False) -> Optional[dict]:
|
||||||
|
"""Load cached Ollama Cloud models from disk.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
ignore_ttl: If True, return data even if the TTL has expired (stale fallback).
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
cache_path = _ollama_cloud_cache_path()
|
||||||
|
if not cache_path.exists():
|
||||||
|
return None
|
||||||
|
with open(cache_path, encoding="utf-8") as f:
|
||||||
|
data = json.load(f)
|
||||||
|
if not isinstance(data, dict):
|
||||||
|
return None
|
||||||
|
models = data.get("models")
|
||||||
|
if not (isinstance(models, list) and models):
|
||||||
|
return None
|
||||||
|
if not ignore_ttl:
|
||||||
|
cached_at = data.get("cached_at", 0)
|
||||||
|
if (time.time() - cached_at) > _OLLAMA_CLOUD_CACHE_TTL:
|
||||||
|
return None # stale
|
||||||
|
return data
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _save_ollama_cloud_cache(models: list[str]) -> None:
|
||||||
|
"""Persist the merged Ollama Cloud model list to disk."""
|
||||||
|
try:
|
||||||
|
from utils import atomic_json_write
|
||||||
|
cache_path = _ollama_cloud_cache_path()
|
||||||
|
cache_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
atomic_json_write(cache_path, {"models": models, "cached_at": time.time()}, indent=None)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_ollama_cloud_models(
|
||||||
|
api_key: Optional[str] = None,
|
||||||
|
base_url: Optional[str] = None,
|
||||||
|
*,
|
||||||
|
force_refresh: bool = False,
|
||||||
|
) -> list[str]:
|
||||||
|
"""Fetch Ollama Cloud models by merging live API + models.dev, with disk cache.
|
||||||
|
|
||||||
|
Resolution order:
|
||||||
|
1. Disk cache (if fresh, < 1 hour, and not force_refresh)
|
||||||
|
2. Live ``/v1/models`` endpoint (primary — freshest source)
|
||||||
|
3. models.dev registry (secondary — fills gaps for unlisted models)
|
||||||
|
4. Merge: live models first, then models.dev additions (deduped)
|
||||||
|
|
||||||
|
Returns a list of model IDs (never None — empty list on total failure).
|
||||||
|
"""
|
||||||
|
# 1. Check disk cache
|
||||||
|
if not force_refresh:
|
||||||
|
cached = _load_ollama_cloud_cache()
|
||||||
|
if cached is not None:
|
||||||
|
return cached["models"]
|
||||||
|
|
||||||
|
# 2. Live API probe
|
||||||
|
if not api_key:
|
||||||
|
api_key = os.getenv("OLLAMA_API_KEY", "")
|
||||||
|
if not base_url:
|
||||||
|
base_url = os.getenv("OLLAMA_BASE_URL", "") or "https://ollama.com/v1"
|
||||||
|
|
||||||
|
live_models: list[str] = []
|
||||||
|
if api_key:
|
||||||
|
result = fetch_api_models(api_key, base_url, timeout=8.0)
|
||||||
|
if result:
|
||||||
|
live_models = result
|
||||||
|
|
||||||
|
# 3. models.dev registry
|
||||||
|
mdev_models: list[str] = []
|
||||||
|
try:
|
||||||
|
from agent.models_dev import list_agentic_models
|
||||||
|
mdev_models = list_agentic_models("ollama-cloud")
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# 4. Merge: live first, then models.dev additions (deduped, order-preserving)
|
||||||
|
if live_models or mdev_models:
|
||||||
|
seen: set[str] = set()
|
||||||
|
merged: list[str] = []
|
||||||
|
for m in live_models:
|
||||||
|
if m and m not in seen:
|
||||||
|
seen.add(m)
|
||||||
|
merged.append(m)
|
||||||
|
for m in mdev_models:
|
||||||
|
if m and m not in seen:
|
||||||
|
seen.add(m)
|
||||||
|
merged.append(m)
|
||||||
|
if merged:
|
||||||
|
_save_ollama_cloud_cache(merged)
|
||||||
|
return merged
|
||||||
|
|
||||||
|
# Total failure — return stale cache if available (ignore TTL)
|
||||||
|
stale = _load_ollama_cloud_cache(ignore_ttl=True)
|
||||||
|
if stale is not None:
|
||||||
|
return stale["models"]
|
||||||
|
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
def validate_requested_model(
|
def validate_requested_model(
|
||||||
model_name: str,
|
model_name: str,
|
||||||
provider: Optional[str],
|
provider: Optional[str],
|
||||||
|
|
|
||||||
|
|
@ -141,6 +141,10 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
|
||||||
base_url_override="https://api.arcee.ai/api/v1",
|
base_url_override="https://api.arcee.ai/api/v1",
|
||||||
base_url_env_var="ARCEE_BASE_URL",
|
base_url_env_var="ARCEE_BASE_URL",
|
||||||
),
|
),
|
||||||
|
"ollama-cloud": HermesOverlay(
|
||||||
|
transport="openai_chat",
|
||||||
|
base_url_env_var="OLLAMA_BASE_URL",
|
||||||
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -250,7 +254,7 @@ ALIASES: Dict[str, str] = {
|
||||||
"lmstudio": "lmstudio",
|
"lmstudio": "lmstudio",
|
||||||
"lm-studio": "lmstudio",
|
"lm-studio": "lmstudio",
|
||||||
"lm_studio": "lmstudio",
|
"lm_studio": "lmstudio",
|
||||||
"ollama": "ollama-cloud",
|
"ollama": "custom", # bare "ollama" = local; use "ollama-cloud" for cloud
|
||||||
"vllm": "local",
|
"vllm": "local",
|
||||||
"llamacpp": "local",
|
"llamacpp": "local",
|
||||||
"llama.cpp": "local",
|
"llama.cpp": "local",
|
||||||
|
|
@ -269,6 +273,7 @@ _LABEL_OVERRIDES: Dict[str, str] = {
|
||||||
"xiaomi": "Xiaomi MiMo",
|
"xiaomi": "Xiaomi MiMo",
|
||||||
"local": "Local endpoint",
|
"local": "Local endpoint",
|
||||||
"bedrock": "AWS Bedrock",
|
"bedrock": "AWS Bedrock",
|
||||||
|
"ollama-cloud": "Ollama Cloud",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
351
tests/hermes_cli/test_ollama_cloud_provider.py
Normal file
351
tests/hermes_cli/test_ollama_cloud_provider.py
Normal file
|
|
@ -0,0 +1,351 @@
|
||||||
|
"""Tests for Ollama Cloud provider integration."""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import pytest
|
||||||
|
from unittest.mock import patch, MagicMock
|
||||||
|
|
||||||
|
from hermes_cli.auth import PROVIDER_REGISTRY, resolve_provider, resolve_api_key_provider_credentials
|
||||||
|
from hermes_cli.models import _PROVIDER_MODELS, _PROVIDER_LABELS, _PROVIDER_ALIASES, normalize_provider
|
||||||
|
from hermes_cli.model_normalize import normalize_model_for_provider
|
||||||
|
from agent.model_metadata import _URL_TO_PROVIDER, _PROVIDER_PREFIXES
|
||||||
|
from agent.models_dev import PROVIDER_TO_MODELS_DEV, list_agentic_models
|
||||||
|
|
||||||
|
|
||||||
|
# ── Provider Registry ──
|
||||||
|
|
||||||
|
class TestOllamaCloudProviderRegistry:
|
||||||
|
def test_ollama_cloud_in_registry(self):
|
||||||
|
assert "ollama-cloud" in PROVIDER_REGISTRY
|
||||||
|
|
||||||
|
def test_ollama_cloud_config(self):
|
||||||
|
pconfig = PROVIDER_REGISTRY["ollama-cloud"]
|
||||||
|
assert pconfig.id == "ollama-cloud"
|
||||||
|
assert pconfig.name == "Ollama Cloud"
|
||||||
|
assert pconfig.auth_type == "api_key"
|
||||||
|
assert pconfig.inference_base_url == "https://ollama.com/v1"
|
||||||
|
|
||||||
|
def test_ollama_cloud_env_vars(self):
|
||||||
|
pconfig = PROVIDER_REGISTRY["ollama-cloud"]
|
||||||
|
assert pconfig.api_key_env_vars == ("OLLAMA_API_KEY",)
|
||||||
|
assert pconfig.base_url_env_var == "OLLAMA_BASE_URL"
|
||||||
|
|
||||||
|
def test_ollama_cloud_base_url(self):
|
||||||
|
assert "ollama.com" in PROVIDER_REGISTRY["ollama-cloud"].inference_base_url
|
||||||
|
|
||||||
|
|
||||||
|
# ── Provider Aliases ──
|
||||||
|
|
||||||
|
PROVIDER_ENV_VARS = (
|
||||||
|
"OPENROUTER_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY",
|
||||||
|
"GOOGLE_API_KEY", "GEMINI_API_KEY", "OLLAMA_API_KEY",
|
||||||
|
"GLM_API_KEY", "ZAI_API_KEY", "KIMI_API_KEY",
|
||||||
|
"MINIMAX_API_KEY", "DEEPSEEK_API_KEY",
|
||||||
|
)
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def _clean_provider_env(monkeypatch):
|
||||||
|
for var in PROVIDER_ENV_VARS:
|
||||||
|
monkeypatch.delenv(var, raising=False)
|
||||||
|
|
||||||
|
|
||||||
|
class TestOllamaCloudAliases:
|
||||||
|
def test_explicit_ollama_cloud(self):
|
||||||
|
assert resolve_provider("ollama-cloud") == "ollama-cloud"
|
||||||
|
|
||||||
|
def test_alias_ollama_underscore(self):
|
||||||
|
"""ollama_cloud (underscore) is the unambiguous cloud alias."""
|
||||||
|
assert resolve_provider("ollama_cloud") == "ollama-cloud"
|
||||||
|
|
||||||
|
def test_bare_ollama_stays_local(self):
|
||||||
|
"""Bare 'ollama' alias routes to 'custom' (local) — not cloud."""
|
||||||
|
assert resolve_provider("ollama") == "custom"
|
||||||
|
|
||||||
|
def test_models_py_aliases(self):
|
||||||
|
assert _PROVIDER_ALIASES.get("ollama_cloud") == "ollama-cloud"
|
||||||
|
# bare "ollama" stays local
|
||||||
|
assert _PROVIDER_ALIASES.get("ollama") == "custom"
|
||||||
|
|
||||||
|
def test_normalize_provider(self):
|
||||||
|
assert normalize_provider("ollama-cloud") == "ollama-cloud"
|
||||||
|
|
||||||
|
|
||||||
|
# ── Auto-detection ──
|
||||||
|
|
||||||
|
class TestOllamaCloudAutoDetection:
|
||||||
|
def test_auto_detects_ollama_api_key(self, monkeypatch):
|
||||||
|
monkeypatch.setenv("OLLAMA_API_KEY", "test-ollama-key")
|
||||||
|
assert resolve_provider("auto") == "ollama-cloud"
|
||||||
|
|
||||||
|
|
||||||
|
# ── Credential Resolution ──
|
||||||
|
|
||||||
|
class TestOllamaCloudCredentials:
|
||||||
|
def test_resolve_with_ollama_api_key(self, monkeypatch):
|
||||||
|
monkeypatch.setenv("OLLAMA_API_KEY", "ollama-secret")
|
||||||
|
creds = resolve_api_key_provider_credentials("ollama-cloud")
|
||||||
|
assert creds["provider"] == "ollama-cloud"
|
||||||
|
assert creds["api_key"] == "ollama-secret"
|
||||||
|
assert creds["base_url"] == "https://ollama.com/v1"
|
||||||
|
|
||||||
|
def test_resolve_with_custom_base_url(self, monkeypatch):
|
||||||
|
monkeypatch.setenv("OLLAMA_API_KEY", "key")
|
||||||
|
monkeypatch.setenv("OLLAMA_BASE_URL", "https://custom.ollama/v1")
|
||||||
|
creds = resolve_api_key_provider_credentials("ollama-cloud")
|
||||||
|
assert creds["base_url"] == "https://custom.ollama/v1"
|
||||||
|
|
||||||
|
def test_runtime_ollama_cloud(self, monkeypatch):
|
||||||
|
monkeypatch.setenv("OLLAMA_API_KEY", "ollama-key")
|
||||||
|
from hermes_cli.runtime_provider import resolve_runtime_provider
|
||||||
|
result = resolve_runtime_provider(requested="ollama-cloud")
|
||||||
|
assert result["provider"] == "ollama-cloud"
|
||||||
|
assert result["api_mode"] == "chat_completions"
|
||||||
|
assert result["api_key"] == "ollama-key"
|
||||||
|
assert result["base_url"] == "https://ollama.com/v1"
|
||||||
|
|
||||||
|
|
||||||
|
# ── Model Catalog (dynamic — no static list) ──
|
||||||
|
|
||||||
|
class TestOllamaCloudModelCatalog:
|
||||||
|
def test_no_static_model_list(self):
|
||||||
|
"""Ollama Cloud models are fetched dynamically — no static list to maintain."""
|
||||||
|
assert "ollama-cloud" not in _PROVIDER_MODELS
|
||||||
|
|
||||||
|
def test_provider_label(self):
|
||||||
|
assert "ollama-cloud" in _PROVIDER_LABELS
|
||||||
|
assert _PROVIDER_LABELS["ollama-cloud"] == "Ollama Cloud"
|
||||||
|
|
||||||
|
|
||||||
|
# ── Merged Model Discovery ──
|
||||||
|
|
||||||
|
class TestOllamaCloudMergedDiscovery:
|
||||||
|
def test_merges_live_and_models_dev(self, tmp_path, monkeypatch):
|
||||||
|
"""Live API models appear first, models.dev additions fill gaps."""
|
||||||
|
from hermes_cli.models import fetch_ollama_cloud_models
|
||||||
|
|
||||||
|
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||||
|
monkeypatch.setenv("OLLAMA_API_KEY", "test-key")
|
||||||
|
|
||||||
|
mock_mdev = {
|
||||||
|
"ollama-cloud": {
|
||||||
|
"models": {
|
||||||
|
"glm-5": {"tool_call": True},
|
||||||
|
"kimi-k2.5": {"tool_call": True},
|
||||||
|
"nemotron-3-super": {"tool_call": True},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
with patch("hermes_cli.models.fetch_api_models", return_value=["qwen3.5:397b", "glm-5"]), \
|
||||||
|
patch("agent.models_dev.fetch_models_dev", return_value=mock_mdev):
|
||||||
|
result = fetch_ollama_cloud_models(force_refresh=True)
|
||||||
|
|
||||||
|
# Live models first, then models.dev additions (deduped)
|
||||||
|
assert result[0] == "qwen3.5:397b" # from live API
|
||||||
|
assert result[1] == "glm-5" # from live API (also in models.dev)
|
||||||
|
assert "kimi-k2.5" in result # from models.dev only
|
||||||
|
assert "nemotron-3-super" in result # from models.dev only
|
||||||
|
assert result.count("glm-5") == 1 # no duplicates
|
||||||
|
|
||||||
|
def test_falls_back_to_models_dev_without_api_key(self, tmp_path, monkeypatch):
|
||||||
|
"""Without API key, only models.dev results are returned."""
|
||||||
|
from hermes_cli.models import fetch_ollama_cloud_models
|
||||||
|
|
||||||
|
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||||
|
monkeypatch.delenv("OLLAMA_API_KEY", raising=False)
|
||||||
|
|
||||||
|
mock_mdev = {
|
||||||
|
"ollama-cloud": {
|
||||||
|
"models": {
|
||||||
|
"glm-5": {"tool_call": True},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
with patch("agent.models_dev.fetch_models_dev", return_value=mock_mdev):
|
||||||
|
result = fetch_ollama_cloud_models(force_refresh=True)
|
||||||
|
|
||||||
|
assert result == ["glm-5"]
|
||||||
|
|
||||||
|
def test_uses_disk_cache(self, tmp_path, monkeypatch):
|
||||||
|
"""Second call returns cached results without hitting APIs."""
|
||||||
|
from hermes_cli.models import fetch_ollama_cloud_models
|
||||||
|
|
||||||
|
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||||
|
monkeypatch.setenv("OLLAMA_API_KEY", "test-key")
|
||||||
|
|
||||||
|
with patch("hermes_cli.models.fetch_api_models", return_value=["model-a"]) as mock_api, \
|
||||||
|
patch("agent.models_dev.fetch_models_dev", return_value={}):
|
||||||
|
first = fetch_ollama_cloud_models(force_refresh=True)
|
||||||
|
assert first == ["model-a"]
|
||||||
|
assert mock_api.call_count == 1
|
||||||
|
|
||||||
|
# Second call — should use disk cache, not call API
|
||||||
|
second = fetch_ollama_cloud_models()
|
||||||
|
assert second == ["model-a"]
|
||||||
|
assert mock_api.call_count == 1 # no extra API call
|
||||||
|
|
||||||
|
def test_force_refresh_bypasses_cache(self, tmp_path, monkeypatch):
|
||||||
|
"""force_refresh=True always hits the API even with fresh cache."""
|
||||||
|
from hermes_cli.models import fetch_ollama_cloud_models
|
||||||
|
|
||||||
|
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||||
|
monkeypatch.setenv("OLLAMA_API_KEY", "test-key")
|
||||||
|
|
||||||
|
with patch("hermes_cli.models.fetch_api_models", return_value=["model-a"]) as mock_api, \
|
||||||
|
patch("agent.models_dev.fetch_models_dev", return_value={}):
|
||||||
|
fetch_ollama_cloud_models(force_refresh=True)
|
||||||
|
fetch_ollama_cloud_models(force_refresh=True)
|
||||||
|
assert mock_api.call_count == 2
|
||||||
|
|
||||||
|
def test_stale_cache_used_on_total_failure(self, tmp_path, monkeypatch):
|
||||||
|
"""If both API and models.dev fail, stale cache is returned."""
|
||||||
|
from hermes_cli.models import fetch_ollama_cloud_models, _save_ollama_cloud_cache
|
||||||
|
|
||||||
|
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||||
|
monkeypatch.setenv("OLLAMA_API_KEY", "test-key")
|
||||||
|
|
||||||
|
# Pre-populate a stale cache
|
||||||
|
_save_ollama_cloud_cache(["stale-model"])
|
||||||
|
|
||||||
|
# Make the cache appear stale by backdating it
|
||||||
|
import json
|
||||||
|
cache_path = tmp_path / "ollama_cloud_models_cache.json"
|
||||||
|
with open(cache_path) as f:
|
||||||
|
data = json.load(f)
|
||||||
|
data["cached_at"] = 0 # epoch = very stale
|
||||||
|
with open(cache_path, "w") as f:
|
||||||
|
json.dump(data, f)
|
||||||
|
|
||||||
|
with patch("hermes_cli.models.fetch_api_models", return_value=None), \
|
||||||
|
patch("agent.models_dev.fetch_models_dev", return_value={}):
|
||||||
|
result = fetch_ollama_cloud_models(force_refresh=True)
|
||||||
|
|
||||||
|
assert result == ["stale-model"]
|
||||||
|
|
||||||
|
def test_empty_on_total_failure_no_cache(self, tmp_path, monkeypatch):
|
||||||
|
"""Returns empty list when everything fails and no cache exists."""
|
||||||
|
from hermes_cli.models import fetch_ollama_cloud_models
|
||||||
|
|
||||||
|
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||||
|
monkeypatch.delenv("OLLAMA_API_KEY", raising=False)
|
||||||
|
|
||||||
|
with patch("agent.models_dev.fetch_models_dev", return_value={}):
|
||||||
|
result = fetch_ollama_cloud_models(force_refresh=True)
|
||||||
|
|
||||||
|
assert result == []
|
||||||
|
|
||||||
|
|
||||||
|
# ── Model Normalization ──
|
||||||
|
|
||||||
|
class TestOllamaCloudModelNormalization:
|
||||||
|
def test_passthrough_bare_name(self):
|
||||||
|
"""Ollama Cloud is a passthrough provider — model names used as-is."""
|
||||||
|
assert normalize_model_for_provider("qwen3.5:397b", "ollama-cloud") == "qwen3.5:397b"
|
||||||
|
|
||||||
|
def test_passthrough_with_tag(self):
|
||||||
|
assert normalize_model_for_provider("cogito-2.1:671b", "ollama-cloud") == "cogito-2.1:671b"
|
||||||
|
|
||||||
|
def test_passthrough_no_tag(self):
|
||||||
|
assert normalize_model_for_provider("glm-5", "ollama-cloud") == "glm-5"
|
||||||
|
|
||||||
|
|
||||||
|
# ── URL-to-Provider Mapping ──
|
||||||
|
|
||||||
|
class TestOllamaCloudUrlMapping:
|
||||||
|
def test_url_to_provider(self):
|
||||||
|
assert _URL_TO_PROVIDER.get("ollama.com") == "ollama-cloud"
|
||||||
|
|
||||||
|
def test_provider_prefix_canonical(self):
|
||||||
|
assert "ollama-cloud" in _PROVIDER_PREFIXES
|
||||||
|
|
||||||
|
def test_provider_prefix_alias(self):
|
||||||
|
assert "ollama" in _PROVIDER_PREFIXES
|
||||||
|
|
||||||
|
|
||||||
|
# ── models.dev Integration ──
|
||||||
|
|
||||||
|
class TestOllamaCloudModelsDev:
|
||||||
|
def test_ollama_cloud_mapped(self):
|
||||||
|
assert PROVIDER_TO_MODELS_DEV.get("ollama-cloud") == "ollama-cloud"
|
||||||
|
|
||||||
|
def test_list_agentic_models_with_mock_data(self):
|
||||||
|
"""list_agentic_models filters correctly from mock models.dev data."""
|
||||||
|
mock_data = {
|
||||||
|
"ollama-cloud": {
|
||||||
|
"models": {
|
||||||
|
"qwen3.5:397b": {"tool_call": True},
|
||||||
|
"glm-5": {"tool_call": True},
|
||||||
|
"nemotron-3-nano:30b": {"tool_call": True},
|
||||||
|
"some-embedding:latest": {"tool_call": False},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
with patch("agent.models_dev.fetch_models_dev", return_value=mock_data):
|
||||||
|
result = list_agentic_models("ollama-cloud")
|
||||||
|
assert "qwen3.5:397b" in result
|
||||||
|
assert "glm-5" in result
|
||||||
|
assert "nemotron-3-nano:30b" in result
|
||||||
|
assert "some-embedding:latest" not in result # no tool_call
|
||||||
|
|
||||||
|
|
||||||
|
# ── Agent Init (no SyntaxError) ──
|
||||||
|
|
||||||
|
class TestOllamaCloudAgentInit:
|
||||||
|
def test_agent_imports_without_error(self):
|
||||||
|
"""Verify run_agent.py has no SyntaxError."""
|
||||||
|
import importlib
|
||||||
|
import run_agent
|
||||||
|
importlib.reload(run_agent)
|
||||||
|
|
||||||
|
def test_ollama_cloud_agent_uses_chat_completions(self, monkeypatch):
|
||||||
|
"""Ollama Cloud falls through to chat_completions — no special elif needed."""
|
||||||
|
monkeypatch.setenv("OLLAMA_API_KEY", "test-key")
|
||||||
|
with patch("run_agent.OpenAI") as mock_openai:
|
||||||
|
mock_openai.return_value = MagicMock()
|
||||||
|
from run_agent import AIAgent
|
||||||
|
agent = AIAgent(
|
||||||
|
model="qwen3.5:397b",
|
||||||
|
provider="ollama-cloud",
|
||||||
|
api_key="test-key",
|
||||||
|
base_url="https://ollama.com/v1",
|
||||||
|
)
|
||||||
|
assert agent.api_mode == "chat_completions"
|
||||||
|
assert agent.provider == "ollama-cloud"
|
||||||
|
|
||||||
|
|
||||||
|
# ── providers.py New System ──
|
||||||
|
|
||||||
|
class TestOllamaCloudProvidersNew:
|
||||||
|
def test_overlay_exists(self):
|
||||||
|
from hermes_cli.providers import HERMES_OVERLAYS
|
||||||
|
assert "ollama-cloud" in HERMES_OVERLAYS
|
||||||
|
overlay = HERMES_OVERLAYS["ollama-cloud"]
|
||||||
|
assert overlay.transport == "openai_chat"
|
||||||
|
assert overlay.base_url_env_var == "OLLAMA_BASE_URL"
|
||||||
|
|
||||||
|
def test_alias_resolves(self):
|
||||||
|
from hermes_cli.providers import normalize_provider as np
|
||||||
|
assert np("ollama") == "custom" # bare "ollama" = local
|
||||||
|
assert np("ollama-cloud") == "ollama-cloud"
|
||||||
|
|
||||||
|
def test_label_override(self):
|
||||||
|
from hermes_cli.providers import _LABEL_OVERRIDES
|
||||||
|
assert _LABEL_OVERRIDES.get("ollama-cloud") == "Ollama Cloud"
|
||||||
|
|
||||||
|
def test_get_label(self):
|
||||||
|
from hermes_cli.providers import get_label
|
||||||
|
assert get_label("ollama-cloud") == "Ollama Cloud"
|
||||||
|
|
||||||
|
def test_get_provider(self):
|
||||||
|
from hermes_cli.providers import get_provider
|
||||||
|
pdef = get_provider("ollama-cloud")
|
||||||
|
assert pdef is not None
|
||||||
|
assert pdef.id == "ollama-cloud"
|
||||||
|
assert pdef.transport == "openai_chat"
|
||||||
|
|
||||||
|
|
||||||
|
# ── Auxiliary Model ──
|
||||||
|
|
||||||
|
class TestOllamaCloudAuxiliary:
|
||||||
|
def test_aux_model_defined(self):
|
||||||
|
from agent.auxiliary_client import _API_KEY_PROVIDER_AUX_MODELS
|
||||||
|
assert "ollama-cloud" in _API_KEY_PROVIDER_AUX_MODELS
|
||||||
|
assert _API_KEY_PROVIDER_AUX_MODELS["ollama-cloud"] == "nemotron-3-nano:30b"
|
||||||
Loading…
Add table
Add a link
Reference in a new issue