fix(models): strip :cloud/-cloud suffix from models.dev Ollama Cloud IDs

models.dev appends :cloud and -cloud suffixes to Ollama Cloud model IDs
(e.g. kimi-k2.6:cloud, qwen3-coder:480b-cloud) that the live Ollama Cloud
API does not use. Without normalisation, these suffixed IDs bypass the
dedup check and appear alongside the correct clean IDs, causing 400/404
errors when users select them in /model or hermes model.

Add _strip_ollama_cloud_suffix() and apply it to mdev entries before the
dedup merge in fetch_ollama_cloud_models() so all model IDs stored in the
disk cache use the canonical form the API accepts.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
briandevans 2026-04-26 12:17:22 -07:00 committed by Teknium
parent c050ee6573
commit eadf34633e
2 changed files with 114 additions and 3 deletions

View file

@ -2906,6 +2906,19 @@ def fetch_api_models(
_OLLAMA_CLOUD_CACHE_TTL = 3600 # 1 hour
def _strip_ollama_cloud_suffix(model_id: str) -> str:
"""Strip :cloud / -cloud suffixes that models.dev appends to Ollama Cloud IDs.
The live API uses clean IDs (e.g. 'kimi-k2.6') while models.dev sometimes
returns them as 'kimi-k2.6:cloud'. Normalising before the dedup merge
prevents duplicate entries in the merged model list.
"""
for suffix in (":cloud", "-cloud"):
if model_id.endswith(suffix):
return model_id[: -len(suffix)]
return model_id
def _ollama_cloud_cache_path() -> Path:
"""Return the path for the Ollama Cloud model cache."""
from hermes_constants import get_hermes_home
@ -3001,9 +3014,10 @@ def fetch_ollama_cloud_models(
seen.add(m)
merged.append(m)
for m in mdev_models:
if m and m not in seen:
seen.add(m)
merged.append(m)
normalized = _strip_ollama_cloud_suffix(m)
if normalized and normalized not in seen:
seen.add(normalized)
merged.append(normalized)
if merged:
_save_ollama_cloud_cache(merged)
return merged

View file

@ -401,6 +401,103 @@ class TestOllamaCloudProvidersNew:
assert pdef.transport == "openai_chat"
# ── Cloud Suffix Stripping ──
class TestOllamaCloudSuffixStripping:
"""models.dev appends :cloud / -cloud suffixes that the live API omits.
fetch_ollama_cloud_models() must normalise these before the dedup merge so
users never see broken IDs like 'kimi-k2.6:cloud' in the model picker.
"""
def test_strips_colon_cloud_suffix(self, tmp_path, monkeypatch):
""":cloud suffix from models.dev is stripped before merge."""
from hermes_cli.models import fetch_ollama_cloud_models
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
monkeypatch.delenv("OLLAMA_API_KEY", raising=False)
mock_mdev = {
"ollama-cloud": {
"models": {"kimi-k2.6:cloud": {"tool_call": True}}
}
}
with patch("agent.models_dev.fetch_models_dev", return_value=mock_mdev):
result = fetch_ollama_cloud_models(force_refresh=True)
assert "kimi-k2.6" in result
assert "kimi-k2.6:cloud" not in result
def test_strips_dash_cloud_suffix(self, tmp_path, monkeypatch):
"""-cloud suffix from models.dev is stripped before merge."""
from hermes_cli.models import fetch_ollama_cloud_models
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
monkeypatch.delenv("OLLAMA_API_KEY", raising=False)
mock_mdev = {
"ollama-cloud": {
"models": {"qwen3-coder:480b-cloud": {"tool_call": True}}
}
}
with patch("agent.models_dev.fetch_models_dev", return_value=mock_mdev):
result = fetch_ollama_cloud_models(force_refresh=True)
assert "qwen3-coder:480b" in result
assert "qwen3-coder:480b-cloud" not in result
def test_no_duplicate_when_live_clean_and_mdev_suffixed(self, tmp_path, monkeypatch):
"""Live API returns clean ID; mdev has :cloud variant — result has exactly one entry."""
from hermes_cli.models import fetch_ollama_cloud_models
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
monkeypatch.setenv("OLLAMA_API_KEY", "test-key")
mock_mdev = {
"ollama-cloud": {
"models": {
"kimi-k2.6:cloud": {"tool_call": True},
"glm-5.1:cloud": {"tool_call": True},
}
}
}
with patch("hermes_cli.models.fetch_api_models", return_value=["kimi-k2.6", "glm-5.1"]), \
patch("agent.models_dev.fetch_models_dev", return_value=mock_mdev):
result = fetch_ollama_cloud_models(force_refresh=True)
assert result.count("kimi-k2.6") == 1
assert result.count("glm-5.1") == 1
assert "kimi-k2.6:cloud" not in result
assert "glm-5.1:cloud" not in result
def test_unsuffixed_model_id_unchanged(self, tmp_path, monkeypatch):
"""Model IDs without :cloud / -cloud suffix are passed through unchanged."""
from hermes_cli.models import fetch_ollama_cloud_models
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
monkeypatch.delenv("OLLAMA_API_KEY", raising=False)
mock_mdev = {
"ollama-cloud": {
"models": {"nemotron-3-nano:30b": {"tool_call": True}}
}
}
with patch("agent.models_dev.fetch_models_dev", return_value=mock_mdev):
result = fetch_ollama_cloud_models(force_refresh=True)
assert "nemotron-3-nano:30b" in result
def test_strip_suffix_helper(self):
"""Unit test for the _strip_ollama_cloud_suffix helper."""
from hermes_cli.models import _strip_ollama_cloud_suffix
assert _strip_ollama_cloud_suffix("kimi-k2.6:cloud") == "kimi-k2.6"
assert _strip_ollama_cloud_suffix("glm-5.1:cloud") == "glm-5.1"
assert _strip_ollama_cloud_suffix("qwen3-coder:480b-cloud") == "qwen3-coder:480b"
assert _strip_ollama_cloud_suffix("nemotron-3-nano:30b") == "nemotron-3-nano:30b"
assert _strip_ollama_cloud_suffix("") == ""
# ── Auxiliary Model ──
class TestOllamaCloudAuxiliary: