diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 94555ad12d..5cceeb9e30 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -55,7 +55,7 @@ logger = logging.getLogger(__name__) # Default auxiliary models for direct API-key providers (cheap/fast for side tasks) _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = { - "gemini": "gemini-2.5-flash", + "gemini": "gemini-3-flash-preview", "zai": "glm-4.5-flash", "kimi-coding": "kimi-k2-turbo-preview", "minimax": "MiniMax-M2.7-highspeed", diff --git a/agent/model_metadata.py b/agent/model_metadata.py index 888032317c..62dfb2b822 100644 --- a/agent/model_metadata.py +++ b/agent/model_metadata.py @@ -103,10 +103,8 @@ DEFAULT_CONTEXT_LENGTHS = { # Google "gemini": 1048576, # Gemma (open models served via AI Studio) - "gemma-4-31b": 262144, - "gemma-4-26b": 262144, - "gemma-4-e4b": 131072, - "gemma-4-e2b": 131072, + "gemma-4-31b": 256000, + "gemma-4-26b": 256000, "gemma-3": 131072, "gemma": 8192, # fallback for older gemma models # DeepSeek diff --git a/agent/models_dev.py b/agent/models_dev.py index 61483b6a10..51eea8fe30 100644 --- a/agent/models_dev.py +++ b/agent/models_dev.py @@ -160,6 +160,7 @@ PROVIDER_TO_MODELS_DEV: Dict[str, str] = { "kilocode": "kilo", "fireworks": "fireworks-ai", "huggingface": "huggingface", + "gemini": "google", "google": "google", "xai": "xai", "nvidia": "nvidia", @@ -422,6 +423,39 @@ def list_provider_models(provider: str) -> List[str]: return list(models.keys()) +# Patterns that indicate non-agentic or noise models (TTS, embedding, +# dated preview snapshots, live/streaming-only, image-only). +import re +_NOISE_PATTERNS: re.Pattern = re.compile( + r"-tts\b|embedding|live-|-(preview|exp)-\d{2,4}[-_]|" + r"-image\b|-image-preview\b|-customtools\b", + re.IGNORECASE, +) + + +def list_agentic_models(provider: str) -> List[str]: + """Return model IDs suitable for agentic use from models.dev. + + Filters for tool_call=True and excludes noise (TTS, embedding, + dated preview snapshots, live/streaming, image-only models). + Returns an empty list on any failure. + """ + models = _get_provider_models(provider) + if models is None: + return [] + + result = [] + for mid, entry in models.items(): + if not isinstance(entry, dict): + continue + if not entry.get("tool_call", False): + continue + if _NOISE_PATTERNS.search(mid): + continue + result.append(mid) + return result + + def search_models_dev( query: str, provider: str = None, limit: int = 5 ) -> List[Dict[str, Any]]: diff --git a/hermes_cli/main.py b/hermes_cli/main.py index ec9e8fb0ad..205cef8d47 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -2211,24 +2211,37 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""): save_env_value(base_url_env, override) effective_base = override - # Model selection — try live /models endpoint first, fall back to defaults. - # Providers with large live catalogs (100+ models) use a curated list instead - # so users see familiar model names rather than an overwhelming dump. + # Model selection — resolution order: + # 1. models.dev registry (cached, filtered for agentic/tool-capable models) + # 2. Curated static fallback list (offline insurance) + # 3. Live /models endpoint probe (small providers without models.dev data) curated = _PROVIDER_MODELS.get(provider_id, []) - if curated and len(curated) >= 8: + + # Try models.dev first — returns tool-capable models, filtered for noise + mdev_models: list = [] + try: + from agent.models_dev import list_agentic_models + mdev_models = list_agentic_models(provider_id) + except Exception: + pass + + if mdev_models: + model_list = mdev_models + print(f" Found {len(model_list)} model(s) from models.dev registry") + elif curated and len(curated) >= 8: # Curated list is substantial — use it directly, skip live probe - live_models = None + model_list = curated + print(f" Showing {len(model_list)} curated models — use \"Enter custom model name\" for others.") else: api_key_for_probe = existing_key or (get_env_value(key_env) if key_env else "") live_models = fetch_api_models(api_key_for_probe, effective_base) - - if live_models and len(live_models) >= len(curated): - model_list = live_models - print(f" Found {len(model_list)} model(s) from {pconfig.name} API") - else: - model_list = curated - if model_list: - print(f" Showing {len(model_list)} curated models — use \"Enter custom model name\" for others.") + if live_models and len(live_models) >= len(curated): + model_list = live_models + print(f" Found {len(model_list)} model(s) from {pconfig.name} API") + else: + model_list = curated + if model_list: + print(f" Showing {len(model_list)} curated models — use \"Enter custom model name\" for others.") # else: no defaults either, will fall through to raw input if provider_id in {"opencode-zen", "opencode-go"}: diff --git a/hermes_cli/models.py b/hermes_cli/models.py index a3145595a2..a5b1c2b2f4 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -112,15 +112,15 @@ _PROVIDER_MODELS: dict[str, list[str]] = { "grok-code-fast-1", ], "gemini": [ + "gemini-3.1-pro-preview", + "gemini-3-flash-preview", + "gemini-3.1-flash-lite-preview", "gemini-2.5-pro", "gemini-2.5-flash", - "gemini-2.0-flash", - "gemini-2.0-flash-lite", + "gemini-2.5-flash-lite", # Gemma open models (also served via AI Studio) "gemma-4-31b-it", - "gemma-4-26b-a4b-it", - "gemma-4-e4b-it", - "gemma-4-e2b-it", + "gemma-4-26b-it", ], "zai": [ "glm-5", diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py index cfc1a756c6..82a30b3caf 100644 --- a/hermes_cli/setup.py +++ b/hermes_cli/setup.py @@ -112,8 +112,9 @@ _DEFAULT_PROVIDER_MODELS = { "grok-code-fast-1", ], "gemini": [ - "gemini-2.5-pro", "gemini-2.5-flash", "gemini-2.0-flash", "gemini-2.0-flash-lite", - "gemma-4-31b-it", "gemma-4-26b-a4b-it", "gemma-4-e4b-it", "gemma-4-e2b-it", + "gemini-3.1-pro-preview", "gemini-3-flash-preview", "gemini-3.1-flash-lite-preview", + "gemini-2.5-pro", "gemini-2.5-flash", "gemini-2.5-flash-lite", + "gemma-4-31b-it", "gemma-4-26b-it", ], "zai": ["glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"], "kimi-coding": ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"], diff --git a/tests/test_gemini_provider.py b/tests/test_gemini_provider.py index 0fee6ff7fb..d0cba5d63c 100644 --- a/tests/test_gemini_provider.py +++ b/tests/test_gemini_provider.py @@ -8,6 +8,7 @@ from hermes_cli.auth import PROVIDER_REGISTRY, resolve_provider, resolve_api_key from hermes_cli.models import _PROVIDER_MODELS, _PROVIDER_LABELS, _PROVIDER_ALIASES, normalize_provider from hermes_cli.model_normalize import normalize_model_for_provider, detect_vendor from agent.model_metadata import get_model_context_length +from agent.models_dev import PROVIDER_TO_MODELS_DEV, list_agentic_models, _NOISE_PATTERNS # ── Provider Registry ── @@ -131,6 +132,12 @@ class TestGeminiModelCatalog: assert "gemini-2.5-flash" in models assert "gemma-4-31b-it" in models + def test_provider_models_has_3x(self): + models = _PROVIDER_MODELS["gemini"] + assert "gemini-3.1-pro-preview" in models + assert "gemini-3-flash-preview" in models + assert "gemini-3.1-flash-lite-preview" in models + def test_provider_label(self): assert "gemini" in _PROVIDER_LABELS assert _PROVIDER_LABELS["gemini"] == "Google AI Studio" @@ -165,11 +172,15 @@ class TestGeminiModelNormalization: class TestGeminiContextLength: def test_gemma_4_31b_context(self): ctx = get_model_context_length("gemma-4-31b-it", provider="gemini") - assert ctx == 262144 + assert ctx == 256000 - def test_gemma_4_e4b_context(self): - ctx = get_model_context_length("gemma-4-e4b-it", provider="gemini") - assert ctx == 131072 + def test_gemma_4_26b_context(self): + ctx = get_model_context_length("gemma-4-26b-it", provider="gemini") + assert ctx == 256000 + + def test_gemini_3_context(self): + ctx = get_model_context_length("gemini-3.1-pro-preview", provider="gemini") + assert ctx == 1048576 # ── Agent Init (no SyntaxError) ── @@ -195,3 +206,64 @@ class TestGeminiAgentInit: ) assert agent.api_mode == "chat_completions" assert agent.provider == "gemini" + + +# ── models.dev Integration ── + +class TestGeminiModelsDev: + def test_gemini_mapped_to_google(self): + assert PROVIDER_TO_MODELS_DEV.get("gemini") == "google" + + def test_noise_filter_excludes_tts(self): + assert _NOISE_PATTERNS.search("gemini-2.5-pro-preview-tts") + + def test_noise_filter_excludes_dated_preview(self): + assert _NOISE_PATTERNS.search("gemini-2.5-flash-preview-04-17") + + def test_noise_filter_excludes_embedding(self): + assert _NOISE_PATTERNS.search("gemini-embedding-001") + + def test_noise_filter_excludes_live(self): + assert _NOISE_PATTERNS.search("gemini-live-2.5-flash") + + def test_noise_filter_excludes_image(self): + assert _NOISE_PATTERNS.search("gemini-2.5-flash-image") + + def test_noise_filter_excludes_customtools(self): + assert _NOISE_PATTERNS.search("gemini-3.1-pro-preview-customtools") + + def test_noise_filter_passes_stable(self): + assert not _NOISE_PATTERNS.search("gemini-2.5-flash") + + def test_noise_filter_passes_preview(self): + # Non-dated preview (e.g. gemini-3-flash-preview) should pass + assert not _NOISE_PATTERNS.search("gemini-3-flash-preview") + + def test_noise_filter_passes_gemma(self): + assert not _NOISE_PATTERNS.search("gemma-4-31b-it") + + def test_list_agentic_models_with_mock_data(self): + """list_agentic_models filters correctly from mock models.dev data.""" + mock_data = { + "google": { + "models": { + "gemini-3-flash-preview": {"tool_call": True}, + "gemini-2.5-pro": {"tool_call": True}, + "gemini-embedding-001": {"tool_call": False}, + "gemini-2.5-flash-preview-tts": {"tool_call": False}, + "gemini-live-2.5-flash": {"tool_call": True}, + "gemini-2.5-flash-preview-04-17": {"tool_call": True}, + "gemma-4-31b-it": {"tool_call": True}, + } + } + } + with patch("agent.models_dev.fetch_models_dev", return_value=mock_data): + result = list_agentic_models("gemini") + assert "gemini-3-flash-preview" in result + assert "gemini-2.5-pro" in result + assert "gemma-4-31b-it" in result + # Filtered out: + assert "gemini-embedding-001" not in result # no tool_call + assert "gemini-2.5-flash-preview-tts" not in result # no tool_call + assert "gemini-live-2.5-flash" not in result # noise: live- + assert "gemini-2.5-flash-preview-04-17" not in result # noise: dated preview