diff --git a/agent/models_dev.py b/agent/models_dev.py index 42c8925ffe..cc4dbf0be4 100644 --- a/agent/models_dev.py +++ b/agent/models_dev.py @@ -420,7 +420,10 @@ def list_provider_models(provider: str) -> List[str]: models = _get_provider_models(provider) if models is None: return [] - return list(models.keys()) + return [ + mid for mid in models.keys() + if not _should_hide_from_provider_catalog(provider, mid) + ] # Patterns that indicate non-agentic or noise models (TTS, embedding, @@ -432,6 +435,29 @@ _NOISE_PATTERNS: re.Pattern = re.compile( re.IGNORECASE, ) +# Google-hosted Gemma models currently have very low TPM quotas for agent-style +# traffic (for example 15K/16K TPM tiers in AI Studio) and are not practical as +# normal Hermes picks even though they advertise large context windows. Keep the +# capability metadata available for direct/manual use, but hide them from the +# Gemini model catalogs we surface in setup and model selection. +_GOOGLE_GEMMA_HIDDEN_MODELS = frozenset({ + "gemma-4-31b-it", + "gemma-4-26b-a4b-it", + "gemma-3-1b", + "gemma-3-2b", + "gemma-3-4b", + "gemma-3-12b", + "gemma-3-27b", +}) + + +def _should_hide_from_provider_catalog(provider: str, model_id: str) -> bool: + provider_lower = (provider or "").strip().lower() + model_lower = (model_id or "").strip().lower() + if provider_lower in {"gemini", "google"} and model_lower in _GOOGLE_GEMMA_HIDDEN_MODELS: + return True + return False + def list_agentic_models(provider: str) -> List[str]: """Return model IDs suitable for agentic use from models.dev. @@ -448,6 +474,8 @@ def list_agentic_models(provider: str) -> List[str]: for mid, entry in models.items(): if not isinstance(entry, dict): continue + if _should_hide_from_provider_catalog(provider, mid): + continue if not entry.get("tool_call", False): continue if _NOISE_PATTERNS.search(mid): @@ -582,5 +610,3 @@ def get_model_info( return _parse_model_info(mid, mdata, mdev_id) return None - - diff --git a/hermes_cli/models.py b/hermes_cli/models.py index cbbeef62d4..a0d7c2220c 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -133,8 +133,6 @@ _PROVIDER_MODELS: dict[str, list[str]] = { "gemini-2.5-pro", "gemini-2.5-flash", "gemini-2.5-flash-lite", - # Gemma open models (also served via AI Studio) - "gemma-4-31b-it", ], "google-gemini-cli": [ "gemini-2.5-pro", diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py index 8770386b73..8f6b633c6a 100644 --- a/hermes_cli/setup.py +++ b/hermes_cli/setup.py @@ -91,7 +91,6 @@ _DEFAULT_PROVIDER_MODELS = { "gemini": [ "gemini-3.1-pro-preview", "gemini-3-flash-preview", "gemini-3.1-flash-lite-preview", "gemini-2.5-pro", "gemini-2.5-flash", "gemini-2.5-flash-lite", - "gemma-4-31b-it", ], "zai": ["glm-5.1", "glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"], "kimi-coding": ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"], diff --git a/tests/hermes_cli/test_gemini_provider.py b/tests/hermes_cli/test_gemini_provider.py index fd16e825d1..7632f7691c 100644 --- a/tests/hermes_cli/test_gemini_provider.py +++ b/tests/hermes_cli/test_gemini_provider.py @@ -130,7 +130,7 @@ class TestGeminiModelCatalog: models = _PROVIDER_MODELS["gemini"] assert "gemini-2.5-pro" in models assert "gemini-2.5-flash" in models - assert "gemma-4-31b-it" in models + assert "gemma-4-31b-it" not in models def test_provider_models_has_3x(self): models = _PROVIDER_MODELS["gemini"] @@ -313,9 +313,28 @@ class TestGeminiModelsDev: result = list_agentic_models("gemini") assert "gemini-3-flash-preview" in result assert "gemini-2.5-pro" in result - assert "gemma-4-31b-it" in result + assert "gemma-4-31b-it" not in result # Filtered out: assert "gemini-embedding-001" not in result # no tool_call assert "gemini-2.5-flash-preview-tts" not in result # no tool_call assert "gemini-live-2.5-flash" not in result # noise: live- assert "gemini-2.5-flash-preview-04-17" not in result # noise: dated preview + + def test_list_provider_models_hides_low_tpm_google_gemmas(self): + mock_data = { + "google": { + "models": { + "gemini-2.5-pro": {}, + "gemma-4-31b-it": {}, + "gemma-3-1b": {}, + } + } + } + with patch("agent.models_dev.fetch_models_dev", return_value=mock_data): + from agent.models_dev import list_provider_models + + result = list_provider_models("gemini") + + assert "gemini-2.5-pro" in result + assert "gemma-4-31b-it" not in result + assert "gemma-3-1b" not in result