fix(gemini): hide low-TPM Gemma models from exposed lists

2026-04-25 00:51:20 +00:00 · 2026-04-18 13:00:04 -06:00 · 2026-04-18 13:00:04 -06:00 · 2eab7ee15f
commit 2eab7ee15f
parent f7af90e2da
4 changed files with 50 additions and 8 deletions
--- a/agent/models_dev.py
+++ b/agent/models_dev.py
@ -420,7 +420,10 @@ def list_provider_models(provider: str) -> List[str]:
    models = _get_provider_models(provider)
    if models is None:
        return []
-    return list(models.keys())
+    return [
+        mid for mid in models.keys()
+        if not _should_hide_from_provider_catalog(provider, mid)
+    ]


 # Patterns that indicate non-agentic or noise models (TTS, embedding,
@ -432,6 +435,29 @@ _NOISE_PATTERNS: re.Pattern = re.compile(
    re.IGNORECASE,
 )

+# Google-hosted Gemma models currently have very low TPM quotas for agent-style
+# traffic (for example 15K/16K TPM tiers in AI Studio) and are not practical as
+# normal Hermes picks even though they advertise large context windows. Keep the
+# capability metadata available for direct/manual use, but hide them from the
+# Gemini model catalogs we surface in setup and model selection.
+_GOOGLE_GEMMA_HIDDEN_MODELS = frozenset({
+    "gemma-4-31b-it",
+    "gemma-4-26b-a4b-it",
+    "gemma-3-1b",
+    "gemma-3-2b",
+    "gemma-3-4b",
+    "gemma-3-12b",
+    "gemma-3-27b",
+})
+
+
+def _should_hide_from_provider_catalog(provider: str, model_id: str) -> bool:
+    provider_lower = (provider or "").strip().lower()
+    model_lower = (model_id or "").strip().lower()
+    if provider_lower in {"gemini", "google"} and model_lower in _GOOGLE_GEMMA_HIDDEN_MODELS:
+        return True
+    return False
+

 def list_agentic_models(provider: str) -> List[str]:
    """Return model IDs suitable for agentic use from models.dev.
@ -448,6 +474,8 @@ def list_agentic_models(provider: str) -> List[str]:
    for mid, entry in models.items():
        if not isinstance(entry, dict):
            continue
+        if _should_hide_from_provider_catalog(provider, mid):
+            continue
        if not entry.get("tool_call", False):
            continue
        if _NOISE_PATTERNS.search(mid):
@ -582,5 +610,3 @@ def get_model_info(
            return _parse_model_info(mid, mdata, mdev_id)

    return None
-
-