mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix(gemini): hide low-TPM Gemma models from exposed lists
This commit is contained in:
parent
f7af90e2da
commit
2eab7ee15f
4 changed files with 50 additions and 8 deletions
|
|
@ -420,7 +420,10 @@ def list_provider_models(provider: str) -> List[str]:
|
|||
models = _get_provider_models(provider)
|
||||
if models is None:
|
||||
return []
|
||||
return list(models.keys())
|
||||
return [
|
||||
mid for mid in models.keys()
|
||||
if not _should_hide_from_provider_catalog(provider, mid)
|
||||
]
|
||||
|
||||
|
||||
# Patterns that indicate non-agentic or noise models (TTS, embedding,
|
||||
|
|
@ -432,6 +435,29 @@ _NOISE_PATTERNS: re.Pattern = re.compile(
|
|||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
# Google-hosted Gemma models currently have very low TPM quotas for agent-style
|
||||
# traffic (for example 15K/16K TPM tiers in AI Studio) and are not practical as
|
||||
# normal Hermes picks even though they advertise large context windows. Keep the
|
||||
# capability metadata available for direct/manual use, but hide them from the
|
||||
# Gemini model catalogs we surface in setup and model selection.
|
||||
_GOOGLE_GEMMA_HIDDEN_MODELS = frozenset({
|
||||
"gemma-4-31b-it",
|
||||
"gemma-4-26b-a4b-it",
|
||||
"gemma-3-1b",
|
||||
"gemma-3-2b",
|
||||
"gemma-3-4b",
|
||||
"gemma-3-12b",
|
||||
"gemma-3-27b",
|
||||
})
|
||||
|
||||
|
||||
def _should_hide_from_provider_catalog(provider: str, model_id: str) -> bool:
|
||||
provider_lower = (provider or "").strip().lower()
|
||||
model_lower = (model_id or "").strip().lower()
|
||||
if provider_lower in {"gemini", "google"} and model_lower in _GOOGLE_GEMMA_HIDDEN_MODELS:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def list_agentic_models(provider: str) -> List[str]:
|
||||
"""Return model IDs suitable for agentic use from models.dev.
|
||||
|
|
@ -448,6 +474,8 @@ def list_agentic_models(provider: str) -> List[str]:
|
|||
for mid, entry in models.items():
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
if _should_hide_from_provider_catalog(provider, mid):
|
||||
continue
|
||||
if not entry.get("tool_call", False):
|
||||
continue
|
||||
if _NOISE_PATTERNS.search(mid):
|
||||
|
|
@ -582,5 +610,3 @@ def get_model_info(
|
|||
return _parse_model_info(mid, mdata, mdev_id)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -133,8 +133,6 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
|
|||
"gemini-2.5-pro",
|
||||
"gemini-2.5-flash",
|
||||
"gemini-2.5-flash-lite",
|
||||
# Gemma open models (also served via AI Studio)
|
||||
"gemma-4-31b-it",
|
||||
],
|
||||
"google-gemini-cli": [
|
||||
"gemini-2.5-pro",
|
||||
|
|
|
|||
|
|
@ -91,7 +91,6 @@ _DEFAULT_PROVIDER_MODELS = {
|
|||
"gemini": [
|
||||
"gemini-3.1-pro-preview", "gemini-3-flash-preview", "gemini-3.1-flash-lite-preview",
|
||||
"gemini-2.5-pro", "gemini-2.5-flash", "gemini-2.5-flash-lite",
|
||||
"gemma-4-31b-it",
|
||||
],
|
||||
"zai": ["glm-5.1", "glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"],
|
||||
"kimi-coding": ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
|
||||
|
|
|
|||
|
|
@ -130,7 +130,7 @@ class TestGeminiModelCatalog:
|
|||
models = _PROVIDER_MODELS["gemini"]
|
||||
assert "gemini-2.5-pro" in models
|
||||
assert "gemini-2.5-flash" in models
|
||||
assert "gemma-4-31b-it" in models
|
||||
assert "gemma-4-31b-it" not in models
|
||||
|
||||
def test_provider_models_has_3x(self):
|
||||
models = _PROVIDER_MODELS["gemini"]
|
||||
|
|
@ -313,9 +313,28 @@ class TestGeminiModelsDev:
|
|||
result = list_agentic_models("gemini")
|
||||
assert "gemini-3-flash-preview" in result
|
||||
assert "gemini-2.5-pro" in result
|
||||
assert "gemma-4-31b-it" in result
|
||||
assert "gemma-4-31b-it" not in result
|
||||
# Filtered out:
|
||||
assert "gemini-embedding-001" not in result # no tool_call
|
||||
assert "gemini-2.5-flash-preview-tts" not in result # no tool_call
|
||||
assert "gemini-live-2.5-flash" not in result # noise: live-
|
||||
assert "gemini-2.5-flash-preview-04-17" not in result # noise: dated preview
|
||||
|
||||
def test_list_provider_models_hides_low_tpm_google_gemmas(self):
|
||||
mock_data = {
|
||||
"google": {
|
||||
"models": {
|
||||
"gemini-2.5-pro": {},
|
||||
"gemma-4-31b-it": {},
|
||||
"gemma-3-1b": {},
|
||||
}
|
||||
}
|
||||
}
|
||||
with patch("agent.models_dev.fetch_models_dev", return_value=mock_data):
|
||||
from agent.models_dev import list_provider_models
|
||||
|
||||
result = list_provider_models("gemini")
|
||||
|
||||
assert "gemini-2.5-pro" in result
|
||||
assert "gemma-4-31b-it" not in result
|
||||
assert "gemma-3-1b" not in result
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue