mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix(gemini-cli): surface MODEL_CAPACITY_EXHAUSTED cleanly + drop retired gemma-4-26b (#11833)
Google-side 429 Code Assist errors now flow through Hermes' normal rate-limit
path (status_code on the exception, Retry-After preserved via error.response)
instead of being opaque RuntimeErrors. User sees a one-line capacity message
instead of a 500-char JSON dump.
Changes
- CodeAssistError grows status_code / response / retry_after / details attrs.
_extract_status_code in error_classifier picks up status_code and classifies
429 as FailoverReason.rate_limit, so fallback_providers triggers the same
way it does for SDK errors. run_agent.py line ~10428 already walks
error.response.headers for Retry-After — preserving the response means that
path just works.
- _gemini_http_error parses the Google error envelope (error.status +
error.details[].reason from google.rpc.ErrorInfo, retryDelay from
google.rpc.RetryInfo). MODEL_CAPACITY_EXHAUSTED / RESOURCE_EXHAUSTED / 404
model-not-found each produce a human-readable message; unknown shapes fall
back to the previous raw-body format.
- Drop gemma-4-26b-it from hermes_cli/models.py, hermes_cli/setup.py, and
agent/model_metadata.py — Google returned 404 for it today in local repro.
Kept gemma-4-31b-it (capacity-constrained but not retired).
Validation
| | Before | After |
|---------------------------|--------------------------------|-------------------------------------------|
| Error message | 'Code Assist returned HTTP 429: {500 chars JSON}' | 'Gemini capacity exhausted for gemini-2.5-pro (Google-side throttle...)' |
| status_code on error | None (opaque RuntimeError) | 429 |
| Classifier reason | unknown (string-match fallback) | FailoverReason.rate_limit |
| Retry-After honored | ignored | extracted from RetryInfo or header |
| gemma-4-26b-it picker | advertised (404s on Google) | removed |
Unit + E2E tests cover non-streaming 429, streaming 429, 404 model-not-found,
Retry-After header fallback, malformed body, and classifier integration.
Targeted suites: tests/agent/test_gemini_cloudcode.py (81 tests), full
tests/hermes_cli (2203 tests) green.
Co-authored-by: teknium1 <teknium@nousresearch.com>
This commit is contained in:
parent
d2206c69cc
commit
c6fd2619f7
7 changed files with 327 additions and 12 deletions
|
|
@ -826,6 +826,160 @@ class TestGeminiCloudCodeClient:
|
|||
finally:
|
||||
client.close()
|
||||
|
||||
|
||||
class TestGeminiHttpErrorParsing:
|
||||
"""Regression coverage for _gemini_http_error Google-envelope parsing.
|
||||
|
||||
These are the paths that users actually hit during Google-side throttling
|
||||
(April 2026: gemini-2.5-pro MODEL_CAPACITY_EXHAUSTED, gemma-4-26b-it
|
||||
returning 404). The error needs to carry status_code + response so the
|
||||
main loop's error_classifier and Retry-After logic work.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def _fake_response(status: int, body: dict | str = "", headers=None):
|
||||
"""Minimal httpx.Response stand-in (duck-typed for _gemini_http_error)."""
|
||||
class _FakeResponse:
|
||||
def __init__(self):
|
||||
self.status_code = status
|
||||
if isinstance(body, dict):
|
||||
self.text = json.dumps(body)
|
||||
else:
|
||||
self.text = body
|
||||
self.headers = headers or {}
|
||||
return _FakeResponse()
|
||||
|
||||
def test_model_capacity_exhausted_produces_friendly_message(self):
|
||||
from agent.gemini_cloudcode_adapter import _gemini_http_error
|
||||
|
||||
body = {
|
||||
"error": {
|
||||
"code": 429,
|
||||
"message": "Resource has been exhausted (e.g. check quota).",
|
||||
"status": "RESOURCE_EXHAUSTED",
|
||||
"details": [
|
||||
{
|
||||
"@type": "type.googleapis.com/google.rpc.ErrorInfo",
|
||||
"reason": "MODEL_CAPACITY_EXHAUSTED",
|
||||
"domain": "googleapis.com",
|
||||
"metadata": {"model": "gemini-2.5-pro"},
|
||||
},
|
||||
{
|
||||
"@type": "type.googleapis.com/google.rpc.RetryInfo",
|
||||
"retryDelay": "30s",
|
||||
},
|
||||
],
|
||||
}
|
||||
}
|
||||
err = _gemini_http_error(self._fake_response(429, body))
|
||||
assert err.status_code == 429
|
||||
assert err.code == "code_assist_capacity_exhausted"
|
||||
assert err.retry_after == 30.0
|
||||
assert err.details["reason"] == "MODEL_CAPACITY_EXHAUSTED"
|
||||
# Message must be user-friendly, not a raw JSON dump.
|
||||
message = str(err)
|
||||
assert "gemini-2.5-pro" in message
|
||||
assert "capacity exhausted" in message.lower()
|
||||
assert "30s" in message
|
||||
# response attr is preserved for run_agent's Retry-After header path.
|
||||
assert err.response is not None
|
||||
|
||||
def test_resource_exhausted_without_reason(self):
|
||||
from agent.gemini_cloudcode_adapter import _gemini_http_error
|
||||
|
||||
body = {
|
||||
"error": {
|
||||
"code": 429,
|
||||
"message": "Quota exceeded for requests per minute.",
|
||||
"status": "RESOURCE_EXHAUSTED",
|
||||
}
|
||||
}
|
||||
err = _gemini_http_error(self._fake_response(429, body))
|
||||
assert err.status_code == 429
|
||||
assert err.code == "code_assist_rate_limited"
|
||||
message = str(err)
|
||||
assert "quota" in message.lower()
|
||||
|
||||
def test_404_model_not_found_produces_model_retired_message(self):
|
||||
from agent.gemini_cloudcode_adapter import _gemini_http_error
|
||||
|
||||
body = {
|
||||
"error": {
|
||||
"code": 404,
|
||||
"message": "models/gemma-4-26b-it is not found for API version v1internal",
|
||||
"status": "NOT_FOUND",
|
||||
}
|
||||
}
|
||||
err = _gemini_http_error(self._fake_response(404, body))
|
||||
assert err.status_code == 404
|
||||
message = str(err)
|
||||
assert "not available" in message.lower() or "retired" in message.lower()
|
||||
# Error message should reference the actual model text from Google.
|
||||
assert "gemma-4-26b-it" in message
|
||||
|
||||
def test_unauthorized_preserves_status_code(self):
|
||||
from agent.gemini_cloudcode_adapter import _gemini_http_error
|
||||
|
||||
err = _gemini_http_error(self._fake_response(
|
||||
401, {"error": {"code": 401, "message": "Invalid token", "status": "UNAUTHENTICATED"}},
|
||||
))
|
||||
assert err.status_code == 401
|
||||
assert err.code == "code_assist_unauthorized"
|
||||
|
||||
def test_retry_after_header_fallback(self):
|
||||
"""If the body has no RetryInfo detail, fall back to Retry-After header."""
|
||||
from agent.gemini_cloudcode_adapter import _gemini_http_error
|
||||
|
||||
resp = self._fake_response(
|
||||
429,
|
||||
{"error": {"code": 429, "message": "Rate limited", "status": "RESOURCE_EXHAUSTED"}},
|
||||
headers={"Retry-After": "45"},
|
||||
)
|
||||
err = _gemini_http_error(resp)
|
||||
assert err.retry_after == 45.0
|
||||
|
||||
def test_malformed_body_still_produces_structured_error(self):
|
||||
"""Non-JSON body must not swallow status_code — we still want the classifier path."""
|
||||
from agent.gemini_cloudcode_adapter import _gemini_http_error
|
||||
|
||||
err = _gemini_http_error(self._fake_response(500, "<html>internal error</html>"))
|
||||
assert err.status_code == 500
|
||||
# Raw body snippet must still be there for debugging.
|
||||
assert "500" in str(err)
|
||||
|
||||
def test_status_code_flows_through_error_classifier(self):
|
||||
"""End-to-end: CodeAssistError from a 429 must classify as rate_limit.
|
||||
|
||||
This is the whole point of adding status_code to CodeAssistError —
|
||||
_extract_status_code must see it and FailoverReason.rate_limit must
|
||||
fire, so the main loop triggers fallback_providers.
|
||||
"""
|
||||
from agent.gemini_cloudcode_adapter import _gemini_http_error
|
||||
from agent.error_classifier import classify_api_error, FailoverReason
|
||||
|
||||
body = {
|
||||
"error": {
|
||||
"code": 429,
|
||||
"message": "Resource has been exhausted",
|
||||
"status": "RESOURCE_EXHAUSTED",
|
||||
"details": [
|
||||
{
|
||||
"@type": "type.googleapis.com/google.rpc.ErrorInfo",
|
||||
"reason": "MODEL_CAPACITY_EXHAUSTED",
|
||||
"metadata": {"model": "gemini-2.5-pro"},
|
||||
}
|
||||
],
|
||||
}
|
||||
}
|
||||
err = _gemini_http_error(self._fake_response(429, body))
|
||||
|
||||
classified = classify_api_error(
|
||||
err, provider="google-gemini-cli", model="gemini-2.5-pro",
|
||||
)
|
||||
assert classified.status_code == 429
|
||||
assert classified.reason == FailoverReason.rate_limit
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Provider registration
|
||||
# =============================================================================
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue