mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix(gemini-cli): surface MODEL_CAPACITY_EXHAUSTED cleanly + drop retired gemma-4-26b (#11833)
Google-side 429 Code Assist errors now flow through Hermes' normal rate-limit
path (status_code on the exception, Retry-After preserved via error.response)
instead of being opaque RuntimeErrors. User sees a one-line capacity message
instead of a 500-char JSON dump.
Changes
- CodeAssistError grows status_code / response / retry_after / details attrs.
_extract_status_code in error_classifier picks up status_code and classifies
429 as FailoverReason.rate_limit, so fallback_providers triggers the same
way it does for SDK errors. run_agent.py line ~10428 already walks
error.response.headers for Retry-After — preserving the response means that
path just works.
- _gemini_http_error parses the Google error envelope (error.status +
error.details[].reason from google.rpc.ErrorInfo, retryDelay from
google.rpc.RetryInfo). MODEL_CAPACITY_EXHAUSTED / RESOURCE_EXHAUSTED / 404
model-not-found each produce a human-readable message; unknown shapes fall
back to the previous raw-body format.
- Drop gemma-4-26b-it from hermes_cli/models.py, hermes_cli/setup.py, and
agent/model_metadata.py — Google returned 404 for it today in local repro.
Kept gemma-4-31b-it (capacity-constrained but not retired).
Validation
| | Before | After |
|---------------------------|--------------------------------|-------------------------------------------|
| Error message | 'Code Assist returned HTTP 429: {500 chars JSON}' | 'Gemini capacity exhausted for gemini-2.5-pro (Google-side throttle...)' |
| status_code on error | None (opaque RuntimeError) | 429 |
| Classifier reason | unknown (string-match fallback) | FailoverReason.rate_limit |
| Retry-After honored | ignored | extracted from RetryInfo or header |
| gemma-4-26b-it picker | advertised (404s on Google) | removed |
Unit + E2E tests cover non-streaming 429, streaming 429, 404 model-not-found,
Retry-After header fallback, malformed body, and classifier integration.
Targeted suites: tests/agent/test_gemini_cloudcode.py (81 tests), full
tests/hermes_cli (2203 tests) green.
Co-authored-by: teknium1 <teknium@nousresearch.com>
This commit is contained in:
parent
d2206c69cc
commit
c6fd2619f7
7 changed files with 327 additions and 12 deletions
|
|
@ -747,18 +747,149 @@ class GeminiCloudCodeClient:
|
||||||
|
|
||||||
|
|
||||||
def _gemini_http_error(response: httpx.Response) -> CodeAssistError:
|
def _gemini_http_error(response: httpx.Response) -> CodeAssistError:
|
||||||
|
"""Translate an httpx response into a CodeAssistError with rich metadata.
|
||||||
|
|
||||||
|
Parses Google's error envelope (``{"error": {"code", "message", "status",
|
||||||
|
"details": [...]}}``) so the agent's error classifier can reason about
|
||||||
|
the failure — ``status_code`` enables the rate_limit / auth classification
|
||||||
|
paths, and ``response`` lets the main loop honor ``Retry-After`` just
|
||||||
|
like it does for OpenAI SDK exceptions.
|
||||||
|
|
||||||
|
Also lifts a few recognizable Google conditions into human-readable
|
||||||
|
messages so the user sees something better than a 500-char JSON dump:
|
||||||
|
|
||||||
|
MODEL_CAPACITY_EXHAUSTED → "Gemini model capacity exhausted for
|
||||||
|
<model>. This is a Google-side throttle..."
|
||||||
|
RESOURCE_EXHAUSTED w/o reason → quota-style message
|
||||||
|
404 → "Model <name> not found at cloudcode-pa..."
|
||||||
|
"""
|
||||||
status = response.status_code
|
status = response.status_code
|
||||||
|
|
||||||
|
# Parse the body once, surviving any weird encodings.
|
||||||
|
body_text = ""
|
||||||
|
body_json: Dict[str, Any] = {}
|
||||||
try:
|
try:
|
||||||
body = response.text[:500]
|
body_text = response.text
|
||||||
except Exception:
|
except Exception:
|
||||||
body = ""
|
body_text = ""
|
||||||
# Let run_agent's retry logic see auth errors as rotatable via `api_key`
|
if body_text:
|
||||||
|
try:
|
||||||
|
parsed = json.loads(body_text)
|
||||||
|
if isinstance(parsed, dict):
|
||||||
|
body_json = parsed
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
body_json = {}
|
||||||
|
|
||||||
|
# Dig into Google's error envelope. Shape is:
|
||||||
|
# {"error": {"code": 429, "message": "...", "status": "RESOURCE_EXHAUSTED",
|
||||||
|
# "details": [{"@type": ".../ErrorInfo", "reason": "MODEL_CAPACITY_EXHAUSTED",
|
||||||
|
# "metadata": {...}},
|
||||||
|
# {"@type": ".../RetryInfo", "retryDelay": "30s"}]}}
|
||||||
|
err_obj = body_json.get("error") if isinstance(body_json, dict) else None
|
||||||
|
if not isinstance(err_obj, dict):
|
||||||
|
err_obj = {}
|
||||||
|
err_status = str(err_obj.get("status") or "").strip()
|
||||||
|
err_message = str(err_obj.get("message") or "").strip()
|
||||||
|
err_details_list = err_obj.get("details") if isinstance(err_obj.get("details"), list) else []
|
||||||
|
|
||||||
|
# Extract google.rpc.ErrorInfo reason + metadata. There may be more
|
||||||
|
# than one ErrorInfo (rare), so we pick the first one with a reason.
|
||||||
|
error_reason = ""
|
||||||
|
error_metadata: Dict[str, Any] = {}
|
||||||
|
retry_delay_seconds: Optional[float] = None
|
||||||
|
for detail in err_details_list:
|
||||||
|
if not isinstance(detail, dict):
|
||||||
|
continue
|
||||||
|
type_url = str(detail.get("@type") or "")
|
||||||
|
if not error_reason and type_url.endswith("/google.rpc.ErrorInfo"):
|
||||||
|
reason = detail.get("reason")
|
||||||
|
if isinstance(reason, str) and reason:
|
||||||
|
error_reason = reason
|
||||||
|
md = detail.get("metadata")
|
||||||
|
if isinstance(md, dict):
|
||||||
|
error_metadata = md
|
||||||
|
elif retry_delay_seconds is None and type_url.endswith("/google.rpc.RetryInfo"):
|
||||||
|
# retryDelay is a google.protobuf.Duration string like "30s" or "1.5s".
|
||||||
|
delay_raw = detail.get("retryDelay")
|
||||||
|
if isinstance(delay_raw, str) and delay_raw.endswith("s"):
|
||||||
|
try:
|
||||||
|
retry_delay_seconds = float(delay_raw[:-1])
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
elif isinstance(delay_raw, (int, float)):
|
||||||
|
retry_delay_seconds = float(delay_raw)
|
||||||
|
|
||||||
|
# Fall back to the Retry-After header if the body didn't include RetryInfo.
|
||||||
|
if retry_delay_seconds is None:
|
||||||
|
try:
|
||||||
|
header_val = response.headers.get("Retry-After") or response.headers.get("retry-after")
|
||||||
|
except Exception:
|
||||||
|
header_val = None
|
||||||
|
if header_val:
|
||||||
|
try:
|
||||||
|
retry_delay_seconds = float(header_val)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
retry_delay_seconds = None
|
||||||
|
|
||||||
|
# Classify the error code. ``code_assist_rate_limited`` stays the default
|
||||||
|
# for 429s; a more specific reason tag helps downstream callers (e.g. tests,
|
||||||
|
# logs) without changing the rate_limit classification path.
|
||||||
code = f"code_assist_http_{status}"
|
code = f"code_assist_http_{status}"
|
||||||
if status == 401:
|
if status == 401:
|
||||||
code = "code_assist_unauthorized"
|
code = "code_assist_unauthorized"
|
||||||
elif status == 429:
|
elif status == 429:
|
||||||
code = "code_assist_rate_limited"
|
code = "code_assist_rate_limited"
|
||||||
|
if error_reason == "MODEL_CAPACITY_EXHAUSTED":
|
||||||
|
code = "code_assist_capacity_exhausted"
|
||||||
|
|
||||||
|
# Build a human-readable message. Keep the status + a raw-body tail for
|
||||||
|
# debugging, but lead with a friendlier summary when we recognize the
|
||||||
|
# Google signal.
|
||||||
|
model_hint = ""
|
||||||
|
if isinstance(error_metadata, dict):
|
||||||
|
model_hint = str(error_metadata.get("model") or error_metadata.get("modelId") or "").strip()
|
||||||
|
|
||||||
|
if status == 429 and error_reason == "MODEL_CAPACITY_EXHAUSTED":
|
||||||
|
target = model_hint or "this Gemini model"
|
||||||
|
message = (
|
||||||
|
f"Gemini capacity exhausted for {target} (Google-side throttle, "
|
||||||
|
f"not a Hermes issue). Try a different Gemini model or set a "
|
||||||
|
f"fallback_providers entry to a non-Gemini provider."
|
||||||
|
)
|
||||||
|
if retry_delay_seconds is not None:
|
||||||
|
message += f" Google suggests retrying in {retry_delay_seconds:g}s."
|
||||||
|
elif status == 429 and err_status == "RESOURCE_EXHAUSTED":
|
||||||
|
message = (
|
||||||
|
f"Gemini quota exhausted ({err_message or 'RESOURCE_EXHAUSTED'}). "
|
||||||
|
f"Check /gquota for remaining daily requests."
|
||||||
|
)
|
||||||
|
if retry_delay_seconds is not None:
|
||||||
|
message += f" Retry suggested in {retry_delay_seconds:g}s."
|
||||||
|
elif status == 404:
|
||||||
|
# Google returns 404 when a model has been retired or renamed.
|
||||||
|
target = model_hint or (err_message or "model")
|
||||||
|
message = (
|
||||||
|
f"Code Assist 404: {target} is not available at "
|
||||||
|
f"cloudcode-pa.googleapis.com. It may have been renamed or "
|
||||||
|
f"retired. Check hermes_cli/models.py for the current list."
|
||||||
|
)
|
||||||
|
elif err_message:
|
||||||
|
# Generic fallback with the parsed message.
|
||||||
|
message = f"Code Assist HTTP {status} ({err_status or 'error'}): {err_message}"
|
||||||
|
else:
|
||||||
|
# Last-ditch fallback — raw body snippet.
|
||||||
|
message = f"Code Assist returned HTTP {status}: {body_text[:500]}"
|
||||||
|
|
||||||
return CodeAssistError(
|
return CodeAssistError(
|
||||||
f"Code Assist returned HTTP {status}: {body}",
|
message,
|
||||||
code=code,
|
code=code,
|
||||||
|
status_code=status,
|
||||||
|
response=response,
|
||||||
|
retry_after=retry_delay_seconds,
|
||||||
|
details={
|
||||||
|
"status": err_status,
|
||||||
|
"reason": error_reason,
|
||||||
|
"metadata": error_metadata,
|
||||||
|
"message": err_message,
|
||||||
|
},
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -68,9 +68,45 @@ _ONBOARDING_POLL_INTERVAL_SECONDS = 5.0
|
||||||
|
|
||||||
|
|
||||||
class CodeAssistError(RuntimeError):
|
class CodeAssistError(RuntimeError):
|
||||||
def __init__(self, message: str, *, code: str = "code_assist_error") -> None:
|
"""Exception raised by the Code Assist (``cloudcode-pa``) integration.
|
||||||
|
|
||||||
|
Carries HTTP status / response / retry-after metadata so the agent's
|
||||||
|
``error_classifier._extract_status_code`` and the main loop's Retry-After
|
||||||
|
handling (which walks ``error.response.headers``) pick up the right
|
||||||
|
signals. Without these, 429s from the OAuth path look like opaque
|
||||||
|
``RuntimeError`` and skip the rate-limit path.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
message: str,
|
||||||
|
*,
|
||||||
|
code: str = "code_assist_error",
|
||||||
|
status_code: Optional[int] = None,
|
||||||
|
response: Any = None,
|
||||||
|
retry_after: Optional[float] = None,
|
||||||
|
details: Optional[Dict[str, Any]] = None,
|
||||||
|
) -> None:
|
||||||
super().__init__(message)
|
super().__init__(message)
|
||||||
self.code = code
|
self.code = code
|
||||||
|
# ``status_code`` is picked up by ``agent.error_classifier._extract_status_code``
|
||||||
|
# so a 429 from Code Assist classifies as FailoverReason.rate_limit and
|
||||||
|
# triggers the main loop's fallback_providers chain the same way SDK
|
||||||
|
# errors do.
|
||||||
|
self.status_code = status_code
|
||||||
|
# ``response`` is the underlying ``httpx.Response`` (or a shim with a
|
||||||
|
# ``.headers`` mapping and ``.json()`` method). The main loop reads
|
||||||
|
# ``error.response.headers["Retry-After"]`` to honor Google's retry
|
||||||
|
# hints when the backend throttles us.
|
||||||
|
self.response = response
|
||||||
|
# Parsed ``Retry-After`` seconds (kept separately for convenience —
|
||||||
|
# Google returns retry hints in both the header and the error body's
|
||||||
|
# ``google.rpc.RetryInfo`` details, and we pick whichever we found).
|
||||||
|
self.retry_after = retry_after
|
||||||
|
# Parsed structured error details from the Google error envelope
|
||||||
|
# (e.g. ``{"reason": "MODEL_CAPACITY_EXHAUSTED", "status": "RESOURCE_EXHAUSTED"}``).
|
||||||
|
# Useful for logging and for tests that want to assert on specifics.
|
||||||
|
self.details = details or {}
|
||||||
|
|
||||||
|
|
||||||
class ProjectIdRequiredError(CodeAssistError):
|
class ProjectIdRequiredError(CodeAssistError):
|
||||||
|
|
|
||||||
|
|
@ -125,7 +125,6 @@ DEFAULT_CONTEXT_LENGTHS = {
|
||||||
"gemini": 1048576,
|
"gemini": 1048576,
|
||||||
# Gemma (open models served via AI Studio)
|
# Gemma (open models served via AI Studio)
|
||||||
"gemma-4-31b": 256000,
|
"gemma-4-31b": 256000,
|
||||||
"gemma-4-26b": 256000,
|
|
||||||
"gemma-3": 131072,
|
"gemma-3": 131072,
|
||||||
"gemma": 8192, # fallback for older gemma models
|
"gemma": 8192, # fallback for older gemma models
|
||||||
# DeepSeek
|
# DeepSeek
|
||||||
|
|
|
||||||
|
|
@ -135,7 +135,6 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
|
||||||
"gemini-2.5-flash-lite",
|
"gemini-2.5-flash-lite",
|
||||||
# Gemma open models (also served via AI Studio)
|
# Gemma open models (also served via AI Studio)
|
||||||
"gemma-4-31b-it",
|
"gemma-4-31b-it",
|
||||||
"gemma-4-26b-it",
|
|
||||||
],
|
],
|
||||||
"google-gemini-cli": [
|
"google-gemini-cli": [
|
||||||
"gemini-2.5-pro",
|
"gemini-2.5-pro",
|
||||||
|
|
|
||||||
|
|
@ -91,7 +91,7 @@ _DEFAULT_PROVIDER_MODELS = {
|
||||||
"gemini": [
|
"gemini": [
|
||||||
"gemini-3.1-pro-preview", "gemini-3-flash-preview", "gemini-3.1-flash-lite-preview",
|
"gemini-3.1-pro-preview", "gemini-3-flash-preview", "gemini-3.1-flash-lite-preview",
|
||||||
"gemini-2.5-pro", "gemini-2.5-flash", "gemini-2.5-flash-lite",
|
"gemini-2.5-pro", "gemini-2.5-flash", "gemini-2.5-flash-lite",
|
||||||
"gemma-4-31b-it", "gemma-4-26b-it",
|
"gemma-4-31b-it",
|
||||||
],
|
],
|
||||||
"zai": ["glm-5.1", "glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"],
|
"zai": ["glm-5.1", "glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"],
|
||||||
"kimi-coding": ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
|
"kimi-coding": ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
|
||||||
|
|
|
||||||
|
|
@ -826,6 +826,160 @@ class TestGeminiCloudCodeClient:
|
||||||
finally:
|
finally:
|
||||||
client.close()
|
client.close()
|
||||||
|
|
||||||
|
|
||||||
|
class TestGeminiHttpErrorParsing:
|
||||||
|
"""Regression coverage for _gemini_http_error Google-envelope parsing.
|
||||||
|
|
||||||
|
These are the paths that users actually hit during Google-side throttling
|
||||||
|
(April 2026: gemini-2.5-pro MODEL_CAPACITY_EXHAUSTED, gemma-4-26b-it
|
||||||
|
returning 404). The error needs to carry status_code + response so the
|
||||||
|
main loop's error_classifier and Retry-After logic work.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _fake_response(status: int, body: dict | str = "", headers=None):
|
||||||
|
"""Minimal httpx.Response stand-in (duck-typed for _gemini_http_error)."""
|
||||||
|
class _FakeResponse:
|
||||||
|
def __init__(self):
|
||||||
|
self.status_code = status
|
||||||
|
if isinstance(body, dict):
|
||||||
|
self.text = json.dumps(body)
|
||||||
|
else:
|
||||||
|
self.text = body
|
||||||
|
self.headers = headers or {}
|
||||||
|
return _FakeResponse()
|
||||||
|
|
||||||
|
def test_model_capacity_exhausted_produces_friendly_message(self):
|
||||||
|
from agent.gemini_cloudcode_adapter import _gemini_http_error
|
||||||
|
|
||||||
|
body = {
|
||||||
|
"error": {
|
||||||
|
"code": 429,
|
||||||
|
"message": "Resource has been exhausted (e.g. check quota).",
|
||||||
|
"status": "RESOURCE_EXHAUSTED",
|
||||||
|
"details": [
|
||||||
|
{
|
||||||
|
"@type": "type.googleapis.com/google.rpc.ErrorInfo",
|
||||||
|
"reason": "MODEL_CAPACITY_EXHAUSTED",
|
||||||
|
"domain": "googleapis.com",
|
||||||
|
"metadata": {"model": "gemini-2.5-pro"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"@type": "type.googleapis.com/google.rpc.RetryInfo",
|
||||||
|
"retryDelay": "30s",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
err = _gemini_http_error(self._fake_response(429, body))
|
||||||
|
assert err.status_code == 429
|
||||||
|
assert err.code == "code_assist_capacity_exhausted"
|
||||||
|
assert err.retry_after == 30.0
|
||||||
|
assert err.details["reason"] == "MODEL_CAPACITY_EXHAUSTED"
|
||||||
|
# Message must be user-friendly, not a raw JSON dump.
|
||||||
|
message = str(err)
|
||||||
|
assert "gemini-2.5-pro" in message
|
||||||
|
assert "capacity exhausted" in message.lower()
|
||||||
|
assert "30s" in message
|
||||||
|
# response attr is preserved for run_agent's Retry-After header path.
|
||||||
|
assert err.response is not None
|
||||||
|
|
||||||
|
def test_resource_exhausted_without_reason(self):
|
||||||
|
from agent.gemini_cloudcode_adapter import _gemini_http_error
|
||||||
|
|
||||||
|
body = {
|
||||||
|
"error": {
|
||||||
|
"code": 429,
|
||||||
|
"message": "Quota exceeded for requests per minute.",
|
||||||
|
"status": "RESOURCE_EXHAUSTED",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
err = _gemini_http_error(self._fake_response(429, body))
|
||||||
|
assert err.status_code == 429
|
||||||
|
assert err.code == "code_assist_rate_limited"
|
||||||
|
message = str(err)
|
||||||
|
assert "quota" in message.lower()
|
||||||
|
|
||||||
|
def test_404_model_not_found_produces_model_retired_message(self):
|
||||||
|
from agent.gemini_cloudcode_adapter import _gemini_http_error
|
||||||
|
|
||||||
|
body = {
|
||||||
|
"error": {
|
||||||
|
"code": 404,
|
||||||
|
"message": "models/gemma-4-26b-it is not found for API version v1internal",
|
||||||
|
"status": "NOT_FOUND",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
err = _gemini_http_error(self._fake_response(404, body))
|
||||||
|
assert err.status_code == 404
|
||||||
|
message = str(err)
|
||||||
|
assert "not available" in message.lower() or "retired" in message.lower()
|
||||||
|
# Error message should reference the actual model text from Google.
|
||||||
|
assert "gemma-4-26b-it" in message
|
||||||
|
|
||||||
|
def test_unauthorized_preserves_status_code(self):
|
||||||
|
from agent.gemini_cloudcode_adapter import _gemini_http_error
|
||||||
|
|
||||||
|
err = _gemini_http_error(self._fake_response(
|
||||||
|
401, {"error": {"code": 401, "message": "Invalid token", "status": "UNAUTHENTICATED"}},
|
||||||
|
))
|
||||||
|
assert err.status_code == 401
|
||||||
|
assert err.code == "code_assist_unauthorized"
|
||||||
|
|
||||||
|
def test_retry_after_header_fallback(self):
|
||||||
|
"""If the body has no RetryInfo detail, fall back to Retry-After header."""
|
||||||
|
from agent.gemini_cloudcode_adapter import _gemini_http_error
|
||||||
|
|
||||||
|
resp = self._fake_response(
|
||||||
|
429,
|
||||||
|
{"error": {"code": 429, "message": "Rate limited", "status": "RESOURCE_EXHAUSTED"}},
|
||||||
|
headers={"Retry-After": "45"},
|
||||||
|
)
|
||||||
|
err = _gemini_http_error(resp)
|
||||||
|
assert err.retry_after == 45.0
|
||||||
|
|
||||||
|
def test_malformed_body_still_produces_structured_error(self):
|
||||||
|
"""Non-JSON body must not swallow status_code — we still want the classifier path."""
|
||||||
|
from agent.gemini_cloudcode_adapter import _gemini_http_error
|
||||||
|
|
||||||
|
err = _gemini_http_error(self._fake_response(500, "<html>internal error</html>"))
|
||||||
|
assert err.status_code == 500
|
||||||
|
# Raw body snippet must still be there for debugging.
|
||||||
|
assert "500" in str(err)
|
||||||
|
|
||||||
|
def test_status_code_flows_through_error_classifier(self):
|
||||||
|
"""End-to-end: CodeAssistError from a 429 must classify as rate_limit.
|
||||||
|
|
||||||
|
This is the whole point of adding status_code to CodeAssistError —
|
||||||
|
_extract_status_code must see it and FailoverReason.rate_limit must
|
||||||
|
fire, so the main loop triggers fallback_providers.
|
||||||
|
"""
|
||||||
|
from agent.gemini_cloudcode_adapter import _gemini_http_error
|
||||||
|
from agent.error_classifier import classify_api_error, FailoverReason
|
||||||
|
|
||||||
|
body = {
|
||||||
|
"error": {
|
||||||
|
"code": 429,
|
||||||
|
"message": "Resource has been exhausted",
|
||||||
|
"status": "RESOURCE_EXHAUSTED",
|
||||||
|
"details": [
|
||||||
|
{
|
||||||
|
"@type": "type.googleapis.com/google.rpc.ErrorInfo",
|
||||||
|
"reason": "MODEL_CAPACITY_EXHAUSTED",
|
||||||
|
"metadata": {"model": "gemini-2.5-pro"},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
err = _gemini_http_error(self._fake_response(429, body))
|
||||||
|
|
||||||
|
classified = classify_api_error(
|
||||||
|
err, provider="google-gemini-cli", model="gemini-2.5-pro",
|
||||||
|
)
|
||||||
|
assert classified.status_code == 429
|
||||||
|
assert classified.reason == FailoverReason.rate_limit
|
||||||
|
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# Provider registration
|
# Provider registration
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|
|
||||||
|
|
@ -178,10 +178,6 @@ class TestGeminiContextLength:
|
||||||
ctx = get_model_context_length("gemma-4-31b-it", provider="gemini")
|
ctx = get_model_context_length("gemma-4-31b-it", provider="gemini")
|
||||||
assert ctx == 256000
|
assert ctx == 256000
|
||||||
|
|
||||||
def test_gemma_4_26b_context(self):
|
|
||||||
ctx = get_model_context_length("gemma-4-26b-it", provider="gemini")
|
|
||||||
assert ctx == 256000
|
|
||||||
|
|
||||||
def test_gemini_3_context(self):
|
def test_gemini_3_context(self):
|
||||||
ctx = get_model_context_length("gemini-3.1-pro-preview", provider="gemini")
|
ctx = get_model_context_length("gemini-3.1-pro-preview", provider="gemini")
|
||||||
assert ctx == 1048576
|
assert ctx == 1048576
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue