mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-09 08:21:50 +00:00
fix(aux): self-heal Nous-routed calls when a pinned model leaves the catalog (#37732)
A long-lived process (gateway, watcher) caches the Nous Portal's recommended-models payload and can pin a model for its whole lifetime. When that model is later dropped from the Nous -> OpenRouter catalog, every auxiliary call 404s with 'model does not exist in our configuration or OpenRouter catalog' until the process restarts. Now such a 404 force-refreshes the Portal recommendation and retries once with the current pick (or the gemini-3-flash-preview default). Scoped to Nous-routed calls only. - _is_model_not_found_error(): 404/400 'not found / does not exist / not a valid model' predicate, excludes billing keywords so it never overlaps _is_payment_error. - _refresh_nous_recommended_model(): force-refresh fetch, returns a model distinct from the one that failed, else the known-good default. - Wired into both call_llm and async_call_llm error chains.
This commit is contained in:
parent
bb1c8b6f1a
commit
ab2472e692
2 changed files with 236 additions and 0 deletions
|
|
@ -1621,6 +1621,47 @@ def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]:
|
|||
)
|
||||
|
||||
|
||||
def _refresh_nous_recommended_model(
|
||||
*, vision: bool, stale_model: Optional[str]
|
||||
) -> Optional[str]:
|
||||
"""Re-fetch the Nous Portal's recommended model after a stale-model 404.
|
||||
|
||||
Long-lived processes (gateway, watchers) cache the Portal's
|
||||
``recommended-models`` payload for 10 minutes and, in practice, can pin a
|
||||
model for the whole process lifetime. When that model is later dropped from
|
||||
the Nous → OpenRouter catalog, every auxiliary call 404s with
|
||||
"model does not exist". This forces a fresh Portal fetch and returns a
|
||||
model name to retry with:
|
||||
|
||||
* the Portal's current recommendation for the task, if it differs from
|
||||
the model that just failed; otherwise
|
||||
* ``_NOUS_MODEL`` (google/gemini-3-flash-preview), the known-good default,
|
||||
if it too differs from the failed model.
|
||||
|
||||
Returns ``None`` when no usable alternative is available (e.g. the Portal
|
||||
still recommends the exact model that just 404'd and the default also
|
||||
matches it) — callers should then let the original error propagate.
|
||||
"""
|
||||
stale = (stale_model or "").strip().lower()
|
||||
fresh: Optional[str] = None
|
||||
try:
|
||||
from hermes_cli.models import get_nous_recommended_aux_model
|
||||
|
||||
fresh = get_nous_recommended_aux_model(vision=vision, force_refresh=True)
|
||||
except Exception as exc:
|
||||
logger.debug(
|
||||
"Nous recommended-model refresh failed (%s); using default %s",
|
||||
exc, _NOUS_MODEL,
|
||||
)
|
||||
if fresh and fresh.strip().lower() != stale:
|
||||
return fresh
|
||||
# Portal recommendation unchanged or unavailable — fall back to the
|
||||
# hardcoded known-good default, but only if it's actually different.
|
||||
if _NOUS_MODEL.strip().lower() != stale:
|
||||
return _NOUS_MODEL
|
||||
return None
|
||||
|
||||
|
||||
def _read_main_model() -> str:
|
||||
"""Read the user's configured main model from config.yaml.
|
||||
|
||||
|
|
@ -2451,6 +2492,46 @@ def _is_unsupported_temperature_error(exc: Exception) -> bool:
|
|||
return _is_unsupported_parameter_error(exc, "temperature")
|
||||
|
||||
|
||||
def _is_model_not_found_error(exc: Exception) -> bool:
|
||||
"""Detect "the requested model doesn't exist" errors (404 / invalid model).
|
||||
|
||||
This fires when a resolved model name is no longer served by the endpoint
|
||||
— most commonly when a long-lived process pinned a Portal-recommended model
|
||||
that has since been dropped from the Nous → OpenRouter catalog. The Nous
|
||||
proxy returns 404 with a body like::
|
||||
|
||||
Model 'gpt-5.4-mini' not found. The requested model does not exist
|
||||
in our configuration or OpenRouter catalog.
|
||||
|
||||
Distinct from :func:`_is_payment_error` (which also matches some 404s for
|
||||
free-tier/credit language) — this one keys on "does not exist / not found /
|
||||
not a valid model" phrasing, and explicitly excludes the billing keywords
|
||||
that the payment path already owns so the two predicates don't overlap.
|
||||
"""
|
||||
status = getattr(exc, "status_code", None)
|
||||
err_lower = str(exc).lower()
|
||||
# Billing/quota 404s belong to _is_payment_error — don't claim them here.
|
||||
if any(kw in err_lower for kw in (
|
||||
"credits", "insufficient funds", "billing", "out of funds",
|
||||
"balance_depleted", "no usable credits", "free tier", "free-tier",
|
||||
"not available on the free tier",
|
||||
)):
|
||||
return False
|
||||
if status not in {404, 400, None}:
|
||||
return False
|
||||
return any(kw in err_lower for kw in (
|
||||
"model does not exist",
|
||||
"does not exist in our configuration",
|
||||
"openrouter catalog",
|
||||
"is not a valid model",
|
||||
"no such model",
|
||||
"model not found",
|
||||
"the model `", # OpenAI-style: "The model `X` does not exist"
|
||||
"model_not_found",
|
||||
"unknown model",
|
||||
))
|
||||
|
||||
|
||||
def _evict_cached_clients(provider: str) -> None:
|
||||
"""Drop cached auxiliary clients for a provider so fresh creds are used."""
|
||||
normalized = _normalize_aux_provider(provider)
|
||||
|
|
@ -5027,6 +5108,32 @@ def call_llm(
|
|||
raise
|
||||
first_err = retry_err
|
||||
|
||||
# ── Stale-model self-heal (Nous Portal recommendation drift) ───
|
||||
# A long-lived process can pin a Portal-recommended model that has
|
||||
# since been dropped from the Nous → OpenRouter catalog, so every
|
||||
# auxiliary call 404s with "model does not exist". Force a fresh
|
||||
# Portal fetch and retry once with the current recommendation (or the
|
||||
# known-good default). Only applies to Nous-routed calls.
|
||||
_heal_is_nous = (
|
||||
resolved_provider == "nous"
|
||||
or base_url_host_matches(_base_info, "inference-api.nousresearch.com")
|
||||
)
|
||||
if _is_model_not_found_error(first_err) and _heal_is_nous:
|
||||
healed_model = _refresh_nous_recommended_model(
|
||||
vision=(task == "vision"), stale_model=kwargs.get("model"))
|
||||
if healed_model and healed_model != kwargs.get("model"):
|
||||
logger.warning(
|
||||
"Auxiliary %s: model %r no longer in Nous catalog; "
|
||||
"retrying with refreshed recommendation %r",
|
||||
task or "call", kwargs.get("model"), healed_model,
|
||||
)
|
||||
kwargs["model"] = healed_model
|
||||
try:
|
||||
return _validate_llm_response(
|
||||
client.chat.completions.create(**kwargs), task)
|
||||
except Exception as retry_err:
|
||||
first_err = retry_err
|
||||
|
||||
# ── Nous auth refresh parity with main agent ──────────────────
|
||||
client_is_nous = (
|
||||
resolved_provider == "nous"
|
||||
|
|
@ -5464,6 +5571,31 @@ async def async_call_llm(
|
|||
raise
|
||||
first_err = retry_err
|
||||
|
||||
# ── Stale-model self-heal (Nous Portal recommendation drift) ───
|
||||
# See the sync call_llm() path for the rationale: a long-lived process
|
||||
# can pin a Portal-recommended model that has since been dropped from
|
||||
# the Nous → OpenRouter catalog, 404'ing every auxiliary call. Force a
|
||||
# fresh Portal fetch and retry once with the current recommendation.
|
||||
_heal_is_nous = (
|
||||
resolved_provider == "nous"
|
||||
or base_url_host_matches(_client_base, "inference-api.nousresearch.com")
|
||||
)
|
||||
if _is_model_not_found_error(first_err) and _heal_is_nous:
|
||||
healed_model = _refresh_nous_recommended_model(
|
||||
vision=(task == "vision"), stale_model=kwargs.get("model"))
|
||||
if healed_model and healed_model != kwargs.get("model"):
|
||||
logger.warning(
|
||||
"Auxiliary %s (async): model %r no longer in Nous catalog; "
|
||||
"retrying with refreshed recommendation %r",
|
||||
task or "call", kwargs.get("model"), healed_model,
|
||||
)
|
||||
kwargs["model"] = healed_model
|
||||
try:
|
||||
return _validate_llm_response(
|
||||
await client.chat.completions.create(**kwargs), task)
|
||||
except Exception as retry_err:
|
||||
first_err = retry_err
|
||||
|
||||
# ── Nous auth refresh parity with main agent ──────────────────
|
||||
client_is_nous = (
|
||||
resolved_provider == "nous"
|
||||
|
|
|
|||
|
|
@ -22,6 +22,8 @@ from agent.auxiliary_client import (
|
|||
_get_provider_chain,
|
||||
_is_payment_error,
|
||||
_is_rate_limit_error,
|
||||
_is_model_not_found_error,
|
||||
_refresh_nous_recommended_model,
|
||||
_normalize_aux_provider,
|
||||
_try_payment_fallback,
|
||||
_resolve_auto,
|
||||
|
|
@ -1298,6 +1300,108 @@ class TestIsPaymentError:
|
|||
assert _is_payment_error(exc) is False
|
||||
|
||||
|
||||
class TestIsModelNotFoundError:
|
||||
"""_is_model_not_found_error detects stale/invalid model 404s, distinct
|
||||
from payment errors."""
|
||||
|
||||
def test_nous_openrouter_catalog_404(self):
|
||||
"""The exact incident error: a Portal-recommended model dropped from
|
||||
the Nous → OpenRouter catalog."""
|
||||
exc = Exception(
|
||||
"Model 'gpt-5.4-mini' not found. The requested model does not "
|
||||
"exist in our configuration or OpenRouter catalog."
|
||||
)
|
||||
exc.status_code = 404
|
||||
assert _is_model_not_found_error(exc) is True
|
||||
|
||||
def test_openai_style_model_does_not_exist(self):
|
||||
exc = Exception("The model `gpt-9-turbo` does not exist")
|
||||
exc.status_code = 404
|
||||
assert _is_model_not_found_error(exc) is True
|
||||
|
||||
def test_invalid_model_id_400(self):
|
||||
exc = Exception("openrouter/foo/bar is not a valid model ID")
|
||||
exc.status_code = 400
|
||||
assert _is_model_not_found_error(exc) is True
|
||||
|
||||
def test_no_such_model(self):
|
||||
exc = Exception("no such model: phantom-v1")
|
||||
exc.status_code = 400
|
||||
assert _is_model_not_found_error(exc) is True
|
||||
|
||||
def test_billing_404_is_not_model_not_found(self):
|
||||
"""Free-tier / credit 404s belong to _is_payment_error, not here —
|
||||
the two predicates must not overlap."""
|
||||
exc = Exception(
|
||||
"Model 'gpt-5' is not available on the free tier. Upgrade."
|
||||
)
|
||||
exc.status_code = 404
|
||||
assert _is_model_not_found_error(exc) is False
|
||||
assert _is_payment_error(exc) is True
|
||||
|
||||
def test_out_of_funds_404_is_not_model_not_found(self):
|
||||
exc = Exception(
|
||||
"Your API key is blocked or out of funds. model_not_found"
|
||||
)
|
||||
exc.status_code = 404
|
||||
# billing keyword wins — payment owns it
|
||||
assert _is_model_not_found_error(exc) is False
|
||||
|
||||
def test_rate_limit_is_not_model_not_found(self):
|
||||
exc = Exception("rate limit exceeded, retry after 5s")
|
||||
exc.status_code = 429
|
||||
assert _is_model_not_found_error(exc) is False
|
||||
|
||||
def test_500_is_not_model_not_found(self):
|
||||
exc = Exception("model does not exist") # right phrase, wrong status
|
||||
exc.status_code = 500
|
||||
assert _is_model_not_found_error(exc) is False
|
||||
|
||||
|
||||
class TestRefreshNousRecommendedModel:
|
||||
"""_refresh_nous_recommended_model picks a fresh model after a stale 404."""
|
||||
|
||||
def test_returns_fresh_portal_recommendation(self, monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.models.get_nous_recommended_aux_model",
|
||||
lambda **kw: "stepfun/step-3.7-flash:free",
|
||||
)
|
||||
out = _refresh_nous_recommended_model(
|
||||
vision=True, stale_model="openai/gpt-5.4-mini")
|
||||
assert out == "stepfun/step-3.7-flash:free"
|
||||
|
||||
def test_falls_back_to_default_when_portal_matches_stale(self, monkeypatch):
|
||||
"""If the Portal still recommends the model that just 404'd, fall back
|
||||
to the known-good default."""
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.models.get_nous_recommended_aux_model",
|
||||
lambda **kw: "openai/gpt-5.4-mini",
|
||||
)
|
||||
out = _refresh_nous_recommended_model(
|
||||
vision=True, stale_model="openai/gpt-5.4-mini")
|
||||
assert out == "google/gemini-3-flash-preview"
|
||||
|
||||
def test_falls_back_to_default_when_portal_unavailable(self, monkeypatch):
|
||||
def _boom(**kw):
|
||||
raise RuntimeError("portal down")
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.models.get_nous_recommended_aux_model", _boom)
|
||||
out = _refresh_nous_recommended_model(
|
||||
vision=False, stale_model="some/dead-model")
|
||||
assert out == "google/gemini-3-flash-preview"
|
||||
|
||||
def test_returns_none_when_no_distinct_alternative(self, monkeypatch):
|
||||
"""When the failed model IS the default and the Portal has nothing
|
||||
else, there's no usable alternative."""
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.models.get_nous_recommended_aux_model",
|
||||
lambda **kw: "google/gemini-3-flash-preview",
|
||||
)
|
||||
out = _refresh_nous_recommended_model(
|
||||
vision=False, stale_model="google/gemini-3-flash-preview")
|
||||
assert out is None
|
||||
|
||||
|
||||
class TestIsRateLimitError:
|
||||
"""_is_rate_limit_error detects 429 rate-limit errors warranting fallback."""
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue