fix(aux): self-heal Nous-routed calls when a pinned model leaves the catalog (#37732)

A long-lived process (gateway, watcher) caches the Nous Portal's
recommended-models payload and can pin a model for its whole lifetime.
When that model is later dropped from the Nous -> OpenRouter catalog,
every auxiliary call 404s with 'model does not exist in our
configuration or OpenRouter catalog' until the process restarts.

Now such a 404 force-refreshes the Portal recommendation and retries
once with the current pick (or the gemini-3-flash-preview default).
Scoped to Nous-routed calls only.

- _is_model_not_found_error(): 404/400 'not found / does not exist /
  not a valid model' predicate, excludes billing keywords so it never
  overlaps _is_payment_error.
- _refresh_nous_recommended_model(): force-refresh fetch, returns a
  model distinct from the one that failed, else the known-good default.
- Wired into both call_llm and async_call_llm error chains.
This commit is contained in:
Teknium 2026-06-02 17:14:36 -07:00 committed by GitHub
parent bb1c8b6f1a
commit ab2472e692
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 236 additions and 0 deletions

View file

@ -1621,6 +1621,47 @@ def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]:
)
def _refresh_nous_recommended_model(
*, vision: bool, stale_model: Optional[str]
) -> Optional[str]:
"""Re-fetch the Nous Portal's recommended model after a stale-model 404.
Long-lived processes (gateway, watchers) cache the Portal's
``recommended-models`` payload for 10 minutes and, in practice, can pin a
model for the whole process lifetime. When that model is later dropped from
the Nous OpenRouter catalog, every auxiliary call 404s with
"model does not exist". This forces a fresh Portal fetch and returns a
model name to retry with:
* the Portal's current recommendation for the task, if it differs from
the model that just failed; otherwise
* ``_NOUS_MODEL`` (google/gemini-3-flash-preview), the known-good default,
if it too differs from the failed model.
Returns ``None`` when no usable alternative is available (e.g. the Portal
still recommends the exact model that just 404'd and the default also
matches it) callers should then let the original error propagate.
"""
stale = (stale_model or "").strip().lower()
fresh: Optional[str] = None
try:
from hermes_cli.models import get_nous_recommended_aux_model
fresh = get_nous_recommended_aux_model(vision=vision, force_refresh=True)
except Exception as exc:
logger.debug(
"Nous recommended-model refresh failed (%s); using default %s",
exc, _NOUS_MODEL,
)
if fresh and fresh.strip().lower() != stale:
return fresh
# Portal recommendation unchanged or unavailable — fall back to the
# hardcoded known-good default, but only if it's actually different.
if _NOUS_MODEL.strip().lower() != stale:
return _NOUS_MODEL
return None
def _read_main_model() -> str:
"""Read the user's configured main model from config.yaml.
@ -2451,6 +2492,46 @@ def _is_unsupported_temperature_error(exc: Exception) -> bool:
return _is_unsupported_parameter_error(exc, "temperature")
def _is_model_not_found_error(exc: Exception) -> bool:
"""Detect "the requested model doesn't exist" errors (404 / invalid model).
This fires when a resolved model name is no longer served by the endpoint
most commonly when a long-lived process pinned a Portal-recommended model
that has since been dropped from the Nous OpenRouter catalog. The Nous
proxy returns 404 with a body like::
Model 'gpt-5.4-mini' not found. The requested model does not exist
in our configuration or OpenRouter catalog.
Distinct from :func:`_is_payment_error` (which also matches some 404s for
free-tier/credit language) this one keys on "does not exist / not found /
not a valid model" phrasing, and explicitly excludes the billing keywords
that the payment path already owns so the two predicates don't overlap.
"""
status = getattr(exc, "status_code", None)
err_lower = str(exc).lower()
# Billing/quota 404s belong to _is_payment_error — don't claim them here.
if any(kw in err_lower for kw in (
"credits", "insufficient funds", "billing", "out of funds",
"balance_depleted", "no usable credits", "free tier", "free-tier",
"not available on the free tier",
)):
return False
if status not in {404, 400, None}:
return False
return any(kw in err_lower for kw in (
"model does not exist",
"does not exist in our configuration",
"openrouter catalog",
"is not a valid model",
"no such model",
"model not found",
"the model `", # OpenAI-style: "The model `X` does not exist"
"model_not_found",
"unknown model",
))
def _evict_cached_clients(provider: str) -> None:
"""Drop cached auxiliary clients for a provider so fresh creds are used."""
normalized = _normalize_aux_provider(provider)
@ -5027,6 +5108,32 @@ def call_llm(
raise
first_err = retry_err
# ── Stale-model self-heal (Nous Portal recommendation drift) ───
# A long-lived process can pin a Portal-recommended model that has
# since been dropped from the Nous → OpenRouter catalog, so every
# auxiliary call 404s with "model does not exist". Force a fresh
# Portal fetch and retry once with the current recommendation (or the
# known-good default). Only applies to Nous-routed calls.
_heal_is_nous = (
resolved_provider == "nous"
or base_url_host_matches(_base_info, "inference-api.nousresearch.com")
)
if _is_model_not_found_error(first_err) and _heal_is_nous:
healed_model = _refresh_nous_recommended_model(
vision=(task == "vision"), stale_model=kwargs.get("model"))
if healed_model and healed_model != kwargs.get("model"):
logger.warning(
"Auxiliary %s: model %r no longer in Nous catalog; "
"retrying with refreshed recommendation %r",
task or "call", kwargs.get("model"), healed_model,
)
kwargs["model"] = healed_model
try:
return _validate_llm_response(
client.chat.completions.create(**kwargs), task)
except Exception as retry_err:
first_err = retry_err
# ── Nous auth refresh parity with main agent ──────────────────
client_is_nous = (
resolved_provider == "nous"
@ -5464,6 +5571,31 @@ async def async_call_llm(
raise
first_err = retry_err
# ── Stale-model self-heal (Nous Portal recommendation drift) ───
# See the sync call_llm() path for the rationale: a long-lived process
# can pin a Portal-recommended model that has since been dropped from
# the Nous → OpenRouter catalog, 404'ing every auxiliary call. Force a
# fresh Portal fetch and retry once with the current recommendation.
_heal_is_nous = (
resolved_provider == "nous"
or base_url_host_matches(_client_base, "inference-api.nousresearch.com")
)
if _is_model_not_found_error(first_err) and _heal_is_nous:
healed_model = _refresh_nous_recommended_model(
vision=(task == "vision"), stale_model=kwargs.get("model"))
if healed_model and healed_model != kwargs.get("model"):
logger.warning(
"Auxiliary %s (async): model %r no longer in Nous catalog; "
"retrying with refreshed recommendation %r",
task or "call", kwargs.get("model"), healed_model,
)
kwargs["model"] = healed_model
try:
return _validate_llm_response(
await client.chat.completions.create(**kwargs), task)
except Exception as retry_err:
first_err = retry_err
# ── Nous auth refresh parity with main agent ──────────────────
client_is_nous = (
resolved_provider == "nous"

View file

@ -22,6 +22,8 @@ from agent.auxiliary_client import (
_get_provider_chain,
_is_payment_error,
_is_rate_limit_error,
_is_model_not_found_error,
_refresh_nous_recommended_model,
_normalize_aux_provider,
_try_payment_fallback,
_resolve_auto,
@ -1298,6 +1300,108 @@ class TestIsPaymentError:
assert _is_payment_error(exc) is False
class TestIsModelNotFoundError:
"""_is_model_not_found_error detects stale/invalid model 404s, distinct
from payment errors."""
def test_nous_openrouter_catalog_404(self):
"""The exact incident error: a Portal-recommended model dropped from
the Nous OpenRouter catalog."""
exc = Exception(
"Model 'gpt-5.4-mini' not found. The requested model does not "
"exist in our configuration or OpenRouter catalog."
)
exc.status_code = 404
assert _is_model_not_found_error(exc) is True
def test_openai_style_model_does_not_exist(self):
exc = Exception("The model `gpt-9-turbo` does not exist")
exc.status_code = 404
assert _is_model_not_found_error(exc) is True
def test_invalid_model_id_400(self):
exc = Exception("openrouter/foo/bar is not a valid model ID")
exc.status_code = 400
assert _is_model_not_found_error(exc) is True
def test_no_such_model(self):
exc = Exception("no such model: phantom-v1")
exc.status_code = 400
assert _is_model_not_found_error(exc) is True
def test_billing_404_is_not_model_not_found(self):
"""Free-tier / credit 404s belong to _is_payment_error, not here —
the two predicates must not overlap."""
exc = Exception(
"Model 'gpt-5' is not available on the free tier. Upgrade."
)
exc.status_code = 404
assert _is_model_not_found_error(exc) is False
assert _is_payment_error(exc) is True
def test_out_of_funds_404_is_not_model_not_found(self):
exc = Exception(
"Your API key is blocked or out of funds. model_not_found"
)
exc.status_code = 404
# billing keyword wins — payment owns it
assert _is_model_not_found_error(exc) is False
def test_rate_limit_is_not_model_not_found(self):
exc = Exception("rate limit exceeded, retry after 5s")
exc.status_code = 429
assert _is_model_not_found_error(exc) is False
def test_500_is_not_model_not_found(self):
exc = Exception("model does not exist") # right phrase, wrong status
exc.status_code = 500
assert _is_model_not_found_error(exc) is False
class TestRefreshNousRecommendedModel:
"""_refresh_nous_recommended_model picks a fresh model after a stale 404."""
def test_returns_fresh_portal_recommendation(self, monkeypatch):
monkeypatch.setattr(
"hermes_cli.models.get_nous_recommended_aux_model",
lambda **kw: "stepfun/step-3.7-flash:free",
)
out = _refresh_nous_recommended_model(
vision=True, stale_model="openai/gpt-5.4-mini")
assert out == "stepfun/step-3.7-flash:free"
def test_falls_back_to_default_when_portal_matches_stale(self, monkeypatch):
"""If the Portal still recommends the model that just 404'd, fall back
to the known-good default."""
monkeypatch.setattr(
"hermes_cli.models.get_nous_recommended_aux_model",
lambda **kw: "openai/gpt-5.4-mini",
)
out = _refresh_nous_recommended_model(
vision=True, stale_model="openai/gpt-5.4-mini")
assert out == "google/gemini-3-flash-preview"
def test_falls_back_to_default_when_portal_unavailable(self, monkeypatch):
def _boom(**kw):
raise RuntimeError("portal down")
monkeypatch.setattr(
"hermes_cli.models.get_nous_recommended_aux_model", _boom)
out = _refresh_nous_recommended_model(
vision=False, stale_model="some/dead-model")
assert out == "google/gemini-3-flash-preview"
def test_returns_none_when_no_distinct_alternative(self, monkeypatch):
"""When the failed model IS the default and the Portal has nothing
else, there's no usable alternative."""
monkeypatch.setattr(
"hermes_cli.models.get_nous_recommended_aux_model",
lambda **kw: "google/gemini-3-flash-preview",
)
out = _refresh_nous_recommended_model(
vision=False, stale_model="google/gemini-3-flash-preview")
assert out is None
class TestIsRateLimitError:
"""_is_rate_limit_error detects 429 rate-limit errors warranting fallback."""