From e687292eb4fdc08a3ea3354d6ca3015ada84fd95 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 9 Jun 2026 00:03:43 -0700
Subject: [PATCH] feat(models): persist Nous recommended-models to disk; fall
back on Portal failure (#42628)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The Portal's /api/nous/recommended-models endpoint is the source of truth for
which models are free/paid right now, but its result was cached in-process
only. When the live fetch failed (network, parse, non-2xx), the function
returned {} and the model picker silently dropped the free/paid
recommendations — free models would vanish with no indication anything went
wrong.
Add a per-base disk cache at $HERMES_HOME/cache/nous_recommended_cache.json:
a successful live fetch is persisted as last-known-good, and a failed fetch
with an empty in-process cache falls back to the disk copy instead of {}.
Self-heals on the next successful fetch. With no disk copy, still degrades to
{} (callers already handle that). Keyed by portal base URL so staging/prod
don't collide.
E2E: live fetch writes disk; simulated Portal failure returns the cached free
models from disk; no-disk + failure returns {}.
---
hermes_cli/models.py | 88 +++++++++++++++++++++++++++++++++++++++++---
1 file changed, 83 insertions(+), 5 deletions(-)
diff --git a/hermes_cli/models.py b/hermes_cli/models.py
index 9ddcf3f24b3..57132a97c66 100644
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@@ -766,6 +766,64 @@ _NOUS_RECOMMENDED_CACHE_TTL: int = 600 # seconds (10 minutes)
_nous_recommended_cache: dict[str, tuple[dict[str, Any], float]] = {}
+def _nous_recommended_disk_path() -> "Path":
+ """Disk path for the persisted recommended-models cache."""
+ from hermes_constants import get_hermes_home
+ return get_hermes_home() / "cache" / "nous_recommended_cache.json"
+
+
+def _read_nous_recommended_disk(base: str) -> dict[str, Any] | None:
+ """Return the last-known-good payload for ``base`` from disk, or None.
+
+ The disk file is a JSON object keyed by portal base URL so staging and
+ prod don't collide:
+ ``{"": {"data": {...}, "ts": }}``.
+ """
+ try:
+ with open(_nous_recommended_disk_path(), encoding="utf-8") as fh:
+ blob = json.load(fh)
+ except (OSError, json.JSONDecodeError):
+ return None
+ if not isinstance(blob, dict):
+ return None
+ entry = blob.get(base)
+ if not isinstance(entry, dict):
+ return None
+ data = entry.get("data")
+ return data if isinstance(data, dict) and data else None
+
+
+def _write_nous_recommended_disk(base: str, data: dict[str, Any]) -> None:
+ """Persist ``data`` as the last-known-good payload for ``base``.
+
+ Merges into any existing per-base map, then writes atomically. Failures
+ are non-fatal (logged at debug) — the in-process cache still works.
+ """
+ if not data:
+ return
+ path = _nous_recommended_disk_path()
+ try:
+ try:
+ with open(path, encoding="utf-8") as fh:
+ blob = json.load(fh)
+ if not isinstance(blob, dict):
+ blob = {}
+ except (OSError, json.JSONDecodeError):
+ blob = {}
+ blob[base] = {"data": data, "ts": time.time()}
+ path.parent.mkdir(parents=True, exist_ok=True)
+ tmp = path.with_suffix(path.suffix + ".tmp")
+ with open(tmp, "w", encoding="utf-8") as fh:
+ json.dump(blob, fh, indent=2)
+ fh.write("\n")
+ os.replace(tmp, path)
+ except OSError as exc:
+ import logging
+ logging.getLogger(__name__).debug(
+ "nous recommended-models disk cache write failed: %s", exc
+ )
+
+
def fetch_nous_recommended_models(
portal_base_url: str = "",
timeout: float = 5.0,
@@ -776,12 +834,19 @@ def fetch_nous_recommended_models(
Hits ``/api/nous/recommended-models``. The endpoint is public —
no auth is required. Results are cached per portal URL for
- ``_NOUS_RECOMMENDED_CACHE_TTL`` seconds; pass ``force_refresh=True`` to
- bypass the cache.
+ ``_NOUS_RECOMMENDED_CACHE_TTL`` seconds in process; pass
+ ``force_refresh=True`` to bypass the in-process cache.
- Returns the parsed JSON dict on success, or ``{}`` on any failure
- (network, parse, non-2xx). Callers must treat missing/null fields as
- "no recommendation" and fall back to their own default.
+ A successful live fetch is also persisted to a per-base disk cache
+ (``$HERMES_HOME/cache/nous_recommended_cache.json``) as last-known-good.
+ When the live fetch fails (network, parse, non-2xx) and the in-process
+ cache is empty, the disk copy is returned instead of ``{}`` — so a
+ transient Portal hiccup no longer silently drops the free/paid model
+ recommendations from the picker. Self-heals on the next successful fetch.
+
+ Returns the parsed JSON dict, or ``{}`` only when neither the network nor
+ any cache layer can supply data. Callers must treat missing/null fields
+ as "no recommendation" and fall back to their own default.
"""
base = (portal_base_url or "https://portal.nousresearch.com").rstrip("/")
now = time.monotonic()
@@ -804,6 +869,19 @@ def fetch_nous_recommended_models(
except Exception:
data = {}
+ if data:
+ # Live fetch succeeded — refresh both cache layers.
+ _nous_recommended_cache[base] = (data, now)
+ _write_nous_recommended_disk(base, data)
+ return data
+
+ # Live fetch failed. Fall back to the last-known-good disk copy so a
+ # transient Portal hiccup doesn't drop the recommendations entirely.
+ disk = _read_nous_recommended_disk(base)
+ if disk:
+ _nous_recommended_cache[base] = (disk, now)
+ return disk
+
_nous_recommended_cache[base] = (data, now)
return data