mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-18 04:41:56 +00:00
perf(models_dev): cache-first lookup, skip network when disk cache is fresh (#22808)
`fetch_models_dev()` is on the hot path of every `AIAgent.__init__`
(via `context_compressor → get_model_context_length`). The previous
policy was "always try network first, only fall back to disk if
network fails," so every fresh `hermes chat` / `hermes gateway` /
batch / cron process paid 250-500 ms re-fetching a 2 MB JSON registry
that was already on disk from earlier runs.
Add a stage 2 between in-mem and network: if
`models_dev_cache.json` exists and its mtime is younger than the
existing `_MODELS_DEV_CACHE_TTL` (1 hour, same TTL the in-mem cache
already uses), load from disk and skip the network call.
The in-mem TTL is anchored to the disk file's age, so a 50-min-old
cache stays in-memory for only 10 more minutes — no surprise
extension of staleness window.
Invariants preserved:
- `force_refresh=True` still always hits the network and only falls
back to disk on failure (`hermes config refresh` semantics).
- Missing disk cache → fall through to network (first-ever run).
- Stale disk cache (mtime > TTL) → fall through to network.
- Negative file age (clock skew) → fall through to network.
- Network failure → existing stage-4 stale-disk fallback unchanged.
Measured impact (3-run medians, 9950X3D, fresh process per run):
fetch_models_dev cold: 256 → 17 ms (-93%)
hermes chat -q wall: 4.00 → 3.73 s (-7% median)
3.99 → 3.60 s (-10% min)
The chat-end-to-end win is bounded below by API latency variance, but
the fetch_models_dev microbenchmark is the cleanest signal: 239 ms
shaved off every fresh-process agent construction.
Win compounds with the previous perf PRs:
#22681 google_chat lazy-load
#22766 doctor parallel + IMDS off
#22790 gateway.platforms PEP 562
Tests: all 30 `tests/agent/test_models_dev.py` pass (added 4 new ones
covering the new disk-cache-first path, force_refresh override, stale
disk fallback, and missing-disk-cache fall-through). Full `tests/agent/`
suite: 2560 passed, 0 failed.
This commit is contained in:
parent
cd712b176a
commit
775c0e22cf
2 changed files with 164 additions and 5 deletions
|
|
@ -197,6 +197,32 @@ def _load_disk_cache() -> Dict[str, Any]:
|
|||
return {}
|
||||
|
||||
|
||||
def _disk_cache_age_seconds() -> Optional[float]:
|
||||
"""Return age (in seconds) of the disk cache file, or None if missing.
|
||||
|
||||
Used by ``fetch_models_dev`` to short-circuit the network probe when
|
||||
a recent on-disk cache exists. Errors (missing file, permission
|
||||
denied, weird filesystem) all return None — callers fall through
|
||||
to the network fetch path.
|
||||
"""
|
||||
try:
|
||||
cache_path = _get_cache_path()
|
||||
if not cache_path.exists():
|
||||
return None
|
||||
mtime = cache_path.stat().st_mtime
|
||||
age = time.time() - mtime
|
||||
# Negative age means the file's mtime is in the future (clock skew
|
||||
# or system clock reset). Treat as "unknown freshness" → fall
|
||||
# through to network so we don't serve potentially-bad data
|
||||
# forever.
|
||||
if age < 0:
|
||||
return None
|
||||
return age
|
||||
except Exception as e:
|
||||
logger.debug("Failed to stat models.dev disk cache: %s", e)
|
||||
return None
|
||||
|
||||
|
||||
def _save_disk_cache(data: Dict[str, Any]) -> None:
|
||||
"""Save models.dev data to disk cache atomically."""
|
||||
try:
|
||||
|
|
@ -207,13 +233,29 @@ def _save_disk_cache(data: Dict[str, Any]) -> None:
|
|||
|
||||
|
||||
def fetch_models_dev(force_refresh: bool = False) -> Dict[str, Any]:
|
||||
"""Fetch models.dev registry. In-memory cache (1hr) + disk fallback.
|
||||
"""Fetch models.dev registry. Cache hierarchy: in-mem → disk → network.
|
||||
|
||||
Returns the full registry dict keyed by provider ID, or empty dict on failure.
|
||||
|
||||
Cache hierarchy (when ``force_refresh=False``):
|
||||
1. In-memory cache, populated and < TTL old → return immediately.
|
||||
2. **Disk cache file < TTL old by mtime → load, populate in-mem, return.**
|
||||
No network call. Saves ~500 ms per cold-start agent construction;
|
||||
``models.dev`` only changes when providers add new models, so a
|
||||
1 hour staleness window is acceptable (same TTL as in-mem cache).
|
||||
3. Network fetch → on success, save to disk + in-mem and return.
|
||||
4. Network fails → fall back to ANY available disk cache (even stale)
|
||||
with a short 5 min in-mem grace period before retrying network.
|
||||
|
||||
When ``force_refresh=True`` (used by ``hermes config refresh``, the
|
||||
\"refresh model catalog\" code path), stages 1 and 2 are skipped. The
|
||||
function always hits the network and only falls back to disk if the
|
||||
network call fails.
|
||||
"""
|
||||
global _models_dev_cache, _models_dev_cache_time
|
||||
|
||||
# Check in-memory cache
|
||||
# Stage 1: fresh in-memory cache wins. This is the hot path on
|
||||
# long-lived processes — no I/O, no system calls.
|
||||
if (
|
||||
not force_refresh
|
||||
and _models_dev_cache
|
||||
|
|
@ -221,7 +263,27 @@ def fetch_models_dev(force_refresh: bool = False) -> Dict[str, Any]:
|
|||
):
|
||||
return _models_dev_cache
|
||||
|
||||
# Try network fetch
|
||||
# Stage 2: fresh-by-mtime disk cache short-circuits the network call.
|
||||
# Only kicks in on cold-start processes (in-mem cache is empty or
|
||||
# expired) and only when the user hasn't asked for a forced refresh.
|
||||
# Skipped if the disk cache file is missing, unreadable, or older
|
||||
# than _MODELS_DEV_CACHE_TTL.
|
||||
if not force_refresh:
|
||||
disk_age = _disk_cache_age_seconds()
|
||||
if disk_age is not None and disk_age < _MODELS_DEV_CACHE_TTL:
|
||||
disk_data = _load_disk_cache()
|
||||
if disk_data:
|
||||
_models_dev_cache = disk_data
|
||||
# Anchor in-mem TTL to the disk file's age so we don't
|
||||
# extend an already-aging cache by another full hour.
|
||||
_models_dev_cache_time = time.time() - disk_age
|
||||
logger.debug(
|
||||
"Loaded models.dev from fresh disk cache "
|
||||
"(%d providers, age=%.0fs)", len(disk_data), disk_age,
|
||||
)
|
||||
return _models_dev_cache
|
||||
|
||||
# Stage 3: network fetch.
|
||||
try:
|
||||
response = requests.get(MODELS_DEV_URL, timeout=15)
|
||||
response.raise_for_status()
|
||||
|
|
@ -239,8 +301,9 @@ def fetch_models_dev(force_refresh: bool = False) -> Dict[str, Any]:
|
|||
except Exception as e:
|
||||
logger.debug("Failed to fetch models.dev: %s", e)
|
||||
|
||||
# Fall back to disk cache — use a short TTL (5 min) so we retry
|
||||
# the network fetch soon instead of serving stale data for a full hour.
|
||||
# Stage 4: network failed — fall back to whatever disk cache exists,
|
||||
# even if it's stale. Give it a short 5 min in-mem TTL so we retry
|
||||
# the network soon instead of serving stale data for a full hour.
|
||||
if not _models_dev_cache:
|
||||
_models_dev_cache = _load_disk_cache()
|
||||
if _models_dev_cache:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue