diff --git a/agent/models_dev.py b/agent/models_dev.py index 0ef18f4ce1f..fbb3153829b 100644 --- a/agent/models_dev.py +++ b/agent/models_dev.py @@ -197,6 +197,32 @@ def _load_disk_cache() -> Dict[str, Any]: return {} +def _disk_cache_age_seconds() -> Optional[float]: + """Return age (in seconds) of the disk cache file, or None if missing. + + Used by ``fetch_models_dev`` to short-circuit the network probe when + a recent on-disk cache exists. Errors (missing file, permission + denied, weird filesystem) all return None — callers fall through + to the network fetch path. + """ + try: + cache_path = _get_cache_path() + if not cache_path.exists(): + return None + mtime = cache_path.stat().st_mtime + age = time.time() - mtime + # Negative age means the file's mtime is in the future (clock skew + # or system clock reset). Treat as "unknown freshness" → fall + # through to network so we don't serve potentially-bad data + # forever. + if age < 0: + return None + return age + except Exception as e: + logger.debug("Failed to stat models.dev disk cache: %s", e) + return None + + def _save_disk_cache(data: Dict[str, Any]) -> None: """Save models.dev data to disk cache atomically.""" try: @@ -207,13 +233,29 @@ def _save_disk_cache(data: Dict[str, Any]) -> None: def fetch_models_dev(force_refresh: bool = False) -> Dict[str, Any]: - """Fetch models.dev registry. In-memory cache (1hr) + disk fallback. + """Fetch models.dev registry. Cache hierarchy: in-mem → disk → network. Returns the full registry dict keyed by provider ID, or empty dict on failure. + + Cache hierarchy (when ``force_refresh=False``): + 1. In-memory cache, populated and < TTL old → return immediately. + 2. **Disk cache file < TTL old by mtime → load, populate in-mem, return.** + No network call. Saves ~500 ms per cold-start agent construction; + ``models.dev`` only changes when providers add new models, so a + 1 hour staleness window is acceptable (same TTL as in-mem cache). + 3. Network fetch → on success, save to disk + in-mem and return. + 4. Network fails → fall back to ANY available disk cache (even stale) + with a short 5 min in-mem grace period before retrying network. + + When ``force_refresh=True`` (used by ``hermes config refresh``, the + \"refresh model catalog\" code path), stages 1 and 2 are skipped. The + function always hits the network and only falls back to disk if the + network call fails. """ global _models_dev_cache, _models_dev_cache_time - # Check in-memory cache + # Stage 1: fresh in-memory cache wins. This is the hot path on + # long-lived processes — no I/O, no system calls. if ( not force_refresh and _models_dev_cache @@ -221,7 +263,27 @@ def fetch_models_dev(force_refresh: bool = False) -> Dict[str, Any]: ): return _models_dev_cache - # Try network fetch + # Stage 2: fresh-by-mtime disk cache short-circuits the network call. + # Only kicks in on cold-start processes (in-mem cache is empty or + # expired) and only when the user hasn't asked for a forced refresh. + # Skipped if the disk cache file is missing, unreadable, or older + # than _MODELS_DEV_CACHE_TTL. + if not force_refresh: + disk_age = _disk_cache_age_seconds() + if disk_age is not None and disk_age < _MODELS_DEV_CACHE_TTL: + disk_data = _load_disk_cache() + if disk_data: + _models_dev_cache = disk_data + # Anchor in-mem TTL to the disk file's age so we don't + # extend an already-aging cache by another full hour. + _models_dev_cache_time = time.time() - disk_age + logger.debug( + "Loaded models.dev from fresh disk cache " + "(%d providers, age=%.0fs)", len(disk_data), disk_age, + ) + return _models_dev_cache + + # Stage 3: network fetch. try: response = requests.get(MODELS_DEV_URL, timeout=15) response.raise_for_status() @@ -239,8 +301,9 @@ def fetch_models_dev(force_refresh: bool = False) -> Dict[str, Any]: except Exception as e: logger.debug("Failed to fetch models.dev: %s", e) - # Fall back to disk cache — use a short TTL (5 min) so we retry - # the network fetch soon instead of serving stale data for a full hour. + # Stage 4: network failed — fall back to whatever disk cache exists, + # even if it's stale. Give it a short 5 min in-mem TTL so we retry + # the network soon instead of serving stale data for a full hour. if not _models_dev_cache: _models_dev_cache = _load_disk_cache() if _models_dev_cache: diff --git a/tests/agent/test_models_dev.py b/tests/agent/test_models_dev.py index 4eac2bd5616..2cb9746b223 100644 --- a/tests/agent/test_models_dev.py +++ b/tests/agent/test_models_dev.py @@ -201,6 +201,102 @@ class TestFetchModelsDev: mock_get.assert_not_called() assert result == SAMPLE_REGISTRY + @patch("agent.models_dev.requests.get") + def test_fresh_disk_cache_skips_network(self, mock_get): + """When in-mem cache is empty but disk cache exists and is fresh by + mtime (< TTL), fetch_models_dev returns disk data without ever + making the network call. + + This is the cold-start fast path: every fresh process previously + paid ~500 ms re-fetching a registry that was already on disk + from an earlier run. + """ + import agent.models_dev as md + # Empty in-mem cache so stage 1 doesn't short-circuit. + md._models_dev_cache = {} + md._models_dev_cache_time = 0 + + with patch.object(md, "_disk_cache_age_seconds", return_value=60.0), \ + patch.object(md, "_load_disk_cache", return_value=SAMPLE_REGISTRY): + result = fetch_models_dev() + + # The whole point: no network call. + mock_get.assert_not_called() + assert "anthropic" in result + # In-mem cache populated so subsequent calls within the same + # process stay on stage 1. + assert md._models_dev_cache == SAMPLE_REGISTRY + + @patch("agent.models_dev.requests.get") + def test_stale_disk_cache_falls_through_to_network(self, mock_get): + """When the disk cache is OLDER than TTL, we must hit the network + (and only fall back to the stale disk data if network fails).""" + import agent.models_dev as md + md._models_dev_cache = {} + md._models_dev_cache_time = 0 + + mock_resp = MagicMock() + mock_resp.status_code = 200 + mock_resp.json.return_value = SAMPLE_REGISTRY + mock_resp.raise_for_status = MagicMock() + mock_get.return_value = mock_resp + + # Disk cache exists but is older than the TTL — must NOT short-circuit. + with patch.object(md, "_disk_cache_age_seconds", + return_value=md._MODELS_DEV_CACHE_TTL + 60), \ + patch.object(md, "_load_disk_cache", return_value=SAMPLE_REGISTRY), \ + patch.object(md, "_save_disk_cache"): + result = fetch_models_dev() + + mock_get.assert_called_once() + assert "anthropic" in result + + @patch("agent.models_dev.requests.get") + def test_force_refresh_skips_disk_cache(self, mock_get): + """force_refresh=True bypasses BOTH the in-mem cache AND the + disk-cache fast path. Used by ``hermes config refresh`` and + anywhere else the user explicitly asked for fresh data. + """ + import agent.models_dev as md + md._models_dev_cache = {} + md._models_dev_cache_time = 0 + + mock_resp = MagicMock() + mock_resp.status_code = 200 + mock_resp.json.return_value = SAMPLE_REGISTRY + mock_resp.raise_for_status = MagicMock() + mock_get.return_value = mock_resp + + # Disk cache is fresh, but force_refresh must override it. + with patch.object(md, "_disk_cache_age_seconds", return_value=60.0), \ + patch.object(md, "_load_disk_cache", return_value=SAMPLE_REGISTRY), \ + patch.object(md, "_save_disk_cache"): + result = fetch_models_dev(force_refresh=True) + + mock_get.assert_called_once() + assert "anthropic" in result + + @patch("agent.models_dev.requests.get") + def test_missing_disk_cache_falls_through_to_network(self, mock_get): + """If the disk cache file doesn't exist (first-ever run, or it + was deleted), fall through cleanly to network.""" + import agent.models_dev as md + md._models_dev_cache = {} + md._models_dev_cache_time = 0 + + mock_resp = MagicMock() + mock_resp.status_code = 200 + mock_resp.json.return_value = SAMPLE_REGISTRY + mock_resp.raise_for_status = MagicMock() + mock_get.return_value = mock_resp + + with patch.object(md, "_disk_cache_age_seconds", return_value=None), \ + patch.object(md, "_save_disk_cache"): + result = fetch_models_dev() + + mock_get.assert_called_once() + assert "anthropic" in result + # --------------------------------------------------------------------------- # get_model_capabilities — vision via modalities.input