From 13a1bd0f83c04fc4b2640e24ce2393e1a88dae1e Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Sun, 14 Jun 2026 04:45:46 -0700 Subject: [PATCH] perf(model-metadata): persist OpenRouter metadata cache (#46114) --- agent/model_metadata.py | 77 +++++++++++++++++++++++++++- tests/agent/test_model_metadata.py | 80 +++++++++++++++++++++++++++++- 2 files changed, 153 insertions(+), 4 deletions(-) diff --git a/agent/model_metadata.py b/agent/model_metadata.py index 3a71e974fdb..8cfec23fe1f 100644 --- a/agent/model_metadata.py +++ b/agent/model_metadata.py @@ -5,6 +5,7 @@ and run_agent.py for pre-flight context checks. """ import ipaddress +import json import logging import os import re @@ -16,7 +17,7 @@ from urllib.parse import urlparse import requests import yaml -from utils import base_url_host_matches, base_url_hostname +from utils import atomic_json_write, base_url_host_matches, base_url_hostname from hermes_constants import OPENROUTER_MODELS_URL @@ -111,6 +112,57 @@ _endpoint_model_metadata_cache: Dict[str, Dict[str, Dict[str, Any]]] = {} _endpoint_model_metadata_cache_time: Dict[str, float] = {} _ENDPOINT_MODEL_CACHE_TTL = 300 + +def _get_model_metadata_cache_path() -> Path: + """Return path to the OpenRouter model metadata disk cache.""" + from hermes_constants import get_hermes_home + return get_hermes_home() / "cache" / "openrouter_model_metadata.json" + + +def _model_metadata_disk_cache_age_seconds() -> Optional[float]: + """Return disk-cache age in seconds, or None if freshness is unknown.""" + try: + cache_path = _get_model_metadata_cache_path() + if not cache_path.exists(): + return None + age = time.time() - cache_path.stat().st_mtime + if age < 0: + return None + return age + except Exception: + return None + + +def _load_model_metadata_disk_cache() -> Dict[str, Dict[str, Any]]: + """Load processed OpenRouter metadata cache from disk.""" + try: + cache_path = _get_model_metadata_cache_path() + with cache_path.open("r", encoding="utf-8") as f: + data = json.load(f) + if not isinstance(data, dict): + return {} + return { + str(key): value + for key, value in data.items() + if isinstance(value, dict) + } + except Exception as e: + logger.debug("Failed to load OpenRouter model metadata disk cache: %s", e) + return {} + + +def _save_model_metadata_disk_cache(data: Dict[str, Dict[str, Any]]) -> None: + """Save processed OpenRouter metadata cache to disk atomically.""" + try: + atomic_json_write( + _get_model_metadata_cache_path(), + data, + indent=0, + separators=(",", ":"), + ) + except Exception as e: + logger.debug("Failed to save OpenRouter model metadata disk cache: %s", e) + # Descending tiers for context length probing when the model is unknown. # We start at 256K (covers GPT-5.x, many current large-context models) and # step down on context-length errors until one works. Tier[0] is also the @@ -627,6 +679,15 @@ def fetch_model_metadata(force_refresh: bool = False) -> Dict[str, Dict[str, Any if not force_refresh and _model_metadata_cache and (time.time() - _model_metadata_cache_time) < _MODEL_CACHE_TTL: return _model_metadata_cache + if not force_refresh: + disk_age = _model_metadata_disk_cache_age_seconds() + if disk_age is not None and disk_age < _MODEL_CACHE_TTL: + disk_cache = _load_model_metadata_disk_cache() + if disk_cache: + _model_metadata_cache = disk_cache + _model_metadata_cache_time = time.time() - disk_age + return _model_metadata_cache + try: response = requests.get(OPENROUTER_MODELS_URL, timeout=10, verify=_resolve_requests_verify()) response.raise_for_status() @@ -648,12 +709,24 @@ def fetch_model_metadata(force_refresh: bool = False) -> Dict[str, Dict[str, Any _model_metadata_cache = cache _model_metadata_cache_time = time.time() + _save_model_metadata_disk_cache(cache) logger.debug("Fetched metadata for %s models from OpenRouter", len(cache)) return cache except Exception as e: logger.warning(f"Failed to fetch model metadata from OpenRouter: {e}") - return _model_metadata_cache or {} + if _model_metadata_cache: + return _model_metadata_cache + disk_cache = _load_model_metadata_disk_cache() + if disk_cache: + _model_metadata_cache = disk_cache + disk_age = _model_metadata_disk_cache_age_seconds() + if disk_age is not None: + _model_metadata_cache_time = time.time() - min(disk_age, _MODEL_CACHE_TTL) + else: + _model_metadata_cache_time = time.time() - _MODEL_CACHE_TTL + 1 + return _model_metadata_cache + return {} def fetch_endpoint_model_metadata( diff --git a/tests/agent/test_model_metadata.py b/tests/agent/test_model_metadata.py index ba5fa30886f..35651a00b66 100644 --- a/tests/agent/test_model_metadata.py +++ b/tests/agent/test_model_metadata.py @@ -1083,6 +1083,78 @@ class TestFetchModelMetadata: mm._model_metadata_cache = {} mm._model_metadata_cache_time = 0 + def _isolate_disk_cache(self, monkeypatch, tmp_path): + import agent.model_metadata as mm + cache_path = tmp_path / "openrouter_model_metadata.json" + monkeypatch.setattr(mm, "_get_model_metadata_cache_path", lambda: cache_path) + return cache_path + + def test_fresh_disk_cache_skips_network(self, tmp_path, monkeypatch): + self._reset_cache() + cache_path = self._isolate_disk_cache(monkeypatch, tmp_path) + cache_path.write_text( + '{"test/model":{"context_length":12345,"name":"Cached","pricing":{}}}', + encoding="utf-8", + ) + + with patch("agent.model_metadata.requests.get") as mock_get: + result = fetch_model_metadata() + + mock_get.assert_not_called() + assert result["test/model"]["context_length"] == 12345 + + def test_force_refresh_bypasses_fresh_disk_cache(self, tmp_path, monkeypatch): + self._reset_cache() + cache_path = self._isolate_disk_cache(monkeypatch, tmp_path) + cache_path.write_text( + '{"test/model":{"context_length":12345,"name":"Cached","pricing":{}}}', + encoding="utf-8", + ) + + mock_response = MagicMock() + mock_response.json.return_value = { + "data": [{"id": "live/model", "context_length": 67890, "name": "Live"}] + } + mock_response.raise_for_status = MagicMock() + + with patch("agent.model_metadata.requests.get", return_value=mock_response) as mock_get: + result = fetch_model_metadata(force_refresh=True) + + assert mock_get.call_count == 1 + assert "live/model" in result + assert "test/model" not in result + + def test_network_success_writes_disk_cache(self, tmp_path, monkeypatch): + self._reset_cache() + cache_path = self._isolate_disk_cache(monkeypatch, tmp_path) + mock_response = MagicMock() + mock_response.json.return_value = { + "data": [{"id": "live/model", "context_length": 67890, "name": "Live"}] + } + mock_response.raise_for_status = MagicMock() + + with patch("agent.model_metadata.requests.get", return_value=mock_response): + fetch_model_metadata(force_refresh=True) + + assert cache_path.exists() + assert "live/model" in cache_path.read_text(encoding="utf-8") + + def test_network_failure_falls_back_to_stale_disk_cache(self, tmp_path, monkeypatch): + self._reset_cache() + cache_path = self._isolate_disk_cache(monkeypatch, tmp_path) + cache_path.write_text( + '{"stale/model":{"context_length":50000,"name":"Stale","pricing":{}}}', + encoding="utf-8", + ) + old = time.time() - _MODEL_CACHE_TTL - 60 + import os + os.utime(cache_path, (old, old)) + + with patch("agent.model_metadata.requests.get", side_effect=Exception("Network error")): + result = fetch_model_metadata(force_refresh=True) + + assert result["stale/model"]["context_length"] == 50000 + @patch("agent.model_metadata.requests.get") def test_caches_result(self, mock_get): self._reset_cache() @@ -1162,10 +1234,11 @@ class TestFetchModelMetadata: assert result["test-model"]["context_length"] == 123456 @patch("agent.model_metadata.requests.get") - def test_ttl_expiry_triggers_refetch(self, mock_get): + def test_ttl_expiry_triggers_refetch(self, mock_get, tmp_path, monkeypatch): """Cache expires after _MODEL_CACHE_TTL seconds.""" import agent.model_metadata as mm self._reset_cache() + cache_path = self._isolate_disk_cache(monkeypatch, tmp_path) mock_response = MagicMock() mock_response.json.return_value = { @@ -1177,8 +1250,11 @@ class TestFetchModelMetadata: fetch_model_metadata(force_refresh=True) assert mock_get.call_count == 1 - # Simulate TTL expiry + # Simulate both memory and disk TTL expiry. mm._model_metadata_cache_time = time.time() - _MODEL_CACHE_TTL - 1 + old = time.time() - _MODEL_CACHE_TTL - 1 + import os + os.utime(cache_path, (old, old)) fetch_model_metadata() assert mock_get.call_count == 2 # refetched