perf(model-metadata): persist OpenRouter metadata cache (#46114)

This commit is contained in:
Teknium 2026-06-14 04:45:46 -07:00 committed by GitHub
parent 0e22bf6439
commit 13a1bd0f83
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 153 additions and 4 deletions

View file

@ -5,6 +5,7 @@ and run_agent.py for pre-flight context checks.
"""
import ipaddress
import json
import logging
import os
import re
@ -16,7 +17,7 @@ from urllib.parse import urlparse
import requests
import yaml
from utils import base_url_host_matches, base_url_hostname
from utils import atomic_json_write, base_url_host_matches, base_url_hostname
from hermes_constants import OPENROUTER_MODELS_URL
@ -111,6 +112,57 @@ _endpoint_model_metadata_cache: Dict[str, Dict[str, Dict[str, Any]]] = {}
_endpoint_model_metadata_cache_time: Dict[str, float] = {}
_ENDPOINT_MODEL_CACHE_TTL = 300
def _get_model_metadata_cache_path() -> Path:
"""Return path to the OpenRouter model metadata disk cache."""
from hermes_constants import get_hermes_home
return get_hermes_home() / "cache" / "openrouter_model_metadata.json"
def _model_metadata_disk_cache_age_seconds() -> Optional[float]:
"""Return disk-cache age in seconds, or None if freshness is unknown."""
try:
cache_path = _get_model_metadata_cache_path()
if not cache_path.exists():
return None
age = time.time() - cache_path.stat().st_mtime
if age < 0:
return None
return age
except Exception:
return None
def _load_model_metadata_disk_cache() -> Dict[str, Dict[str, Any]]:
"""Load processed OpenRouter metadata cache from disk."""
try:
cache_path = _get_model_metadata_cache_path()
with cache_path.open("r", encoding="utf-8") as f:
data = json.load(f)
if not isinstance(data, dict):
return {}
return {
str(key): value
for key, value in data.items()
if isinstance(value, dict)
}
except Exception as e:
logger.debug("Failed to load OpenRouter model metadata disk cache: %s", e)
return {}
def _save_model_metadata_disk_cache(data: Dict[str, Dict[str, Any]]) -> None:
"""Save processed OpenRouter metadata cache to disk atomically."""
try:
atomic_json_write(
_get_model_metadata_cache_path(),
data,
indent=0,
separators=(",", ":"),
)
except Exception as e:
logger.debug("Failed to save OpenRouter model metadata disk cache: %s", e)
# Descending tiers for context length probing when the model is unknown.
# We start at 256K (covers GPT-5.x, many current large-context models) and
# step down on context-length errors until one works. Tier[0] is also the
@ -627,6 +679,15 @@ def fetch_model_metadata(force_refresh: bool = False) -> Dict[str, Dict[str, Any
if not force_refresh and _model_metadata_cache and (time.time() - _model_metadata_cache_time) < _MODEL_CACHE_TTL:
return _model_metadata_cache
if not force_refresh:
disk_age = _model_metadata_disk_cache_age_seconds()
if disk_age is not None and disk_age < _MODEL_CACHE_TTL:
disk_cache = _load_model_metadata_disk_cache()
if disk_cache:
_model_metadata_cache = disk_cache
_model_metadata_cache_time = time.time() - disk_age
return _model_metadata_cache
try:
response = requests.get(OPENROUTER_MODELS_URL, timeout=10, verify=_resolve_requests_verify())
response.raise_for_status()
@ -648,12 +709,24 @@ def fetch_model_metadata(force_refresh: bool = False) -> Dict[str, Dict[str, Any
_model_metadata_cache = cache
_model_metadata_cache_time = time.time()
_save_model_metadata_disk_cache(cache)
logger.debug("Fetched metadata for %s models from OpenRouter", len(cache))
return cache
except Exception as e:
logger.warning(f"Failed to fetch model metadata from OpenRouter: {e}")
return _model_metadata_cache or {}
if _model_metadata_cache:
return _model_metadata_cache
disk_cache = _load_model_metadata_disk_cache()
if disk_cache:
_model_metadata_cache = disk_cache
disk_age = _model_metadata_disk_cache_age_seconds()
if disk_age is not None:
_model_metadata_cache_time = time.time() - min(disk_age, _MODEL_CACHE_TTL)
else:
_model_metadata_cache_time = time.time() - _MODEL_CACHE_TTL + 1
return _model_metadata_cache
return {}
def fetch_endpoint_model_metadata(

View file

@ -1083,6 +1083,78 @@ class TestFetchModelMetadata:
mm._model_metadata_cache = {}
mm._model_metadata_cache_time = 0
def _isolate_disk_cache(self, monkeypatch, tmp_path):
import agent.model_metadata as mm
cache_path = tmp_path / "openrouter_model_metadata.json"
monkeypatch.setattr(mm, "_get_model_metadata_cache_path", lambda: cache_path)
return cache_path
def test_fresh_disk_cache_skips_network(self, tmp_path, monkeypatch):
self._reset_cache()
cache_path = self._isolate_disk_cache(monkeypatch, tmp_path)
cache_path.write_text(
'{"test/model":{"context_length":12345,"name":"Cached","pricing":{}}}',
encoding="utf-8",
)
with patch("agent.model_metadata.requests.get") as mock_get:
result = fetch_model_metadata()
mock_get.assert_not_called()
assert result["test/model"]["context_length"] == 12345
def test_force_refresh_bypasses_fresh_disk_cache(self, tmp_path, monkeypatch):
self._reset_cache()
cache_path = self._isolate_disk_cache(monkeypatch, tmp_path)
cache_path.write_text(
'{"test/model":{"context_length":12345,"name":"Cached","pricing":{}}}',
encoding="utf-8",
)
mock_response = MagicMock()
mock_response.json.return_value = {
"data": [{"id": "live/model", "context_length": 67890, "name": "Live"}]
}
mock_response.raise_for_status = MagicMock()
with patch("agent.model_metadata.requests.get", return_value=mock_response) as mock_get:
result = fetch_model_metadata(force_refresh=True)
assert mock_get.call_count == 1
assert "live/model" in result
assert "test/model" not in result
def test_network_success_writes_disk_cache(self, tmp_path, monkeypatch):
self._reset_cache()
cache_path = self._isolate_disk_cache(monkeypatch, tmp_path)
mock_response = MagicMock()
mock_response.json.return_value = {
"data": [{"id": "live/model", "context_length": 67890, "name": "Live"}]
}
mock_response.raise_for_status = MagicMock()
with patch("agent.model_metadata.requests.get", return_value=mock_response):
fetch_model_metadata(force_refresh=True)
assert cache_path.exists()
assert "live/model" in cache_path.read_text(encoding="utf-8")
def test_network_failure_falls_back_to_stale_disk_cache(self, tmp_path, monkeypatch):
self._reset_cache()
cache_path = self._isolate_disk_cache(monkeypatch, tmp_path)
cache_path.write_text(
'{"stale/model":{"context_length":50000,"name":"Stale","pricing":{}}}',
encoding="utf-8",
)
old = time.time() - _MODEL_CACHE_TTL - 60
import os
os.utime(cache_path, (old, old))
with patch("agent.model_metadata.requests.get", side_effect=Exception("Network error")):
result = fetch_model_metadata(force_refresh=True)
assert result["stale/model"]["context_length"] == 50000
@patch("agent.model_metadata.requests.get")
def test_caches_result(self, mock_get):
self._reset_cache()
@ -1162,10 +1234,11 @@ class TestFetchModelMetadata:
assert result["test-model"]["context_length"] == 123456
@patch("agent.model_metadata.requests.get")
def test_ttl_expiry_triggers_refetch(self, mock_get):
def test_ttl_expiry_triggers_refetch(self, mock_get, tmp_path, monkeypatch):
"""Cache expires after _MODEL_CACHE_TTL seconds."""
import agent.model_metadata as mm
self._reset_cache()
cache_path = self._isolate_disk_cache(monkeypatch, tmp_path)
mock_response = MagicMock()
mock_response.json.return_value = {
@ -1177,8 +1250,11 @@ class TestFetchModelMetadata:
fetch_model_metadata(force_refresh=True)
assert mock_get.call_count == 1
# Simulate TTL expiry
# Simulate both memory and disk TTL expiry.
mm._model_metadata_cache_time = time.time() - _MODEL_CACHE_TTL - 1
old = time.time() - _MODEL_CACHE_TTL - 1
import os
os.utime(cache_path, (old, old))
fetch_model_metadata()
assert mock_get.call_count == 2 # refetched