mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-15 09:21:36 +00:00
perf(model-metadata): persist OpenRouter metadata cache (#46114)
This commit is contained in:
parent
0e22bf6439
commit
13a1bd0f83
2 changed files with 153 additions and 4 deletions
|
|
@ -5,6 +5,7 @@ and run_agent.py for pre-flight context checks.
|
|||
"""
|
||||
|
||||
import ipaddress
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
|
|
@ -16,7 +17,7 @@ from urllib.parse import urlparse
|
|||
import requests
|
||||
import yaml
|
||||
|
||||
from utils import base_url_host_matches, base_url_hostname
|
||||
from utils import atomic_json_write, base_url_host_matches, base_url_hostname
|
||||
|
||||
from hermes_constants import OPENROUTER_MODELS_URL
|
||||
|
||||
|
|
@ -111,6 +112,57 @@ _endpoint_model_metadata_cache: Dict[str, Dict[str, Dict[str, Any]]] = {}
|
|||
_endpoint_model_metadata_cache_time: Dict[str, float] = {}
|
||||
_ENDPOINT_MODEL_CACHE_TTL = 300
|
||||
|
||||
|
||||
def _get_model_metadata_cache_path() -> Path:
|
||||
"""Return path to the OpenRouter model metadata disk cache."""
|
||||
from hermes_constants import get_hermes_home
|
||||
return get_hermes_home() / "cache" / "openrouter_model_metadata.json"
|
||||
|
||||
|
||||
def _model_metadata_disk_cache_age_seconds() -> Optional[float]:
|
||||
"""Return disk-cache age in seconds, or None if freshness is unknown."""
|
||||
try:
|
||||
cache_path = _get_model_metadata_cache_path()
|
||||
if not cache_path.exists():
|
||||
return None
|
||||
age = time.time() - cache_path.stat().st_mtime
|
||||
if age < 0:
|
||||
return None
|
||||
return age
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _load_model_metadata_disk_cache() -> Dict[str, Dict[str, Any]]:
|
||||
"""Load processed OpenRouter metadata cache from disk."""
|
||||
try:
|
||||
cache_path = _get_model_metadata_cache_path()
|
||||
with cache_path.open("r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
if not isinstance(data, dict):
|
||||
return {}
|
||||
return {
|
||||
str(key): value
|
||||
for key, value in data.items()
|
||||
if isinstance(value, dict)
|
||||
}
|
||||
except Exception as e:
|
||||
logger.debug("Failed to load OpenRouter model metadata disk cache: %s", e)
|
||||
return {}
|
||||
|
||||
|
||||
def _save_model_metadata_disk_cache(data: Dict[str, Dict[str, Any]]) -> None:
|
||||
"""Save processed OpenRouter metadata cache to disk atomically."""
|
||||
try:
|
||||
atomic_json_write(
|
||||
_get_model_metadata_cache_path(),
|
||||
data,
|
||||
indent=0,
|
||||
separators=(",", ":"),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug("Failed to save OpenRouter model metadata disk cache: %s", e)
|
||||
|
||||
# Descending tiers for context length probing when the model is unknown.
|
||||
# We start at 256K (covers GPT-5.x, many current large-context models) and
|
||||
# step down on context-length errors until one works. Tier[0] is also the
|
||||
|
|
@ -627,6 +679,15 @@ def fetch_model_metadata(force_refresh: bool = False) -> Dict[str, Dict[str, Any
|
|||
if not force_refresh and _model_metadata_cache and (time.time() - _model_metadata_cache_time) < _MODEL_CACHE_TTL:
|
||||
return _model_metadata_cache
|
||||
|
||||
if not force_refresh:
|
||||
disk_age = _model_metadata_disk_cache_age_seconds()
|
||||
if disk_age is not None and disk_age < _MODEL_CACHE_TTL:
|
||||
disk_cache = _load_model_metadata_disk_cache()
|
||||
if disk_cache:
|
||||
_model_metadata_cache = disk_cache
|
||||
_model_metadata_cache_time = time.time() - disk_age
|
||||
return _model_metadata_cache
|
||||
|
||||
try:
|
||||
response = requests.get(OPENROUTER_MODELS_URL, timeout=10, verify=_resolve_requests_verify())
|
||||
response.raise_for_status()
|
||||
|
|
@ -648,12 +709,24 @@ def fetch_model_metadata(force_refresh: bool = False) -> Dict[str, Dict[str, Any
|
|||
|
||||
_model_metadata_cache = cache
|
||||
_model_metadata_cache_time = time.time()
|
||||
_save_model_metadata_disk_cache(cache)
|
||||
logger.debug("Fetched metadata for %s models from OpenRouter", len(cache))
|
||||
return cache
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to fetch model metadata from OpenRouter: {e}")
|
||||
return _model_metadata_cache or {}
|
||||
if _model_metadata_cache:
|
||||
return _model_metadata_cache
|
||||
disk_cache = _load_model_metadata_disk_cache()
|
||||
if disk_cache:
|
||||
_model_metadata_cache = disk_cache
|
||||
disk_age = _model_metadata_disk_cache_age_seconds()
|
||||
if disk_age is not None:
|
||||
_model_metadata_cache_time = time.time() - min(disk_age, _MODEL_CACHE_TTL)
|
||||
else:
|
||||
_model_metadata_cache_time = time.time() - _MODEL_CACHE_TTL + 1
|
||||
return _model_metadata_cache
|
||||
return {}
|
||||
|
||||
|
||||
def fetch_endpoint_model_metadata(
|
||||
|
|
|
|||
|
|
@ -1083,6 +1083,78 @@ class TestFetchModelMetadata:
|
|||
mm._model_metadata_cache = {}
|
||||
mm._model_metadata_cache_time = 0
|
||||
|
||||
def _isolate_disk_cache(self, monkeypatch, tmp_path):
|
||||
import agent.model_metadata as mm
|
||||
cache_path = tmp_path / "openrouter_model_metadata.json"
|
||||
monkeypatch.setattr(mm, "_get_model_metadata_cache_path", lambda: cache_path)
|
||||
return cache_path
|
||||
|
||||
def test_fresh_disk_cache_skips_network(self, tmp_path, monkeypatch):
|
||||
self._reset_cache()
|
||||
cache_path = self._isolate_disk_cache(monkeypatch, tmp_path)
|
||||
cache_path.write_text(
|
||||
'{"test/model":{"context_length":12345,"name":"Cached","pricing":{}}}',
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
with patch("agent.model_metadata.requests.get") as mock_get:
|
||||
result = fetch_model_metadata()
|
||||
|
||||
mock_get.assert_not_called()
|
||||
assert result["test/model"]["context_length"] == 12345
|
||||
|
||||
def test_force_refresh_bypasses_fresh_disk_cache(self, tmp_path, monkeypatch):
|
||||
self._reset_cache()
|
||||
cache_path = self._isolate_disk_cache(monkeypatch, tmp_path)
|
||||
cache_path.write_text(
|
||||
'{"test/model":{"context_length":12345,"name":"Cached","pricing":{}}}',
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.json.return_value = {
|
||||
"data": [{"id": "live/model", "context_length": 67890, "name": "Live"}]
|
||||
}
|
||||
mock_response.raise_for_status = MagicMock()
|
||||
|
||||
with patch("agent.model_metadata.requests.get", return_value=mock_response) as mock_get:
|
||||
result = fetch_model_metadata(force_refresh=True)
|
||||
|
||||
assert mock_get.call_count == 1
|
||||
assert "live/model" in result
|
||||
assert "test/model" not in result
|
||||
|
||||
def test_network_success_writes_disk_cache(self, tmp_path, monkeypatch):
|
||||
self._reset_cache()
|
||||
cache_path = self._isolate_disk_cache(monkeypatch, tmp_path)
|
||||
mock_response = MagicMock()
|
||||
mock_response.json.return_value = {
|
||||
"data": [{"id": "live/model", "context_length": 67890, "name": "Live"}]
|
||||
}
|
||||
mock_response.raise_for_status = MagicMock()
|
||||
|
||||
with patch("agent.model_metadata.requests.get", return_value=mock_response):
|
||||
fetch_model_metadata(force_refresh=True)
|
||||
|
||||
assert cache_path.exists()
|
||||
assert "live/model" in cache_path.read_text(encoding="utf-8")
|
||||
|
||||
def test_network_failure_falls_back_to_stale_disk_cache(self, tmp_path, monkeypatch):
|
||||
self._reset_cache()
|
||||
cache_path = self._isolate_disk_cache(monkeypatch, tmp_path)
|
||||
cache_path.write_text(
|
||||
'{"stale/model":{"context_length":50000,"name":"Stale","pricing":{}}}',
|
||||
encoding="utf-8",
|
||||
)
|
||||
old = time.time() - _MODEL_CACHE_TTL - 60
|
||||
import os
|
||||
os.utime(cache_path, (old, old))
|
||||
|
||||
with patch("agent.model_metadata.requests.get", side_effect=Exception("Network error")):
|
||||
result = fetch_model_metadata(force_refresh=True)
|
||||
|
||||
assert result["stale/model"]["context_length"] == 50000
|
||||
|
||||
@patch("agent.model_metadata.requests.get")
|
||||
def test_caches_result(self, mock_get):
|
||||
self._reset_cache()
|
||||
|
|
@ -1162,10 +1234,11 @@ class TestFetchModelMetadata:
|
|||
assert result["test-model"]["context_length"] == 123456
|
||||
|
||||
@patch("agent.model_metadata.requests.get")
|
||||
def test_ttl_expiry_triggers_refetch(self, mock_get):
|
||||
def test_ttl_expiry_triggers_refetch(self, mock_get, tmp_path, monkeypatch):
|
||||
"""Cache expires after _MODEL_CACHE_TTL seconds."""
|
||||
import agent.model_metadata as mm
|
||||
self._reset_cache()
|
||||
cache_path = self._isolate_disk_cache(monkeypatch, tmp_path)
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.json.return_value = {
|
||||
|
|
@ -1177,8 +1250,11 @@ class TestFetchModelMetadata:
|
|||
fetch_model_metadata(force_refresh=True)
|
||||
assert mock_get.call_count == 1
|
||||
|
||||
# Simulate TTL expiry
|
||||
# Simulate both memory and disk TTL expiry.
|
||||
mm._model_metadata_cache_time = time.time() - _MODEL_CACHE_TTL - 1
|
||||
old = time.time() - _MODEL_CACHE_TTL - 1
|
||||
import os
|
||||
os.utime(cache_path, (old, old))
|
||||
fetch_model_metadata()
|
||||
assert mock_get.call_count == 2 # refetched
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue