mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-18 04:41:56 +00:00
perf(models_dev): cache-first lookup, skip network when disk cache is fresh (#22808)
`fetch_models_dev()` is on the hot path of every `AIAgent.__init__`
(via `context_compressor → get_model_context_length`). The previous
policy was "always try network first, only fall back to disk if
network fails," so every fresh `hermes chat` / `hermes gateway` /
batch / cron process paid 250-500 ms re-fetching a 2 MB JSON registry
that was already on disk from earlier runs.
Add a stage 2 between in-mem and network: if
`models_dev_cache.json` exists and its mtime is younger than the
existing `_MODELS_DEV_CACHE_TTL` (1 hour, same TTL the in-mem cache
already uses), load from disk and skip the network call.
The in-mem TTL is anchored to the disk file's age, so a 50-min-old
cache stays in-memory for only 10 more minutes — no surprise
extension of staleness window.
Invariants preserved:
- `force_refresh=True` still always hits the network and only falls
back to disk on failure (`hermes config refresh` semantics).
- Missing disk cache → fall through to network (first-ever run).
- Stale disk cache (mtime > TTL) → fall through to network.
- Negative file age (clock skew) → fall through to network.
- Network failure → existing stage-4 stale-disk fallback unchanged.
Measured impact (3-run medians, 9950X3D, fresh process per run):
fetch_models_dev cold: 256 → 17 ms (-93%)
hermes chat -q wall: 4.00 → 3.73 s (-7% median)
3.99 → 3.60 s (-10% min)
The chat-end-to-end win is bounded below by API latency variance, but
the fetch_models_dev microbenchmark is the cleanest signal: 239 ms
shaved off every fresh-process agent construction.
Win compounds with the previous perf PRs:
#22681 google_chat lazy-load
#22766 doctor parallel + IMDS off
#22790 gateway.platforms PEP 562
Tests: all 30 `tests/agent/test_models_dev.py` pass (added 4 new ones
covering the new disk-cache-first path, force_refresh override, stale
disk fallback, and missing-disk-cache fall-through). Full `tests/agent/`
suite: 2560 passed, 0 failed.
This commit is contained in:
parent
cd712b176a
commit
775c0e22cf
2 changed files with 164 additions and 5 deletions
|
|
@ -201,6 +201,102 @@ class TestFetchModelsDev:
|
|||
mock_get.assert_not_called()
|
||||
assert result == SAMPLE_REGISTRY
|
||||
|
||||
@patch("agent.models_dev.requests.get")
|
||||
def test_fresh_disk_cache_skips_network(self, mock_get):
|
||||
"""When in-mem cache is empty but disk cache exists and is fresh by
|
||||
mtime (< TTL), fetch_models_dev returns disk data without ever
|
||||
making the network call.
|
||||
|
||||
This is the cold-start fast path: every fresh process previously
|
||||
paid ~500 ms re-fetching a registry that was already on disk
|
||||
from an earlier run.
|
||||
"""
|
||||
import agent.models_dev as md
|
||||
# Empty in-mem cache so stage 1 doesn't short-circuit.
|
||||
md._models_dev_cache = {}
|
||||
md._models_dev_cache_time = 0
|
||||
|
||||
with patch.object(md, "_disk_cache_age_seconds", return_value=60.0), \
|
||||
patch.object(md, "_load_disk_cache", return_value=SAMPLE_REGISTRY):
|
||||
result = fetch_models_dev()
|
||||
|
||||
# The whole point: no network call.
|
||||
mock_get.assert_not_called()
|
||||
assert "anthropic" in result
|
||||
# In-mem cache populated so subsequent calls within the same
|
||||
# process stay on stage 1.
|
||||
assert md._models_dev_cache == SAMPLE_REGISTRY
|
||||
|
||||
@patch("agent.models_dev.requests.get")
|
||||
def test_stale_disk_cache_falls_through_to_network(self, mock_get):
|
||||
"""When the disk cache is OLDER than TTL, we must hit the network
|
||||
(and only fall back to the stale disk data if network fails)."""
|
||||
import agent.models_dev as md
|
||||
md._models_dev_cache = {}
|
||||
md._models_dev_cache_time = 0
|
||||
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.status_code = 200
|
||||
mock_resp.json.return_value = SAMPLE_REGISTRY
|
||||
mock_resp.raise_for_status = MagicMock()
|
||||
mock_get.return_value = mock_resp
|
||||
|
||||
# Disk cache exists but is older than the TTL — must NOT short-circuit.
|
||||
with patch.object(md, "_disk_cache_age_seconds",
|
||||
return_value=md._MODELS_DEV_CACHE_TTL + 60), \
|
||||
patch.object(md, "_load_disk_cache", return_value=SAMPLE_REGISTRY), \
|
||||
patch.object(md, "_save_disk_cache"):
|
||||
result = fetch_models_dev()
|
||||
|
||||
mock_get.assert_called_once()
|
||||
assert "anthropic" in result
|
||||
|
||||
@patch("agent.models_dev.requests.get")
|
||||
def test_force_refresh_skips_disk_cache(self, mock_get):
|
||||
"""force_refresh=True bypasses BOTH the in-mem cache AND the
|
||||
disk-cache fast path. Used by ``hermes config refresh`` and
|
||||
anywhere else the user explicitly asked for fresh data.
|
||||
"""
|
||||
import agent.models_dev as md
|
||||
md._models_dev_cache = {}
|
||||
md._models_dev_cache_time = 0
|
||||
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.status_code = 200
|
||||
mock_resp.json.return_value = SAMPLE_REGISTRY
|
||||
mock_resp.raise_for_status = MagicMock()
|
||||
mock_get.return_value = mock_resp
|
||||
|
||||
# Disk cache is fresh, but force_refresh must override it.
|
||||
with patch.object(md, "_disk_cache_age_seconds", return_value=60.0), \
|
||||
patch.object(md, "_load_disk_cache", return_value=SAMPLE_REGISTRY), \
|
||||
patch.object(md, "_save_disk_cache"):
|
||||
result = fetch_models_dev(force_refresh=True)
|
||||
|
||||
mock_get.assert_called_once()
|
||||
assert "anthropic" in result
|
||||
|
||||
@patch("agent.models_dev.requests.get")
|
||||
def test_missing_disk_cache_falls_through_to_network(self, mock_get):
|
||||
"""If the disk cache file doesn't exist (first-ever run, or it
|
||||
was deleted), fall through cleanly to network."""
|
||||
import agent.models_dev as md
|
||||
md._models_dev_cache = {}
|
||||
md._models_dev_cache_time = 0
|
||||
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.status_code = 200
|
||||
mock_resp.json.return_value = SAMPLE_REGISTRY
|
||||
mock_resp.raise_for_status = MagicMock()
|
||||
mock_get.return_value = mock_resp
|
||||
|
||||
with patch.object(md, "_disk_cache_age_seconds", return_value=None), \
|
||||
patch.object(md, "_save_disk_cache"):
|
||||
result = fetch_models_dev()
|
||||
|
||||
mock_get.assert_called_once()
|
||||
assert "anthropic" in result
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# get_model_capabilities — vision via modalities.input
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue