perf(tools): cache get_nous_auth_status() and load_env() to fix slow hermes tools menus (#25341)

`hermes tools` -> "All Platforms" took ~14s to render the checklist
because building the toolset labels called `get_nous_auth_status()` ~31x
transitively (`_toolset_has_keys` -> `_visible_providers` ->
`get_nous_subscription_features` -> `managed_nous_tools_enabled`).
Each call did a synchronous OAuth refresh POST to
portal.nousresearch.com (~350ms even on the failure path), so one menu
paint burned >13s of HTTP and 31 single-use Nous refresh tokens.

Secondary hot spot: every `get_env_value()` re-read and re-sanitised
the entire .env file. 116 reads with O(lines x known-keys) scanning
added ~300ms of CPU per render.

Fix is two process-level caches, both mtime-keyed so login/logout/edit
invalidate naturally:

* `hermes_cli/auth.py`: memoise `get_nous_auth_status()` for 15s keyed
  on auth.json mtime. Splits `_compute_nous_auth_status()` as the
  uncached impl. Adds `invalidate_nous_auth_status_cache()`.
* `hermes_cli/config.py`: memoise `load_env()` keyed on .env
  (path, mtime, size). Adds `invalidate_env_cache()`, wired into
  `save_env_value`, `remove_env_value`, and the sanitize-on-load
  writer so writers don't return stale dicts on same-second writes.

Before/after on Teknium's box (real HERMES_HOME, no Nous login):

* "All Platforms" cold path: ~13,874ms -> ~691ms label-build
* Warm re-open within the same process: ~122ms -> ~17ms

Side benefit: stops burning a Nous refresh token on every menu paint,
which was risking the portal's reuse-detection revocation logic.
This commit is contained in:
Teknium 2026-05-13 18:40:14 -07:00 committed by GitHub
parent dd5a9502e3
commit 3f13d78088
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 449 additions and 4 deletions

View file

@ -35,7 +35,7 @@ from dataclasses import dataclass, field
from datetime import datetime, timezone
from http.server import BaseHTTPRequestHandler, HTTPServer
from pathlib import Path
from typing import Any, Dict, List, Optional
from typing import Any, Dict, List, Optional, Tuple
from urllib.parse import parse_qs, urlencode, urlparse
import httpx
@ -3870,6 +3870,39 @@ def _snapshot_nous_pool_status() -> Dict[str, Any]:
return _empty_nous_auth_status()
# ── Process-level memo for get_nous_auth_status() ──
# get_nous_auth_status() validates state by calling resolve_nous_runtime_credentials(),
# which does a synchronous OAuth refresh POST to portal.nousresearch.com. That can take
# ~350ms even on the failure path, and read-only UI surfaces (`hermes tools`, status panels,
# subscription-feature checks) call it many times per render — `hermes tools` → "All Platforms"
# was firing the refresh ~31× during one menu paint, racking up >13s of HTTP and burning
# single-use refresh tokens. Cache the snapshot for a few seconds, keyed on the auth.json
# mtime so that `hermes auth login/logout/add/remove` invalidate naturally on the next call.
_NOUS_AUTH_STATUS_CACHE_TTL = 15.0 # seconds
_nous_auth_status_cache: Optional[Tuple[float, Optional[float], Dict[str, Any]]] = None
def _auth_file_mtime() -> Optional[float]:
try:
return _auth_file_path().stat().st_mtime
except FileNotFoundError:
return None
except Exception:
return None
def invalidate_nous_auth_status_cache() -> None:
"""Clear the get_nous_auth_status() process-level memo.
Call this from any code path that mutates Nous auth state without going
through resolve_nous_runtime_credentials() (e.g. tests). Login/logout
flows touch auth.json, so the mtime check below invalidates them
automatically explicit invalidation is the belt-and-braces option.
"""
global _nous_auth_status_cache
_nous_auth_status_cache = None
def get_nous_auth_status() -> Dict[str, Any]:
"""Status snapshot for Nous auth.
@ -3878,7 +3911,32 @@ def get_nous_auth_status() -> Dict[str, Any]:
by resolving runtime credentials so revoked refresh sessions do not show up
as a healthy login. If provider state is absent, fall back to the credential
pool for the just-logged-in / not-yet-promoted case.
The returned snapshot is memoised for ~15s keyed on the auth.json mtime,
so menu/status surfaces that ask repeatedly don't trigger one refresh POST
per call. Login/logout flows write to auth.json and therefore invalidate
the cache automatically; tests can also call
``invalidate_nous_auth_status_cache()`` explicitly.
"""
global _nous_auth_status_cache
now = time.monotonic()
mtime = _auth_file_mtime()
cached = _nous_auth_status_cache
if cached is not None:
cached_at, cached_mtime, cached_status = cached
if (
cached_mtime == mtime
and (now - cached_at) < _NOUS_AUTH_STATUS_CACHE_TTL
):
return dict(cached_status)
status = _compute_nous_auth_status()
_nous_auth_status_cache = (now, mtime, dict(status))
return status
def _compute_nous_auth_status() -> Dict[str, Any]:
"""Uncached implementation of get_nous_auth_status(). See that function."""
state = get_provider_auth_state("nous")
if state:
base_status = {

View file

@ -4319,10 +4319,34 @@ def load_env() -> Dict[str, str]:
concatenated KEY=VALUE pairs on a single line) are handled
gracefully instead of producing mangled values such as duplicated
bot tokens. See #8908.
The parsed dict is memoised keyed on the .env file mtime, because
``get_env_value()`` is called dozens-to-hundreds of times per
interactive menu render (`hermes tools`, `hermes setup`, status
panels). Sanitisation is O(lines × known-keys), so re-parsing the
same file on every call was burning ~300ms of CPU per `hermes tools`
menu paint on top of the OAuth-refresh slowness. The mtime check
invalidates the cache when the user edits .env mid-process.
"""
global _env_cache
env_path = get_env_path()
env_vars = {}
try:
mtime = env_path.stat().st_mtime
size = env_path.stat().st_size
cache_key = (str(env_path), mtime, size)
except FileNotFoundError:
cache_key = (str(env_path), None, None)
except Exception:
cache_key = None
if cache_key is not None and _env_cache is not None:
cached_key, cached_vars = _env_cache
if cached_key == cache_key:
return dict(cached_vars)
env_vars: Dict[str, str] = {}
if env_path.exists():
# On Windows, open() defaults to the system locale (cp1252) which can
# fail on UTF-8 .env files. Always use explicit UTF-8; tolerate BOM
@ -4338,10 +4362,33 @@ def load_env() -> Dict[str, str]:
if line and not line.startswith('#') and '=' in line:
key, _, value = line.partition('=')
env_vars[key.strip()] = value.strip().strip('"\'')
if cache_key is not None:
_env_cache = (cache_key, dict(env_vars))
return env_vars
# Module-level memo for load_env(), keyed on (path, mtime, size).
# Editing .env bumps mtime → next load_env() rebuilds. invalidate_env_cache()
# is the explicit knob for writers that update .env via this module
# (set_env_value, save_env, etc.) without relying on filesystem mtime
# resolution.
_env_cache: Optional[Tuple[Tuple[str, Optional[float], Optional[int]], Dict[str, str]]] = None
def invalidate_env_cache() -> None:
"""Clear the load_env() process-level memo.
Writers that mutate .env (set_env_value, save_env, etc.) call this
to guarantee the next load_env() sees their change even on
filesystems with coarse mtime resolution. Reads invalidate naturally
via the mtime/size check.
"""
global _env_cache
_env_cache = None
def _sanitize_env_lines(lines: list) -> list:
"""Fix corrupted .env lines before reading or writing.
@ -4444,6 +4491,7 @@ def sanitize_env_file() -> int:
pass
raise
_secure_file(env_path)
invalidate_env_cache()
return fixes
@ -4555,6 +4603,7 @@ def save_env_value(key: str, value: str):
_secure_file(env_path)
os.environ[key] = value
invalidate_env_cache()
def remove_env_value(key: str) -> bool:
@ -4610,6 +4659,7 @@ def remove_env_value(key: str) -> bool:
_secure_file(env_path)
os.environ.pop(key, None)
invalidate_env_cache()
return found

View file

@ -0,0 +1,193 @@
"""Tests for the load_env() process-level cache.
The cache exists to keep `hermes tools` "All Platforms" fast: every
`get_env_value()` lookup used to re-read and re-sanitise the entire
.env file, racking up hundreds of ms across one menu render. The
cache is keyed on (path, mtime, size); writers (save_env_value /
remove_env_value / sanitise_env_file) call invalidate_env_cache().
"""
from __future__ import annotations
import os
import tempfile
from pathlib import Path
from unittest.mock import patch
def _write_env(path: Path, contents: str) -> None:
path.write_text(contents, encoding="utf-8")
def test_load_env_caches_on_repeat_calls():
"""Repeated load_env() calls on the same file return the cached dict."""
from hermes_cli.config import invalidate_env_cache, load_env
invalidate_env_cache()
with tempfile.NamedTemporaryFile(
mode="w", suffix=".env", delete=False, encoding="utf-8"
) as f:
f.write("OPENAI_API_KEY=sk-first\n")
env_path = Path(f.name)
try:
with patch("hermes_cli.config.get_env_path", return_value=env_path):
first = load_env()
# Even if a writer outside our cache mutates the file, an
# mtime/size match means the cache still wins. We simulate that
# by writing identical bytes back — sanity check that the cache
# is keyed structurally, not on a counter.
second = load_env()
assert first == second
assert first.get("OPENAI_API_KEY") == "sk-first"
finally:
env_path.unlink(missing_ok=True)
invalidate_env_cache()
def test_load_env_invalidates_on_mtime_bump():
"""Editing the file (mtime changes) invalidates the cache."""
from hermes_cli.config import invalidate_env_cache, load_env
invalidate_env_cache()
with tempfile.NamedTemporaryFile(
mode="w", suffix=".env", delete=False, encoding="utf-8"
) as f:
f.write("OPENAI_API_KEY=sk-old\n")
env_path = Path(f.name)
try:
with patch("hermes_cli.config.get_env_path", return_value=env_path):
first = load_env()
assert first.get("OPENAI_API_KEY") == "sk-old"
# Rewrite file with new contents and bump mtime to make sure
# the FS records the change even on coarse-mtime filesystems.
_write_env(env_path, "OPENAI_API_KEY=sk-new\n")
future = env_path.stat().st_mtime + 5.0
os.utime(env_path, (future, future))
second = load_env()
assert second.get("OPENAI_API_KEY") == "sk-new", (
"load_env() returned stale value after file change"
)
finally:
env_path.unlink(missing_ok=True)
invalidate_env_cache()
def test_invalidate_env_cache_forces_reread():
"""invalidate_env_cache() forces the next load_env() to hit the disk.
This is the belt-and-braces knob for writers (save_env_value, etc.)
on filesystems where mtime resolution might miss a same-second write.
"""
from hermes_cli.config import invalidate_env_cache, load_env
invalidate_env_cache()
with tempfile.NamedTemporaryFile(
mode="w", suffix=".env", delete=False, encoding="utf-8"
) as f:
f.write("OPENAI_API_KEY=sk-old\n")
env_path = Path(f.name)
try:
with patch("hermes_cli.config.get_env_path", return_value=env_path):
assert load_env().get("OPENAI_API_KEY") == "sk-old"
# Rewrite WITHOUT bumping mtime — simulates same-second write.
mtime_before = env_path.stat().st_mtime
_write_env(env_path, "OPENAI_API_KEY=sk-new\n")
os.utime(env_path, (mtime_before, mtime_before))
# Without invalidation, cache hit might return stale.
invalidate_env_cache()
assert load_env().get("OPENAI_API_KEY") == "sk-new"
finally:
env_path.unlink(missing_ok=True)
invalidate_env_cache()
def test_save_env_value_invalidates_cache(tmp_path, monkeypatch):
"""save_env_value() invalidates the cache so subsequent reads see the update."""
from hermes_cli import config as config_mod
from hermes_cli.config import invalidate_env_cache, load_env, save_env_value
invalidate_env_cache()
env_path = tmp_path / ".env"
env_path.write_text("EXISTING_KEY=old\n", encoding="utf-8")
monkeypatch.setattr(config_mod, "get_env_path", lambda: env_path)
monkeypatch.setattr(config_mod, "ensure_hermes_home", lambda: None)
monkeypatch.setattr(config_mod, "_secure_file", lambda _p: None)
monkeypatch.setattr(config_mod, "is_managed", lambda: False)
try:
# Prime the cache.
first = load_env()
assert first.get("EXISTING_KEY") == "old"
save_env_value("NEW_KEY", "shiny")
# Same-second writes on coarse-mtime filesystems would normally
# let stale cache survive; invalidate_env_cache() inside the
# writer makes the next read see the new key.
result = load_env()
assert result.get("NEW_KEY") == "shiny"
assert result.get("EXISTING_KEY") == "old"
finally:
monkeypatch.delenv("NEW_KEY", raising=False)
invalidate_env_cache()
def test_remove_env_value_invalidates_cache(tmp_path, monkeypatch):
"""remove_env_value() invalidates the cache so the removed key disappears."""
from hermes_cli import config as config_mod
from hermes_cli.config import (
invalidate_env_cache,
load_env,
remove_env_value,
save_env_value,
)
invalidate_env_cache()
env_path = tmp_path / ".env"
monkeypatch.setattr(config_mod, "get_env_path", lambda: env_path)
monkeypatch.setattr(config_mod, "ensure_hermes_home", lambda: None)
monkeypatch.setattr(config_mod, "_secure_file", lambda _p: None)
monkeypatch.setattr(config_mod, "is_managed", lambda: False)
save_env_value("DOOMED_KEY", "value")
assert load_env().get("DOOMED_KEY") == "value"
try:
removed = remove_env_value("DOOMED_KEY")
assert removed is True
assert "DOOMED_KEY" not in load_env()
finally:
monkeypatch.delenv("DOOMED_KEY", raising=False)
invalidate_env_cache()
def test_load_env_handles_missing_file():
"""A nonexistent .env returns {} and caches the empty result."""
from hermes_cli.config import invalidate_env_cache, load_env
invalidate_env_cache()
nonexistent = Path(tempfile.gettempdir()) / "hermes-test-no-such-env-xyz123.env"
nonexistent.unlink(missing_ok=True)
try:
with patch("hermes_cli.config.get_env_path", return_value=nonexistent):
assert load_env() == {}
assert load_env() == {} # cached
finally:
invalidate_env_cache()

View file

@ -0,0 +1,144 @@
"""Tests for the get_nous_auth_status() process-level cache.
The cache avoids re-validating Nous credentials on every menu paint
`hermes tools` "All Platforms" used to fire ~31 OAuth refresh POSTs
against portal.nousresearch.com during one render. The cache is keyed
on auth.json mtime so login/logout flows invalidate naturally; tests
and other writers can also call invalidate_nous_auth_status_cache().
"""
from __future__ import annotations
import json
import os
from unittest.mock import patch
def _seed_auth_file(tmp_path):
"""Drop a placeholder auth.json into the test HERMES_HOME.
The exact content doesn't matter for cache-key purposes — only that
the file exists and we can mutate it to bump mtime.
"""
auth = tmp_path / "auth.json"
auth.write_text(json.dumps({"providers": {}}), encoding="utf-8")
return auth
def test_get_nous_auth_status_caches_consecutive_calls(tmp_path, monkeypatch):
"""A second call within the TTL skips re-computing the snapshot."""
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
_seed_auth_file(tmp_path)
from hermes_cli import auth as auth_mod
auth_mod.invalidate_nous_auth_status_cache()
call_count = {"n": 0}
def fake_compute():
call_count["n"] += 1
return {"logged_in": False, "source": "auth_store", "call": call_count["n"]}
with patch.object(auth_mod, "_compute_nous_auth_status", side_effect=fake_compute):
first = auth_mod.get_nous_auth_status()
second = auth_mod.get_nous_auth_status()
third = auth_mod.get_nous_auth_status()
assert call_count["n"] == 1, (
f"_compute_nous_auth_status was called {call_count['n']}×"
"cache is not deduplicating within TTL."
)
# Each call returns a copy so callers can't mutate the cached dict.
assert first == second == third
first["mutated"] = True
assert "mutated" not in auth_mod.get_nous_auth_status()
auth_mod.invalidate_nous_auth_status_cache()
def test_get_nous_auth_status_invalidates_on_auth_file_mtime(tmp_path, monkeypatch):
"""Touching auth.json (login/logout) forces a re-compute."""
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
auth_path = _seed_auth_file(tmp_path)
from hermes_cli import auth as auth_mod
auth_mod.invalidate_nous_auth_status_cache()
call_count = {"n": 0}
def fake_compute():
call_count["n"] += 1
return {"logged_in": False, "source": "auth_store", "call": call_count["n"]}
with patch.object(auth_mod, "_compute_nous_auth_status", side_effect=fake_compute):
auth_mod.get_nous_auth_status()
# Bump mtime forward so coarse-resolution filesystems still record
# a change.
future = auth_path.stat().st_mtime + 5.0
os.utime(auth_path, (future, future))
auth_mod.get_nous_auth_status()
assert call_count["n"] == 2, (
"auth.json mtime change should invalidate the cache, but only "
f"{call_count['n']} compute call(s) happened."
)
auth_mod.invalidate_nous_auth_status_cache()
def test_invalidate_nous_auth_status_cache_forces_recompute(tmp_path, monkeypatch):
"""Explicit invalidate forces the next call to re-compute."""
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
_seed_auth_file(tmp_path)
from hermes_cli import auth as auth_mod
auth_mod.invalidate_nous_auth_status_cache()
call_count = {"n": 0}
def fake_compute():
call_count["n"] += 1
return {"logged_in": False, "source": "auth_store"}
with patch.object(auth_mod, "_compute_nous_auth_status", side_effect=fake_compute):
auth_mod.get_nous_auth_status()
auth_mod.invalidate_nous_auth_status_cache()
auth_mod.get_nous_auth_status()
assert call_count["n"] == 2
auth_mod.invalidate_nous_auth_status_cache()
def test_get_nous_auth_status_caches_failure_path(tmp_path, monkeypatch):
"""Logged-out snapshots are cached too — that's where the cost was.
Teknium's case: ~31 cache misses per `hermes tools` "All Platforms"
menu paint, all returning logged_in=False after a failed refresh POST.
The whole point of the cache is to memoise that failure path too.
"""
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
_seed_auth_file(tmp_path)
from hermes_cli import auth as auth_mod
auth_mod.invalidate_nous_auth_status_cache()
call_count = {"n": 0}
def fake_compute():
call_count["n"] += 1
return {"logged_in": False, "source": "auth_store", "error": "refresh failed"}
with patch.object(auth_mod, "_compute_nous_auth_status", side_effect=fake_compute):
for _ in range(10):
auth_mod.get_nous_auth_status()
assert call_count["n"] == 1, (
f"Logged-out snapshots must cache; got {call_count['n']} computes for 10 calls."
)
auth_mod.invalidate_nous_auth_status_cache()