perf(tools): cache get_nous_auth_status() and load_env() to fix slow hermes tools menus (#25341)

`hermes tools` -> "All Platforms" took ~14s to render the checklist
because building the toolset labels called `get_nous_auth_status()` ~31x
transitively (`_toolset_has_keys` -> `_visible_providers` ->
`get_nous_subscription_features` -> `managed_nous_tools_enabled`).
Each call did a synchronous OAuth refresh POST to
portal.nousresearch.com (~350ms even on the failure path), so one menu
paint burned >13s of HTTP and 31 single-use Nous refresh tokens.

Secondary hot spot: every `get_env_value()` re-read and re-sanitised
the entire .env file. 116 reads with O(lines x known-keys) scanning
added ~300ms of CPU per render.

Fix is two process-level caches, both mtime-keyed so login/logout/edit
invalidate naturally:

* `hermes_cli/auth.py`: memoise `get_nous_auth_status()` for 15s keyed
  on auth.json mtime. Splits `_compute_nous_auth_status()` as the
  uncached impl. Adds `invalidate_nous_auth_status_cache()`.
* `hermes_cli/config.py`: memoise `load_env()` keyed on .env
  (path, mtime, size). Adds `invalidate_env_cache()`, wired into
  `save_env_value`, `remove_env_value`, and the sanitize-on-load
  writer so writers don't return stale dicts on same-second writes.

Before/after on Teknium's box (real HERMES_HOME, no Nous login):

* "All Platforms" cold path: ~13,874ms -> ~691ms label-build
* Warm re-open within the same process: ~122ms -> ~17ms

Side benefit: stops burning a Nous refresh token on every menu paint,
which was risking the portal's reuse-detection revocation logic.
This commit is contained in:
Teknium 2026-05-13 18:40:14 -07:00 committed by GitHub
parent dd5a9502e3
commit 3f13d78088
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 449 additions and 4 deletions

View file

@ -35,7 +35,7 @@ from dataclasses import dataclass, field
from datetime import datetime, timezone
from http.server import BaseHTTPRequestHandler, HTTPServer
from pathlib import Path
from typing import Any, Dict, List, Optional
from typing import Any, Dict, List, Optional, Tuple
from urllib.parse import parse_qs, urlencode, urlparse
import httpx
@ -3870,6 +3870,39 @@ def _snapshot_nous_pool_status() -> Dict[str, Any]:
return _empty_nous_auth_status()
# ── Process-level memo for get_nous_auth_status() ──
# get_nous_auth_status() validates state by calling resolve_nous_runtime_credentials(),
# which does a synchronous OAuth refresh POST to portal.nousresearch.com. That can take
# ~350ms even on the failure path, and read-only UI surfaces (`hermes tools`, status panels,
# subscription-feature checks) call it many times per render — `hermes tools` → "All Platforms"
# was firing the refresh ~31× during one menu paint, racking up >13s of HTTP and burning
# single-use refresh tokens. Cache the snapshot for a few seconds, keyed on the auth.json
# mtime so that `hermes auth login/logout/add/remove` invalidate naturally on the next call.
_NOUS_AUTH_STATUS_CACHE_TTL = 15.0 # seconds
_nous_auth_status_cache: Optional[Tuple[float, Optional[float], Dict[str, Any]]] = None
def _auth_file_mtime() -> Optional[float]:
try:
return _auth_file_path().stat().st_mtime
except FileNotFoundError:
return None
except Exception:
return None
def invalidate_nous_auth_status_cache() -> None:
"""Clear the get_nous_auth_status() process-level memo.
Call this from any code path that mutates Nous auth state without going
through resolve_nous_runtime_credentials() (e.g. tests). Login/logout
flows touch auth.json, so the mtime check below invalidates them
automatically explicit invalidation is the belt-and-braces option.
"""
global _nous_auth_status_cache
_nous_auth_status_cache = None
def get_nous_auth_status() -> Dict[str, Any]:
"""Status snapshot for Nous auth.
@ -3878,7 +3911,32 @@ def get_nous_auth_status() -> Dict[str, Any]:
by resolving runtime credentials so revoked refresh sessions do not show up
as a healthy login. If provider state is absent, fall back to the credential
pool for the just-logged-in / not-yet-promoted case.
The returned snapshot is memoised for ~15s keyed on the auth.json mtime,
so menu/status surfaces that ask repeatedly don't trigger one refresh POST
per call. Login/logout flows write to auth.json and therefore invalidate
the cache automatically; tests can also call
``invalidate_nous_auth_status_cache()`` explicitly.
"""
global _nous_auth_status_cache
now = time.monotonic()
mtime = _auth_file_mtime()
cached = _nous_auth_status_cache
if cached is not None:
cached_at, cached_mtime, cached_status = cached
if (
cached_mtime == mtime
and (now - cached_at) < _NOUS_AUTH_STATUS_CACHE_TTL
):
return dict(cached_status)
status = _compute_nous_auth_status()
_nous_auth_status_cache = (now, mtime, dict(status))
return status
def _compute_nous_auth_status() -> Dict[str, Any]:
"""Uncached implementation of get_nous_auth_status(). See that function."""
state = get_provider_auth_state("nous")
if state:
base_status = {

View file

@ -4319,10 +4319,34 @@ def load_env() -> Dict[str, str]:
concatenated KEY=VALUE pairs on a single line) are handled
gracefully instead of producing mangled values such as duplicated
bot tokens. See #8908.
The parsed dict is memoised keyed on the .env file mtime, because
``get_env_value()`` is called dozens-to-hundreds of times per
interactive menu render (`hermes tools`, `hermes setup`, status
panels). Sanitisation is O(lines × known-keys), so re-parsing the
same file on every call was burning ~300ms of CPU per `hermes tools`
menu paint on top of the OAuth-refresh slowness. The mtime check
invalidates the cache when the user edits .env mid-process.
"""
global _env_cache
env_path = get_env_path()
env_vars = {}
try:
mtime = env_path.stat().st_mtime
size = env_path.stat().st_size
cache_key = (str(env_path), mtime, size)
except FileNotFoundError:
cache_key = (str(env_path), None, None)
except Exception:
cache_key = None
if cache_key is not None and _env_cache is not None:
cached_key, cached_vars = _env_cache
if cached_key == cache_key:
return dict(cached_vars)
env_vars: Dict[str, str] = {}
if env_path.exists():
# On Windows, open() defaults to the system locale (cp1252) which can
# fail on UTF-8 .env files. Always use explicit UTF-8; tolerate BOM
@ -4338,10 +4362,33 @@ def load_env() -> Dict[str, str]:
if line and not line.startswith('#') and '=' in line:
key, _, value = line.partition('=')
env_vars[key.strip()] = value.strip().strip('"\'')
if cache_key is not None:
_env_cache = (cache_key, dict(env_vars))
return env_vars
# Module-level memo for load_env(), keyed on (path, mtime, size).
# Editing .env bumps mtime → next load_env() rebuilds. invalidate_env_cache()
# is the explicit knob for writers that update .env via this module
# (set_env_value, save_env, etc.) without relying on filesystem mtime
# resolution.
_env_cache: Optional[Tuple[Tuple[str, Optional[float], Optional[int]], Dict[str, str]]] = None
def invalidate_env_cache() -> None:
"""Clear the load_env() process-level memo.
Writers that mutate .env (set_env_value, save_env, etc.) call this
to guarantee the next load_env() sees their change even on
filesystems with coarse mtime resolution. Reads invalidate naturally
via the mtime/size check.
"""
global _env_cache
_env_cache = None
def _sanitize_env_lines(lines: list) -> list:
"""Fix corrupted .env lines before reading or writing.
@ -4444,6 +4491,7 @@ def sanitize_env_file() -> int:
pass
raise
_secure_file(env_path)
invalidate_env_cache()
return fixes
@ -4555,6 +4603,7 @@ def save_env_value(key: str, value: str):
_secure_file(env_path)
os.environ[key] = value
invalidate_env_cache()
def remove_env_value(key: str) -> bool:
@ -4610,6 +4659,7 @@ def remove_env_value(key: str) -> bool:
_secure_file(env_path)
os.environ.pop(key, None)
invalidate_env_cache()
return found