hermes-agent/tests/agent/test_external_skills_dirs_cache.py
Teknium 0ec052ca24
perf(cli): cut ~19s from 'hermes' cold start (skills cache + lazy Feishu + no Nous HTTP) (#22138)
Interactive `hermes` launch drops from ~21s to ~2.5s. Three independent
fixes, each targets a distinct hot spot in the banner / tool-registration
path that fires on every CLI invocation.

1. `get_external_skills_dirs()` in-process mtime cache (~10s saved)
   The function re-read + YAML-parsed the full ~/.hermes/config.yaml on
   every call. Banner build invokes it once per skill to resolve the
   category column, which on a 120-skill install meant ~120 reparses of
   a 15 KB config (~85 ms each). Added a
   `(config_path, mtime_ns) -> list[Path]` memo; stat() is ~2 us vs
   ~85 ms for the parse. Edits to config.yaml invalidate the cache on
   the next call via mtime.

2. Feishu availability probe uses `importlib.util.find_spec` (~5.2s saved)
   `tools/feishu_doc_tool.py::_check_feishu` and the identical helper in
   `feishu_drive_tool.py` were calling `import lark_oapi` purely to
   detect whether the SDK was installed. Executing the real import pulls
   in websockets + dispatcher + every v2 API model — ~5 seconds of work
   that fires at every tool-registry bootstrap. `find_spec` answers the
   same question ("is lark_oapi importable?") without executing the
   module. The actual tool handlers still do the real import on invoke,
   so runtime behavior is unchanged.

3. `_web_requires_env` no longer triggers Nous portal refresh (~800ms saved)
   `tools/web_tools.py::_web_requires_env` used
   `managed_nous_tools_enabled()` to gate four gateway env-var names in
   the returned list. The gate called `get_nous_auth_status()` ->
   `resolve_nous_runtime_credentials()` -> live HTTP POST to the portal
   on every tool-registry bootstrap. But the list is pure metadata — if
   the env var is set at runtime, the tool lights up; otherwise it
   doesn't. Including the four names unconditionally is harmless for
   unsubscribed users (vars just aren't set) and eliminates the sync
   HTTP round trip from startup.

Test:
- tests/agent/test_external_skills_dirs_cache.py (new, 6 cases):
  returns config'd dir, caches on second call (yaml_load patched to
  raise — never invoked), invalidates on mtime bump, empty when config
  missing, returned list is a defensive copy, per-HERMES_HOME cache key
  isolation.
- Existing tests/agent/test_external_skills.py and tests/tools/
  continue to pass modulo pre-existing flakes on main (test_delegate,
  test_send_message — unrelated, pass in isolation).

Measured: bare `hermes` (cold → REPL ready) 21,519ms -> 2,618ms on
Teknium's install (119 skills, 15 KB config.yaml, Nous auth logged in,
lark_oapi installed). 8x faster.
2026-05-08 16:39:32 -07:00

149 lines
4.7 KiB
Python

"""Guards for ``get_external_skills_dirs`` mtime-based memo.
``get_external_skills_dirs()`` is called once per skill during banner
construction and tool registration — on a typical install that's 120+
calls. Without caching, each call re-reads + YAML-parses the full
config.yaml (~85ms each, 10+ seconds total). This test pins the
behavior: first call parses, subsequent calls return cached result,
cache invalidates when config.yaml's mtime changes.
"""
from __future__ import annotations
import os
import time
from pathlib import Path
from unittest.mock import patch
import pytest
from agent import skill_utils
from agent.skill_utils import (
_external_dirs_cache_clear,
get_external_skills_dirs,
)
@pytest.fixture
def hermes_home_with_config(tmp_path, monkeypatch):
"""Isolated ``~/.hermes/`` with a config.yaml referencing one external dir."""
home = tmp_path / ".hermes"
home.mkdir()
external = tmp_path / "external_skills"
external.mkdir()
config = home / "config.yaml"
config.write_text(
"skills:\n"
f" external_dirs:\n"
f" - {external}\n",
encoding="utf-8",
)
monkeypatch.setenv("HERMES_HOME", str(home))
monkeypatch.setattr(Path, "home", lambda: tmp_path)
_external_dirs_cache_clear()
yield home, external, config
_external_dirs_cache_clear()
def test_returns_configured_external_dir(hermes_home_with_config):
_home, external, _cfg = hermes_home_with_config
result = get_external_skills_dirs()
assert result == [external.resolve()]
def test_cache_reuses_result_without_reparsing(hermes_home_with_config):
"""Subsequent calls hit the cache and skip YAML parsing entirely."""
_home, _external, _cfg = hermes_home_with_config
# Prime cache
get_external_skills_dirs()
# Patch yaml_load to raise — if cache works, it's never called again.
with patch.object(
skill_utils,
"yaml_load",
side_effect=AssertionError("yaml_load should not run on cache hit"),
):
# Many calls, none should trigger the patched yaml_load.
for _ in range(100):
get_external_skills_dirs()
def test_cache_invalidates_on_mtime_change(hermes_home_with_config):
"""A config.yaml edit invalidates the cache on the next call."""
_home, external, config = hermes_home_with_config
other = external.parent / "other_skills"
other.mkdir()
# Prime cache with original contents.
first = get_external_skills_dirs()
assert first == [external.resolve()]
# Rewrite config; bump mtime forward explicitly so filesystems with
# coarse mtime granularity still register the change on fast test
# systems.
config.write_text(
"skills:\n"
f" external_dirs:\n"
f" - {other}\n",
encoding="utf-8",
)
stat = config.stat()
future = stat.st_atime + 10
os.utime(config, (future, future))
second = get_external_skills_dirs()
assert second == [other.resolve()]
def test_returns_empty_when_config_missing(tmp_path, monkeypatch):
"""No config file → empty list, cached as empty."""
home = tmp_path / ".hermes"
home.mkdir()
monkeypatch.setenv("HERMES_HOME", str(home))
monkeypatch.setattr(Path, "home", lambda: tmp_path)
_external_dirs_cache_clear()
assert get_external_skills_dirs() == []
def test_returned_list_is_a_copy(hermes_home_with_config):
"""Callers can't poison the cache by mutating the returned list."""
first = get_external_skills_dirs()
first.append(Path("/tmp/should-not-persist"))
second = get_external_skills_dirs()
assert Path("/tmp/should-not-persist") not in second
def test_cache_key_is_per_config_path(tmp_path, monkeypatch):
"""Two different HERMES_HOMEs keep separate cache entries."""
home_a = tmp_path / "home_a" / ".hermes"
home_a.mkdir(parents=True)
ext_a = tmp_path / "ext_a"
ext_a.mkdir()
(home_a / "config.yaml").write_text(
f"skills:\n external_dirs:\n - {ext_a}\n", encoding="utf-8"
)
home_b = tmp_path / "home_b" / ".hermes"
home_b.mkdir(parents=True)
ext_b = tmp_path / "ext_b"
ext_b.mkdir()
(home_b / "config.yaml").write_text(
f"skills:\n external_dirs:\n - {ext_b}\n", encoding="utf-8"
)
_external_dirs_cache_clear()
monkeypatch.setenv("HERMES_HOME", str(home_a))
assert get_external_skills_dirs() == [ext_a.resolve()]
monkeypatch.setenv("HERMES_HOME", str(home_b))
assert get_external_skills_dirs() == [ext_b.resolve()]
# And switching back still works — both entries coexist in the cache.
monkeypatch.setenv("HERMES_HOME", str(home_a))
assert get_external_skills_dirs() == [ext_a.resolve()]