mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-09 08:21:50 +00:00
perf(cli): cut hermes startup 63% — flip head-to-head vs codex (#31968)
* perf(bitwarden): persist secret-fetch cache across CLI invocations
Every `hermes` invocation paid a ~380ms tax for `bws secret list` to
Bitwarden Secrets Manager because the existing cache was in-process only.
Back-to-back `hermes chat -q`, gateway-spawned agents, and cron-launched
runs all re-fetched.
Adds a disk-persisted L2 cache at `<hermes_home>/cache/bws_cache.json`
(mode 0600, never contains the access token — only the SHA-256
fingerprint prefix). Same TTL as the in-process cache. Read on miss,
write on bws success, ignored on key mismatch / corruption / expiry.
Measured on a startup profile:
load_hermes_dotenv() cold: 372ms → warm (disk cache hit): 20ms
End-to-end `hermes --version` cold→warm: 666ms → ~295ms.
In a hermes-vs-codex benchmark across 11 single- and multi-turn tasks
(framework overhead = wall − llm − tool_exec, median over 3 trials):
cohort before after saved
single-turn (median) 2.96s 2.31s -0.65s
multi-turn (5-turn) 9.40s 8.95s -0.45s (≈0.3s/turn)
Hermes now wins head-to-head on 6/11 tasks vs codex (was 4/11 before).
The remaining ~0.6s single-turn delta is mostly Python's own import
cost in hermes_cli.main, which is a separate optimization.
* perf(cli): lazy-load model catalog + dedupe config.yaml reads at startup
Two import-time wins on top of the bws disk-cache fix:
1. Lazy-load `hermes_cli.models._PROVIDER_MODELS` via PEP 562
module-level `__getattr__`. The catalog is ~55ms of work that was
eagerly imported on every CLI invocation (line 4557 `if not
_is_termux_startup_environment(): from hermes_cli.models import
_PROVIDER_MODELS`). Audit showed every internal call site already
does its own function-local import; only test code reads
`hermes_cli.main._PROVIDER_MODELS` as a module attribute, and
__getattr__ keeps that working transparently. First access triggers
the import once and caches the result on the module via
`globals()[name] = ...`, so subsequent reads are dict lookups.
2. Dedupe the double config.yaml read in the top-of-module bootstrap.
Previously: one raw yaml.safe_load for the `security.redact_secrets`
bridge, then a separate full `load_config()` (with deep-merge) for
`network.force_ipv4`. Both keys come from the same file. Merged
into one raw yaml load.
Combined with the bws cache fix in the previous commit:
hermes --version wall time:
original (cold): 666 ms
after bws fix (warm): 295 ms
after lazy-load + dedupe: 228 ms (-67 ms additional, -66% from original)
Tests:
- tests/hermes_cli/test_api_key_providers.py: 173/173 pass
(lazy __getattr__ correctly handles
`from hermes_cli.main import _PROVIDER_MODELS`)
- tests/test_ipv4_preference.py + tests/hermes_cli/test_redact_config_bridge.py +
tests/agent/test_redact.py: 93/93 pass (dedupe preserves both bridges)
- tests/test_bitwarden_secrets.py + env_loader tests: 49/49 pass
This commit is contained in:
parent
c0169496d0
commit
0219b0408a
4 changed files with 395 additions and 24 deletions
|
|
@ -255,6 +255,7 @@ def _apply_external_secret_sources(home_path: Path) -> None:
|
|||
cache_ttl_seconds=float(bw_cfg.get("cache_ttl_seconds", 300)),
|
||||
auto_install=bool(bw_cfg.get("auto_install", True)),
|
||||
server_url=str(bw_cfg.get("server_url", "") or "").strip(),
|
||||
home_path=home_path,
|
||||
)
|
||||
|
||||
if result.applied:
|
||||
|
|
|
|||
|
|
@ -280,20 +280,29 @@ load_hermes_dotenv(project_env=PROJECT_ROOT / ".env")
|
|||
# module-import time). Without this, config.yaml's toggle is ignored because
|
||||
# the setup_logging() call below imports agent.redact, which reads the env var
|
||||
# exactly once. Env var in .env still wins — this is config.yaml fallback only.
|
||||
#
|
||||
# We also read network.force_ipv4 from the same yaml load to avoid two
|
||||
# separate config.yaml reads (saves ~17ms on every CLI startup — the second
|
||||
# `load_config()` was doing a full deep-merge for one boolean lookup).
|
||||
_FORCE_IPV4_EARLY = False
|
||||
try:
|
||||
if "HERMES_REDACT_SECRETS" not in os.environ:
|
||||
import yaml as _yaml_early
|
||||
import yaml as _yaml_early
|
||||
|
||||
_cfg_path = get_hermes_home() / "config.yaml"
|
||||
if _cfg_path.exists():
|
||||
with open(_cfg_path, encoding="utf-8") as _f:
|
||||
_early_sec_cfg = (_yaml_early.safe_load(_f) or {}).get("security", {})
|
||||
_cfg_path = get_hermes_home() / "config.yaml"
|
||||
if _cfg_path.exists():
|
||||
with open(_cfg_path, encoding="utf-8") as _f:
|
||||
_early_cfg_raw = _yaml_early.safe_load(_f) or {}
|
||||
if "HERMES_REDACT_SECRETS" not in os.environ:
|
||||
_early_sec_cfg = _early_cfg_raw.get("security", {})
|
||||
if isinstance(_early_sec_cfg, dict):
|
||||
_early_redact = _early_sec_cfg.get("redact_secrets")
|
||||
if _early_redact is not None:
|
||||
os.environ["HERMES_REDACT_SECRETS"] = str(_early_redact).lower()
|
||||
del _early_sec_cfg
|
||||
del _cfg_path
|
||||
_early_net_cfg = _early_cfg_raw.get("network", {})
|
||||
if isinstance(_early_net_cfg, dict) and _early_net_cfg.get("force_ipv4"):
|
||||
_FORCE_IPV4_EARLY = True
|
||||
del _early_cfg_raw
|
||||
del _cfg_path
|
||||
except Exception:
|
||||
pass # best-effort — redaction stays at default (enabled) on config errors
|
||||
|
||||
|
|
@ -307,17 +316,15 @@ except Exception:
|
|||
pass # best-effort — don't crash the CLI if logging setup fails
|
||||
|
||||
# Apply IPv4 preference early, before any HTTP clients are created.
|
||||
try:
|
||||
from hermes_cli.config import load_config as _load_config_early
|
||||
from hermes_constants import apply_ipv4_preference as _apply_ipv4
|
||||
# We already determined whether to force IPv4 from the raw yaml read above —
|
||||
# this just calls the toggle without a redundant load_config() round trip.
|
||||
if _FORCE_IPV4_EARLY:
|
||||
try:
|
||||
from hermes_constants import apply_ipv4_preference as _apply_ipv4
|
||||
|
||||
_early_cfg = _load_config_early()
|
||||
_net = _early_cfg.get("network", {})
|
||||
if isinstance(_net, dict) and _net.get("force_ipv4"):
|
||||
_apply_ipv4(force=True)
|
||||
del _early_cfg, _net
|
||||
except Exception:
|
||||
pass # best-effort — don't crash if config isn't available yet
|
||||
except Exception:
|
||||
pass # best-effort — don't crash if hermes_constants not importable yet
|
||||
|
||||
import logging
|
||||
import threading
|
||||
|
|
@ -4551,11 +4558,27 @@ def _model_flow_named_custom(config, provider_info):
|
|||
print(f" Provider: {name} ({base_url})")
|
||||
|
||||
|
||||
# Keep the historical eager model catalog import on desktop/CI. Termux defers
|
||||
# it to the model-selection handlers so plain `hermes --tui` does not pay for
|
||||
# requests/models.dev catalog imports before the Node TUI starts.
|
||||
if not _is_termux_startup_environment():
|
||||
from hermes_cli.models import _PROVIDER_MODELS
|
||||
# Lazy-export the model catalog at module level. Tests and a handful of
|
||||
# downstream call sites read `hermes_cli.main._PROVIDER_MODELS` directly,
|
||||
# so the symbol needs to be reachable as a module attribute. But importing
|
||||
# the catalog eagerly costs ~55ms on every `hermes` invocation — including
|
||||
# fast paths like `hermes --version` and slash-command dispatch that never
|
||||
# touch the catalog. PEP 562 module-level __getattr__ defers the import
|
||||
# until first attribute access, so the cost is only paid by callers that
|
||||
# actually look up the catalog. Termux already defers via the same
|
||||
# mechanism (its model-selection handlers do their own function-local
|
||||
# imports), so the explicit termux branch from before is no longer needed.
|
||||
_LAZY_MODEL_EXPORTS = ("_PROVIDER_MODELS",)
|
||||
|
||||
|
||||
def __getattr__(name):
|
||||
"""Defer the model-catalog import until something actually reads it."""
|
||||
if name in _LAZY_MODEL_EXPORTS:
|
||||
from hermes_cli.models import _PROVIDER_MODELS
|
||||
# Cache on the module so subsequent accesses skip the import machinery.
|
||||
globals()[name] = _PROVIDER_MODELS
|
||||
return _PROVIDER_MODELS
|
||||
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
||||
|
||||
|
||||
def _current_reasoning_effort(config) -> str:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue