hermes-agent/tests/conftest.py
Teknium 70111eea24 feat(plugins): make all plugins opt-in by default
Plugins now require explicit consent to load. Discovery still finds every
plugin — user-installed, bundled, and pip — so they all show up in
`hermes plugins` and `/plugins`, but the loader only instantiates
plugins whose name appears in `plugins.enabled` in config.yaml. This
removes the previous ambient-execution risk where a newly-installed or
bundled plugin could register hooks, tools, and commands on first run
without the user opting in.

The three-state model is now explicit:
  enabled     — in plugins.enabled, loads on next session
  disabled    — in plugins.disabled, never loads (wins over enabled)
  not enabled — discovered but never opted in (default for new installs)

`hermes plugins install <repo>` prompts "Enable 'name' now? [y/N]"
(defaults to no). New `--enable` / `--no-enable` flags skip the prompt
for scripted installs. `hermes plugins enable/disable` manage both lists
so a disabled plugin stays explicitly off even if something later adds
it to enabled.

Config migration (schema v20 → v21): existing user plugins already
installed under ~/.hermes/plugins/ (minus anything in plugins.disabled)
are auto-grandfathered into plugins.enabled so upgrades don't silently
break working setups. Bundled plugins are NOT grandfathered — even
existing users have to opt in explicitly.

Also: HERMES_DISABLE_BUNDLED_PLUGINS env var removed (redundant with
opt-in default), cmd_list now shows bundled + user plugins together with
their three-state status, interactive UI tags bundled entries
[bundled], docs updated across plugins.md and built-in-plugins.md.

Validation: 442 plugin/config tests pass. E2E: fresh install discovers
disk-cleanup but does not load it; `hermes plugins enable disk-cleanup`
activates hooks; migration grandfathers existing user plugins correctly
while leaving bundled plugins off.
2026-04-20 04:46:45 -07:00

337 lines
11 KiB
Python

"""Shared fixtures for the hermes-agent test suite.
Hermetic-test invariants enforced here (see AGENTS.md for rationale):
1. **No credential env vars.** All provider/credential-shaped env vars
(ending in _API_KEY, _TOKEN, _SECRET, _PASSWORD, _CREDENTIALS, etc.)
are unset before every test. Local developer keys cannot leak in.
2. **Isolated HERMES_HOME.** HERMES_HOME points to a per-test tempdir so
code reading ``~/.hermes/*`` via ``get_hermes_home()`` can't see the
real one. (We do NOT also redirect HOME — that broke subprocesses in
CI. Code using ``Path.home() / ".hermes"`` instead of the canonical
``get_hermes_home()`` is a bug to fix at the callsite.)
3. **Deterministic runtime.** TZ=UTC, LANG=C.UTF-8, PYTHONHASHSEED=0.
4. **No HERMES_SESSION_* inheritance** — the agent's current gateway
session must not leak into tests.
These invariants make the local test run match CI closely. Gaps that
remain (CPU count, xdist worker count) are addressed by the canonical
test runner at ``scripts/run_tests.sh``.
"""
import asyncio
import os
import re
import signal
import sys
import tempfile
from pathlib import Path
from unittest.mock import patch
import pytest
# Ensure project root is importable
PROJECT_ROOT = Path(__file__).parent.parent
if str(PROJECT_ROOT) not in sys.path:
sys.path.insert(0, str(PROJECT_ROOT))
# ── Credential env-var filter ──────────────────────────────────────────────
#
# Any env var in the current process matching ONE of these patterns is
# unset for every test. Developers' local keys cannot leak into assertions
# about "auto-detect provider when key present".
_CREDENTIAL_SUFFIXES = (
"_API_KEY",
"_TOKEN",
"_SECRET",
"_PASSWORD",
"_CREDENTIALS",
"_ACCESS_KEY",
"_SECRET_ACCESS_KEY",
"_PRIVATE_KEY",
"_OAUTH_TOKEN",
"_WEBHOOK_SECRET",
"_ENCRYPT_KEY",
"_APP_SECRET",
"_CLIENT_SECRET",
"_CORP_SECRET",
"_AES_KEY",
)
# Explicit names (for ones that don't fit the suffix pattern)
_CREDENTIAL_NAMES = frozenset({
"AWS_ACCESS_KEY_ID",
"AWS_SECRET_ACCESS_KEY",
"AWS_SESSION_TOKEN",
"ANTHROPIC_TOKEN",
"FAL_KEY",
"GH_TOKEN",
"GITHUB_TOKEN",
"OPENAI_API_KEY",
"OPENROUTER_API_KEY",
"NOUS_API_KEY",
"GEMINI_API_KEY",
"GOOGLE_API_KEY",
"GROQ_API_KEY",
"XAI_API_KEY",
"MISTRAL_API_KEY",
"DEEPSEEK_API_KEY",
"KIMI_API_KEY",
"MOONSHOT_API_KEY",
"GLM_API_KEY",
"ZAI_API_KEY",
"MINIMAX_API_KEY",
"OLLAMA_API_KEY",
"OPENVIKING_API_KEY",
"COPILOT_API_KEY",
"CLAUDE_CODE_OAUTH_TOKEN",
"BROWSERBASE_API_KEY",
"FIRECRAWL_API_KEY",
"PARALLEL_API_KEY",
"EXA_API_KEY",
"TAVILY_API_KEY",
"WANDB_API_KEY",
"ELEVENLABS_API_KEY",
"HONCHO_API_KEY",
"MEM0_API_KEY",
"SUPERMEMORY_API_KEY",
"RETAINDB_API_KEY",
"HINDSIGHT_API_KEY",
"HINDSIGHT_LLM_API_KEY",
"TINKER_API_KEY",
"DAYTONA_API_KEY",
"TWILIO_AUTH_TOKEN",
"TELEGRAM_BOT_TOKEN",
"DISCORD_BOT_TOKEN",
"SLACK_BOT_TOKEN",
"SLACK_APP_TOKEN",
"MATTERMOST_TOKEN",
"MATRIX_ACCESS_TOKEN",
"MATRIX_PASSWORD",
"MATRIX_RECOVERY_KEY",
"HASS_TOKEN",
"EMAIL_PASSWORD",
"BLUEBUBBLES_PASSWORD",
"FEISHU_APP_SECRET",
"FEISHU_ENCRYPT_KEY",
"FEISHU_VERIFICATION_TOKEN",
"DINGTALK_CLIENT_SECRET",
"QQ_CLIENT_SECRET",
"QQ_STT_API_KEY",
"WECOM_SECRET",
"WECOM_CALLBACK_CORP_SECRET",
"WECOM_CALLBACK_TOKEN",
"WECOM_CALLBACK_ENCODING_AES_KEY",
"WEIXIN_TOKEN",
"MODAL_TOKEN_ID",
"MODAL_TOKEN_SECRET",
"TERMINAL_SSH_KEY",
"SUDO_PASSWORD",
"GATEWAY_PROXY_KEY",
"API_SERVER_KEY",
"TOOL_GATEWAY_USER_TOKEN",
"TELEGRAM_WEBHOOK_SECRET",
"WEBHOOK_SECRET",
"AI_GATEWAY_API_KEY",
"VOICE_TOOLS_OPENAI_KEY",
"BROWSER_USE_API_KEY",
"CUSTOM_API_KEY",
"GATEWAY_PROXY_URL",
"GEMINI_BASE_URL",
"OPENAI_BASE_URL",
"OPENROUTER_BASE_URL",
"OLLAMA_BASE_URL",
"GROQ_BASE_URL",
"XAI_BASE_URL",
"AI_GATEWAY_BASE_URL",
"ANTHROPIC_BASE_URL",
})
def _looks_like_credential(name: str) -> bool:
"""True if env var name matches a credential-shaped pattern."""
if name in _CREDENTIAL_NAMES:
return True
return any(name.endswith(suf) for suf in _CREDENTIAL_SUFFIXES)
# HERMES_* vars that change test behavior by being set. Unset all of these
# unconditionally — individual tests that need them set do so explicitly.
_HERMES_BEHAVIORAL_VARS = frozenset({
"HERMES_YOLO_MODE",
"HERMES_INTERACTIVE",
"HERMES_QUIET",
"HERMES_TOOL_PROGRESS",
"HERMES_TOOL_PROGRESS_MODE",
"HERMES_MAX_ITERATIONS",
"HERMES_SESSION_PLATFORM",
"HERMES_SESSION_CHAT_ID",
"HERMES_SESSION_CHAT_NAME",
"HERMES_SESSION_THREAD_ID",
"HERMES_SESSION_SOURCE",
"HERMES_SESSION_KEY",
"HERMES_GATEWAY_SESSION",
"HERMES_PLATFORM",
"HERMES_INFERENCE_PROVIDER",
"HERMES_MANAGED",
"HERMES_DEV",
"HERMES_CONTAINER",
"HERMES_EPHEMERAL_SYSTEM_PROMPT",
"HERMES_TIMEZONE",
"HERMES_REDACT_SECRETS",
"HERMES_BACKGROUND_NOTIFICATIONS",
"HERMES_EXEC_ASK",
"HERMES_HOME_MODE",
"BROWSER_CDP_URL",
"CAMOFOX_URL",
})
@pytest.fixture(autouse=True)
def _hermetic_environment(tmp_path, monkeypatch):
"""Blank out all credential/behavioral env vars so local and CI match.
Also redirects HOME and HERMES_HOME to per-test tempdirs so code that
reads ``~/.hermes/*`` can't touch the real one, and pins TZ/LANG so
datetime/locale-sensitive tests are deterministic.
"""
# 1. Blank every credential-shaped env var that's currently set.
for name in list(os.environ.keys()):
if _looks_like_credential(name):
monkeypatch.delenv(name, raising=False)
# 2. Blank behavioral HERMES_* vars that could change test semantics.
for name in _HERMES_BEHAVIORAL_VARS:
monkeypatch.delenv(name, raising=False)
# 3. Redirect HERMES_HOME to a per-test tempdir. Code that reads
# ``~/.hermes/*`` via ``get_hermes_home()`` now gets the tempdir.
#
# NOTE: We do NOT also redirect HOME. Doing so broke CI because
# some tests (and their transitive deps) spawn subprocesses that
# inherit HOME and expect it to be stable. If a test genuinely
# needs HOME isolated, it should set it explicitly in its own
# fixture. Any code in the codebase reading ``~/.hermes/*`` via
# ``Path.home() / ".hermes"`` instead of ``get_hermes_home()``
# is a bug to fix at the callsite.
fake_hermes_home = tmp_path / "hermes_test"
fake_hermes_home.mkdir()
(fake_hermes_home / "sessions").mkdir()
(fake_hermes_home / "cron").mkdir()
(fake_hermes_home / "memories").mkdir()
(fake_hermes_home / "skills").mkdir()
monkeypatch.setenv("HERMES_HOME", str(fake_hermes_home))
# 4. Deterministic locale / timezone / hashseed. CI runs in UTC with
# C.UTF-8 locale; local dev often doesn't. Pin everything.
monkeypatch.setenv("TZ", "UTC")
monkeypatch.setenv("LANG", "C.UTF-8")
monkeypatch.setenv("LC_ALL", "C.UTF-8")
monkeypatch.setenv("PYTHONHASHSEED", "0")
# 4b. Disable AWS IMDS lookups. Without this, any test that ends up
# calling has_aws_credentials() / resolve_aws_auth_env_var()
# (e.g. provider auto-detect, status command, cron run_job) burns
# ~2s waiting for the metadata service at 169.254.169.254 to time
# out. Tests don't run on EC2 — IMDS is always unreachable here.
monkeypatch.setenv("AWS_EC2_METADATA_DISABLED", "true")
monkeypatch.setenv("AWS_METADATA_SERVICE_TIMEOUT", "1")
monkeypatch.setenv("AWS_METADATA_SERVICE_NUM_ATTEMPTS", "1")
# 5. Reset plugin singleton so tests don't leak plugins from
# ~/.hermes/plugins/ (which, per step 3, is now empty — but the
# singleton might still be cached from a previous test).
try:
import hermes_cli.plugins as _plugins_mod
monkeypatch.setattr(_plugins_mod, "_plugin_manager", None)
except Exception:
pass
# Backward-compat alias — old tests reference this fixture name. Keep it
# as a no-op wrapper so imports don't break.
@pytest.fixture(autouse=True)
def _isolate_hermes_home(_hermetic_environment):
"""Alias preserved for any test that yields this name explicitly."""
return None
@pytest.fixture()
def tmp_dir(tmp_path):
"""Provide a temporary directory that is cleaned up automatically."""
return tmp_path
@pytest.fixture()
def mock_config():
"""Return a minimal hermes config dict suitable for unit tests."""
return {
"model": "test/mock-model",
"toolsets": ["terminal", "file"],
"max_turns": 10,
"terminal": {
"backend": "local",
"cwd": "/tmp",
"timeout": 30,
},
"compression": {"enabled": False},
"memory": {"memory_enabled": False, "user_profile_enabled": False},
"command_allowlist": [],
}
# ── Global test timeout ─────────────────────────────────────────────────────
# Kill any individual test that takes longer than 30 seconds.
# Prevents hanging tests (subprocess spawns, blocking I/O) from stalling the
# entire test suite.
def _timeout_handler(signum, frame):
raise TimeoutError("Test exceeded 30 second timeout")
@pytest.fixture(autouse=True)
def _ensure_current_event_loop(request):
"""Provide a default event loop for sync tests that call get_event_loop().
Python 3.11+ no longer guarantees a current loop for plain synchronous tests.
A number of gateway tests still use asyncio.get_event_loop().run_until_complete(...).
Ensure they always have a usable loop without interfering with pytest-asyncio's
own loop management for @pytest.mark.asyncio tests.
"""
if request.node.get_closest_marker("asyncio") is not None:
yield
return
try:
loop = asyncio.get_event_loop_policy().get_event_loop()
except RuntimeError:
loop = None
created = loop is None or loop.is_closed()
if created:
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
try:
yield
finally:
if created and loop is not None:
try:
loop.close()
finally:
asyncio.set_event_loop(None)
@pytest.fixture(autouse=True)
def _enforce_test_timeout():
"""Kill any individual test that takes longer than 30 seconds.
SIGALRM is Unix-only; skip on Windows."""
if sys.platform == "win32":
yield
return
old = signal.signal(signal.SIGALRM, _timeout_handler)
signal.alarm(30)
yield
signal.alarm(0)
signal.signal(signal.SIGALRM, old)