"""Shared fixtures for the hermes-agent test suite. Hermetic-test invariants enforced here (see AGENTS.md for rationale): 1. **No credential env vars.** All provider/credential-shaped env vars (ending in _API_KEY, _TOKEN, _SECRET, _PASSWORD, _CREDENTIALS, etc.) are unset before every test. Local developer keys cannot leak in. 2. **Isolated HERMES_HOME.** HERMES_HOME points to a per-test tempdir so code reading ``~/.hermes/*`` via ``get_hermes_home()`` can't see the real one. (We do NOT also redirect HOME — that broke subprocesses in CI. Code using ``Path.home() / ".hermes"`` instead of the canonical ``get_hermes_home()`` is a bug to fix at the callsite.) 3. **Deterministic runtime.** TZ=UTC, LANG=C.UTF-8, PYTHONHASHSEED=0. 4. **No HERMES_SESSION_* inheritance** — the agent's current gateway session must not leak into tests. These invariants make the local test run match CI closely. Gaps that remain (CPU count, xdist worker count) are addressed by the canonical test runner at ``scripts/run_tests.sh``. """ import asyncio import os import re import signal import sys import tempfile from pathlib import Path from unittest.mock import patch import pytest # Ensure project root is importable PROJECT_ROOT = Path(__file__).parent.parent if str(PROJECT_ROOT) not in sys.path: sys.path.insert(0, str(PROJECT_ROOT)) # ── Credential env-var filter ────────────────────────────────────────────── # # Any env var in the current process matching ONE of these patterns is # unset for every test. Developers' local keys cannot leak into assertions # about "auto-detect provider when key present". _CREDENTIAL_SUFFIXES = ( "_API_KEY", "_TOKEN", "_SECRET", "_PASSWORD", "_CREDENTIALS", "_ACCESS_KEY", "_SECRET_ACCESS_KEY", "_PRIVATE_KEY", "_OAUTH_TOKEN", "_WEBHOOK_SECRET", "_ENCRYPT_KEY", "_APP_SECRET", "_CLIENT_SECRET", "_CORP_SECRET", "_AES_KEY", ) # Explicit names (for ones that don't fit the suffix pattern) _CREDENTIAL_NAMES = frozenset({ "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", "AWS_SESSION_TOKEN", "ANTHROPIC_TOKEN", "FAL_KEY", "GH_TOKEN", "GITHUB_TOKEN", "OPENAI_API_KEY", "OPENROUTER_API_KEY", "NOUS_API_KEY", "GEMINI_API_KEY", "GOOGLE_API_KEY", "GROQ_API_KEY", "XAI_API_KEY", "MISTRAL_API_KEY", "DEEPSEEK_API_KEY", "KIMI_API_KEY", "MOONSHOT_API_KEY", "GLM_API_KEY", "ZAI_API_KEY", "MINIMAX_API_KEY", "OLLAMA_API_KEY", "OPENVIKING_API_KEY", "COPILOT_API_KEY", "CLAUDE_CODE_OAUTH_TOKEN", "BROWSERBASE_API_KEY", "FIRECRAWL_API_KEY", "PARALLEL_API_KEY", "EXA_API_KEY", "TAVILY_API_KEY", "WANDB_API_KEY", "ELEVENLABS_API_KEY", "HONCHO_API_KEY", "MEM0_API_KEY", "SUPERMEMORY_API_KEY", "RETAINDB_API_KEY", "HINDSIGHT_API_KEY", "HINDSIGHT_LLM_API_KEY", "TINKER_API_KEY", "DAYTONA_API_KEY", "TWILIO_AUTH_TOKEN", "TELEGRAM_BOT_TOKEN", "DISCORD_BOT_TOKEN", "SLACK_BOT_TOKEN", "SLACK_APP_TOKEN", "MATTERMOST_TOKEN", "MATRIX_ACCESS_TOKEN", "MATRIX_PASSWORD", "MATRIX_RECOVERY_KEY", "HASS_TOKEN", "EMAIL_PASSWORD", "BLUEBUBBLES_PASSWORD", "FEISHU_APP_SECRET", "FEISHU_ENCRYPT_KEY", "FEISHU_VERIFICATION_TOKEN", "DINGTALK_CLIENT_SECRET", "QQ_CLIENT_SECRET", "QQ_STT_API_KEY", "WECOM_SECRET", "WECOM_CALLBACK_CORP_SECRET", "WECOM_CALLBACK_TOKEN", "WECOM_CALLBACK_ENCODING_AES_KEY", "WEIXIN_TOKEN", "MODAL_TOKEN_ID", "MODAL_TOKEN_SECRET", "TERMINAL_SSH_KEY", "SUDO_PASSWORD", "GATEWAY_PROXY_KEY", "API_SERVER_KEY", "TOOL_GATEWAY_USER_TOKEN", "TELEGRAM_WEBHOOK_SECRET", "WEBHOOK_SECRET", "AI_GATEWAY_API_KEY", "VOICE_TOOLS_OPENAI_KEY", "BROWSER_USE_API_KEY", "CUSTOM_API_KEY", "GATEWAY_PROXY_URL", "GEMINI_BASE_URL", "OPENAI_BASE_URL", "OPENROUTER_BASE_URL", "OLLAMA_BASE_URL", "GROQ_BASE_URL", "XAI_BASE_URL", "AI_GATEWAY_BASE_URL", "ANTHROPIC_BASE_URL", }) def _looks_like_credential(name: str) -> bool: """True if env var name matches a credential-shaped pattern.""" if name in _CREDENTIAL_NAMES: return True return any(name.endswith(suf) for suf in _CREDENTIAL_SUFFIXES) # HERMES_* vars that change test behavior by being set. Unset all of these # unconditionally — individual tests that need them set do so explicitly. _HERMES_BEHAVIORAL_VARS = frozenset({ "HERMES_YOLO_MODE", "HERMES_INTERACTIVE", "HERMES_QUIET", "HERMES_TOOL_PROGRESS", "HERMES_TOOL_PROGRESS_MODE", "HERMES_MAX_ITERATIONS", "HERMES_SESSION_PLATFORM", "HERMES_SESSION_CHAT_ID", "HERMES_SESSION_CHAT_NAME", "HERMES_SESSION_THREAD_ID", "HERMES_SESSION_SOURCE", "HERMES_SESSION_KEY", "HERMES_GATEWAY_SESSION", "HERMES_PLATFORM", "HERMES_INFERENCE_PROVIDER", "HERMES_MANAGED", "HERMES_DEV", "HERMES_CONTAINER", "HERMES_EPHEMERAL_SYSTEM_PROMPT", "HERMES_TIMEZONE", "HERMES_REDACT_SECRETS", "HERMES_BACKGROUND_NOTIFICATIONS", "HERMES_EXEC_ASK", "HERMES_HOME_MODE", "BROWSER_CDP_URL", "CAMOFOX_URL", # Platform allowlists — not credentials, but if set from any source # (user shell, earlier leaky test, CI env), they change gateway auth # behavior and flake button-authorization tests. "TELEGRAM_ALLOWED_USERS", "DISCORD_ALLOWED_USERS", "WHATSAPP_ALLOWED_USERS", "SLACK_ALLOWED_USERS", "SIGNAL_ALLOWED_USERS", "SIGNAL_GROUP_ALLOWED_USERS", "EMAIL_ALLOWED_USERS", "SMS_ALLOWED_USERS", "MATTERMOST_ALLOWED_USERS", "MATRIX_ALLOWED_USERS", "DINGTALK_ALLOWED_USERS", "FEISHU_ALLOWED_USERS", "WECOM_ALLOWED_USERS", "GATEWAY_ALLOWED_USERS", "GATEWAY_ALLOW_ALL_USERS", "TELEGRAM_ALLOW_ALL_USERS", "DISCORD_ALLOW_ALL_USERS", "WHATSAPP_ALLOW_ALL_USERS", "SLACK_ALLOW_ALL_USERS", "SIGNAL_ALLOW_ALL_USERS", "EMAIL_ALLOW_ALL_USERS", "SMS_ALLOW_ALL_USERS", }) @pytest.fixture(autouse=True) def _hermetic_environment(tmp_path, monkeypatch): """Blank out all credential/behavioral env vars so local and CI match. Also redirects HOME and HERMES_HOME to per-test tempdirs so code that reads ``~/.hermes/*`` can't touch the real one, and pins TZ/LANG so datetime/locale-sensitive tests are deterministic. """ # 1. Blank every credential-shaped env var that's currently set. for name in list(os.environ.keys()): if _looks_like_credential(name): monkeypatch.delenv(name, raising=False) # 2. Blank behavioral HERMES_* vars that could change test semantics. for name in _HERMES_BEHAVIORAL_VARS: monkeypatch.delenv(name, raising=False) # 3. Redirect HERMES_HOME to a per-test tempdir. Code that reads # ``~/.hermes/*`` via ``get_hermes_home()`` now gets the tempdir. # # NOTE: We do NOT also redirect HOME. Doing so broke CI because # some tests (and their transitive deps) spawn subprocesses that # inherit HOME and expect it to be stable. If a test genuinely # needs HOME isolated, it should set it explicitly in its own # fixture. Any code in the codebase reading ``~/.hermes/*`` via # ``Path.home() / ".hermes"`` instead of ``get_hermes_home()`` # is a bug to fix at the callsite. fake_hermes_home = tmp_path / "hermes_test" fake_hermes_home.mkdir() (fake_hermes_home / "sessions").mkdir() (fake_hermes_home / "cron").mkdir() (fake_hermes_home / "memories").mkdir() (fake_hermes_home / "skills").mkdir() monkeypatch.setenv("HERMES_HOME", str(fake_hermes_home)) # 4. Deterministic locale / timezone / hashseed. CI runs in UTC with # C.UTF-8 locale; local dev often doesn't. Pin everything. monkeypatch.setenv("TZ", "UTC") monkeypatch.setenv("LANG", "C.UTF-8") monkeypatch.setenv("LC_ALL", "C.UTF-8") monkeypatch.setenv("PYTHONHASHSEED", "0") # 4b. Disable AWS IMDS lookups. Without this, any test that ends up # calling has_aws_credentials() / resolve_aws_auth_env_var() # (e.g. provider auto-detect, status command, cron run_job) burns # ~2s waiting for the metadata service at 169.254.169.254 to time # out. Tests don't run on EC2 — IMDS is always unreachable here. monkeypatch.setenv("AWS_EC2_METADATA_DISABLED", "true") monkeypatch.setenv("AWS_METADATA_SERVICE_TIMEOUT", "1") monkeypatch.setenv("AWS_METADATA_SERVICE_NUM_ATTEMPTS", "1") # 5. Reset plugin singleton so tests don't leak plugins from # ~/.hermes/plugins/ (which, per step 3, is now empty — but the # singleton might still be cached from a previous test). try: import hermes_cli.plugins as _plugins_mod monkeypatch.setattr(_plugins_mod, "_plugin_manager", None) except Exception: pass # Backward-compat alias — old tests reference this fixture name. Keep it # as a no-op wrapper so imports don't break. @pytest.fixture(autouse=True) def _isolate_hermes_home(_hermetic_environment): """Alias preserved for any test that yields this name explicitly.""" return None # ── Module-level state reset ─────────────────────────────────────────────── # # Python modules are singletons per process, and pytest-xdist workers are # long-lived. Module-level dicts/sets (tool registries, approval state, # interrupt flags) and ContextVars persist across tests in the same worker, # causing tests that pass alone to fail when run with siblings. # # Each entry in this fixture clears state that belongs to a specific module. # New state buckets go here too — this is the single gate that prevents # "works alone, flakes in CI" bugs from state leakage. # # The skill `test-suite-cascade-diagnosis` documents the concrete patterns # this closes; the running example was `test_command_guards` failing 12/15 # CI runs because ``tools.approval._session_approved`` carried approvals # from one test's session into another's. @pytest.fixture(autouse=True) def _reset_module_state(): """Clear module-level mutable state and ContextVars between tests. Keeps state from leaking across tests on the same xdist worker. Modules that don't exist yet (test collection before production import) are skipped silently — production import later creates fresh empty state. """ # --- tools.approval — the single biggest source of cross-test pollution --- try: from tools import approval as _approval_mod _approval_mod._session_approved.clear() _approval_mod._session_yolo.clear() _approval_mod._permanent_approved.clear() _approval_mod._pending.clear() _approval_mod._gateway_queues.clear() _approval_mod._gateway_notify_cbs.clear() # ContextVar: reset to empty string so get_current_session_key() # falls through to the env var / default path, matching a fresh # process. _approval_mod._approval_session_key.set("") except Exception: pass # --- tools.interrupt — per-thread interrupt flag set --- try: from tools import interrupt as _interrupt_mod with _interrupt_mod._lock: _interrupt_mod._interrupted_threads.clear() except Exception: pass # --- gateway.session_context — 9 ContextVars that represent # the active gateway session. If set in one test and not reset, # the next test's get_session_env() reads stale values. try: from gateway import session_context as _sc_mod for _cv in ( _sc_mod._SESSION_PLATFORM, _sc_mod._SESSION_CHAT_ID, _sc_mod._SESSION_CHAT_NAME, _sc_mod._SESSION_THREAD_ID, _sc_mod._SESSION_USER_ID, _sc_mod._SESSION_USER_NAME, _sc_mod._SESSION_KEY, _sc_mod._CRON_AUTO_DELIVER_PLATFORM, _sc_mod._CRON_AUTO_DELIVER_CHAT_ID, _sc_mod._CRON_AUTO_DELIVER_THREAD_ID, ): _cv.set(_sc_mod._UNSET) except Exception: pass # --- tools.env_passthrough — ContextVar with no default --- # LookupError is normal if the test never set it. Setting it to an # empty set unconditionally normalizes the starting state. try: from tools import env_passthrough as _envp_mod _envp_mod._allowed_env_vars_var.set(set()) except Exception: pass # --- tools.credential_files — ContextVar --- try: from tools import credential_files as _credf_mod _credf_mod._registered_files_var.set({}) except Exception: pass # --- tools.file_tools — per-task read history + file-ops cache --- # _read_tracker accumulates per-task_id read history for loop detection, # capped by _READ_HISTORY_CAP. If entries from a prior test persist, the # cap is hit faster than expected and capacity-related tests flake. try: from tools import file_tools as _ft_mod with _ft_mod._read_tracker_lock: _ft_mod._read_tracker.clear() with _ft_mod._file_ops_lock: _ft_mod._file_ops_cache.clear() except Exception: pass yield @pytest.fixture() def tmp_dir(tmp_path): """Provide a temporary directory that is cleaned up automatically.""" return tmp_path @pytest.fixture() def mock_config(): """Return a minimal hermes config dict suitable for unit tests.""" return { "model": "test/mock-model", "toolsets": ["terminal", "file"], "max_turns": 10, "terminal": { "backend": "local", "cwd": "/tmp", "timeout": 30, }, "compression": {"enabled": False}, "memory": {"memory_enabled": False, "user_profile_enabled": False}, "command_allowlist": [], } # ── Global test timeout ───────────────────────────────────────────────────── # Kill any individual test that takes longer than 30 seconds. # Prevents hanging tests (subprocess spawns, blocking I/O) from stalling the # entire test suite. def _timeout_handler(signum, frame): raise TimeoutError("Test exceeded 30 second timeout") @pytest.fixture(autouse=True) def _ensure_current_event_loop(request): """Provide a default event loop for sync tests that call get_event_loop(). Python 3.11+ no longer guarantees a current loop for plain synchronous tests. A number of gateway tests still use asyncio.get_event_loop().run_until_complete(...). Ensure they always have a usable loop without interfering with pytest-asyncio's own loop management for @pytest.mark.asyncio tests. """ if request.node.get_closest_marker("asyncio") is not None: yield return try: loop = asyncio.get_event_loop_policy().get_event_loop() except RuntimeError: loop = None created = loop is None or loop.is_closed() if created: loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) try: yield finally: if created and loop is not None: try: loop.close() finally: asyncio.set_event_loop(None) @pytest.fixture(autouse=True) def _enforce_test_timeout(): """Kill any individual test that takes longer than 30 seconds. SIGALRM is Unix-only; skip on Windows.""" if sys.platform == "win32": yield return old = signal.signal(signal.SIGALRM, _timeout_handler) signal.alarm(30) yield signal.alarm(0) signal.signal(signal.SIGALRM, old)