mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
test(conftest): reset module-level state + unset platform allowlists (#13400)
Three fixes that close the remaining structural sources of CI flakes after PR #13363. ## 1. Per-test reset of module-level singletons and ContextVars Python modules are singletons per process, and pytest-xdist workers are long-lived. Module-level dicts/sets and ContextVars persist across tests on the same worker. A test that sets state in `tools.approval._session_approved` and doesn't explicitly clear it leaks that state to every subsequent test on the same worker. New `_reset_module_state` autouse fixture in `tests/conftest.py` clears: - tools.approval: _session_approved, _session_yolo, _permanent_approved, _pending, _gateway_queues, _gateway_notify_cbs, _approval_session_key - tools.interrupt: _interrupted_threads - gateway.session_context: 10 session/cron ContextVars (reset to _UNSET) - tools.env_passthrough: _allowed_env_vars_var (reset to empty set) - tools.credential_files: _registered_files_var (reset to empty dict) - tools.file_tools: _read_tracker, _file_ops_cache This was the single biggest remaining class of CI flakes. `test_command_guards::test_warn_session_approved` and `test_combined_cli_session_approves_both` were failing 12/15 recent main runs specifically because `_session_approved` carried approvals from a prior test's session into these tests' `"default"` session lookup. ## 2. Unset platform allowlist env vars in hermetic fixture `TELEGRAM_ALLOWED_USERS`, `DISCORD_ALLOWED_USERS`, and 20 other `*_ALLOWED_USERS` / `*_ALLOW_ALL_USERS` vars are now unset per-test in the same place credential env vars already are. These aren't credentials but they change gateway auth behavior; if set from any source (user shell, leaky test, CI env) they flake button-authorization tests. Fixes three `test_telegram_approval_buttons` tests that were failing across recent runs of the full gateway directory. ## 3. Two specific tests with module-level captured state - `test_signal::TestSignalPhoneRedaction`: `agent.redact._REDACT_ENABLED` is captured at module import from `HERMES_REDACT_SECRETS`, not read per-call. `monkeypatch.delenv` at test time is too late. Added `monkeypatch.setattr("agent.redact._REDACT_ENABLED", True)` per skill xdist-cross-test-pollution Pattern 5. - `test_internal_event_bypass_pairing::test_non_internal_event_without_user_triggers_pairing`: `gateway.pairing.PAIRING_DIR` is captured at module import from HERMES_HOME, so per-test HERMES_HOME redirection in conftest doesn't retroactively move it. Test now monkeypatches PAIRING_DIR directly to its tmp_path, preventing rate-limit state from prior xdist workers from letting the pairing send-call be suppressed. ## Validation - tests/tools/: 3494 pass (0 fail) including test_command_guards - tests/gateway/: 3504 pass (0 fail) across repeat runs - tests/agent/ + tests/hermes_cli/ + tests/run_agent/ + tests/tools/: 8371 pass, 37 skipped, 0 fail — full suite across directories No production code changed.
This commit is contained in:
parent
71668559be
commit
26abac5afd
3 changed files with 141 additions and 0 deletions
|
|
@ -186,6 +186,31 @@ _HERMES_BEHAVIORAL_VARS = frozenset({
|
||||||
"HERMES_HOME_MODE",
|
"HERMES_HOME_MODE",
|
||||||
"BROWSER_CDP_URL",
|
"BROWSER_CDP_URL",
|
||||||
"CAMOFOX_URL",
|
"CAMOFOX_URL",
|
||||||
|
# Platform allowlists — not credentials, but if set from any source
|
||||||
|
# (user shell, earlier leaky test, CI env), they change gateway auth
|
||||||
|
# behavior and flake button-authorization tests.
|
||||||
|
"TELEGRAM_ALLOWED_USERS",
|
||||||
|
"DISCORD_ALLOWED_USERS",
|
||||||
|
"WHATSAPP_ALLOWED_USERS",
|
||||||
|
"SLACK_ALLOWED_USERS",
|
||||||
|
"SIGNAL_ALLOWED_USERS",
|
||||||
|
"SIGNAL_GROUP_ALLOWED_USERS",
|
||||||
|
"EMAIL_ALLOWED_USERS",
|
||||||
|
"SMS_ALLOWED_USERS",
|
||||||
|
"MATTERMOST_ALLOWED_USERS",
|
||||||
|
"MATRIX_ALLOWED_USERS",
|
||||||
|
"DINGTALK_ALLOWED_USERS",
|
||||||
|
"FEISHU_ALLOWED_USERS",
|
||||||
|
"WECOM_ALLOWED_USERS",
|
||||||
|
"GATEWAY_ALLOWED_USERS",
|
||||||
|
"GATEWAY_ALLOW_ALL_USERS",
|
||||||
|
"TELEGRAM_ALLOW_ALL_USERS",
|
||||||
|
"DISCORD_ALLOW_ALL_USERS",
|
||||||
|
"WHATSAPP_ALLOW_ALL_USERS",
|
||||||
|
"SLACK_ALLOW_ALL_USERS",
|
||||||
|
"SIGNAL_ALLOW_ALL_USERS",
|
||||||
|
"EMAIL_ALLOW_ALL_USERS",
|
||||||
|
"SMS_ALLOW_ALL_USERS",
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -258,6 +283,107 @@ def _isolate_hermes_home(_hermetic_environment):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
# ── Module-level state reset ───────────────────────────────────────────────
|
||||||
|
#
|
||||||
|
# Python modules are singletons per process, and pytest-xdist workers are
|
||||||
|
# long-lived. Module-level dicts/sets (tool registries, approval state,
|
||||||
|
# interrupt flags) and ContextVars persist across tests in the same worker,
|
||||||
|
# causing tests that pass alone to fail when run with siblings.
|
||||||
|
#
|
||||||
|
# Each entry in this fixture clears state that belongs to a specific module.
|
||||||
|
# New state buckets go here too — this is the single gate that prevents
|
||||||
|
# "works alone, flakes in CI" bugs from state leakage.
|
||||||
|
#
|
||||||
|
# The skill `test-suite-cascade-diagnosis` documents the concrete patterns
|
||||||
|
# this closes; the running example was `test_command_guards` failing 12/15
|
||||||
|
# CI runs because ``tools.approval._session_approved`` carried approvals
|
||||||
|
# from one test's session into another's.
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def _reset_module_state():
|
||||||
|
"""Clear module-level mutable state and ContextVars between tests.
|
||||||
|
|
||||||
|
Keeps state from leaking across tests on the same xdist worker. Modules
|
||||||
|
that don't exist yet (test collection before production import) are
|
||||||
|
skipped silently — production import later creates fresh empty state.
|
||||||
|
"""
|
||||||
|
# --- tools.approval — the single biggest source of cross-test pollution ---
|
||||||
|
try:
|
||||||
|
from tools import approval as _approval_mod
|
||||||
|
_approval_mod._session_approved.clear()
|
||||||
|
_approval_mod._session_yolo.clear()
|
||||||
|
_approval_mod._permanent_approved.clear()
|
||||||
|
_approval_mod._pending.clear()
|
||||||
|
_approval_mod._gateway_queues.clear()
|
||||||
|
_approval_mod._gateway_notify_cbs.clear()
|
||||||
|
# ContextVar: reset to empty string so get_current_session_key()
|
||||||
|
# falls through to the env var / default path, matching a fresh
|
||||||
|
# process.
|
||||||
|
_approval_mod._approval_session_key.set("")
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# --- tools.interrupt — per-thread interrupt flag set ---
|
||||||
|
try:
|
||||||
|
from tools import interrupt as _interrupt_mod
|
||||||
|
with _interrupt_mod._lock:
|
||||||
|
_interrupt_mod._interrupted_threads.clear()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# --- gateway.session_context — 9 ContextVars that represent
|
||||||
|
# the active gateway session. If set in one test and not reset,
|
||||||
|
# the next test's get_session_env() reads stale values.
|
||||||
|
try:
|
||||||
|
from gateway import session_context as _sc_mod
|
||||||
|
for _cv in (
|
||||||
|
_sc_mod._SESSION_PLATFORM,
|
||||||
|
_sc_mod._SESSION_CHAT_ID,
|
||||||
|
_sc_mod._SESSION_CHAT_NAME,
|
||||||
|
_sc_mod._SESSION_THREAD_ID,
|
||||||
|
_sc_mod._SESSION_USER_ID,
|
||||||
|
_sc_mod._SESSION_USER_NAME,
|
||||||
|
_sc_mod._SESSION_KEY,
|
||||||
|
_sc_mod._CRON_AUTO_DELIVER_PLATFORM,
|
||||||
|
_sc_mod._CRON_AUTO_DELIVER_CHAT_ID,
|
||||||
|
_sc_mod._CRON_AUTO_DELIVER_THREAD_ID,
|
||||||
|
):
|
||||||
|
_cv.set(_sc_mod._UNSET)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# --- tools.env_passthrough — ContextVar<set[str]> with no default ---
|
||||||
|
# LookupError is normal if the test never set it. Setting it to an
|
||||||
|
# empty set unconditionally normalizes the starting state.
|
||||||
|
try:
|
||||||
|
from tools import env_passthrough as _envp_mod
|
||||||
|
_envp_mod._allowed_env_vars_var.set(set())
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# --- tools.credential_files — ContextVar<dict> ---
|
||||||
|
try:
|
||||||
|
from tools import credential_files as _credf_mod
|
||||||
|
_credf_mod._registered_files_var.set({})
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# --- tools.file_tools — per-task read history + file-ops cache ---
|
||||||
|
# _read_tracker accumulates per-task_id read history for loop detection,
|
||||||
|
# capped by _READ_HISTORY_CAP. If entries from a prior test persist, the
|
||||||
|
# cap is hit faster than expected and capacity-related tests flake.
|
||||||
|
try:
|
||||||
|
from tools import file_tools as _ft_mod
|
||||||
|
with _ft_mod._read_tracker_lock:
|
||||||
|
_ft_mod._read_tracker.clear()
|
||||||
|
with _ft_mod._file_ops_lock:
|
||||||
|
_ft_mod._file_ops_cache.clear()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
yield
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture()
|
@pytest.fixture()
|
||||||
def tmp_dir(tmp_path):
|
def tmp_dir(tmp_path):
|
||||||
"""Provide a temporary directory that is cleaned up automatically."""
|
"""Provide a temporary directory that is cleaned up automatically."""
|
||||||
|
|
|
||||||
|
|
@ -355,8 +355,17 @@ async def test_none_user_id_does_not_generate_pairing_code(monkeypatch, tmp_path
|
||||||
async def test_non_internal_event_without_user_triggers_pairing(monkeypatch, tmp_path):
|
async def test_non_internal_event_without_user_triggers_pairing(monkeypatch, tmp_path):
|
||||||
"""Verify the normal (non-internal) path still triggers pairing for unknown users."""
|
"""Verify the normal (non-internal) path still triggers pairing for unknown users."""
|
||||||
import gateway.run as gateway_run
|
import gateway.run as gateway_run
|
||||||
|
import gateway.pairing as pairing_mod
|
||||||
|
|
||||||
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
|
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
|
||||||
|
# gateway.pairing.PAIRING_DIR is a module-level constant captured at
|
||||||
|
# import time from whichever HERMES_HOME was set then. Per-test
|
||||||
|
# HERMES_HOME redirection in conftest doesn't retroactively move it.
|
||||||
|
# Override directly so pairing rate-limit state lives in this test's
|
||||||
|
# tmp_path (and so stale state from prior xdist workers can't leak in).
|
||||||
|
pairing_dir = tmp_path / "pairing"
|
||||||
|
pairing_dir.mkdir()
|
||||||
|
monkeypatch.setattr(pairing_mod, "PAIRING_DIR", pairing_dir)
|
||||||
(tmp_path / "config.yaml").write_text("", encoding="utf-8")
|
(tmp_path / "config.yaml").write_text("", encoding="utf-8")
|
||||||
|
|
||||||
# Clear env vars that could let all users through (loaded by
|
# Clear env vars that could let all users through (loaded by
|
||||||
|
|
|
||||||
|
|
@ -306,7 +306,13 @@ class TestSignalSessionSource:
|
||||||
class TestSignalPhoneRedaction:
|
class TestSignalPhoneRedaction:
|
||||||
@pytest.fixture(autouse=True)
|
@pytest.fixture(autouse=True)
|
||||||
def _ensure_redaction_enabled(self, monkeypatch):
|
def _ensure_redaction_enabled(self, monkeypatch):
|
||||||
|
# agent.redact snapshots _REDACT_ENABLED at import time from the
|
||||||
|
# HERMES_REDACT_SECRETS env var. monkeypatch.delenv is too late —
|
||||||
|
# the module was already imported during test collection with
|
||||||
|
# whatever value was in the env then. Force the flag directly.
|
||||||
|
# See skill: xdist-cross-test-pollution Pattern 5.
|
||||||
monkeypatch.delenv("HERMES_REDACT_SECRETS", raising=False)
|
monkeypatch.delenv("HERMES_REDACT_SECRETS", raising=False)
|
||||||
|
monkeypatch.setattr("agent.redact._REDACT_ENABLED", True)
|
||||||
|
|
||||||
def test_us_number(self):
|
def test_us_number(self):
|
||||||
from agent.redact import redact_sensitive_text
|
from agent.redact import redact_sensitive_text
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue