mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
Three fixes that close the remaining structural sources of CI flakes after PR #13363. ## 1. Per-test reset of module-level singletons and ContextVars Python modules are singletons per process, and pytest-xdist workers are long-lived. Module-level dicts/sets and ContextVars persist across tests on the same worker. A test that sets state in `tools.approval._session_approved` and doesn't explicitly clear it leaks that state to every subsequent test on the same worker. New `_reset_module_state` autouse fixture in `tests/conftest.py` clears: - tools.approval: _session_approved, _session_yolo, _permanent_approved, _pending, _gateway_queues, _gateway_notify_cbs, _approval_session_key - tools.interrupt: _interrupted_threads - gateway.session_context: 10 session/cron ContextVars (reset to _UNSET) - tools.env_passthrough: _allowed_env_vars_var (reset to empty set) - tools.credential_files: _registered_files_var (reset to empty dict) - tools.file_tools: _read_tracker, _file_ops_cache This was the single biggest remaining class of CI flakes. `test_command_guards::test_warn_session_approved` and `test_combined_cli_session_approves_both` were failing 12/15 recent main runs specifically because `_session_approved` carried approvals from a prior test's session into these tests' `"default"` session lookup. ## 2. Unset platform allowlist env vars in hermetic fixture `TELEGRAM_ALLOWED_USERS`, `DISCORD_ALLOWED_USERS`, and 20 other `*_ALLOWED_USERS` / `*_ALLOW_ALL_USERS` vars are now unset per-test in the same place credential env vars already are. These aren't credentials but they change gateway auth behavior; if set from any source (user shell, leaky test, CI env) they flake button-authorization tests. Fixes three `test_telegram_approval_buttons` tests that were failing across recent runs of the full gateway directory. ## 3. Two specific tests with module-level captured state - `test_signal::TestSignalPhoneRedaction`: `agent.redact._REDACT_ENABLED` is captured at module import from `HERMES_REDACT_SECRETS`, not read per-call. `monkeypatch.delenv` at test time is too late. Added `monkeypatch.setattr("agent.redact._REDACT_ENABLED", True)` per skill xdist-cross-test-pollution Pattern 5. - `test_internal_event_bypass_pairing::test_non_internal_event_without_user_triggers_pairing`: `gateway.pairing.PAIRING_DIR` is captured at module import from HERMES_HOME, so per-test HERMES_HOME redirection in conftest doesn't retroactively move it. Test now monkeypatches PAIRING_DIR directly to its tmp_path, preventing rate-limit state from prior xdist workers from letting the pairing send-call be suppressed. ## Validation - tests/tools/: 3494 pass (0 fail) including test_command_guards - tests/gateway/: 3504 pass (0 fail) across repeat runs - tests/agent/ + tests/hermes_cli/ + tests/run_agent/ + tests/tools/: 8371 pass, 37 skipped, 0 fail — full suite across directories No production code changed.
401 lines
13 KiB
Python
401 lines
13 KiB
Python
"""Tests that internal synthetic events (e.g. background process completion)
|
|
bypass user authorization and do not trigger DM pairing.
|
|
|
|
Regression test for the bug where ``_run_process_watcher`` with
|
|
``notify_on_complete=True`` injected a ``MessageEvent`` without ``user_id``,
|
|
causing ``_is_user_authorized`` to reject it and the gateway to send a
|
|
pairing code to the chat.
|
|
"""
|
|
|
|
import asyncio
|
|
from types import SimpleNamespace
|
|
from unittest.mock import AsyncMock, patch
|
|
|
|
import pytest
|
|
|
|
from gateway.config import GatewayConfig, Platform
|
|
from gateway.platforms.base import MessageEvent
|
|
from gateway.run import GatewayRunner
|
|
from gateway.session import SessionSource
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class _FakeRegistry:
|
|
"""Return pre-canned sessions, then None once exhausted."""
|
|
|
|
def __init__(self, sessions):
|
|
self._sessions = list(sessions)
|
|
self._completion_consumed: set = set()
|
|
|
|
def get(self, session_id):
|
|
if self._sessions:
|
|
return self._sessions.pop(0)
|
|
return None
|
|
|
|
def is_completion_consumed(self, session_id):
|
|
return session_id in self._completion_consumed
|
|
|
|
|
|
def _build_runner(monkeypatch, tmp_path) -> GatewayRunner:
|
|
"""Create a GatewayRunner with notifications set to 'all'."""
|
|
(tmp_path / "config.yaml").write_text(
|
|
"display:\n background_process_notifications: all\n",
|
|
encoding="utf-8",
|
|
)
|
|
|
|
import gateway.run as gateway_run
|
|
|
|
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
|
|
|
|
runner = GatewayRunner(GatewayConfig())
|
|
adapter = SimpleNamespace(send=AsyncMock(), handle_message=AsyncMock())
|
|
runner.adapters[Platform.DISCORD] = adapter
|
|
return runner
|
|
|
|
|
|
def _watcher_dict_with_notify():
|
|
return {
|
|
"session_id": "proc_test_internal",
|
|
"check_interval": 0,
|
|
"session_key": "agent:main:discord:dm:123",
|
|
"platform": "discord",
|
|
"chat_id": "123",
|
|
"thread_id": "",
|
|
"notify_on_complete": True,
|
|
}
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Tests
|
|
# ---------------------------------------------------------------------------
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_notify_on_complete_sets_internal_flag(monkeypatch, tmp_path):
|
|
"""Synthetic completion event must have internal=True."""
|
|
import tools.process_registry as pr_module
|
|
|
|
sessions = [
|
|
SimpleNamespace(
|
|
output_buffer="done\n", exited=True, exit_code=0, command="echo test"
|
|
),
|
|
]
|
|
monkeypatch.setattr(pr_module, "process_registry", _FakeRegistry(sessions))
|
|
|
|
async def _instant_sleep(*_a, **_kw):
|
|
pass
|
|
monkeypatch.setattr(asyncio, "sleep", _instant_sleep)
|
|
|
|
runner = _build_runner(monkeypatch, tmp_path)
|
|
adapter = runner.adapters[Platform.DISCORD]
|
|
|
|
await runner._run_process_watcher(_watcher_dict_with_notify())
|
|
|
|
assert adapter.handle_message.await_count == 1
|
|
event = adapter.handle_message.await_args.args[0]
|
|
assert isinstance(event, MessageEvent)
|
|
assert event.internal is True, "Synthetic completion event must be marked internal"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_internal_event_bypasses_authorization(monkeypatch, tmp_path):
|
|
"""An internal event should skip _is_user_authorized entirely."""
|
|
import gateway.run as gateway_run
|
|
|
|
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
|
|
(tmp_path / "config.yaml").write_text("", encoding="utf-8")
|
|
|
|
runner = GatewayRunner(GatewayConfig())
|
|
|
|
# Create an internal event with no user_id (simulates the bug scenario)
|
|
source = SessionSource(
|
|
platform=Platform.DISCORD,
|
|
chat_id="123",
|
|
chat_type="dm",
|
|
)
|
|
event = MessageEvent(
|
|
text="[SYSTEM: Background process completed]",
|
|
source=source,
|
|
internal=True,
|
|
)
|
|
|
|
# Track if _is_user_authorized is called
|
|
auth_called = False
|
|
original_auth = GatewayRunner._is_user_authorized
|
|
|
|
def tracking_auth(self, src):
|
|
nonlocal auth_called
|
|
auth_called = True
|
|
return original_auth(self, src)
|
|
|
|
monkeypatch.setattr(GatewayRunner, "_is_user_authorized", tracking_auth)
|
|
|
|
# Stop execution before the agent runner so the test doesn't block in
|
|
# run_in_executor. Auth check happens before _handle_message_with_agent.
|
|
async def _raise(*_a, **_kw):
|
|
raise RuntimeError("sentinel — stop here")
|
|
monkeypatch.setattr(GatewayRunner, "_handle_message_with_agent", _raise)
|
|
|
|
try:
|
|
await runner._handle_message(event)
|
|
except RuntimeError:
|
|
pass # Expected sentinel
|
|
|
|
assert not auth_called, (
|
|
"_is_user_authorized should NOT be called for internal events"
|
|
)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_internal_event_does_not_trigger_pairing(monkeypatch, tmp_path):
|
|
"""An internal event with no user_id must not generate a pairing code."""
|
|
import gateway.run as gateway_run
|
|
|
|
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
|
|
(tmp_path / "config.yaml").write_text("", encoding="utf-8")
|
|
|
|
runner = GatewayRunner(GatewayConfig())
|
|
# Add adapter so pairing would have somewhere to send
|
|
adapter = SimpleNamespace(send=AsyncMock())
|
|
runner.adapters[Platform.DISCORD] = adapter
|
|
|
|
source = SessionSource(
|
|
platform=Platform.DISCORD,
|
|
chat_id="123",
|
|
chat_type="dm", # DM would normally trigger pairing
|
|
)
|
|
event = MessageEvent(
|
|
text="[SYSTEM: Background process completed]",
|
|
source=source,
|
|
internal=True,
|
|
)
|
|
|
|
# Track pairing code generation
|
|
generate_called = False
|
|
original_generate = runner.pairing_store.generate_code
|
|
|
|
def tracking_generate(*args, **kwargs):
|
|
nonlocal generate_called
|
|
generate_called = True
|
|
return original_generate(*args, **kwargs)
|
|
|
|
runner.pairing_store.generate_code = tracking_generate
|
|
|
|
# Stop execution before the agent runner so the test doesn't block in
|
|
# run_in_executor. Pairing check happens before _handle_message_with_agent.
|
|
async def _raise(*_a, **_kw):
|
|
raise RuntimeError("sentinel — stop here")
|
|
monkeypatch.setattr(GatewayRunner, "_handle_message_with_agent", _raise)
|
|
|
|
try:
|
|
await runner._handle_message(event)
|
|
except RuntimeError:
|
|
pass # Expected sentinel
|
|
|
|
assert not generate_called, (
|
|
"Pairing code should NOT be generated for internal events"
|
|
)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_notify_on_complete_preserves_user_identity(monkeypatch, tmp_path):
|
|
"""Synthetic completion event should carry user_id and user_name from the watcher."""
|
|
import tools.process_registry as pr_module
|
|
|
|
sessions = [
|
|
SimpleNamespace(
|
|
output_buffer="done\n", exited=True, exit_code=0, command="echo test"
|
|
),
|
|
]
|
|
monkeypatch.setattr(pr_module, "process_registry", _FakeRegistry(sessions))
|
|
|
|
async def _instant_sleep(*_a, **_kw):
|
|
pass
|
|
monkeypatch.setattr(asyncio, "sleep", _instant_sleep)
|
|
|
|
runner = _build_runner(monkeypatch, tmp_path)
|
|
adapter = runner.adapters[Platform.DISCORD]
|
|
|
|
watcher = _watcher_dict_with_notify()
|
|
watcher["user_id"] = "user-42"
|
|
watcher["user_name"] = "alice"
|
|
|
|
await runner._run_process_watcher(watcher)
|
|
|
|
assert adapter.handle_message.await_count == 1
|
|
event = adapter.handle_message.await_args.args[0]
|
|
assert event.source.user_id == "user-42"
|
|
assert event.source.user_name == "alice"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_notify_on_complete_uses_session_store_origin_for_group_topic(monkeypatch, tmp_path):
|
|
import tools.process_registry as pr_module
|
|
from gateway.session import SessionSource
|
|
|
|
sessions = [
|
|
SimpleNamespace(
|
|
output_buffer="done\n", exited=True, exit_code=0, command="echo test"
|
|
),
|
|
]
|
|
monkeypatch.setattr(pr_module, "process_registry", _FakeRegistry(sessions))
|
|
|
|
async def _instant_sleep(*_a, **_kw):
|
|
pass
|
|
monkeypatch.setattr(asyncio, "sleep", _instant_sleep)
|
|
|
|
runner = GatewayRunner(GatewayConfig())
|
|
adapter = SimpleNamespace(send=AsyncMock(), handle_message=AsyncMock())
|
|
runner.adapters[Platform.TELEGRAM] = adapter
|
|
runner.session_store._entries["agent:main:telegram:group:-100:42"] = SimpleNamespace(
|
|
origin=SessionSource(
|
|
platform=Platform.TELEGRAM,
|
|
chat_id="-100",
|
|
chat_type="group",
|
|
thread_id="42",
|
|
user_id="user-42",
|
|
user_name="alice",
|
|
)
|
|
)
|
|
|
|
watcher = {
|
|
"session_id": "proc_test_internal",
|
|
"check_interval": 0,
|
|
"session_key": "agent:main:telegram:group:-100:42",
|
|
"platform": "telegram",
|
|
"chat_id": "-100",
|
|
"thread_id": "42",
|
|
"notify_on_complete": True,
|
|
}
|
|
|
|
await runner._run_process_watcher(watcher)
|
|
|
|
assert adapter.handle_message.await_count == 1
|
|
event = adapter.handle_message.await_args.args[0]
|
|
assert event.internal is True
|
|
assert event.source.platform == Platform.TELEGRAM
|
|
assert event.source.chat_id == "-100"
|
|
assert event.source.chat_type == "group"
|
|
assert event.source.thread_id == "42"
|
|
assert event.source.user_id == "user-42"
|
|
assert event.source.user_name == "alice"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_none_user_id_skips_pairing(monkeypatch, tmp_path):
|
|
"""A non-internal event with user_id=None should be silently dropped."""
|
|
import gateway.run as gateway_run
|
|
|
|
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
|
|
(tmp_path / "config.yaml").write_text("", encoding="utf-8")
|
|
|
|
runner = GatewayRunner(GatewayConfig())
|
|
adapter = SimpleNamespace(send=AsyncMock())
|
|
runner.adapters[Platform.TELEGRAM] = adapter
|
|
|
|
source = SessionSource(
|
|
platform=Platform.TELEGRAM,
|
|
chat_id="123",
|
|
chat_type="dm",
|
|
user_id=None,
|
|
)
|
|
event = MessageEvent(
|
|
text="service message",
|
|
source=source,
|
|
internal=False,
|
|
)
|
|
|
|
result = await runner._handle_message(event)
|
|
|
|
# Should return None (dropped) and NOT send any pairing message
|
|
assert result is None
|
|
assert adapter.send.await_count == 0
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_none_user_id_does_not_generate_pairing_code(monkeypatch, tmp_path):
|
|
"""A message with user_id=None must never call generate_code."""
|
|
import gateway.run as gateway_run
|
|
|
|
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
|
|
(tmp_path / "config.yaml").write_text("", encoding="utf-8")
|
|
|
|
runner = GatewayRunner(GatewayConfig())
|
|
adapter = SimpleNamespace(send=AsyncMock())
|
|
runner.adapters[Platform.DISCORD] = adapter
|
|
|
|
generate_called = False
|
|
original_generate = runner.pairing_store.generate_code
|
|
|
|
def tracking_generate(*args, **kwargs):
|
|
nonlocal generate_called
|
|
generate_called = True
|
|
return original_generate(*args, **kwargs)
|
|
|
|
runner.pairing_store.generate_code = tracking_generate
|
|
|
|
source = SessionSource(
|
|
platform=Platform.DISCORD,
|
|
chat_id="456",
|
|
chat_type="dm",
|
|
user_id=None,
|
|
)
|
|
event = MessageEvent(text="anonymous", source=source, internal=False)
|
|
|
|
await runner._handle_message(event)
|
|
|
|
assert not generate_called, (
|
|
"Pairing code should NOT be generated for messages with user_id=None"
|
|
)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_non_internal_event_without_user_triggers_pairing(monkeypatch, tmp_path):
|
|
"""Verify the normal (non-internal) path still triggers pairing for unknown users."""
|
|
import gateway.run as gateway_run
|
|
import gateway.pairing as pairing_mod
|
|
|
|
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
|
|
# gateway.pairing.PAIRING_DIR is a module-level constant captured at
|
|
# import time from whichever HERMES_HOME was set then. Per-test
|
|
# HERMES_HOME redirection in conftest doesn't retroactively move it.
|
|
# Override directly so pairing rate-limit state lives in this test's
|
|
# tmp_path (and so stale state from prior xdist workers can't leak in).
|
|
pairing_dir = tmp_path / "pairing"
|
|
pairing_dir.mkdir()
|
|
monkeypatch.setattr(pairing_mod, "PAIRING_DIR", pairing_dir)
|
|
(tmp_path / "config.yaml").write_text("", encoding="utf-8")
|
|
|
|
# Clear env vars that could let all users through (loaded by
|
|
# module-level dotenv in gateway/run.py from the real ~/.hermes/.env).
|
|
monkeypatch.delenv("DISCORD_ALLOW_ALL_USERS", raising=False)
|
|
monkeypatch.delenv("DISCORD_ALLOWED_USERS", raising=False)
|
|
monkeypatch.delenv("GATEWAY_ALLOW_ALL_USERS", raising=False)
|
|
monkeypatch.delenv("GATEWAY_ALLOWED_USERS", raising=False)
|
|
|
|
runner = GatewayRunner(GatewayConfig())
|
|
adapter = SimpleNamespace(send=AsyncMock())
|
|
runner.adapters[Platform.DISCORD] = adapter
|
|
|
|
source = SessionSource(
|
|
platform=Platform.DISCORD,
|
|
chat_id="123",
|
|
chat_type="dm",
|
|
user_id="unknown_user_999",
|
|
)
|
|
# Normal event (not internal)
|
|
event = MessageEvent(
|
|
text="hello",
|
|
source=source,
|
|
internal=False,
|
|
)
|
|
|
|
result = await runner._handle_message(event)
|
|
|
|
# Should return None (unauthorized) and send pairing message
|
|
assert result is None
|
|
assert adapter.send.await_count == 1
|
|
sent_text = adapter.send.await_args.args[1]
|
|
assert "don't recognize you" in sent_text
|