fix(tests): align CI tests with recent security hardening (#31470)

Four recent security PRs landed on main with stale/missing test updates, breaking 4 test shards on every subsequent PR's CI run: - test_discord_bot_auth_bypass.py (PR #30742 c3caca658): DISCORD_ALLOWED_ROLES no longer bypasses _is_user_authorized. Inverted 3 tests to assert the new (correct) behavior: role config alone does NOT authorize at the gateway layer. - test_msgraph_webhook.py (PR #30169 4ca77f105): adapter.is_connected is a @property, not a method. Test was calling it with () after the connect() change; TypeError: 'bool' is not callable. Removed the parens. - test_feishu_approval_buttons.py (PR #30744 bdb97b857): Card-action callbacks now go through _allow_group_message authorization. 3 tests in TestCardActionCallbackResponse didn't populate adapter._allowed_group_users so the operator's open_id got rejected. Added the allowlist setup to each test, matching the existing pattern in test_returns_card_for_approve_action. Also raise tolerance on test_wait_for_process_kills_subprocess_on_keyboardinterrupt: the SIGTERM → 3s TimeoutStopSec → SIGKILL → reap chain can exceed 10s under loaded xdist (40 workers). Bumped _wait_for_pgid_exit timeout 10→30s and worker join timeout 5→15s. Passes 100% in isolation already; this just makes it tolerant of CI-host load. Validation: 270/270 tests pass across the 5 affected files.
2026-07-14 14:12:44 +00:00 · 2026-05-24 06:54:16 -07:00 · 2026-05-24 06:54:16 -07:00 · 889903f0fa
commit 889903f0fa
parent 3bace071bf
4 changed files with 40 additions and 21 deletions
--- a/tests/gateway/test_discord_bot_auth_bypass.py
+++ b/tests/gateway/test_discord_bot_auth_bypass.py
@ -172,42 +172,49 @@ def test_bot_bypass_does_not_leak_to_other_platforms(monkeypatch):


 # -----------------------------------------------------------------------------
-# DISCORD_ALLOWED_ROLES gateway-layer bypass (#7871)
+# DISCORD_ALLOWED_ROLES no longer bypasses the gateway allowlist (#30742)
+#
+# Prior behavior: setting DISCORD_ALLOWED_ROLES caused _is_user_authorized
+# to return True for ANY Discord event, on the assumption that the adapter
+# pre-filter had already validated role membership.  That allowed slash
+# commands and synthetic voice events to bypass role checks.  PR #30742
+# removed the shortcut — Discord auth now flows through the same allowlist
+# / pairing / allow-all path as every other platform.
 # -----------------------------------------------------------------------------


-def test_discord_role_config_bypasses_gateway_allowlist(monkeypatch):
-    """When DISCORD_ALLOWED_ROLES is set, _is_user_authorized must trust
-    the adapter's pre-filter and authorize. Without this, role-only setups
-    (DISCORD_ALLOWED_ROLES populated, DISCORD_ALLOWED_USERS empty) would
-    hit the 'no allowlists configured' branch and get rejected.
+def test_discord_role_config_does_not_bypass_gateway_allowlist(monkeypatch):
+    """DISCORD_ALLOWED_ROLES alone must NOT authorize at the gateway layer
+    (regression guard for #30742).  Role-based access is enforced by the
+    adapter pre-filter on real message events; the gateway layer requires
+    an explicit allowlist hit or pairing approval.
    """
    runner = _make_bare_runner()

    monkeypatch.setenv("DISCORD_ALLOWED_ROLES", "1493705176387948674")
-    # Note: DISCORD_ALLOWED_USERS is NOT set — the entire point.
+    # DISCORD_ALLOWED_USERS deliberately NOT set — verifies the role
+    # config alone no longer grants authorization.

    source = _make_discord_human_source(user_id="999888777")
-    assert runner._is_user_authorized(source) is True
+    assert runner._is_user_authorized(source) is False


-def test_discord_role_config_still_authorizes_alongside_users(monkeypatch):
-    """Sanity: setting both DISCORD_ALLOWED_ROLES and DISCORD_ALLOWED_USERS
-    doesn't break the user-id path. Users in the allowlist should still be
-    authorized even if they don't have a role. (OR semantics.)
+def test_discord_user_allowlist_still_authorizes_when_role_is_also_configured(monkeypatch):
+    """Sanity: DISCORD_ALLOWED_USERS still authorizes users on the list,
+    independent of DISCORD_ALLOWED_ROLES.  This guards against a future
+    regression that ties the user-allowlist check to the (now-removed)
+    role bypass.
    """
    runner = _make_bare_runner()

    monkeypatch.setenv("DISCORD_ALLOWED_ROLES", "1493705176387948674")
    monkeypatch.setenv("DISCORD_ALLOWED_USERS", "100200300")

-    # User on the user allowlist, no role → still authorized at gateway
-    # level via the role bypass (adapter already approved them).
    source = _make_discord_human_source(user_id="100200300")
    assert runner._is_user_authorized(source) is True


-def test_discord_role_bypass_does_not_leak_to_other_platforms(monkeypatch):
+def test_discord_role_config_does_not_leak_to_other_platforms(monkeypatch):
    """DISCORD_ALLOWED_ROLES must only affect Discord. Setting it should
    not suddenly start authorizing Telegram users whose platform has its
    own empty allowlist.
--- a/tests/gateway/test_feishu_approval_buttons.py
+++ b/tests/gateway/test_feishu_approval_buttons.py
@ -506,6 +506,7 @@ class TestCardActionCallbackResponse:
        adapter = _make_adapter()
        adapter._loop = MagicMock()
        adapter._loop.is_closed = MagicMock(return_value=False)
+        adapter._allowed_group_users = {"ou_user1"}
        adapter._approval_state[2] = {
            "session_key": "sess-2",
            "message_id": "msg-2",
@ -552,6 +553,7 @@ class TestCardActionCallbackResponse:
        adapter = _make_adapter()
        adapter._loop = MagicMock()
        adapter._loop.is_closed = MagicMock(return_value=False)
+        adapter._allowed_group_users = {"ou_unknown"}
        adapter._approval_state[3] = {
            "session_key": "sess-3",
            "message_id": "msg-3",
@ -572,6 +574,7 @@ class TestCardActionCallbackResponse:
        adapter = _make_adapter()
        adapter._loop = MagicMock()
        adapter._loop.is_closed = MagicMock(return_value=False)
+        adapter._allowed_group_users = {"ou_expired"}
        adapter._approval_state[4] = {
            "session_key": "sess-4",
            "message_id": "msg-4",
--- a/tests/gateway/test_msgraph_webhook.py
+++ b/tests/gateway/test_msgraph_webhook.py
@ -77,7 +77,8 @@ class TestMSGraphValidationHandshake:
        adapter = MSGraphWebhookAdapter(PlatformConfig(enabled=True, extra={}))
        connected = await adapter.connect()
        assert connected is False
-        assert adapter.is_connected() is False
+        # is_connected is a @property on the base adapter, not a method.
+        assert adapter.is_connected is False

    @pytest.mark.anyio
    async def test_validation_token_echo_on_get(self):
--- a/tests/tools/test_local_interrupt_cleanup.py
+++ b/tests/tools/test_local_interrupt_cleanup.py
@ -48,8 +48,14 @@ def _process_group_snapshot(pgid: int) -> str:
    ).stdout.strip()


-def _wait_for_pgid_exit(pgid: int, timeout: float = 10.0) -> bool:
-    """Wait for a process group to disappear under loaded xdist hosts."""
+def _wait_for_pgid_exit(pgid: int, timeout: float = 30.0) -> bool:
+    """Wait for a process group to disappear under loaded xdist hosts.
+
+    The cleanup chain is: SIGTERM → 3s TimeoutStopSec → SIGKILL → reap.
+    Under heavy xdist load (40 parallel workers, 6-shard CI), the full
+    sequence can exceed 10s. Default timeout is generous to avoid CI
+    flakes; in practice the wait returns in <1s on quiet hosts.
+    """
    deadline = time.monotonic() + timeout
    while time.monotonic() < deadline:
        if not _pgid_still_alive(pgid):
@ -166,9 +172,11 @@ def test_wait_for_process_kills_subprocess_on_keyboardinterrupt():
        assert ret == 1, f"SetAsyncExc returned {ret}, expected 1"

        # Give the worker a moment to: hit the exception at the next poll,
-        # run the except-block cleanup (_kill_process), and exit.
-        t.join(timeout=5.0)
-        assert not t.is_alive(), "worker didn't exit within 5 s of the interrupt"
+        # run the except-block cleanup (_kill_process), and exit.  Under
+        # xdist load the SIGTERM → 3s wait → SIGKILL chain can take longer
+        # than 5s before the worker's join() returns; bumped to 15s.
+        t.join(timeout=15.0)
+        assert not t.is_alive(), "worker didn't exit within 15 s of the interrupt"

        # The critical assertion: the subprocess GROUP must be dead.  Not
        # just the bash wrapper — the 'sleep 30' child too. Under xdist load,