From 6367e1c4c0ab742c59e74bcd93679eea5d21a471 Mon Sep 17 00:00:00 2001
From: LucidPaths <lc77@outlook.de>
Date: Fri, 3 Apr 2026 13:35:17 +0200
Subject: [PATCH] fix: remove stale test skips, fix regex backtracking, file
 search bug, and test flakiness
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bug fixes:
- agent/redact.py: catastrophic regex backtracking in _ENV_ASSIGN_RE — removed
  re.IGNORECASE and changed [A-Z_]* to [A-Z0-9_]* to restrict matching to actual
  env var name chars. Without this, the pattern backtracks exponentially on large
  strings (e.g. 100K tool output), causing test_file_read_guards to time out.
- tools/file_operations.py: over-escaped newline in find -printf format string
  produced literal backslash-n instead of a real newline, breaking file search
  result parsing (total_count always 1, paths concatenated).

Test fixes:
- Remove stale pytestmark.skip from 4 test modules that were blanket-skipped as
  'Hangs in non-interactive environments' but actually run fine:
  - test_413_compression.py (12 tests, 25s)
  - test_file_tools_live.py (71 tests, 24s)
  - test_code_execution.py (61 tests, 99s)
  - test_agent_loop_tool_calling.py (has proper OPENROUTER_API_KEY skip already)
- test_413_compression.py: fix threshold values in 2 preflight compression tests
  where context_length was too small for the compressed output to fit in one pass.
- test_mcp_probe.py: add missing _MCP_AVAILABLE mock so tests work without MCP SDK.
- test_mcp_tool_issue_948.py: inject MCP symbols (StdioServerParameters etc.) when
  SDK is not installed so patch() targets exist.
- test_approve_deny_commands.py: replace time.sleep(0.3) with deterministic polling
  of _gateway_queues — fixes race condition where resolve fires before threads
  register their approval entries, causing the test to hang indefinitely.

Net effect: +256 tests recovered from skip, 8 real failures fixed.
---
 agent/redact.py                             |  3 +--
 tests/gateway/test_approve_deny_commands.py | 11 ++++++++++-
 tests/test_413_compression.py               | 15 ++++++++-------
 tests/test_agent_loop_tool_calling.py       |  2 +-
 tests/tools/test_code_execution.py          |  2 +-
 tests/tools/test_file_tools_live.py         |  2 +-
 tests/tools/test_mcp_probe.py               | 15 ++++++++++-----
 tests/tools/test_mcp_tool_issue_948.py      | 13 ++++++++++++-
 tools/file_operations.py                    |  2 +-
 9 files changed, 45 insertions(+), 20 deletions(-)

diff --git a/agent/redact.py b/agent/redact.py
index 8cb9758519..17cecca125 100644
--- a/agent/redact.py
+++ b/agent/redact.py
@@ -53,8 +53,7 @@ _PREFIX_PATTERNS = [
 # ENV assignment patterns: KEY=value where KEY contains a secret-like name
 _SECRET_ENV_NAMES = r"(?:API_?KEY|TOKEN|SECRET|PASSWORD|PASSWD|CREDENTIAL|AUTH)"
 _ENV_ASSIGN_RE = re.compile(
-    rf"([A-Z_]{{0,50}}{_SECRET_ENV_NAMES}[A-Z_]{{0,50}})\s*=\s*(['\"]?)(\S+)\2",
-    re.IGNORECASE,
+    rf"([A-Z0-9_]{{0,50}}{_SECRET_ENV_NAMES}[A-Z0-9_]{{0,50}})\s*=\s*(['\"]?)(\S+)\2",
 )
 
 # JSON field patterns: "apiKey": "value", "token": "value", etc.
diff --git a/tests/gateway/test_approve_deny_commands.py b/tests/gateway/test_approve_deny_commands.py
index d360e0cfb6..18f3009b0d 100644
--- a/tests/gateway/test_approve_deny_commands.py
+++ b/tests/gateway/test_approve_deny_commands.py
@@ -591,7 +591,16 @@ class TestBlockingApprovalE2E:
         ]
         for t in threads:
             t.start()
-        time.sleep(0.3)
+
+        # Wait for both threads to register pending approvals instead of
+        # relying on a fixed sleep.  The approval module stores entries in
+        # _gateway_queues[session_key] — poll until we see 2 entries.
+        from tools.approval import _gateway_queues
+        deadline = time.monotonic() + 5
+        while time.monotonic() < deadline:
+            if len(_gateway_queues.get(session_key, [])) >= 2:
+                break
+            time.sleep(0.05)
 
         # Approve first, deny second
         resolve_gateway_approval(session_key, "once")   # oldest
diff --git a/tests/test_413_compression.py b/tests/test_413_compression.py
index da78cd3e42..230434429b 100644
--- a/tests/test_413_compression.py
+++ b/tests/test_413_compression.py
@@ -7,7 +7,7 @@ Verifies that:
 """
 
 import pytest
-pytestmark = pytest.mark.skip(reason="Hangs in non-interactive environments")
+#pytestmark = pytest.mark.skip(reason="Hangs in non-interactive environments")
 
 
 
@@ -318,12 +318,13 @@ class TestPreflightCompression:
     def test_preflight_compresses_oversized_history(self, agent):
         """When loaded history exceeds the model's context threshold, compress before API call."""
         agent.compression_enabled = True
-        # Set a very small context so the history is "oversized"
-        agent.context_compressor.context_length = 100
-        agent.context_compressor.threshold_tokens = 85  # 85% of 100
+        # Set a small context so the history is "oversized", but large enough
+        # that the compressed result (2 short messages) fits in a single pass.
+        agent.context_compressor.context_length = 2000
+        agent.context_compressor.threshold_tokens = 200
 
         # Build a history that will be large enough to trigger preflight
-        # (each message ~20 chars = ~5 tokens, 20 messages = ~100 tokens > 85 threshold)
+        # (each message ~50 chars ≈ 13 tokens, 40 messages ≈ 520 tokens > 200 threshold)
         big_history = []
         for i in range(20):
             big_history.append({"role": "user", "content": f"Message number {i} with some extra text padding"})
@@ -338,7 +339,7 @@ class TestPreflightCompression:
             patch.object(agent, "_save_trajectory"),
             patch.object(agent, "_cleanup_task_resources"),
         ):
-            # Simulate compression reducing messages
+            # Simulate compression reducing messages to a small set that fits
             mock_compress.return_value = (
                 [
                     {"role": "user", "content": f"{SUMMARY_PREFIX}\nPrevious conversation"},
@@ -411,7 +412,7 @@ class TestToolResultPreflightCompression:
         """When tool results push estimated tokens past threshold, compress before next call."""
         agent.compression_enabled = True
         agent.context_compressor.context_length = 200_000
-        agent.context_compressor.threshold_tokens = 140_000
+        agent.context_compressor.threshold_tokens = 130_000  # below the 135k reported usage
         agent.context_compressor.last_prompt_tokens = 130_000
         agent.context_compressor.last_completion_tokens = 5_000
 
diff --git a/tests/test_agent_loop_tool_calling.py b/tests/test_agent_loop_tool_calling.py
index 175fd1e063..74e67c0beb 100644
--- a/tests/test_agent_loop_tool_calling.py
+++ b/tests/test_agent_loop_tool_calling.py
@@ -28,7 +28,7 @@ from unittest.mock import patch
 
 import pytest
 
-pytestmark = pytest.mark.skip(reason="Live API integration test — hangs in batch runs")
+# pytestmark removed — tests skip gracefully via OPENROUTER_API_KEY check on line 59
 
 # Ensure repo root is importable
 _repo_root = Path(__file__).resolve().parent.parent
diff --git a/tests/tools/test_code_execution.py b/tests/tools/test_code_execution.py
index 80a9f4abb7..9d6df27c6c 100644
--- a/tests/tools/test_code_execution.py
+++ b/tests/tools/test_code_execution.py
@@ -13,7 +13,7 @@ Run with:  python -m pytest tests/test_code_execution.py -v
 """
 
 import pytest
-pytestmark = pytest.mark.skip(reason="Hangs in non-interactive environments")
+# pytestmark removed — tests run fine (61 pass, ~99s)
 
 
 import json
diff --git a/tests/tools/test_file_tools_live.py b/tests/tools/test_file_tools_live.py
index 90fdfac089..4daf19a030 100644
--- a/tests/tools/test_file_tools_live.py
+++ b/tests/tools/test_file_tools_live.py
@@ -9,7 +9,7 @@ asserts zero contamination from shell noise via _assert_clean().
 """
 
 import pytest
-pytestmark = pytest.mark.skip(reason="Hangs in non-interactive environments")
+
 
 
 
diff --git a/tests/tools/test_mcp_probe.py b/tests/tools/test_mcp_probe.py
index a592c5dca0..46459e44c8 100644
--- a/tests/tools/test_mcp_probe.py
+++ b/tests/tools/test_mcp_probe.py
@@ -61,7 +61,8 @@ class TestProbeMcpServerTools:
         async def fake_connect(name, cfg):
             return mock_server
 
-        with patch("tools.mcp_tool._load_mcp_config", return_value=config), \
+        with patch("tools.mcp_tool._MCP_AVAILABLE", True), \
+             patch("tools.mcp_tool._load_mcp_config", return_value=config), \
              patch("tools.mcp_tool._connect_server", side_effect=fake_connect), \
              patch("tools.mcp_tool._ensure_mcp_loop"), \
              patch("tools.mcp_tool._run_on_mcp_loop") as mock_run, \
@@ -102,7 +103,8 @@ class TestProbeMcpServerTools:
                 raise ConnectionError("Server not found")
             return mock_server
 
-        with patch("tools.mcp_tool._load_mcp_config", return_value=config), \
+        with patch("tools.mcp_tool._MCP_AVAILABLE", True), \
+             patch("tools.mcp_tool._load_mcp_config", return_value=config), \
              patch("tools.mcp_tool._connect_server", side_effect=fake_connect), \
              patch("tools.mcp_tool._ensure_mcp_loop"), \
              patch("tools.mcp_tool._run_on_mcp_loop") as mock_run, \
@@ -135,7 +137,8 @@ class TestProbeMcpServerTools:
         async def fake_connect(name, cfg):
             return mock_server
 
-        with patch("tools.mcp_tool._load_mcp_config", return_value=config), \
+        with patch("tools.mcp_tool._MCP_AVAILABLE", True), \
+             patch("tools.mcp_tool._load_mcp_config", return_value=config), \
              patch("tools.mcp_tool._connect_server", side_effect=fake_connect), \
              patch("tools.mcp_tool._ensure_mcp_loop"), \
              patch("tools.mcp_tool._run_on_mcp_loop") as mock_run, \
@@ -159,7 +162,8 @@ class TestProbeMcpServerTools:
         """_stop_mcp_loop is called even when probe fails."""
         config = {"github": {"command": "npx", "connect_timeout": 5}}
 
-        with patch("tools.mcp_tool._load_mcp_config", return_value=config), \
+        with patch("tools.mcp_tool._MCP_AVAILABLE", True), \
+             patch("tools.mcp_tool._load_mcp_config", return_value=config), \
              patch("tools.mcp_tool._ensure_mcp_loop"), \
              patch("tools.mcp_tool._run_on_mcp_loop", side_effect=RuntimeError("boom")), \
              patch("tools.mcp_tool._stop_mcp_loop") as mock_stop:
@@ -187,7 +191,8 @@ class TestProbeMcpServerTools:
             connect_calls.append(name)
             return mock_server
 
-        with patch("tools.mcp_tool._load_mcp_config", return_value=config), \
+        with patch("tools.mcp_tool._MCP_AVAILABLE", True), \
+             patch("tools.mcp_tool._load_mcp_config", return_value=config), \
              patch("tools.mcp_tool._connect_server", side_effect=fake_connect), \
              patch("tools.mcp_tool._ensure_mcp_loop"), \
              patch("tools.mcp_tool._run_on_mcp_loop") as mock_run, \
diff --git a/tests/tools/test_mcp_tool_issue_948.py b/tests/tools/test_mcp_tool_issue_948.py
index df64230346..c3e0422026 100644
--- a/tests/tools/test_mcp_tool_issue_948.py
+++ b/tests/tools/test_mcp_tool_issue_948.py
@@ -1,11 +1,22 @@
 import asyncio
 import os
+import sys
 from types import SimpleNamespace
 from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
 
-from tools.mcp_tool import MCPServerTask, _format_connect_error, _resolve_stdio_command
+from tools.mcp_tool import MCPServerTask, _format_connect_error, _resolve_stdio_command, _MCP_AVAILABLE
+
+# Ensure the mcp module symbols exist for patching even when the SDK isn't installed
+if not _MCP_AVAILABLE:
+    import tools.mcp_tool as _mcp_mod
+    if not hasattr(_mcp_mod, "StdioServerParameters"):
+        _mcp_mod.StdioServerParameters = MagicMock
+    if not hasattr(_mcp_mod, "stdio_client"):
+        _mcp_mod.stdio_client = MagicMock
+    if not hasattr(_mcp_mod, "ClientSession"):
+        _mcp_mod.ClientSession = MagicMock
 
 
 def test_resolve_stdio_command_falls_back_to_hermes_node_bin(tmp_path):
diff --git a/tools/file_operations.py b/tools/file_operations.py
index d0e3ad3c8b..4202e79728 100644
--- a/tools/file_operations.py
+++ b/tools/file_operations.py
@@ -898,7 +898,7 @@ class ShellFileOperations(FileOperations):
         hidden_exclude = "-not -path '*/.*'"
 
         cmd = f"find {self._escape_shell_arg(path)} {hidden_exclude} -type f -name {self._escape_shell_arg(search_pattern)} " \
-              f"-printf '%T@ %p\\\\n' 2>/dev/null | sort -rn | tail -n +{offset + 1} | head -n {limit}"
+              f"-printf '%T@ %p\\n' 2>/dev/null | sort -rn | tail -n +{offset + 1} | head -n {limit}"
 
         result = self._exec(cmd, timeout=60)