fix(security): normalize input before dangerous command detection (#3260)

detect_dangerous_command() ran regex patterns against raw command strings without normalization, allowing bypass via Unicode fullwidth chars, ANSI escape codes, null bytes, and 8-bit C1 controls. Adds _normalize_command_for_detection() that: - Strips ANSI escapes using the full ECMA-48 strip_ansi() from tools/ansi_strip (CSI, OSC, DCS, 8-bit C1, nF sequences) - Removes null bytes - Normalizes Unicode via NFKC (fullwidth Latin → ASCII, etc.) Includes 12 regression tests covering fullwidth, ANSI, C1, null byte, and combined obfuscation bypasses. Salvaged from PR #3089 by thakoreh — improved ANSI stripping to use existing comprehensive strip_ansi() instead of a weaker hand-rolled regex, and added test coverage. Co-authored-by: Hiren <hiren.thakore58@gmail.com>
2026-04-25 00:51:20 +00:00 · 2026-03-26 14:33:18 -07:00 · 2026-03-26 14:33:18 -07:00 · 76ed15dd4d
commit 76ed15dd4d
parent a8e02c7d49
2 changed files with 90 additions and 1 deletions
--- a/tools/approval.py
+++ b/tools/approval.py
@ -13,6 +13,7 @@ import os
 import re
 import sys
 import threading
+import unicodedata
 from typing import Optional

 logger = logging.getLogger(__name__)
@ -82,13 +83,31 @@ def _approval_key_aliases(pattern_key: str) -> set[str]:
 # Detection
 # =========================================================================

+def _normalize_command_for_detection(command: str) -> str:
+    """Normalize a command string before dangerous-pattern matching.
+
+    Strips ANSI escape sequences (full ECMA-48 via tools.ansi_strip),
+    null bytes, and normalizes Unicode fullwidth characters so that
+    obfuscation techniques cannot bypass the pattern-based detection.
+    """
+    from tools.ansi_strip import strip_ansi
+
+    # Strip all ANSI escape sequences (CSI, OSC, DCS, 8-bit C1, etc.)
+    command = strip_ansi(command)
+    # Strip null bytes
+    command = command.replace('\x00', '')
+    # Normalize Unicode (fullwidth Latin, halfwidth Katakana, etc.)
+    command = unicodedata.normalize('NFKC', command)
+    return command
+
+
 def detect_dangerous_command(command: str) -> tuple:
    """Check if a command matches any dangerous patterns.

    Returns:
        (is_dangerous, pattern_key, description) or (False, None, None)
    """
-    command_lower = command.lower()
+    command_lower = _normalize_command_for_detection(command).lower()
    for pattern, description in DANGEROUS_PATTERNS:
        if re.search(pattern, command_lower, re.IGNORECASE | re.DOTALL):
            pattern_key = description