fix(security): normalize input before dangerous command detection (#3260)

detect_dangerous_command() ran regex patterns against raw command strings
without normalization, allowing bypass via Unicode fullwidth chars,
ANSI escape codes, null bytes, and 8-bit C1 controls.

Adds _normalize_command_for_detection() that:
- Strips ANSI escapes using the full ECMA-48 strip_ansi() from
  tools/ansi_strip (CSI, OSC, DCS, 8-bit C1, nF sequences)
- Removes null bytes
- Normalizes Unicode via NFKC (fullwidth Latin → ASCII, etc.)

Includes 12 regression tests covering fullwidth, ANSI, C1, null byte,
and combined obfuscation bypasses.

Salvaged from PR #3089 by thakoreh — improved ANSI stripping to use
existing comprehensive strip_ansi() instead of a weaker hand-rolled
regex, and added test coverage.

Co-authored-by: Hiren <hiren.thakore58@gmail.com>
This commit is contained in:
Teknium 2026-03-26 14:33:18 -07:00 committed by GitHub
parent a8e02c7d49
commit 76ed15dd4d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 90 additions and 1 deletions

View file

@ -13,6 +13,7 @@ import os
import re
import sys
import threading
import unicodedata
from typing import Optional
logger = logging.getLogger(__name__)
@ -82,13 +83,31 @@ def _approval_key_aliases(pattern_key: str) -> set[str]:
# Detection
# =========================================================================
def _normalize_command_for_detection(command: str) -> str:
"""Normalize a command string before dangerous-pattern matching.
Strips ANSI escape sequences (full ECMA-48 via tools.ansi_strip),
null bytes, and normalizes Unicode fullwidth characters so that
obfuscation techniques cannot bypass the pattern-based detection.
"""
from tools.ansi_strip import strip_ansi
# Strip all ANSI escape sequences (CSI, OSC, DCS, 8-bit C1, etc.)
command = strip_ansi(command)
# Strip null bytes
command = command.replace('\x00', '')
# Normalize Unicode (fullwidth Latin, halfwidth Katakana, etc.)
command = unicodedata.normalize('NFKC', command)
return command
def detect_dangerous_command(command: str) -> tuple:
"""Check if a command matches any dangerous patterns.
Returns:
(is_dangerous, pattern_key, description) or (False, None, None)
"""
command_lower = command.lower()
command_lower = _normalize_command_for_detection(command).lower()
for pattern, description in DANGEROUS_PATTERNS:
if re.search(pattern, command_lower, re.IGNORECASE | re.DOTALL):
pattern_key = description