hermes-agent/scripts/check-windows-footguns.py

#!/usr/bin/env python3
"""
Grep-based checker for Windows cross-platform footguns.

Flags common patterns that break silently on Windows. Run before PRs —
cheap, fast, catches regressions in a codebase that runs on three OSes.

Usage:
    # Scan staged changes (default when run from a git checkout)
    python scripts/check-windows-footguns.py

    # Scan the full tree (full-repo audit)
    python scripts/check-windows-footguns.py --all

    # Scan a specific file or directory
    python scripts/check-windows-footguns.py path/to/file.py path/to/dir/

    # Scan only modified files vs. main
    python scripts/check-windows-footguns.py --diff main

Exit status:
    0 — no Windows footguns found (or all matches suppressed)
    1 — at least one unsuppressed match

Suppress an intentional use (e.g. tests or platform-gated code) with:
    os.kill(pid, 0)  # windows-footgun: ok — only called on POSIX
"""

from __future__ import annotations

import argparse
import ast
import os
import re
import subprocess
import sys
from dataclasses import dataclass
from pathlib import Path
from typing import Iterable

REPO_ROOT = Path(__file__).resolve().parent.parent

SUPPRESS_MARKER = re.compile(r"#\s*windows-footgun\s*:\s*ok\b", re.IGNORECASE)

# Line-level guard hints. If a line contains any of these tokens, we assume
# the programmer wrote the line in full awareness of the Windows pitfall —
# e.g. `if hasattr(os, 'setsid'): ... os.setsid()`, or the classic
# `getattr(signal, 'SIGKILL', signal.SIGTERM)`, or `shutil.which("wmic")`.
# False negatives are fine here — the inline `# windows-footgun: ok` marker
# is still the authoritative suppression. This is just to reduce the noise
# floor on obviously-guarded lines so the signal-to-noise stays useful.
GUARD_HINTS = (
    "hasattr(os,",
    "hasattr(signal,",
    "getattr(os,",
    "getattr(signal,",
    "shutil.which(",
    "if platform.system() != \"Windows\"",
    "if platform.system() != 'Windows'",
    "if sys.platform == \"win32\"",
    "if sys.platform != \"win32\"",
    "if sys.platform == 'win32'",
    "if sys.platform != 'win32'",
    "IS_WINDOWS",
    "is_windows",
)

# Dirs we never scan.
EXCLUDED_DIRS = {
    ".git",
    "node_modules",
    "venv",
    ".venv",
    "__pycache__",
    "build",
    "dist",
    ".tox",
    ".mypy_cache",
    ".pytest_cache",
    "site-packages",
    "website/build",
    "optional-skills",  # external skills
}

# File globs we never scan (beyond the dirs above).
EXCLUDED_SUFFIXES = {
    ".pyc",
    ".pyo",
    ".so",
    ".dll",
    ".exe",
    ".png",
    ".jpg",
    ".gif",
    ".ico",
    ".svg",
    ".mp4",
    ".mp3",
    ".wav",
    ".pdf",
    ".zip",
    ".tar",
    ".gz",
    ".whl",
    ".lock",
    ".min.js",
    ".min.css",
}

# Files we never scan (self-referential — this script mentions the
# patterns it detects — and the CONTRIBUTING docs that list them).
EXCLUDED_FILES = {
    "scripts/check-windows-footguns.py",
    "CONTRIBUTING.md",
}


@dataclass
class Footgun:
    """A Windows cross-platform footgun pattern."""

    name: str
    pattern: re.Pattern
    message: str
    fix: str
    # If set, matches in files/paths containing any of these substrings are
    # silently ignored (e.g. tests that legitimately exercise the footgun
    # behind a platform guard). Prefer `# windows-footgun: ok` inline
    # suppression over this list; only use path_allowlist for whole files
    # that are inherently tests of the footgun itself.
    path_allowlist: tuple[str, ...] = ()
    # Optional post-match predicate. Takes the re.Match and returns True
    # if the match is a REAL footgun (not a false positive). Use this when
    # the regex can't fully distinguish (e.g. open() where mode may contain
    # "b" for binary, or the line may have `encoding=` elsewhere).
    post_filter: "callable | None" = None


FOOTGUNS: list[Footgun] = [
    Footgun(
        name="open() without encoding= on text mode",
        # Match builtins.open() specifically — NOT os.open(), .open()
        # method calls (Path.open, tarfile.open, zf.open, webbrowser.open,
        # Image.open, wave.open, etc), or `async def open()` method
        # definitions.  The pattern requires a start-of-identifier boundary
        # before `open(` so `os.open`, `.open`, `def open` are all skipped.
        # Note: Path.open() is ALSO affected by the encoding default, but
        # rather than flagging all `.open(` (huge noise), we require an
        # explicit builtins-style open() call.  Path.open() is rare in the
        # codebase compared to open() and can be audited separately.
        pattern=re.compile(
            r"""(?:^|[\s\(,;=])(?<![.\w])open\s*\(\s*[^,)]+\s*(?:,\s*['"](?P<mode>[^'"]*)['"])?"""
        ),
        message=(
            "open() without an explicit encoding= uses the platform default "
            "(UTF-8 on POSIX, cp1252/mbcs on Windows) — files round-tripped "
            "between hosts get mojibake. Always pass encoding='utf-8' for "
            "text files, or use open(path, 'rb')/'wb' for binary."
        ),
        fix=(
            "open(path, 'r', encoding='utf-8')  # or 'utf-8-sig' if the "
            "file may have a BOM"
        ),
        # Filter: only flag if mode is missing-or-text AND the line doesn't
        # already pass encoding=. Skip binary mode (contains "b").
        post_filter=lambda m, line: (
            "b" not in (m.group("mode") or "")
            and "encoding=" not in line
            and "encoding =" not in line
            # Skip `def open(` and `async def open(` (method definitions)
            and not line.lstrip().startswith("def ")
            and not line.lstrip().startswith("async def ")
            # Skip open(path, **kwargs) patterns — encoding may be in the dict.
            # Too expensive to trace; require the author to set encoding in
            # the dict and trust them (or they can add a # windows-footgun: ok).
            and "**" not in line
        ),
    ),
    Footgun(
        name="os.kill(pid, 0)",
        pattern=re.compile(r"\bos\.kill\s*\(\s*[^,]+,\s*0\s*\)"),
        message=(
            "os.kill(pid, 0) is NOT a no-op on Windows — it sends "
            "CTRL_C_EVENT to the target's console process group, "
            "hard-killing the target and potentially unrelated siblings. "
            "See bpo-14484."
        ),
        fix=(
            "Use psutil.pid_exists(pid) (psutil is a core dependency). "
            "Or gateway.status._pid_exists(pid) for the hermes wrapper "
            "with a stdlib fallback."
        ),
    ),
    Footgun(
        name="bare os.setsid",
        pattern=re.compile(r"(?<!hasattr\()\bos\.setsid\b"),
        message=(
            "os.setsid does not exist on Windows and raises "
            "AttributeError. Subprocesses that need detachment on "
            "Windows use creationflags instead."
        ),
        fix=(
            "if platform.system() != 'Windows':\n"
            "    kwargs['preexec_fn'] = os.setsid\n"
            "else:\n"
            "    kwargs['creationflags'] = subprocess.CREATE_NEW_PROCESS_GROUP"
        ),
    ),
    Footgun(
        name="bare os.killpg",
        pattern=re.compile(r"\bos\.killpg\b"),
        message="os.killpg does not exist on Windows.",
        fix=(
            "Use psutil for cross-platform process-tree kill:\n"
            "  p = psutil.Process(pid)\n"
            "  for c in p.children(recursive=True): c.kill()\n"
            "  p.kill()"
        ),
    ),
    Footgun(
        name="bare os.getuid / os.geteuid / os.getgid",
        pattern=re.compile(r"\bos\.(?:getuid|geteuid|getgid|getegid)\b"),
        message=(
            "os.getuid / os.geteuid / os.getgid do not exist on Windows "
            "and raise AttributeError at import time if referenced."
        ),
        fix=(
            "Use getpass.getuser() for the username, or gate with "
            "hasattr(os, 'getuid')."
        ),
    ),
    Footgun(
        name="bare os.fork",
        pattern=re.compile(r"(?<!hasattr\()\bos\.fork\s*\("),
        message="os.fork does not exist on Windows.",
        fix=(
            "Use subprocess.Popen for daemonization, or guard with "
            "hasattr(os, 'fork') and a Windows fallback path."
        ),
    ),
    Footgun(
        name="bare signal.SIGKILL",
        pattern=re.compile(r"\bsignal\.SIGKILL\b"),
        message=(
            "signal.SIGKILL does not exist on Windows and raises "
            "AttributeError at import time."
        ),
        fix="Use getattr(signal, 'SIGKILL', signal.SIGTERM).",
    ),
    Footgun(
        name="bare signal.SIGHUP / SIGUSR1 / SIGUSR2 / SIGALRM / SIGCHLD / SIGPIPE / SIGQUIT",
        pattern=re.compile(
            r"\bsignal\.(?:SIGHUP|SIGUSR1|SIGUSR2|SIGALRM|SIGCHLD|SIGPIPE|SIGQUIT)\b"
        ),
        message=(
            "These POSIX signals don't exist on Windows; referencing "
            "them raises AttributeError at import time."
        ),
        fix=(
            "Use getattr(signal, 'SIGXXX', None) and check for None "
            "before using, or gate the whole block behind a platform check."
        ),
    ),
    Footgun(
        name="subprocess shebang script invocation",
        pattern=re.compile(
            r"subprocess\.(?:run|Popen|call|check_output|check_call)\s*\(\s*\[\s*['\"]\./"
        ),
        message=(
            "Running a script via './scriptname' doesn't work on Windows — "
            "shebang lines aren't honored. CreateProcessW can't execute "
            "bash/python scripts without an explicit interpreter."
        ),
        fix="Use [sys.executable, 'scriptname.py', ...] explicitly.",
    ),
    Footgun(
        name="wmic invocation without shutil.which guard",
        # Match wmic appearing as a subprocess argument — NOT the
        # shutil.which("wmic") guard pattern itself. Looks for wmic in a
        # list or as first arg of subprocess.run/Popen.
        pattern=re.compile(
            r"""(?:subprocess\.\w+\s*\(\s*\[\s*['"]wmic['"]|['"]wmic\.exe['"])"""
        ),
        message=(
            "wmic was removed in Windows 10 21H1 and later. Always "
            "gate with shutil.which('wmic') and fall back to "
            "PowerShell (Get-CimInstance Win32_Process)."
        ),
        fix=(
            "if shutil.which('wmic'):\n"
            "    ... wmic path ...\n"
            "else:\n"
            "    subprocess.run(['powershell', '-NoProfile', '-Command',\n"
            "                    'Get-CimInstance Win32_Process | ...'])"
        ),
    ),
    Footgun(
        name="hardcoded ~/Desktop (OneDrive trap)",
        pattern=re.compile(
            r"""['"](?:~|~/|[A-Z]:[/\\]Users[/\\][^/\\'"]+[/\\])Desktop\b"""
        ),
        message=(
            "When OneDrive Backup is enabled on Windows, the real Desktop "
            "is at %USERPROFILE%\\OneDrive\\Desktop, not %USERPROFILE%\\"
            "Desktop (which exists as an empty husk)."
        ),
        fix=(
            "On Windows, resolve via ctypes + SHGetKnownFolderPath, or "
            "read the Shell Folders registry key, or run PowerShell "
            "[Environment]::GetFolderPath('Desktop')."
        ),
    ),
    Footgun(
        name="asyncio add_signal_handler without try/except",
        pattern=re.compile(r"\.add_signal_handler\s*\("),
        message=(
            "loop.add_signal_handler raises NotImplementedError on "
            "Windows — always wrap in try/except or gate with a "
            "platform check."
        ),
        fix=(
            "try:\n"
            "    loop.add_signal_handler(sig, handler, sig)\n"
            "except NotImplementedError:\n"
            "    pass  # Windows asyncio doesn't support signal handlers"
        ),
    ),
]


# -----------------------------------------------------------------------------
# AST-based rule: subprocess calls that flash a console window on Windows
# -----------------------------------------------------------------------------
#
# This is the high-volume Windows complaint: every `subprocess.run(...)` /
# `subprocess.Popen(...)` of a console program on Windows briefly flashes a
# cmd window unless the child either (a) inherits the parent's stdio handles
# via output redirection, or (b) is spawned with a no-window creationflag
# (CREATE_NO_WINDOW / DETACHED_PROCESS).  The fix landscape already exists in
# `hermes_cli/_subprocess_compat.py` (windows_hide_flags / windows_detach_*),
# but nothing stopped new bare calls from re-introducing the popup — so the
# bug kept coming back PR after PR.  This rule is the chokepoint.
#
# It is AST-based (not regex) because the deciding factor — whether the call
# redirects stdout/stderr — frequently lives several lines below the
# `subprocess.run(` opener, which a line-oriented regex cannot see.
#
# Comprehensive, not restrictive: a call is only flagged when it can ACTUALLY
# create a new console.  Calls that capture or redirect output (capture_output=,
# stdout=, stderr=), or use check_output (which always captures), cannot pop a
# window and are silently ignored — no suppression comment needed.  The intent
# is that the overwhelming majority of subprocess calls require no change at
# all; only the genuine window-spawners do.

# The subprocess functions that can spawn a child process.
_SUBPROCESS_FUNCS = frozenset({"run", "Popen", "call", "check_call", "check_output"})
# Module aliases we recognise as the stdlib subprocess module.
_SUBPROCESS_ALIASES = frozenset({"subprocess", "sp"})

# Executables that simply do not exist on Windows. A subprocess call whose
# program is one of these can never create a Windows console window, so the
# no-window flag is irrelevant — flagging them would force pointless
# suppression comments on macOS/Linux-only service-management and packaging
# code (launchctl, systemctl, brew, codesign …). Matched against the FIRST
# element of a list/tuple argv literal only; anything dynamic still gets
# flagged (we can't prove it's POSIX-only).
_POSIX_ONLY_PROGRAMS = frozenset(
    {
        "launchctl",
        "systemctl",
        "journalctl",
        "loginctl",
        "osascript",
        "codesign",
        "xattr",
        "defaults",
        "brew",
        "apt",
        "apt-get",
        "dpkg",
        "pacman",
        "dnf",
        "yum",
        "sudo",
        "open",  # macOS `open`
        "tail",
        "sw_vers",
        "scutil",
        "diskutil",
        "hdiutil",
        "dscl",
    }
)

# Cross-platform console programs that DO exist on Windows and allocate a
# console window when spawned from a console-less parent (Desktop/Electron,
# pythonw.exe, a detached gateway/cron). For these, capturing or redirecting
# stdio is NOT a safety boundary — stream redirection controls where the
# child's output goes, it does NOT suppress console *allocation*. Only
# CREATE_NO_WINDOW (or routing through hermes_cli._subprocess_compat.run/popen,
# which injects it) prevents the flash. So a call to one of these is flagged
# even with capture_output=/stdout=/stderr= set. Matched against the first
# element of a literal argv (bare name or .exe, path-stripped).
_WINDOWS_FLASHING_PROGRAMS = frozenset(
    {
        "git",
        "gh",
        "node",
        "npm",
        "npx",
        "yarn",
        "pnpm",
        "python",
        "python3",
        "pythonw",
        "pip",
        "uv",
        "uvx",
        "ffmpeg",
        "ffprobe",
        "ollama",
        "docker",
        "cmd",
        "cmd.exe",
        "powershell",
        "powershell.exe",
        "pwsh",
        "where",
        "taskkill",
        "schtasks",
        "wmic",
        "tasklist",
        "netstat",
    }
)

SUBPROCESS_FOOTGUN_NAME = "subprocess without Windows no-window flag"
SUBPROCESS_FOOTGUN_MESSAGE = (
    "subprocess.run/Popen/call on Windows flashes a console (cmd) window "
    "unless the child inherits stdio (output is captured/redirected) or is "
    "spawned with a no-window creationflag. This is the #1 source of Windows "
    "'terminal popup' bug reports."
)
SUBPROCESS_FOOTGUN_FIX = (
    "Pass creationflags=windows_hide_flags() (for short-lived/captured spawns) "
    "or **windows_detach_popen_kwargs() (for detached daemons) from "
    "hermes_cli._subprocess_compat (both no-op on POSIX). If a visible window "
    "is intended (interactive launch, shell hand-off), add "
    "'# windows-footgun: ok' on the call line."
)


def _call_attr_name(node: ast.Call) -> str | None:
    """Return 'run'/'Popen'/... when node is subprocess.<func>(...), else None."""
    f = node.func
    if not isinstance(f, ast.Attribute):
        return None
    if f.attr not in _SUBPROCESS_FUNCS:
        return None
    mod = getattr(f.value, "id", None)
    if mod not in _SUBPROCESS_ALIASES:
        return None
    return f.attr


def _suppresses_window(node: ast.Call, func_name: str) -> bool:
    """True if this subprocess call cannot create a new console window.

    The honest invariant (corrected after review of PR #53791): capturing or
    redirecting stdio is NOT the same as suppressing console allocation. From a
    console-less parent (Desktop/Electron, pythonw.exe, a detached gateway/cron)
    a console-subsystem child still allocates — and flashes — a window even with
    capture_output=True. Only CREATE_NO_WINDOW (or routing through
    hermes_cli._subprocess_compat.run/popen, which injects it) prevents it.

    So capture/stdout/stderr/check_output is treated as window-safe ONLY when the
    program is not a known cross-platform console exe that flashes on Windows
    (see _WINDOWS_FLASHING_PROGRAMS — git/gh/npm/node/python/uv/ffmpeg/docker/…).
    For those, even a fully-captured call is flagged.

    Always window-safe regardless of program:
      * creationflags=...       — author is already managing the console
      * **<spread>              — kwargs may carry a _subprocess_compat helper;
                                  flag-via-spread is the recommended fix, so we
                                  must not penalise it.
      * POSIX-only program      — can't run on Windows, can't flash.
    Conditionally safe (only when NOT a known flashing program):
      * check_output / capture_output= / stdout= / stderr=
    """
    explicit = {kw.arg for kw in node.keywords if kw.arg}
    if "creationflags" in explicit:
        return True
    if any(kw.arg is None for kw in node.keywords):  # **kwargs spread
        return True
    if _is_posix_only_program(node):
        return True
    # Capture/redirect is only a safety boundary for programs that don't
    # allocate a Windows console — NOT for git/npm/node/python/ffmpeg/etc.
    if not _is_windows_flashing_program(node):
        if func_name == "check_output":
            return True
        if explicit & {"stdout", "stderr", "capture_output"}:
            return True
    return False


def _argv_head(node: ast.Call) -> str | None:
    """Return the path-stripped first argv element if it's a string literal."""
    if not node.args:
        return None
    first = node.args[0]
    if isinstance(first, (ast.List, ast.Tuple)) and first.elts:
        head = first.elts[0]
        if isinstance(head, ast.Constant) and isinstance(head.value, str):
            return head.value.rsplit("/", 1)[-1].rsplit("\\", 1)[-1]
    return None


def _is_windows_flashing_program(node: ast.Call) -> bool:
    """True if the call's program is a known cross-platform console exe that
    allocates a Windows console window (so capture is NOT a safe boundary)."""
    prog = _argv_head(node)
    return prog is not None and prog in _WINDOWS_FLASHING_PROGRAMS


def _is_posix_only_program(node: ast.Call) -> bool:
    """True if the call's program is a statically-known POSIX-only executable.

    Only inspects a literal list/tuple first arg whose first element is a
    string constant (e.g. ``["launchctl", "bootout", target]``). Dynamic
    argv (variables, f-strings) is treated as unknown and still flagged.
    """
    if not node.args:
        return False
    first = node.args[0]
    if isinstance(first, (ast.List, ast.Tuple)) and first.elts:
        head = first.elts[0]
        if isinstance(head, ast.Constant) and isinstance(head.value, str):
            prog = head.value.rsplit("/", 1)[-1]
            return prog in _POSIX_ONLY_PROGRAMS
    return False


def scan_subprocess_window_footguns(
    path: Path, text: str
) -> list[tuple[int, str, Footgun]]:
    """AST pass: flag subprocess calls that can flash a Windows console.

    Honours the same `# windows-footgun: ok` line suppression as the regex
    rules. Returns the same (lineno, line, Footgun) shape so results merge
    cleanly into scan_file's output.
    """
    try:
        tree = ast.parse(text)
    except SyntaxError:
        return []
    lines = text.splitlines()
    rule = Footgun(
        name=SUBPROCESS_FOOTGUN_NAME,
        pattern=re.compile(r"^$"),  # unused; AST-driven
        message=SUBPROCESS_FOOTGUN_MESSAGE,
        fix=SUBPROCESS_FOOTGUN_FIX,
    )
    out: list[tuple[int, str, Footgun]] = []
    for node in ast.walk(tree):
        if not isinstance(node, ast.Call):
            continue
        func_name = _call_attr_name(node)
        if func_name is None:
            continue
        if _suppresses_window(node, func_name):
            continue
        lineno = node.lineno
        line = lines[lineno - 1] if 0 <= lineno - 1 < len(lines) else ""
        # Inline suppression — check the opener line AND, for multi-line calls,
        # any line in the call's span (a developer may mark the closing paren).
        end = getattr(node, "end_lineno", lineno) or lineno
        span = lines[lineno - 1 : end]
        if any(SUPPRESS_MARKER.search(l) for l in span):
            continue
        out.append((lineno, line.rstrip(), rule))
    return out


def should_scan_file(path: Path) -> bool:
    """Return True if this file is in scope for the checker."""
    # Skip the excluded dirs
    parts = set(path.parts)
    if parts & EXCLUDED_DIRS:
        return False
    # Skip excluded suffixes
    for suffix in EXCLUDED_SUFFIXES:
        if str(path).endswith(suffix):
            return False
    # Skip self and docs that intentionally mention the patterns
    rel = path.relative_to(REPO_ROOT).as_posix()
    if rel in EXCLUDED_FILES:
        return False
    # Only scan text files (rough heuristic — .py, .md, .sh, .ps1, .yaml, etc.)
    if path.suffix in {".py", ".pyw", ".pyi"}:
        return True
    # Other file types are read but only Python-specific patterns would match;
    # that's fine and cheap to skip.
    return False


def iter_files(paths: Iterable[Path]) -> Iterable[Path]:
    for p in paths:
        if p.is_file():
            if should_scan_file(p):
                yield p
        elif p.is_dir():
            for root, dirs, files in os.walk(p):
                # prune excluded dirs in-place for speed
                dirs[:] = [d for d in dirs if d not in EXCLUDED_DIRS]
                for fname in files:
                    fpath = Path(root) / fname
                    if should_scan_file(fpath):
                        yield fpath


def _strip_code(line: str) -> str:
    """Return just the code portion of a line — strip trailing comments and
    skip lines that are entirely inside a string literal or comment.

    Heuristic only (we don't parse Python); good enough to avoid flagging
    our own `# ``os.kill(pid, 0)`` is NOT a no-op` docstring-style comments.
    """
    stripped = line.lstrip()
    # Line starts with # — entirely a comment.
    if stripped.startswith("#"):
        return ""
    # Remove trailing "# ..." inline comment. Naive — doesn't handle `#`
    # inside strings — but on balance reduces noise far more than it adds.
    hash_idx = _find_unquoted_hash(line)
    if hash_idx is not None:
        return line[:hash_idx]
    return line


def _find_unquoted_hash(line: str) -> int | None:
    """Index of the first `#` not inside a single/double/triple-quoted string.

    Simple state machine — good enough for the 99% case of "code, then
    optional trailing comment."
    """
    i = 0
    n = len(line)
    in_s = False  # single-quote string
    in_d = False  # double-quote string
    while i < n:
        c = line[i]
        if c == "\\" and (in_s or in_d) and i + 1 < n:
            i += 2
            continue
        if not in_d and c == "'":
            in_s = not in_s
        elif not in_s and c == '"':
            in_d = not in_d
        elif c == "#" and not in_s and not in_d:
            return i
        i += 1
    return None


def scan_file(path: Path, footguns: list[Footgun]) -> list[tuple[int, str, Footgun]]:
    """Return a list of (line_number, line, footgun) for unsuppressed matches."""
    try:
        text = path.read_text(encoding="utf-8", errors="replace")
    except OSError:
        return []
    matches: list[tuple[int, str, Footgun]] = []

    # AST-based rule (subprocess console-window footgun). Runs only on Python
    # source; merges into the same result list as the regex rules below.
    if path.suffix in {".py", ".pyw", ".pyi"}:
        matches.extend(scan_subprocess_window_footguns(path, text))

    # Track whether we're inside a triple-quoted string (docstring/raw block).
    # Simple state machine — handles both ''' and """, toggled by the FIRST
    # triple-quote we see; we don't try to handle nested or f-string cases.
    in_triple: str | None = None  # None, "'''", or '"""'

    for i, line in enumerate(text.splitlines(), start=1):
        # Update triple-quote state based on this line's occurrences.
        code_for_scan = line
        if in_triple:
            # We're inside a docstring — skip the whole line's scan.
            # Check if it closes here.
            if in_triple in line:
                # Find the closing delimiter; anything after it is real code.
                after = line.split(in_triple, 1)[1]
                in_triple = None
                code_for_scan = after
            else:
                continue
        # Now check for docstring-open in the (possibly after-triple) portion.
        # Scan for the first unescaped '''/""" in the current code_for_scan.
        stripped = code_for_scan.strip()
        for delim in ('"""', "'''"):
            if delim in code_for_scan:
                # Count occurrences — even count means single-line docstring,
                # odd means we've entered a multi-line one.
                count = code_for_scan.count(delim)
                if count % 2 == 1:
                    # Odd — we're now inside the triple-quoted block.
                    # Scan only the part BEFORE the opening delimiter.
                    before = code_for_scan.split(delim, 1)[0]
                    code_for_scan = before
                    in_triple = delim
                    break
                else:
                    # Even — entire docstring fits on one line. Strip it
                    # from the scan text to avoid matching on prose.
                    parts = code_for_scan.split(delim)
                    # Keep the "outside" parts (every other chunk, starting
                    # with index 0) as code, drop the "inside" parts.
                    code_for_scan = "".join(parts[::2])
                    break

        if SUPPRESS_MARKER.search(line):
            continue
        # Skip if the line has an obvious guard — e.g. hasattr/getattr/
        # shutil.which or a platform check. False negatives are acceptable;
        # the inline suppression marker is the authoritative override.
        if any(hint in line for hint in GUARD_HINTS):
            continue
        code = _strip_code(code_for_scan)
        if not code.strip():
            continue
        for fg in footguns:
            if fg.path_allowlist and any(s in str(path) for s in fg.path_allowlist):
                continue
            match = fg.pattern.search(code)
            if not match:
                continue
            if fg.post_filter is not None:
                try:
                    if not fg.post_filter(match, line):
                        continue
                except (IndexError, AttributeError):
                    # Post-filter assumed a named group that isn't there — skip.
                    continue
            matches.append((i, line.rstrip(), fg))
    return matches


def get_staged_files() -> list[Path]:
    """Return paths staged in the current git index. Empty on non-git trees."""
    try:
        out = subprocess.check_output(  # windows-footgun: ok — dev-only checker, runs on Linux CI
            ["git", "diff", "--cached", "--name-only", "--diff-filter=ACMR"],
            cwd=REPO_ROOT,
            stderr=subprocess.DEVNULL,
            text=True,
        )
    except (subprocess.CalledProcessError, FileNotFoundError):
        return []
    return [REPO_ROOT / f for f in out.splitlines() if f.strip()]


def get_diff_files(ref: str) -> list[Path]:
    """Return paths modified vs. the given git ref."""
    try:
        out = subprocess.check_output(  # windows-footgun: ok — dev-only checker, runs on Linux CI
            ["git", "diff", f"{ref}...HEAD", "--name-only", "--diff-filter=ACMR"],
            cwd=REPO_ROOT,
            stderr=subprocess.DEVNULL,
            text=True,
        )
    except (subprocess.CalledProcessError, FileNotFoundError):
        return []
    return [REPO_ROOT / f for f in out.splitlines() if f.strip()]


def parse_args(argv: list[str]) -> argparse.Namespace:
    p = argparse.ArgumentParser(
        description="Flag Windows cross-platform footguns in Python code."
    )
    p.add_argument(
        "paths",
        nargs="*",
        type=Path,
        help="Specific files/dirs to scan (default: staged changes).",
    )
    p.add_argument(
        "--all",
        action="store_true",
        help="Scan the full repository (hermes_cli/, gateway/, tools/, cron/, etc.).",
    )
    p.add_argument(
        "--diff",
        metavar="REF",
        help="Scan files changed vs. the given git ref (e.g. --diff main).",
    )
    p.add_argument(
        "--list",
        action="store_true",
        help="List all known footgun rules and exit.",
    )
    return p.parse_args(argv)


def print_rules() -> None:
    print("Known Windows footguns checked by this script:\n")
    for i, fg in enumerate(FOOTGUNS, start=1):
        print(f"{i:2}. {fg.name}")
        print(f"    {fg.message}")
        print(f"    Fix: {fg.fix}")
        print()
    # AST-based rule (not in the regex FOOTGUNS list).
    n = len(FOOTGUNS) + 1
    print(f"{n:2}. {SUBPROCESS_FOOTGUN_NAME}  (AST-based)")
    print(f"    {SUBPROCESS_FOOTGUN_MESSAGE}")
    print(f"    Fix: {SUBPROCESS_FOOTGUN_FIX}")
    print()


def main(argv: list[str]) -> int:
    # Windows terminals default to cp1252, which can't encode the ✓/✗
    # characters used in the output. Reconfigure streams to UTF-8 so the
    # script works correctly on the very platform it is designed to help.
    if hasattr(sys.stdout, "reconfigure"):
        sys.stdout.reconfigure(encoding="utf-8")
    if hasattr(sys.stderr, "reconfigure"):
        sys.stderr.reconfigure(encoding="utf-8")

    args = parse_args(argv)

    if args.list:
        print_rules()
        return 0

    if args.all:
        # Scan main Python packages + scripts
        roots = [
            REPO_ROOT / "hermes_cli",
            REPO_ROOT / "gateway",
            REPO_ROOT / "tools",
            REPO_ROOT / "cron",
            REPO_ROOT / "agent",
            REPO_ROOT / "plugins",
            REPO_ROOT / "scripts",
            REPO_ROOT / "acp_adapter",
            REPO_ROOT / "acp_registry",
        ]
        roots = [r for r in roots if r.exists()]
    elif args.diff:
        roots = get_diff_files(args.diff)
    elif args.paths:
        roots = [p.resolve() for p in args.paths]
    else:
        # Default: staged changes
        roots = get_staged_files()
        if not roots:
            print(
                "No staged files to scan. Pass --all for a full-repo scan, "
                "--diff <ref> for a range diff, or paths explicitly.",
                file=sys.stderr,
            )
            return 0

    total_matches = 0
    files_scanned = 0
    for path in iter_files(roots):
        files_scanned += 1
        matches = scan_file(path, FOOTGUNS)
        for lineno, line, fg in matches:
            rel = path.relative_to(REPO_ROOT).as_posix()
            print(f"{rel}:{lineno}: [{fg.name}]")
            print(f"    {line.strip()}")
            print(f"    — {fg.message}")
            print(f"    Fix: {fg.fix.splitlines()[0]}")
            print()
            total_matches += 1

    if total_matches:
        print(
            f"\n✗ {total_matches} Windows footgun(s) found across "
            f"{files_scanned} file(s) scanned.",
            file=sys.stderr,
        )
        print(
            "  If an individual match is a false positive or intentionally "
            "platform-gated, suppress it with `# windows-footgun: ok` on "
            "the same line.\n  Run with --list to see all rules.",
            file=sys.stderr,
        )
        return 1

    print(
        f"✓ No Windows footguns found ({files_scanned} file(s) scanned)."
    )
    return 0


if __name__ == "__main__":
    sys.exit(main(sys.argv[1:]))