hermes-agent/scripts/check-windows-footguns.py
Teknium 2ecca1e7d3
fix(windows): capture is not a no-window boundary; route flashing spawns through chokepoint (#53829)
Follow-up to #53791 addressing review feedback: the footgun checker treated
capture_output=/stdout=/stderr=/check_output as proof a subprocess can't pop a
Windows console. That invariant is false — stream redirection controls where a
child's output goes, not whether a console is allocated. From a console-less
parent (Desktop/Electron, pythonw.exe, detached gateway/cron) a console-subsystem
child still flashes a window even when fully captured.

- check-windows-footguns.py: capture/redirect/check_output is no longer a blanket
  safe-pass. Added _WINDOWS_FLASHING_PROGRAMS (git/gh/npm/node/python/uv/ffmpeg/
  docker/powershell/…); calls to those are flagged even when captured. Non-flashing
  programs keep the capture exemption (no 271-site noise). _subprocess_compat.run/
  popen calls are inherently safe (wrapper injects CREATE_NO_WINDOW).
- Routed the 35 genuine flashing git/gh/npm/uv/ffmpeg/docker spawns through the
  _subprocess_compat.run/popen chokepoint (Brooklyn's wrapper from #53810) — the
  durable fix, not per-site annotations. cmd.exe /c start stays # ok (intentional).
- Updated tests + CONTRIBUTING.md rule #17 to the corrected invariant.
2026-06-27 14:49:41 -07:00

898 lines
33 KiB
Python

#!/usr/bin/env python3
"""
Grep-based checker for Windows cross-platform footguns.
Flags common patterns that break silently on Windows. Run before PRs —
cheap, fast, catches regressions in a codebase that runs on three OSes.
Usage:
# Scan staged changes (default when run from a git checkout)
python scripts/check-windows-footguns.py
# Scan the full tree (full-repo audit)
python scripts/check-windows-footguns.py --all
# Scan a specific file or directory
python scripts/check-windows-footguns.py path/to/file.py path/to/dir/
# Scan only modified files vs. main
python scripts/check-windows-footguns.py --diff main
Exit status:
0 — no Windows footguns found (or all matches suppressed)
1 — at least one unsuppressed match
Suppress an intentional use (e.g. tests or platform-gated code) with:
os.kill(pid, 0) # windows-footgun: ok — only called on POSIX
"""
from __future__ import annotations
import argparse
import ast
import os
import re
import subprocess
import sys
from dataclasses import dataclass
from pathlib import Path
from typing import Iterable
REPO_ROOT = Path(__file__).resolve().parent.parent
SUPPRESS_MARKER = re.compile(r"#\s*windows-footgun\s*:\s*ok\b", re.IGNORECASE)
# Line-level guard hints. If a line contains any of these tokens, we assume
# the programmer wrote the line in full awareness of the Windows pitfall —
# e.g. `if hasattr(os, 'setsid'): ... os.setsid()`, or the classic
# `getattr(signal, 'SIGKILL', signal.SIGTERM)`, or `shutil.which("wmic")`.
# False negatives are fine here — the inline `# windows-footgun: ok` marker
# is still the authoritative suppression. This is just to reduce the noise
# floor on obviously-guarded lines so the signal-to-noise stays useful.
GUARD_HINTS = (
"hasattr(os,",
"hasattr(signal,",
"getattr(os,",
"getattr(signal,",
"shutil.which(",
"if platform.system() != \"Windows\"",
"if platform.system() != 'Windows'",
"if sys.platform == \"win32\"",
"if sys.platform != \"win32\"",
"if sys.platform == 'win32'",
"if sys.platform != 'win32'",
"IS_WINDOWS",
"is_windows",
)
# Dirs we never scan.
EXCLUDED_DIRS = {
".git",
"node_modules",
"venv",
".venv",
"__pycache__",
"build",
"dist",
".tox",
".mypy_cache",
".pytest_cache",
"site-packages",
"website/build",
"optional-skills", # external skills
}
# File globs we never scan (beyond the dirs above).
EXCLUDED_SUFFIXES = {
".pyc",
".pyo",
".so",
".dll",
".exe",
".png",
".jpg",
".gif",
".ico",
".svg",
".mp4",
".mp3",
".wav",
".pdf",
".zip",
".tar",
".gz",
".whl",
".lock",
".min.js",
".min.css",
}
# Files we never scan (self-referential — this script mentions the
# patterns it detects — and the CONTRIBUTING docs that list them).
EXCLUDED_FILES = {
"scripts/check-windows-footguns.py",
"CONTRIBUTING.md",
}
@dataclass
class Footgun:
"""A Windows cross-platform footgun pattern."""
name: str
pattern: re.Pattern
message: str
fix: str
# If set, matches in files/paths containing any of these substrings are
# silently ignored (e.g. tests that legitimately exercise the footgun
# behind a platform guard). Prefer `# windows-footgun: ok` inline
# suppression over this list; only use path_allowlist for whole files
# that are inherently tests of the footgun itself.
path_allowlist: tuple[str, ...] = ()
# Optional post-match predicate. Takes the re.Match and returns True
# if the match is a REAL footgun (not a false positive). Use this when
# the regex can't fully distinguish (e.g. open() where mode may contain
# "b" for binary, or the line may have `encoding=` elsewhere).
post_filter: "callable | None" = None
FOOTGUNS: list[Footgun] = [
Footgun(
name="open() without encoding= on text mode",
# Match builtins.open() specifically — NOT os.open(), .open()
# method calls (Path.open, tarfile.open, zf.open, webbrowser.open,
# Image.open, wave.open, etc), or `async def open()` method
# definitions. The pattern requires a start-of-identifier boundary
# before `open(` so `os.open`, `.open`, `def open` are all skipped.
# Note: Path.open() is ALSO affected by the encoding default, but
# rather than flagging all `.open(` (huge noise), we require an
# explicit builtins-style open() call. Path.open() is rare in the
# codebase compared to open() and can be audited separately.
pattern=re.compile(
r"""(?:^|[\s\(,;=])(?<![.\w])open\s*\(\s*[^,)]+\s*(?:,\s*['"](?P<mode>[^'"]*)['"])?"""
),
message=(
"open() without an explicit encoding= uses the platform default "
"(UTF-8 on POSIX, cp1252/mbcs on Windows) — files round-tripped "
"between hosts get mojibake. Always pass encoding='utf-8' for "
"text files, or use open(path, 'rb')/'wb' for binary."
),
fix=(
"open(path, 'r', encoding='utf-8') # or 'utf-8-sig' if the "
"file may have a BOM"
),
# Filter: only flag if mode is missing-or-text AND the line doesn't
# already pass encoding=. Skip binary mode (contains "b").
post_filter=lambda m, line: (
"b" not in (m.group("mode") or "")
and "encoding=" not in line
and "encoding =" not in line
# Skip `def open(` and `async def open(` (method definitions)
and not line.lstrip().startswith("def ")
and not line.lstrip().startswith("async def ")
# Skip open(path, **kwargs) patterns — encoding may be in the dict.
# Too expensive to trace; require the author to set encoding in
# the dict and trust them (or they can add a # windows-footgun: ok).
and "**" not in line
),
),
Footgun(
name="os.kill(pid, 0)",
pattern=re.compile(r"\bos\.kill\s*\(\s*[^,]+,\s*0\s*\)"),
message=(
"os.kill(pid, 0) is NOT a no-op on Windows — it sends "
"CTRL_C_EVENT to the target's console process group, "
"hard-killing the target and potentially unrelated siblings. "
"See bpo-14484."
),
fix=(
"Use psutil.pid_exists(pid) (psutil is a core dependency). "
"Or gateway.status._pid_exists(pid) for the hermes wrapper "
"with a stdlib fallback."
),
),
Footgun(
name="bare os.setsid",
pattern=re.compile(r"(?<!hasattr\()\bos\.setsid\b"),
message=(
"os.setsid does not exist on Windows and raises "
"AttributeError. Subprocesses that need detachment on "
"Windows use creationflags instead."
),
fix=(
"if platform.system() != 'Windows':\n"
" kwargs['preexec_fn'] = os.setsid\n"
"else:\n"
" kwargs['creationflags'] = subprocess.CREATE_NEW_PROCESS_GROUP"
),
),
Footgun(
name="bare os.killpg",
pattern=re.compile(r"\bos\.killpg\b"),
message="os.killpg does not exist on Windows.",
fix=(
"Use psutil for cross-platform process-tree kill:\n"
" p = psutil.Process(pid)\n"
" for c in p.children(recursive=True): c.kill()\n"
" p.kill()"
),
),
Footgun(
name="bare os.getuid / os.geteuid / os.getgid",
pattern=re.compile(r"\bos\.(?:getuid|geteuid|getgid|getegid)\b"),
message=(
"os.getuid / os.geteuid / os.getgid do not exist on Windows "
"and raise AttributeError at import time if referenced."
),
fix=(
"Use getpass.getuser() for the username, or gate with "
"hasattr(os, 'getuid')."
),
),
Footgun(
name="bare os.fork",
pattern=re.compile(r"(?<!hasattr\()\bos\.fork\s*\("),
message="os.fork does not exist on Windows.",
fix=(
"Use subprocess.Popen for daemonization, or guard with "
"hasattr(os, 'fork') and a Windows fallback path."
),
),
Footgun(
name="bare signal.SIGKILL",
pattern=re.compile(r"\bsignal\.SIGKILL\b"),
message=(
"signal.SIGKILL does not exist on Windows and raises "
"AttributeError at import time."
),
fix="Use getattr(signal, 'SIGKILL', signal.SIGTERM).",
),
Footgun(
name="bare signal.SIGHUP / SIGUSR1 / SIGUSR2 / SIGALRM / SIGCHLD / SIGPIPE / SIGQUIT",
pattern=re.compile(
r"\bsignal\.(?:SIGHUP|SIGUSR1|SIGUSR2|SIGALRM|SIGCHLD|SIGPIPE|SIGQUIT)\b"
),
message=(
"These POSIX signals don't exist on Windows; referencing "
"them raises AttributeError at import time."
),
fix=(
"Use getattr(signal, 'SIGXXX', None) and check for None "
"before using, or gate the whole block behind a platform check."
),
),
Footgun(
name="subprocess shebang script invocation",
pattern=re.compile(
r"subprocess\.(?:run|Popen|call|check_output|check_call)\s*\(\s*\[\s*['\"]\./"
),
message=(
"Running a script via './scriptname' doesn't work on Windows — "
"shebang lines aren't honored. CreateProcessW can't execute "
"bash/python scripts without an explicit interpreter."
),
fix="Use [sys.executable, 'scriptname.py', ...] explicitly.",
),
Footgun(
name="wmic invocation without shutil.which guard",
# Match wmic appearing as a subprocess argument — NOT the
# shutil.which("wmic") guard pattern itself. Looks for wmic in a
# list or as first arg of subprocess.run/Popen.
pattern=re.compile(
r"""(?:subprocess\.\w+\s*\(\s*\[\s*['"]wmic['"]|['"]wmic\.exe['"])"""
),
message=(
"wmic was removed in Windows 10 21H1 and later. Always "
"gate with shutil.which('wmic') and fall back to "
"PowerShell (Get-CimInstance Win32_Process)."
),
fix=(
"if shutil.which('wmic'):\n"
" ... wmic path ...\n"
"else:\n"
" subprocess.run(['powershell', '-NoProfile', '-Command',\n"
" 'Get-CimInstance Win32_Process | ...'])"
),
),
Footgun(
name="hardcoded ~/Desktop (OneDrive trap)",
pattern=re.compile(
r"""['"](?:~|~/|[A-Z]:[/\\]Users[/\\][^/\\'"]+[/\\])Desktop\b"""
),
message=(
"When OneDrive Backup is enabled on Windows, the real Desktop "
"is at %USERPROFILE%\\OneDrive\\Desktop, not %USERPROFILE%\\"
"Desktop (which exists as an empty husk)."
),
fix=(
"On Windows, resolve via ctypes + SHGetKnownFolderPath, or "
"read the Shell Folders registry key, or run PowerShell "
"[Environment]::GetFolderPath('Desktop')."
),
),
Footgun(
name="asyncio add_signal_handler without try/except",
pattern=re.compile(r"\.add_signal_handler\s*\("),
message=(
"loop.add_signal_handler raises NotImplementedError on "
"Windows — always wrap in try/except or gate with a "
"platform check."
),
fix=(
"try:\n"
" loop.add_signal_handler(sig, handler, sig)\n"
"except NotImplementedError:\n"
" pass # Windows asyncio doesn't support signal handlers"
),
),
]
# -----------------------------------------------------------------------------
# AST-based rule: subprocess calls that flash a console window on Windows
# -----------------------------------------------------------------------------
#
# This is the high-volume Windows complaint: every `subprocess.run(...)` /
# `subprocess.Popen(...)` of a console program on Windows briefly flashes a
# cmd window unless the child either (a) inherits the parent's stdio handles
# via output redirection, or (b) is spawned with a no-window creationflag
# (CREATE_NO_WINDOW / DETACHED_PROCESS). The fix landscape already exists in
# `hermes_cli/_subprocess_compat.py` (windows_hide_flags / windows_detach_*),
# but nothing stopped new bare calls from re-introducing the popup — so the
# bug kept coming back PR after PR. This rule is the chokepoint.
#
# It is AST-based (not regex) because the deciding factor — whether the call
# redirects stdout/stderr — frequently lives several lines below the
# `subprocess.run(` opener, which a line-oriented regex cannot see.
#
# Comprehensive, not restrictive: a call is only flagged when it can ACTUALLY
# create a new console. Calls that capture or redirect output (capture_output=,
# stdout=, stderr=), or use check_output (which always captures), cannot pop a
# window and are silently ignored — no suppression comment needed. The intent
# is that the overwhelming majority of subprocess calls require no change at
# all; only the genuine window-spawners do.
# The subprocess functions that can spawn a child process.
_SUBPROCESS_FUNCS = frozenset({"run", "Popen", "call", "check_call", "check_output"})
# Module aliases we recognise as the stdlib subprocess module.
_SUBPROCESS_ALIASES = frozenset({"subprocess", "sp"})
# Executables that simply do not exist on Windows. A subprocess call whose
# program is one of these can never create a Windows console window, so the
# no-window flag is irrelevant — flagging them would force pointless
# suppression comments on macOS/Linux-only service-management and packaging
# code (launchctl, systemctl, brew, codesign …). Matched against the FIRST
# element of a list/tuple argv literal only; anything dynamic still gets
# flagged (we can't prove it's POSIX-only).
_POSIX_ONLY_PROGRAMS = frozenset(
{
"launchctl",
"systemctl",
"journalctl",
"loginctl",
"osascript",
"codesign",
"xattr",
"defaults",
"brew",
"apt",
"apt-get",
"dpkg",
"pacman",
"dnf",
"yum",
"sudo",
"open", # macOS `open`
"tail",
"sw_vers",
"scutil",
"diskutil",
"hdiutil",
"dscl",
}
)
# Cross-platform console programs that DO exist on Windows and allocate a
# console window when spawned from a console-less parent (Desktop/Electron,
# pythonw.exe, a detached gateway/cron). For these, capturing or redirecting
# stdio is NOT a safety boundary — stream redirection controls where the
# child's output goes, it does NOT suppress console *allocation*. Only
# CREATE_NO_WINDOW (or routing through hermes_cli._subprocess_compat.run/popen,
# which injects it) prevents the flash. So a call to one of these is flagged
# even with capture_output=/stdout=/stderr= set. Matched against the first
# element of a literal argv (bare name or .exe, path-stripped).
_WINDOWS_FLASHING_PROGRAMS = frozenset(
{
"git",
"gh",
"node",
"npm",
"npx",
"yarn",
"pnpm",
"python",
"python3",
"pythonw",
"pip",
"uv",
"uvx",
"ffmpeg",
"ffprobe",
"ollama",
"docker",
"cmd",
"cmd.exe",
"powershell",
"powershell.exe",
"pwsh",
"where",
"taskkill",
"schtasks",
"wmic",
"tasklist",
"netstat",
}
)
SUBPROCESS_FOOTGUN_NAME = "subprocess without Windows no-window flag"
SUBPROCESS_FOOTGUN_MESSAGE = (
"subprocess.run/Popen/call on Windows flashes a console (cmd) window "
"unless the child inherits stdio (output is captured/redirected) or is "
"spawned with a no-window creationflag. This is the #1 source of Windows "
"'terminal popup' bug reports."
)
SUBPROCESS_FOOTGUN_FIX = (
"Pass creationflags=windows_hide_flags() (for short-lived/captured spawns) "
"or **windows_detach_popen_kwargs() (for detached daemons) from "
"hermes_cli._subprocess_compat (both no-op on POSIX). If a visible window "
"is intended (interactive launch, shell hand-off), add "
"'# windows-footgun: ok' on the call line."
)
def _call_attr_name(node: ast.Call) -> str | None:
"""Return 'run'/'Popen'/... when node is subprocess.<func>(...), else None."""
f = node.func
if not isinstance(f, ast.Attribute):
return None
if f.attr not in _SUBPROCESS_FUNCS:
return None
mod = getattr(f.value, "id", None)
if mod not in _SUBPROCESS_ALIASES:
return None
return f.attr
def _suppresses_window(node: ast.Call, func_name: str) -> bool:
"""True if this subprocess call cannot create a new console window.
The honest invariant (corrected after review of PR #53791): capturing or
redirecting stdio is NOT the same as suppressing console allocation. From a
console-less parent (Desktop/Electron, pythonw.exe, a detached gateway/cron)
a console-subsystem child still allocates — and flashes — a window even with
capture_output=True. Only CREATE_NO_WINDOW (or routing through
hermes_cli._subprocess_compat.run/popen, which injects it) prevents it.
So capture/stdout/stderr/check_output is treated as window-safe ONLY when the
program is not a known cross-platform console exe that flashes on Windows
(see _WINDOWS_FLASHING_PROGRAMS — git/gh/npm/node/python/uv/ffmpeg/docker/…).
For those, even a fully-captured call is flagged.
Always window-safe regardless of program:
* creationflags=... — author is already managing the console
* **<spread> — kwargs may carry a _subprocess_compat helper;
flag-via-spread is the recommended fix, so we
must not penalise it.
* POSIX-only program — can't run on Windows, can't flash.
Conditionally safe (only when NOT a known flashing program):
* check_output / capture_output= / stdout= / stderr=
"""
explicit = {kw.arg for kw in node.keywords if kw.arg}
if "creationflags" in explicit:
return True
if any(kw.arg is None for kw in node.keywords): # **kwargs spread
return True
if _is_posix_only_program(node):
return True
# Capture/redirect is only a safety boundary for programs that don't
# allocate a Windows console — NOT for git/npm/node/python/ffmpeg/etc.
if not _is_windows_flashing_program(node):
if func_name == "check_output":
return True
if explicit & {"stdout", "stderr", "capture_output"}:
return True
return False
def _argv_head(node: ast.Call) -> str | None:
"""Return the path-stripped first argv element if it's a string literal."""
if not node.args:
return None
first = node.args[0]
if isinstance(first, (ast.List, ast.Tuple)) and first.elts:
head = first.elts[0]
if isinstance(head, ast.Constant) and isinstance(head.value, str):
return head.value.rsplit("/", 1)[-1].rsplit("\\", 1)[-1]
return None
def _is_windows_flashing_program(node: ast.Call) -> bool:
"""True if the call's program is a known cross-platform console exe that
allocates a Windows console window (so capture is NOT a safe boundary)."""
prog = _argv_head(node)
return prog is not None and prog in _WINDOWS_FLASHING_PROGRAMS
def _is_posix_only_program(node: ast.Call) -> bool:
"""True if the call's program is a statically-known POSIX-only executable.
Only inspects a literal list/tuple first arg whose first element is a
string constant (e.g. ``["launchctl", "bootout", target]``). Dynamic
argv (variables, f-strings) is treated as unknown and still flagged.
"""
if not node.args:
return False
first = node.args[0]
if isinstance(first, (ast.List, ast.Tuple)) and first.elts:
head = first.elts[0]
if isinstance(head, ast.Constant) and isinstance(head.value, str):
prog = head.value.rsplit("/", 1)[-1]
return prog in _POSIX_ONLY_PROGRAMS
return False
def scan_subprocess_window_footguns(
path: Path, text: str
) -> list[tuple[int, str, Footgun]]:
"""AST pass: flag subprocess calls that can flash a Windows console.
Honours the same `# windows-footgun: ok` line suppression as the regex
rules. Returns the same (lineno, line, Footgun) shape so results merge
cleanly into scan_file's output.
"""
try:
tree = ast.parse(text)
except SyntaxError:
return []
lines = text.splitlines()
rule = Footgun(
name=SUBPROCESS_FOOTGUN_NAME,
pattern=re.compile(r"^$"), # unused; AST-driven
message=SUBPROCESS_FOOTGUN_MESSAGE,
fix=SUBPROCESS_FOOTGUN_FIX,
)
out: list[tuple[int, str, Footgun]] = []
for node in ast.walk(tree):
if not isinstance(node, ast.Call):
continue
func_name = _call_attr_name(node)
if func_name is None:
continue
if _suppresses_window(node, func_name):
continue
lineno = node.lineno
line = lines[lineno - 1] if 0 <= lineno - 1 < len(lines) else ""
# Inline suppression — check the opener line AND, for multi-line calls,
# any line in the call's span (a developer may mark the closing paren).
end = getattr(node, "end_lineno", lineno) or lineno
span = lines[lineno - 1 : end]
if any(SUPPRESS_MARKER.search(l) for l in span):
continue
out.append((lineno, line.rstrip(), rule))
return out
def should_scan_file(path: Path) -> bool:
"""Return True if this file is in scope for the checker."""
# Skip the excluded dirs
parts = set(path.parts)
if parts & EXCLUDED_DIRS:
return False
# Skip excluded suffixes
for suffix in EXCLUDED_SUFFIXES:
if str(path).endswith(suffix):
return False
# Skip self and docs that intentionally mention the patterns
rel = path.relative_to(REPO_ROOT).as_posix()
if rel in EXCLUDED_FILES:
return False
# Only scan text files (rough heuristic — .py, .md, .sh, .ps1, .yaml, etc.)
if path.suffix in {".py", ".pyw", ".pyi"}:
return True
# Other file types are read but only Python-specific patterns would match;
# that's fine and cheap to skip.
return False
def iter_files(paths: Iterable[Path]) -> Iterable[Path]:
for p in paths:
if p.is_file():
if should_scan_file(p):
yield p
elif p.is_dir():
for root, dirs, files in os.walk(p):
# prune excluded dirs in-place for speed
dirs[:] = [d for d in dirs if d not in EXCLUDED_DIRS]
for fname in files:
fpath = Path(root) / fname
if should_scan_file(fpath):
yield fpath
def _strip_code(line: str) -> str:
"""Return just the code portion of a line — strip trailing comments and
skip lines that are entirely inside a string literal or comment.
Heuristic only (we don't parse Python); good enough to avoid flagging
our own `# ``os.kill(pid, 0)`` is NOT a no-op` docstring-style comments.
"""
stripped = line.lstrip()
# Line starts with # — entirely a comment.
if stripped.startswith("#"):
return ""
# Remove trailing "# ..." inline comment. Naive — doesn't handle `#`
# inside strings — but on balance reduces noise far more than it adds.
hash_idx = _find_unquoted_hash(line)
if hash_idx is not None:
return line[:hash_idx]
return line
def _find_unquoted_hash(line: str) -> int | None:
"""Index of the first `#` not inside a single/double/triple-quoted string.
Simple state machine — good enough for the 99% case of "code, then
optional trailing comment."
"""
i = 0
n = len(line)
in_s = False # single-quote string
in_d = False # double-quote string
while i < n:
c = line[i]
if c == "\\" and (in_s or in_d) and i + 1 < n:
i += 2
continue
if not in_d and c == "'":
in_s = not in_s
elif not in_s and c == '"':
in_d = not in_d
elif c == "#" and not in_s and not in_d:
return i
i += 1
return None
def scan_file(path: Path, footguns: list[Footgun]) -> list[tuple[int, str, Footgun]]:
"""Return a list of (line_number, line, footgun) for unsuppressed matches."""
try:
text = path.read_text(encoding="utf-8", errors="replace")
except OSError:
return []
matches: list[tuple[int, str, Footgun]] = []
# AST-based rule (subprocess console-window footgun). Runs only on Python
# source; merges into the same result list as the regex rules below.
if path.suffix in {".py", ".pyw", ".pyi"}:
matches.extend(scan_subprocess_window_footguns(path, text))
# Track whether we're inside a triple-quoted string (docstring/raw block).
# Simple state machine — handles both ''' and """, toggled by the FIRST
# triple-quote we see; we don't try to handle nested or f-string cases.
in_triple: str | None = None # None, "'''", or '"""'
for i, line in enumerate(text.splitlines(), start=1):
# Update triple-quote state based on this line's occurrences.
code_for_scan = line
if in_triple:
# We're inside a docstring — skip the whole line's scan.
# Check if it closes here.
if in_triple in line:
# Find the closing delimiter; anything after it is real code.
after = line.split(in_triple, 1)[1]
in_triple = None
code_for_scan = after
else:
continue
# Now check for docstring-open in the (possibly after-triple) portion.
# Scan for the first unescaped '''/""" in the current code_for_scan.
stripped = code_for_scan.strip()
for delim in ('"""', "'''"):
if delim in code_for_scan:
# Count occurrences — even count means single-line docstring,
# odd means we've entered a multi-line one.
count = code_for_scan.count(delim)
if count % 2 == 1:
# Odd — we're now inside the triple-quoted block.
# Scan only the part BEFORE the opening delimiter.
before = code_for_scan.split(delim, 1)[0]
code_for_scan = before
in_triple = delim
break
else:
# Even — entire docstring fits on one line. Strip it
# from the scan text to avoid matching on prose.
parts = code_for_scan.split(delim)
# Keep the "outside" parts (every other chunk, starting
# with index 0) as code, drop the "inside" parts.
code_for_scan = "".join(parts[::2])
break
if SUPPRESS_MARKER.search(line):
continue
# Skip if the line has an obvious guard — e.g. hasattr/getattr/
# shutil.which or a platform check. False negatives are acceptable;
# the inline suppression marker is the authoritative override.
if any(hint in line for hint in GUARD_HINTS):
continue
code = _strip_code(code_for_scan)
if not code.strip():
continue
for fg in footguns:
if fg.path_allowlist and any(s in str(path) for s in fg.path_allowlist):
continue
match = fg.pattern.search(code)
if not match:
continue
if fg.post_filter is not None:
try:
if not fg.post_filter(match, line):
continue
except (IndexError, AttributeError):
# Post-filter assumed a named group that isn't there — skip.
continue
matches.append((i, line.rstrip(), fg))
return matches
def get_staged_files() -> list[Path]:
"""Return paths staged in the current git index. Empty on non-git trees."""
try:
out = subprocess.check_output( # windows-footgun: ok — dev-only checker, runs on Linux CI
["git", "diff", "--cached", "--name-only", "--diff-filter=ACMR"],
cwd=REPO_ROOT,
stderr=subprocess.DEVNULL,
text=True,
)
except (subprocess.CalledProcessError, FileNotFoundError):
return []
return [REPO_ROOT / f for f in out.splitlines() if f.strip()]
def get_diff_files(ref: str) -> list[Path]:
"""Return paths modified vs. the given git ref."""
try:
out = subprocess.check_output( # windows-footgun: ok — dev-only checker, runs on Linux CI
["git", "diff", f"{ref}...HEAD", "--name-only", "--diff-filter=ACMR"],
cwd=REPO_ROOT,
stderr=subprocess.DEVNULL,
text=True,
)
except (subprocess.CalledProcessError, FileNotFoundError):
return []
return [REPO_ROOT / f for f in out.splitlines() if f.strip()]
def parse_args(argv: list[str]) -> argparse.Namespace:
p = argparse.ArgumentParser(
description="Flag Windows cross-platform footguns in Python code."
)
p.add_argument(
"paths",
nargs="*",
type=Path,
help="Specific files/dirs to scan (default: staged changes).",
)
p.add_argument(
"--all",
action="store_true",
help="Scan the full repository (hermes_cli/, gateway/, tools/, cron/, etc.).",
)
p.add_argument(
"--diff",
metavar="REF",
help="Scan files changed vs. the given git ref (e.g. --diff main).",
)
p.add_argument(
"--list",
action="store_true",
help="List all known footgun rules and exit.",
)
return p.parse_args(argv)
def print_rules() -> None:
print("Known Windows footguns checked by this script:\n")
for i, fg in enumerate(FOOTGUNS, start=1):
print(f"{i:2}. {fg.name}")
print(f" {fg.message}")
print(f" Fix: {fg.fix}")
print()
# AST-based rule (not in the regex FOOTGUNS list).
n = len(FOOTGUNS) + 1
print(f"{n:2}. {SUBPROCESS_FOOTGUN_NAME} (AST-based)")
print(f" {SUBPROCESS_FOOTGUN_MESSAGE}")
print(f" Fix: {SUBPROCESS_FOOTGUN_FIX}")
print()
def main(argv: list[str]) -> int:
# Windows terminals default to cp1252, which can't encode the ✓/✗
# characters used in the output. Reconfigure streams to UTF-8 so the
# script works correctly on the very platform it is designed to help.
if hasattr(sys.stdout, "reconfigure"):
sys.stdout.reconfigure(encoding="utf-8")
if hasattr(sys.stderr, "reconfigure"):
sys.stderr.reconfigure(encoding="utf-8")
args = parse_args(argv)
if args.list:
print_rules()
return 0
if args.all:
# Scan main Python packages + scripts
roots = [
REPO_ROOT / "hermes_cli",
REPO_ROOT / "gateway",
REPO_ROOT / "tools",
REPO_ROOT / "cron",
REPO_ROOT / "agent",
REPO_ROOT / "plugins",
REPO_ROOT / "scripts",
REPO_ROOT / "acp_adapter",
REPO_ROOT / "acp_registry",
]
roots = [r for r in roots if r.exists()]
elif args.diff:
roots = get_diff_files(args.diff)
elif args.paths:
roots = [p.resolve() for p in args.paths]
else:
# Default: staged changes
roots = get_staged_files()
if not roots:
print(
"No staged files to scan. Pass --all for a full-repo scan, "
"--diff <ref> for a range diff, or paths explicitly.",
file=sys.stderr,
)
return 0
total_matches = 0
files_scanned = 0
for path in iter_files(roots):
files_scanned += 1
matches = scan_file(path, FOOTGUNS)
for lineno, line, fg in matches:
rel = path.relative_to(REPO_ROOT).as_posix()
print(f"{rel}:{lineno}: [{fg.name}]")
print(f" {line.strip()}")
print(f"{fg.message}")
print(f" Fix: {fg.fix.splitlines()[0]}")
print()
total_matches += 1
if total_matches:
print(
f"\n{total_matches} Windows footgun(s) found across "
f"{files_scanned} file(s) scanned.",
file=sys.stderr,
)
print(
" If an individual match is a false positive or intentionally "
"platform-gated, suppress it with `# windows-footgun: ok` on "
"the same line.\n Run with --list to see all rules.",
file=sys.stderr,
)
return 1
print(
f"✓ No Windows footguns found ({files_scanned} file(s) scanned)."
)
return 0
if __name__ == "__main__":
sys.exit(main(sys.argv[1:]))