mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-07-01 12:02:05 +00:00
Follow-up to #53791 addressing review feedback: the footgun checker treated capture_output=/stdout=/stderr=/check_output as proof a subprocess can't pop a Windows console. That invariant is false — stream redirection controls where a child's output goes, not whether a console is allocated. From a console-less parent (Desktop/Electron, pythonw.exe, detached gateway/cron) a console-subsystem child still flashes a window even when fully captured. - check-windows-footguns.py: capture/redirect/check_output is no longer a blanket safe-pass. Added _WINDOWS_FLASHING_PROGRAMS (git/gh/npm/node/python/uv/ffmpeg/ docker/powershell/…); calls to those are flagged even when captured. Non-flashing programs keep the capture exemption (no 271-site noise). _subprocess_compat.run/ popen calls are inherently safe (wrapper injects CREATE_NO_WINDOW). - Routed the 35 genuine flashing git/gh/npm/uv/ffmpeg/docker spawns through the _subprocess_compat.run/popen chokepoint (Brooklyn's wrapper from #53810) — the durable fix, not per-site annotations. cmd.exe /c start stays # ok (intentional). - Updated tests + CONTRIBUTING.md rule #17 to the corrected invariant.
898 lines
33 KiB
Python
898 lines
33 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Grep-based checker for Windows cross-platform footguns.
|
|
|
|
Flags common patterns that break silently on Windows. Run before PRs —
|
|
cheap, fast, catches regressions in a codebase that runs on three OSes.
|
|
|
|
Usage:
|
|
# Scan staged changes (default when run from a git checkout)
|
|
python scripts/check-windows-footguns.py
|
|
|
|
# Scan the full tree (full-repo audit)
|
|
python scripts/check-windows-footguns.py --all
|
|
|
|
# Scan a specific file or directory
|
|
python scripts/check-windows-footguns.py path/to/file.py path/to/dir/
|
|
|
|
# Scan only modified files vs. main
|
|
python scripts/check-windows-footguns.py --diff main
|
|
|
|
Exit status:
|
|
0 — no Windows footguns found (or all matches suppressed)
|
|
1 — at least one unsuppressed match
|
|
|
|
Suppress an intentional use (e.g. tests or platform-gated code) with:
|
|
os.kill(pid, 0) # windows-footgun: ok — only called on POSIX
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import ast
|
|
import os
|
|
import re
|
|
import subprocess
|
|
import sys
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
from typing import Iterable
|
|
|
|
REPO_ROOT = Path(__file__).resolve().parent.parent
|
|
|
|
SUPPRESS_MARKER = re.compile(r"#\s*windows-footgun\s*:\s*ok\b", re.IGNORECASE)
|
|
|
|
# Line-level guard hints. If a line contains any of these tokens, we assume
|
|
# the programmer wrote the line in full awareness of the Windows pitfall —
|
|
# e.g. `if hasattr(os, 'setsid'): ... os.setsid()`, or the classic
|
|
# `getattr(signal, 'SIGKILL', signal.SIGTERM)`, or `shutil.which("wmic")`.
|
|
# False negatives are fine here — the inline `# windows-footgun: ok` marker
|
|
# is still the authoritative suppression. This is just to reduce the noise
|
|
# floor on obviously-guarded lines so the signal-to-noise stays useful.
|
|
GUARD_HINTS = (
|
|
"hasattr(os,",
|
|
"hasattr(signal,",
|
|
"getattr(os,",
|
|
"getattr(signal,",
|
|
"shutil.which(",
|
|
"if platform.system() != \"Windows\"",
|
|
"if platform.system() != 'Windows'",
|
|
"if sys.platform == \"win32\"",
|
|
"if sys.platform != \"win32\"",
|
|
"if sys.platform == 'win32'",
|
|
"if sys.platform != 'win32'",
|
|
"IS_WINDOWS",
|
|
"is_windows",
|
|
)
|
|
|
|
# Dirs we never scan.
|
|
EXCLUDED_DIRS = {
|
|
".git",
|
|
"node_modules",
|
|
"venv",
|
|
".venv",
|
|
"__pycache__",
|
|
"build",
|
|
"dist",
|
|
".tox",
|
|
".mypy_cache",
|
|
".pytest_cache",
|
|
"site-packages",
|
|
"website/build",
|
|
"optional-skills", # external skills
|
|
}
|
|
|
|
# File globs we never scan (beyond the dirs above).
|
|
EXCLUDED_SUFFIXES = {
|
|
".pyc",
|
|
".pyo",
|
|
".so",
|
|
".dll",
|
|
".exe",
|
|
".png",
|
|
".jpg",
|
|
".gif",
|
|
".ico",
|
|
".svg",
|
|
".mp4",
|
|
".mp3",
|
|
".wav",
|
|
".pdf",
|
|
".zip",
|
|
".tar",
|
|
".gz",
|
|
".whl",
|
|
".lock",
|
|
".min.js",
|
|
".min.css",
|
|
}
|
|
|
|
# Files we never scan (self-referential — this script mentions the
|
|
# patterns it detects — and the CONTRIBUTING docs that list them).
|
|
EXCLUDED_FILES = {
|
|
"scripts/check-windows-footguns.py",
|
|
"CONTRIBUTING.md",
|
|
}
|
|
|
|
|
|
@dataclass
|
|
class Footgun:
|
|
"""A Windows cross-platform footgun pattern."""
|
|
|
|
name: str
|
|
pattern: re.Pattern
|
|
message: str
|
|
fix: str
|
|
# If set, matches in files/paths containing any of these substrings are
|
|
# silently ignored (e.g. tests that legitimately exercise the footgun
|
|
# behind a platform guard). Prefer `# windows-footgun: ok` inline
|
|
# suppression over this list; only use path_allowlist for whole files
|
|
# that are inherently tests of the footgun itself.
|
|
path_allowlist: tuple[str, ...] = ()
|
|
# Optional post-match predicate. Takes the re.Match and returns True
|
|
# if the match is a REAL footgun (not a false positive). Use this when
|
|
# the regex can't fully distinguish (e.g. open() where mode may contain
|
|
# "b" for binary, or the line may have `encoding=` elsewhere).
|
|
post_filter: "callable | None" = None
|
|
|
|
|
|
FOOTGUNS: list[Footgun] = [
|
|
Footgun(
|
|
name="open() without encoding= on text mode",
|
|
# Match builtins.open() specifically — NOT os.open(), .open()
|
|
# method calls (Path.open, tarfile.open, zf.open, webbrowser.open,
|
|
# Image.open, wave.open, etc), or `async def open()` method
|
|
# definitions. The pattern requires a start-of-identifier boundary
|
|
# before `open(` so `os.open`, `.open`, `def open` are all skipped.
|
|
# Note: Path.open() is ALSO affected by the encoding default, but
|
|
# rather than flagging all `.open(` (huge noise), we require an
|
|
# explicit builtins-style open() call. Path.open() is rare in the
|
|
# codebase compared to open() and can be audited separately.
|
|
pattern=re.compile(
|
|
r"""(?:^|[\s\(,;=])(?<![.\w])open\s*\(\s*[^,)]+\s*(?:,\s*['"](?P<mode>[^'"]*)['"])?"""
|
|
),
|
|
message=(
|
|
"open() without an explicit encoding= uses the platform default "
|
|
"(UTF-8 on POSIX, cp1252/mbcs on Windows) — files round-tripped "
|
|
"between hosts get mojibake. Always pass encoding='utf-8' for "
|
|
"text files, or use open(path, 'rb')/'wb' for binary."
|
|
),
|
|
fix=(
|
|
"open(path, 'r', encoding='utf-8') # or 'utf-8-sig' if the "
|
|
"file may have a BOM"
|
|
),
|
|
# Filter: only flag if mode is missing-or-text AND the line doesn't
|
|
# already pass encoding=. Skip binary mode (contains "b").
|
|
post_filter=lambda m, line: (
|
|
"b" not in (m.group("mode") or "")
|
|
and "encoding=" not in line
|
|
and "encoding =" not in line
|
|
# Skip `def open(` and `async def open(` (method definitions)
|
|
and not line.lstrip().startswith("def ")
|
|
and not line.lstrip().startswith("async def ")
|
|
# Skip open(path, **kwargs) patterns — encoding may be in the dict.
|
|
# Too expensive to trace; require the author to set encoding in
|
|
# the dict and trust them (or they can add a # windows-footgun: ok).
|
|
and "**" not in line
|
|
),
|
|
),
|
|
Footgun(
|
|
name="os.kill(pid, 0)",
|
|
pattern=re.compile(r"\bos\.kill\s*\(\s*[^,]+,\s*0\s*\)"),
|
|
message=(
|
|
"os.kill(pid, 0) is NOT a no-op on Windows — it sends "
|
|
"CTRL_C_EVENT to the target's console process group, "
|
|
"hard-killing the target and potentially unrelated siblings. "
|
|
"See bpo-14484."
|
|
),
|
|
fix=(
|
|
"Use psutil.pid_exists(pid) (psutil is a core dependency). "
|
|
"Or gateway.status._pid_exists(pid) for the hermes wrapper "
|
|
"with a stdlib fallback."
|
|
),
|
|
),
|
|
Footgun(
|
|
name="bare os.setsid",
|
|
pattern=re.compile(r"(?<!hasattr\()\bos\.setsid\b"),
|
|
message=(
|
|
"os.setsid does not exist on Windows and raises "
|
|
"AttributeError. Subprocesses that need detachment on "
|
|
"Windows use creationflags instead."
|
|
),
|
|
fix=(
|
|
"if platform.system() != 'Windows':\n"
|
|
" kwargs['preexec_fn'] = os.setsid\n"
|
|
"else:\n"
|
|
" kwargs['creationflags'] = subprocess.CREATE_NEW_PROCESS_GROUP"
|
|
),
|
|
),
|
|
Footgun(
|
|
name="bare os.killpg",
|
|
pattern=re.compile(r"\bos\.killpg\b"),
|
|
message="os.killpg does not exist on Windows.",
|
|
fix=(
|
|
"Use psutil for cross-platform process-tree kill:\n"
|
|
" p = psutil.Process(pid)\n"
|
|
" for c in p.children(recursive=True): c.kill()\n"
|
|
" p.kill()"
|
|
),
|
|
),
|
|
Footgun(
|
|
name="bare os.getuid / os.geteuid / os.getgid",
|
|
pattern=re.compile(r"\bos\.(?:getuid|geteuid|getgid|getegid)\b"),
|
|
message=(
|
|
"os.getuid / os.geteuid / os.getgid do not exist on Windows "
|
|
"and raise AttributeError at import time if referenced."
|
|
),
|
|
fix=(
|
|
"Use getpass.getuser() for the username, or gate with "
|
|
"hasattr(os, 'getuid')."
|
|
),
|
|
),
|
|
Footgun(
|
|
name="bare os.fork",
|
|
pattern=re.compile(r"(?<!hasattr\()\bos\.fork\s*\("),
|
|
message="os.fork does not exist on Windows.",
|
|
fix=(
|
|
"Use subprocess.Popen for daemonization, or guard with "
|
|
"hasattr(os, 'fork') and a Windows fallback path."
|
|
),
|
|
),
|
|
Footgun(
|
|
name="bare signal.SIGKILL",
|
|
pattern=re.compile(r"\bsignal\.SIGKILL\b"),
|
|
message=(
|
|
"signal.SIGKILL does not exist on Windows and raises "
|
|
"AttributeError at import time."
|
|
),
|
|
fix="Use getattr(signal, 'SIGKILL', signal.SIGTERM).",
|
|
),
|
|
Footgun(
|
|
name="bare signal.SIGHUP / SIGUSR1 / SIGUSR2 / SIGALRM / SIGCHLD / SIGPIPE / SIGQUIT",
|
|
pattern=re.compile(
|
|
r"\bsignal\.(?:SIGHUP|SIGUSR1|SIGUSR2|SIGALRM|SIGCHLD|SIGPIPE|SIGQUIT)\b"
|
|
),
|
|
message=(
|
|
"These POSIX signals don't exist on Windows; referencing "
|
|
"them raises AttributeError at import time."
|
|
),
|
|
fix=(
|
|
"Use getattr(signal, 'SIGXXX', None) and check for None "
|
|
"before using, or gate the whole block behind a platform check."
|
|
),
|
|
),
|
|
Footgun(
|
|
name="subprocess shebang script invocation",
|
|
pattern=re.compile(
|
|
r"subprocess\.(?:run|Popen|call|check_output|check_call)\s*\(\s*\[\s*['\"]\./"
|
|
),
|
|
message=(
|
|
"Running a script via './scriptname' doesn't work on Windows — "
|
|
"shebang lines aren't honored. CreateProcessW can't execute "
|
|
"bash/python scripts without an explicit interpreter."
|
|
),
|
|
fix="Use [sys.executable, 'scriptname.py', ...] explicitly.",
|
|
),
|
|
Footgun(
|
|
name="wmic invocation without shutil.which guard",
|
|
# Match wmic appearing as a subprocess argument — NOT the
|
|
# shutil.which("wmic") guard pattern itself. Looks for wmic in a
|
|
# list or as first arg of subprocess.run/Popen.
|
|
pattern=re.compile(
|
|
r"""(?:subprocess\.\w+\s*\(\s*\[\s*['"]wmic['"]|['"]wmic\.exe['"])"""
|
|
),
|
|
message=(
|
|
"wmic was removed in Windows 10 21H1 and later. Always "
|
|
"gate with shutil.which('wmic') and fall back to "
|
|
"PowerShell (Get-CimInstance Win32_Process)."
|
|
),
|
|
fix=(
|
|
"if shutil.which('wmic'):\n"
|
|
" ... wmic path ...\n"
|
|
"else:\n"
|
|
" subprocess.run(['powershell', '-NoProfile', '-Command',\n"
|
|
" 'Get-CimInstance Win32_Process | ...'])"
|
|
),
|
|
),
|
|
Footgun(
|
|
name="hardcoded ~/Desktop (OneDrive trap)",
|
|
pattern=re.compile(
|
|
r"""['"](?:~|~/|[A-Z]:[/\\]Users[/\\][^/\\'"]+[/\\])Desktop\b"""
|
|
),
|
|
message=(
|
|
"When OneDrive Backup is enabled on Windows, the real Desktop "
|
|
"is at %USERPROFILE%\\OneDrive\\Desktop, not %USERPROFILE%\\"
|
|
"Desktop (which exists as an empty husk)."
|
|
),
|
|
fix=(
|
|
"On Windows, resolve via ctypes + SHGetKnownFolderPath, or "
|
|
"read the Shell Folders registry key, or run PowerShell "
|
|
"[Environment]::GetFolderPath('Desktop')."
|
|
),
|
|
),
|
|
Footgun(
|
|
name="asyncio add_signal_handler without try/except",
|
|
pattern=re.compile(r"\.add_signal_handler\s*\("),
|
|
message=(
|
|
"loop.add_signal_handler raises NotImplementedError on "
|
|
"Windows — always wrap in try/except or gate with a "
|
|
"platform check."
|
|
),
|
|
fix=(
|
|
"try:\n"
|
|
" loop.add_signal_handler(sig, handler, sig)\n"
|
|
"except NotImplementedError:\n"
|
|
" pass # Windows asyncio doesn't support signal handlers"
|
|
),
|
|
),
|
|
]
|
|
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# AST-based rule: subprocess calls that flash a console window on Windows
|
|
# -----------------------------------------------------------------------------
|
|
#
|
|
# This is the high-volume Windows complaint: every `subprocess.run(...)` /
|
|
# `subprocess.Popen(...)` of a console program on Windows briefly flashes a
|
|
# cmd window unless the child either (a) inherits the parent's stdio handles
|
|
# via output redirection, or (b) is spawned with a no-window creationflag
|
|
# (CREATE_NO_WINDOW / DETACHED_PROCESS). The fix landscape already exists in
|
|
# `hermes_cli/_subprocess_compat.py` (windows_hide_flags / windows_detach_*),
|
|
# but nothing stopped new bare calls from re-introducing the popup — so the
|
|
# bug kept coming back PR after PR. This rule is the chokepoint.
|
|
#
|
|
# It is AST-based (not regex) because the deciding factor — whether the call
|
|
# redirects stdout/stderr — frequently lives several lines below the
|
|
# `subprocess.run(` opener, which a line-oriented regex cannot see.
|
|
#
|
|
# Comprehensive, not restrictive: a call is only flagged when it can ACTUALLY
|
|
# create a new console. Calls that capture or redirect output (capture_output=,
|
|
# stdout=, stderr=), or use check_output (which always captures), cannot pop a
|
|
# window and are silently ignored — no suppression comment needed. The intent
|
|
# is that the overwhelming majority of subprocess calls require no change at
|
|
# all; only the genuine window-spawners do.
|
|
|
|
# The subprocess functions that can spawn a child process.
|
|
_SUBPROCESS_FUNCS = frozenset({"run", "Popen", "call", "check_call", "check_output"})
|
|
# Module aliases we recognise as the stdlib subprocess module.
|
|
_SUBPROCESS_ALIASES = frozenset({"subprocess", "sp"})
|
|
|
|
# Executables that simply do not exist on Windows. A subprocess call whose
|
|
# program is one of these can never create a Windows console window, so the
|
|
# no-window flag is irrelevant — flagging them would force pointless
|
|
# suppression comments on macOS/Linux-only service-management and packaging
|
|
# code (launchctl, systemctl, brew, codesign …). Matched against the FIRST
|
|
# element of a list/tuple argv literal only; anything dynamic still gets
|
|
# flagged (we can't prove it's POSIX-only).
|
|
_POSIX_ONLY_PROGRAMS = frozenset(
|
|
{
|
|
"launchctl",
|
|
"systemctl",
|
|
"journalctl",
|
|
"loginctl",
|
|
"osascript",
|
|
"codesign",
|
|
"xattr",
|
|
"defaults",
|
|
"brew",
|
|
"apt",
|
|
"apt-get",
|
|
"dpkg",
|
|
"pacman",
|
|
"dnf",
|
|
"yum",
|
|
"sudo",
|
|
"open", # macOS `open`
|
|
"tail",
|
|
"sw_vers",
|
|
"scutil",
|
|
"diskutil",
|
|
"hdiutil",
|
|
"dscl",
|
|
}
|
|
)
|
|
|
|
# Cross-platform console programs that DO exist on Windows and allocate a
|
|
# console window when spawned from a console-less parent (Desktop/Electron,
|
|
# pythonw.exe, a detached gateway/cron). For these, capturing or redirecting
|
|
# stdio is NOT a safety boundary — stream redirection controls where the
|
|
# child's output goes, it does NOT suppress console *allocation*. Only
|
|
# CREATE_NO_WINDOW (or routing through hermes_cli._subprocess_compat.run/popen,
|
|
# which injects it) prevents the flash. So a call to one of these is flagged
|
|
# even with capture_output=/stdout=/stderr= set. Matched against the first
|
|
# element of a literal argv (bare name or .exe, path-stripped).
|
|
_WINDOWS_FLASHING_PROGRAMS = frozenset(
|
|
{
|
|
"git",
|
|
"gh",
|
|
"node",
|
|
"npm",
|
|
"npx",
|
|
"yarn",
|
|
"pnpm",
|
|
"python",
|
|
"python3",
|
|
"pythonw",
|
|
"pip",
|
|
"uv",
|
|
"uvx",
|
|
"ffmpeg",
|
|
"ffprobe",
|
|
"ollama",
|
|
"docker",
|
|
"cmd",
|
|
"cmd.exe",
|
|
"powershell",
|
|
"powershell.exe",
|
|
"pwsh",
|
|
"where",
|
|
"taskkill",
|
|
"schtasks",
|
|
"wmic",
|
|
"tasklist",
|
|
"netstat",
|
|
}
|
|
)
|
|
|
|
SUBPROCESS_FOOTGUN_NAME = "subprocess without Windows no-window flag"
|
|
SUBPROCESS_FOOTGUN_MESSAGE = (
|
|
"subprocess.run/Popen/call on Windows flashes a console (cmd) window "
|
|
"unless the child inherits stdio (output is captured/redirected) or is "
|
|
"spawned with a no-window creationflag. This is the #1 source of Windows "
|
|
"'terminal popup' bug reports."
|
|
)
|
|
SUBPROCESS_FOOTGUN_FIX = (
|
|
"Pass creationflags=windows_hide_flags() (for short-lived/captured spawns) "
|
|
"or **windows_detach_popen_kwargs() (for detached daemons) from "
|
|
"hermes_cli._subprocess_compat (both no-op on POSIX). If a visible window "
|
|
"is intended (interactive launch, shell hand-off), add "
|
|
"'# windows-footgun: ok' on the call line."
|
|
)
|
|
|
|
|
|
def _call_attr_name(node: ast.Call) -> str | None:
|
|
"""Return 'run'/'Popen'/... when node is subprocess.<func>(...), else None."""
|
|
f = node.func
|
|
if not isinstance(f, ast.Attribute):
|
|
return None
|
|
if f.attr not in _SUBPROCESS_FUNCS:
|
|
return None
|
|
mod = getattr(f.value, "id", None)
|
|
if mod not in _SUBPROCESS_ALIASES:
|
|
return None
|
|
return f.attr
|
|
|
|
|
|
def _suppresses_window(node: ast.Call, func_name: str) -> bool:
|
|
"""True if this subprocess call cannot create a new console window.
|
|
|
|
The honest invariant (corrected after review of PR #53791): capturing or
|
|
redirecting stdio is NOT the same as suppressing console allocation. From a
|
|
console-less parent (Desktop/Electron, pythonw.exe, a detached gateway/cron)
|
|
a console-subsystem child still allocates — and flashes — a window even with
|
|
capture_output=True. Only CREATE_NO_WINDOW (or routing through
|
|
hermes_cli._subprocess_compat.run/popen, which injects it) prevents it.
|
|
|
|
So capture/stdout/stderr/check_output is treated as window-safe ONLY when the
|
|
program is not a known cross-platform console exe that flashes on Windows
|
|
(see _WINDOWS_FLASHING_PROGRAMS — git/gh/npm/node/python/uv/ffmpeg/docker/…).
|
|
For those, even a fully-captured call is flagged.
|
|
|
|
Always window-safe regardless of program:
|
|
* creationflags=... — author is already managing the console
|
|
* **<spread> — kwargs may carry a _subprocess_compat helper;
|
|
flag-via-spread is the recommended fix, so we
|
|
must not penalise it.
|
|
* POSIX-only program — can't run on Windows, can't flash.
|
|
Conditionally safe (only when NOT a known flashing program):
|
|
* check_output / capture_output= / stdout= / stderr=
|
|
"""
|
|
explicit = {kw.arg for kw in node.keywords if kw.arg}
|
|
if "creationflags" in explicit:
|
|
return True
|
|
if any(kw.arg is None for kw in node.keywords): # **kwargs spread
|
|
return True
|
|
if _is_posix_only_program(node):
|
|
return True
|
|
# Capture/redirect is only a safety boundary for programs that don't
|
|
# allocate a Windows console — NOT for git/npm/node/python/ffmpeg/etc.
|
|
if not _is_windows_flashing_program(node):
|
|
if func_name == "check_output":
|
|
return True
|
|
if explicit & {"stdout", "stderr", "capture_output"}:
|
|
return True
|
|
return False
|
|
|
|
|
|
def _argv_head(node: ast.Call) -> str | None:
|
|
"""Return the path-stripped first argv element if it's a string literal."""
|
|
if not node.args:
|
|
return None
|
|
first = node.args[0]
|
|
if isinstance(first, (ast.List, ast.Tuple)) and first.elts:
|
|
head = first.elts[0]
|
|
if isinstance(head, ast.Constant) and isinstance(head.value, str):
|
|
return head.value.rsplit("/", 1)[-1].rsplit("\\", 1)[-1]
|
|
return None
|
|
|
|
|
|
def _is_windows_flashing_program(node: ast.Call) -> bool:
|
|
"""True if the call's program is a known cross-platform console exe that
|
|
allocates a Windows console window (so capture is NOT a safe boundary)."""
|
|
prog = _argv_head(node)
|
|
return prog is not None and prog in _WINDOWS_FLASHING_PROGRAMS
|
|
|
|
|
|
def _is_posix_only_program(node: ast.Call) -> bool:
|
|
"""True if the call's program is a statically-known POSIX-only executable.
|
|
|
|
Only inspects a literal list/tuple first arg whose first element is a
|
|
string constant (e.g. ``["launchctl", "bootout", target]``). Dynamic
|
|
argv (variables, f-strings) is treated as unknown and still flagged.
|
|
"""
|
|
if not node.args:
|
|
return False
|
|
first = node.args[0]
|
|
if isinstance(first, (ast.List, ast.Tuple)) and first.elts:
|
|
head = first.elts[0]
|
|
if isinstance(head, ast.Constant) and isinstance(head.value, str):
|
|
prog = head.value.rsplit("/", 1)[-1]
|
|
return prog in _POSIX_ONLY_PROGRAMS
|
|
return False
|
|
|
|
|
|
def scan_subprocess_window_footguns(
|
|
path: Path, text: str
|
|
) -> list[tuple[int, str, Footgun]]:
|
|
"""AST pass: flag subprocess calls that can flash a Windows console.
|
|
|
|
Honours the same `# windows-footgun: ok` line suppression as the regex
|
|
rules. Returns the same (lineno, line, Footgun) shape so results merge
|
|
cleanly into scan_file's output.
|
|
"""
|
|
try:
|
|
tree = ast.parse(text)
|
|
except SyntaxError:
|
|
return []
|
|
lines = text.splitlines()
|
|
rule = Footgun(
|
|
name=SUBPROCESS_FOOTGUN_NAME,
|
|
pattern=re.compile(r"^$"), # unused; AST-driven
|
|
message=SUBPROCESS_FOOTGUN_MESSAGE,
|
|
fix=SUBPROCESS_FOOTGUN_FIX,
|
|
)
|
|
out: list[tuple[int, str, Footgun]] = []
|
|
for node in ast.walk(tree):
|
|
if not isinstance(node, ast.Call):
|
|
continue
|
|
func_name = _call_attr_name(node)
|
|
if func_name is None:
|
|
continue
|
|
if _suppresses_window(node, func_name):
|
|
continue
|
|
lineno = node.lineno
|
|
line = lines[lineno - 1] if 0 <= lineno - 1 < len(lines) else ""
|
|
# Inline suppression — check the opener line AND, for multi-line calls,
|
|
# any line in the call's span (a developer may mark the closing paren).
|
|
end = getattr(node, "end_lineno", lineno) or lineno
|
|
span = lines[lineno - 1 : end]
|
|
if any(SUPPRESS_MARKER.search(l) for l in span):
|
|
continue
|
|
out.append((lineno, line.rstrip(), rule))
|
|
return out
|
|
|
|
|
|
def should_scan_file(path: Path) -> bool:
|
|
"""Return True if this file is in scope for the checker."""
|
|
# Skip the excluded dirs
|
|
parts = set(path.parts)
|
|
if parts & EXCLUDED_DIRS:
|
|
return False
|
|
# Skip excluded suffixes
|
|
for suffix in EXCLUDED_SUFFIXES:
|
|
if str(path).endswith(suffix):
|
|
return False
|
|
# Skip self and docs that intentionally mention the patterns
|
|
rel = path.relative_to(REPO_ROOT).as_posix()
|
|
if rel in EXCLUDED_FILES:
|
|
return False
|
|
# Only scan text files (rough heuristic — .py, .md, .sh, .ps1, .yaml, etc.)
|
|
if path.suffix in {".py", ".pyw", ".pyi"}:
|
|
return True
|
|
# Other file types are read but only Python-specific patterns would match;
|
|
# that's fine and cheap to skip.
|
|
return False
|
|
|
|
|
|
def iter_files(paths: Iterable[Path]) -> Iterable[Path]:
|
|
for p in paths:
|
|
if p.is_file():
|
|
if should_scan_file(p):
|
|
yield p
|
|
elif p.is_dir():
|
|
for root, dirs, files in os.walk(p):
|
|
# prune excluded dirs in-place for speed
|
|
dirs[:] = [d for d in dirs if d not in EXCLUDED_DIRS]
|
|
for fname in files:
|
|
fpath = Path(root) / fname
|
|
if should_scan_file(fpath):
|
|
yield fpath
|
|
|
|
|
|
def _strip_code(line: str) -> str:
|
|
"""Return just the code portion of a line — strip trailing comments and
|
|
skip lines that are entirely inside a string literal or comment.
|
|
|
|
Heuristic only (we don't parse Python); good enough to avoid flagging
|
|
our own `# ``os.kill(pid, 0)`` is NOT a no-op` docstring-style comments.
|
|
"""
|
|
stripped = line.lstrip()
|
|
# Line starts with # — entirely a comment.
|
|
if stripped.startswith("#"):
|
|
return ""
|
|
# Remove trailing "# ..." inline comment. Naive — doesn't handle `#`
|
|
# inside strings — but on balance reduces noise far more than it adds.
|
|
hash_idx = _find_unquoted_hash(line)
|
|
if hash_idx is not None:
|
|
return line[:hash_idx]
|
|
return line
|
|
|
|
|
|
def _find_unquoted_hash(line: str) -> int | None:
|
|
"""Index of the first `#` not inside a single/double/triple-quoted string.
|
|
|
|
Simple state machine — good enough for the 99% case of "code, then
|
|
optional trailing comment."
|
|
"""
|
|
i = 0
|
|
n = len(line)
|
|
in_s = False # single-quote string
|
|
in_d = False # double-quote string
|
|
while i < n:
|
|
c = line[i]
|
|
if c == "\\" and (in_s or in_d) and i + 1 < n:
|
|
i += 2
|
|
continue
|
|
if not in_d and c == "'":
|
|
in_s = not in_s
|
|
elif not in_s and c == '"':
|
|
in_d = not in_d
|
|
elif c == "#" and not in_s and not in_d:
|
|
return i
|
|
i += 1
|
|
return None
|
|
|
|
|
|
def scan_file(path: Path, footguns: list[Footgun]) -> list[tuple[int, str, Footgun]]:
|
|
"""Return a list of (line_number, line, footgun) for unsuppressed matches."""
|
|
try:
|
|
text = path.read_text(encoding="utf-8", errors="replace")
|
|
except OSError:
|
|
return []
|
|
matches: list[tuple[int, str, Footgun]] = []
|
|
|
|
# AST-based rule (subprocess console-window footgun). Runs only on Python
|
|
# source; merges into the same result list as the regex rules below.
|
|
if path.suffix in {".py", ".pyw", ".pyi"}:
|
|
matches.extend(scan_subprocess_window_footguns(path, text))
|
|
|
|
# Track whether we're inside a triple-quoted string (docstring/raw block).
|
|
# Simple state machine — handles both ''' and """, toggled by the FIRST
|
|
# triple-quote we see; we don't try to handle nested or f-string cases.
|
|
in_triple: str | None = None # None, "'''", or '"""'
|
|
|
|
for i, line in enumerate(text.splitlines(), start=1):
|
|
# Update triple-quote state based on this line's occurrences.
|
|
code_for_scan = line
|
|
if in_triple:
|
|
# We're inside a docstring — skip the whole line's scan.
|
|
# Check if it closes here.
|
|
if in_triple in line:
|
|
# Find the closing delimiter; anything after it is real code.
|
|
after = line.split(in_triple, 1)[1]
|
|
in_triple = None
|
|
code_for_scan = after
|
|
else:
|
|
continue
|
|
# Now check for docstring-open in the (possibly after-triple) portion.
|
|
# Scan for the first unescaped '''/""" in the current code_for_scan.
|
|
stripped = code_for_scan.strip()
|
|
for delim in ('"""', "'''"):
|
|
if delim in code_for_scan:
|
|
# Count occurrences — even count means single-line docstring,
|
|
# odd means we've entered a multi-line one.
|
|
count = code_for_scan.count(delim)
|
|
if count % 2 == 1:
|
|
# Odd — we're now inside the triple-quoted block.
|
|
# Scan only the part BEFORE the opening delimiter.
|
|
before = code_for_scan.split(delim, 1)[0]
|
|
code_for_scan = before
|
|
in_triple = delim
|
|
break
|
|
else:
|
|
# Even — entire docstring fits on one line. Strip it
|
|
# from the scan text to avoid matching on prose.
|
|
parts = code_for_scan.split(delim)
|
|
# Keep the "outside" parts (every other chunk, starting
|
|
# with index 0) as code, drop the "inside" parts.
|
|
code_for_scan = "".join(parts[::2])
|
|
break
|
|
|
|
if SUPPRESS_MARKER.search(line):
|
|
continue
|
|
# Skip if the line has an obvious guard — e.g. hasattr/getattr/
|
|
# shutil.which or a platform check. False negatives are acceptable;
|
|
# the inline suppression marker is the authoritative override.
|
|
if any(hint in line for hint in GUARD_HINTS):
|
|
continue
|
|
code = _strip_code(code_for_scan)
|
|
if not code.strip():
|
|
continue
|
|
for fg in footguns:
|
|
if fg.path_allowlist and any(s in str(path) for s in fg.path_allowlist):
|
|
continue
|
|
match = fg.pattern.search(code)
|
|
if not match:
|
|
continue
|
|
if fg.post_filter is not None:
|
|
try:
|
|
if not fg.post_filter(match, line):
|
|
continue
|
|
except (IndexError, AttributeError):
|
|
# Post-filter assumed a named group that isn't there — skip.
|
|
continue
|
|
matches.append((i, line.rstrip(), fg))
|
|
return matches
|
|
|
|
|
|
def get_staged_files() -> list[Path]:
|
|
"""Return paths staged in the current git index. Empty on non-git trees."""
|
|
try:
|
|
out = subprocess.check_output( # windows-footgun: ok — dev-only checker, runs on Linux CI
|
|
["git", "diff", "--cached", "--name-only", "--diff-filter=ACMR"],
|
|
cwd=REPO_ROOT,
|
|
stderr=subprocess.DEVNULL,
|
|
text=True,
|
|
)
|
|
except (subprocess.CalledProcessError, FileNotFoundError):
|
|
return []
|
|
return [REPO_ROOT / f for f in out.splitlines() if f.strip()]
|
|
|
|
|
|
def get_diff_files(ref: str) -> list[Path]:
|
|
"""Return paths modified vs. the given git ref."""
|
|
try:
|
|
out = subprocess.check_output( # windows-footgun: ok — dev-only checker, runs on Linux CI
|
|
["git", "diff", f"{ref}...HEAD", "--name-only", "--diff-filter=ACMR"],
|
|
cwd=REPO_ROOT,
|
|
stderr=subprocess.DEVNULL,
|
|
text=True,
|
|
)
|
|
except (subprocess.CalledProcessError, FileNotFoundError):
|
|
return []
|
|
return [REPO_ROOT / f for f in out.splitlines() if f.strip()]
|
|
|
|
|
|
def parse_args(argv: list[str]) -> argparse.Namespace:
|
|
p = argparse.ArgumentParser(
|
|
description="Flag Windows cross-platform footguns in Python code."
|
|
)
|
|
p.add_argument(
|
|
"paths",
|
|
nargs="*",
|
|
type=Path,
|
|
help="Specific files/dirs to scan (default: staged changes).",
|
|
)
|
|
p.add_argument(
|
|
"--all",
|
|
action="store_true",
|
|
help="Scan the full repository (hermes_cli/, gateway/, tools/, cron/, etc.).",
|
|
)
|
|
p.add_argument(
|
|
"--diff",
|
|
metavar="REF",
|
|
help="Scan files changed vs. the given git ref (e.g. --diff main).",
|
|
)
|
|
p.add_argument(
|
|
"--list",
|
|
action="store_true",
|
|
help="List all known footgun rules and exit.",
|
|
)
|
|
return p.parse_args(argv)
|
|
|
|
|
|
def print_rules() -> None:
|
|
print("Known Windows footguns checked by this script:\n")
|
|
for i, fg in enumerate(FOOTGUNS, start=1):
|
|
print(f"{i:2}. {fg.name}")
|
|
print(f" {fg.message}")
|
|
print(f" Fix: {fg.fix}")
|
|
print()
|
|
# AST-based rule (not in the regex FOOTGUNS list).
|
|
n = len(FOOTGUNS) + 1
|
|
print(f"{n:2}. {SUBPROCESS_FOOTGUN_NAME} (AST-based)")
|
|
print(f" {SUBPROCESS_FOOTGUN_MESSAGE}")
|
|
print(f" Fix: {SUBPROCESS_FOOTGUN_FIX}")
|
|
print()
|
|
|
|
|
|
def main(argv: list[str]) -> int:
|
|
# Windows terminals default to cp1252, which can't encode the ✓/✗
|
|
# characters used in the output. Reconfigure streams to UTF-8 so the
|
|
# script works correctly on the very platform it is designed to help.
|
|
if hasattr(sys.stdout, "reconfigure"):
|
|
sys.stdout.reconfigure(encoding="utf-8")
|
|
if hasattr(sys.stderr, "reconfigure"):
|
|
sys.stderr.reconfigure(encoding="utf-8")
|
|
|
|
args = parse_args(argv)
|
|
|
|
if args.list:
|
|
print_rules()
|
|
return 0
|
|
|
|
if args.all:
|
|
# Scan main Python packages + scripts
|
|
roots = [
|
|
REPO_ROOT / "hermes_cli",
|
|
REPO_ROOT / "gateway",
|
|
REPO_ROOT / "tools",
|
|
REPO_ROOT / "cron",
|
|
REPO_ROOT / "agent",
|
|
REPO_ROOT / "plugins",
|
|
REPO_ROOT / "scripts",
|
|
REPO_ROOT / "acp_adapter",
|
|
REPO_ROOT / "acp_registry",
|
|
]
|
|
roots = [r for r in roots if r.exists()]
|
|
elif args.diff:
|
|
roots = get_diff_files(args.diff)
|
|
elif args.paths:
|
|
roots = [p.resolve() for p in args.paths]
|
|
else:
|
|
# Default: staged changes
|
|
roots = get_staged_files()
|
|
if not roots:
|
|
print(
|
|
"No staged files to scan. Pass --all for a full-repo scan, "
|
|
"--diff <ref> for a range diff, or paths explicitly.",
|
|
file=sys.stderr,
|
|
)
|
|
return 0
|
|
|
|
total_matches = 0
|
|
files_scanned = 0
|
|
for path in iter_files(roots):
|
|
files_scanned += 1
|
|
matches = scan_file(path, FOOTGUNS)
|
|
for lineno, line, fg in matches:
|
|
rel = path.relative_to(REPO_ROOT).as_posix()
|
|
print(f"{rel}:{lineno}: [{fg.name}]")
|
|
print(f" {line.strip()}")
|
|
print(f" — {fg.message}")
|
|
print(f" Fix: {fg.fix.splitlines()[0]}")
|
|
print()
|
|
total_matches += 1
|
|
|
|
if total_matches:
|
|
print(
|
|
f"\n✗ {total_matches} Windows footgun(s) found across "
|
|
f"{files_scanned} file(s) scanned.",
|
|
file=sys.stderr,
|
|
)
|
|
print(
|
|
" If an individual match is a false positive or intentionally "
|
|
"platform-gated, suppress it with `# windows-footgun: ok` on "
|
|
"the same line.\n Run with --list to see all rules.",
|
|
file=sys.stderr,
|
|
)
|
|
return 1
|
|
|
|
print(
|
|
f"✓ No Windows footguns found ({files_scanned} file(s) scanned)."
|
|
)
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main(sys.argv[1:]))
|