From 3dfb3570012ef1beead545d127f78b2776c23198 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Fri, 8 May 2026 12:57:33 -0700 Subject: [PATCH] feat(cross-platform): psutil for PID/process management + Windows footgun checker MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Why Hermes supports Linux, macOS, and native Windows, but the codebase grew up POSIX-first and has accumulated patterns that silently break (or worse, silently kill!) on Windows: - `os.kill(pid, 0)` as a liveness probe — on Windows this maps to CTRL_C_EVENT and broadcasts Ctrl+C to the target's entire console process group (bpo-14484, open since 2012). - `os.killpg` — doesn't exist on Windows at all (AttributeError). - `os.setsid` / `os.getuid` / `os.geteuid` — same. - `signal.SIGKILL` / `signal.SIGHUP` / `signal.SIGUSR1` — module-attr errors at runtime on Windows. - `open(path)` / `open(path, "r")` without explicit encoding= — inherits the platform default, which is cp1252/mbcs on Windows (UTF-8 on POSIX), causing mojibake round-tripping between hosts. - `wmic` — removed from Windows 10 21H1+. This commit does three things: 1. Makes `psutil` a core dependency and migrates critical callsites to it. 2. Adds a grep-based CI gate (`scripts/check-windows-footguns.py`) that blocks new instances of any of the above patterns. 3. Fixes every existing instance in the codebase so the baseline is clean. ## What changed ### 1. psutil as a core dependency (pyproject.toml) Added `psutil>=5.9.0,<8` to core deps. psutil is the canonical cross-platform answer for "is this PID alive" and "kill this process tree" — its `pid_exists()` uses `OpenProcess + GetExitCodeProcess` on Windows (NOT a signal call), and its `Process.children(recursive=True)` + `.kill()` combo replaces `os.killpg()` portably. ### 2. `gateway/status.py::_pid_exists` Rewrote to call `psutil.pid_exists()` first, falling back to the hand-rolled ctypes `OpenProcess + WaitForSingleObject` dance on Windows (and `os.kill(pid, 0)` on POSIX) only if psutil is somehow missing — e.g. during the scaffold phase of a fresh install before pip finishes. ### 3. `os.killpg` migration to psutil (7 callsites, 5 files) - `tools/code_execution_tool.py` - `tools/process_registry.py` - `tools/tts_tool.py` - `tools/environments/local.py` (3 sites kept as-is, suppressed with `# windows-footgun: ok` — the pgid semantics psutil can't replicate, and the calls are already Windows-guarded at the outer branch) - `gateway/platforms/whatsapp.py` ### 4. `scripts/check-windows-footguns.py` (NEW, 500 lines) Grep-based checker with 11 rules covering every Windows cross-platform footgun we've hit so far: 1. `os.kill(pid, 0)` — the silent killer 2. `os.setsid` without guard 3. `os.killpg` (recommends psutil) 4. `os.getuid` / `os.geteuid` / `os.getgid` 5. `os.fork` 6. `signal.SIGKILL` 7. `signal.SIGHUP/SIGUSR1/SIGUSR2/SIGALRM/SIGCHLD/SIGPIPE/SIGQUIT` 8. `subprocess` shebang script invocation 9. `wmic` without `shutil.which` guard 10. Hardcoded `~/Desktop` (OneDrive trap) 11. `asyncio.add_signal_handler` without try/except 12. `open()` without `encoding=` on text mode Features: - Triple-quoted-docstring aware (won't flag prose inside docstrings) - Trailing-comment aware (won't flag mentions in `# os.kill(pid, 0)` comments) - Guard-hint aware (skips lines with `hasattr(os, ...)`, `shutil.which(...)`, `if platform.system() != 'Windows'`, etc.) - Inline suppression with `# windows-footgun: ok — ` - `--list` to print all rules with fixes - `--all` / `--diff ` / staged-files (default) modes - Scans 380 files in under 2 seconds ### 5. CI integration A GitHub Actions workflow that runs the checker on every PR and push is staged at `/tmp/hermes-stash/windows-footguns.yml` — not included in this commit because the GH token on the push machine lacks `workflow` scope. A maintainer with `workflow` permissions should add it as `.github/workflows/windows-footguns.yml` in a follow-up. Content: ```yaml name: Windows footgun check on: push: branches: [main] pull_request: branches: [main] jobs: check: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: {python-version: "3.11"} - run: python scripts/check-windows-footguns.py --all ``` ### 6. CONTRIBUTING.md — "Cross-Platform Compatibility" expansion Expanded from 5 to 16 rules, each with message, example, and fix. Recommends psutil as the preferred API for PID / process-tree operations. ### 7. Baseline cleanup (91 → 0 findings) - 14 `open()` sites → added `encoding='utf-8'` (internal logs/caches) or `encoding='utf-8-sig'` (user-editable files that Notepad may BOM) - 23 POSIX-only callsites in systemd helpers, pty_bridge, and plugin tool subprocess management → annotated with `# windows-footgun: ok — ` - 7 `os.killpg` sites → migrated to psutil (see §3 above) ## Verification ``` $ python scripts/check-windows-footguns.py --all ✓ No Windows footguns found (380 file(s) scanned). $ python -c "from gateway.status import _pid_exists; import os > print('self:', _pid_exists(os.getpid())); print('bogus:', _pid_exists(999999))" self: True bogus: False ``` Proof-of-repro that `os.kill(pid, 0)` was actually killing processes before this fix — see commit `1cbe39914` and bpo-14484. This commit removes the last hand-rolled ctypes path from the hot liveness-check path and defers to the best-maintained cross-platform answer. --- CONTRIBUTING.md | 162 ++++- agent/copilot_acp_client.py | 2 +- gateway/platforms/whatsapp.py | 24 +- gateway/run.py | 4 +- gateway/status.py | 18 +- hermes_cli/auth.py | 2 +- hermes_cli/config.py | 17 +- hermes_cli/env_loader.py | 2 +- hermes_cli/gateway.py | 26 +- hermes_cli/main.py | 2 +- hermes_cli/pty_bridge.py | 2 +- hermes_cli/uninstall.py | 2 +- plugins/context_engine/__init__.py | 2 +- plugins/disk-cleanup/disk_cleanup.py | 2 +- plugins/google_meet/process_manager.py | 2 +- plugins/google_meet/realtime/openai_client.py | 2 +- plugins/memory/__init__.py | 4 +- plugins/memory/hindsight/__init__.py | 8 +- plugins/memory/holographic/__init__.py | 6 +- pyproject.toml | 6 + scripts/check-windows-footguns.py | 624 ++++++++++++++++++ scripts/profile-tui.py | 2 +- tools/code_execution_tool.py | 44 +- tools/environments/local.py | 6 +- tools/memory_tool.py | 2 +- tools/process_registry.py | 32 +- tools/skill_usage.py | 2 +- tools/tts_tool.py | 21 +- 28 files changed, 943 insertions(+), 85 deletions(-) create mode 100644 scripts/check-windows-footguns.py diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 78c608c73a..56f0c8ff01 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -522,11 +522,57 @@ See `hermes_cli/skin_engine.py` for the full schema and existing skins as exampl ## Cross-Platform Compatibility -Hermes runs on Linux, macOS, and WSL2 on Windows. When writing code that touches the OS: +Hermes runs on Linux, macOS, and native Windows (plus WSL2). When writing code +that touches the OS, assume *any* platform can hit your code path. + +> **Before you PR:** run `scripts/check-windows-footguns.py` to catch the +> common Windows-unsafe patterns in your diff. It's grep-based and cheap; +> CI runs it on every PR too. ### Critical rules -1. **`termios` and `fcntl` are Unix-only.** Always catch both `ImportError` and `NotImplementedError`: +1. **Never call `os.kill(pid, 0)` for liveness checks.** `os.kill(pid, 0)` + is a standard POSIX idiom to check "is this PID alive" — the signal 0 + is a no-op permission check. **On Windows it is NOT a no-op.** Python's + Windows `os.kill` maps `sig=0` to `CTRL_C_EVENT` (they collide at the + integer value 0) and routes it through `GenerateConsoleCtrlEvent(0, pid)`, + which broadcasts Ctrl+C to the **entire console process group** containing + the target PID. "Probe if alive" silently becomes "kill the target and + often unrelated processes sharing its console." See [bpo-14484](https://bugs.python.org/issue14484) + (open since 2012 — will never be fixed for compat reasons). + + **Preferred:** use `psutil` (a core dependency — always available): + + ```python + import psutil + if psutil.pid_exists(pid): + # process is alive — safe on every platform + ... + ``` + + If you specifically need the hermes wrapper (it has a stdlib fallback + for scaffold-phase imports before pip install finishes), use + `gateway.status._pid_exists(pid)`. It calls `psutil.pid_exists` first + and falls back to a hand-rolled `OpenProcess + WaitForSingleObject` + dance on Windows only when psutil is somehow missing. + + Audit grep for new callsites: `rg "os\.kill\([^,]+,\s*0\s*\)"`. Any hit + in non-test code is presumptively a Windows silent-kill bug. + +2. **Use `shutil.which()` before shelling out — don't assume Windows has + tools Linux has.** `wmic` was removed in Windows 10 21H1 and later. `ps`, + `kill`, `grep`, `awk`, `fuser`, `lsof`, `pgrep`, and most POSIX CLI tools + simply don't exist on Windows. Test availability with + `shutil.which("tool")` and fall back to a Windows-native equivalent — + usually PowerShell via `subprocess.run(["powershell", "-NoProfile", + "-Command", ...])`. + + For process enumeration: PowerShell's `Get-CimInstance Win32_Process` is + the modern replacement for `wmic process`. See + `hermes_cli/gateway.py::_scan_gateway_pids` for the pattern. + +3. **`termios` and `fcntl` are Unix-only.** Always catch both `ImportError` + and `NotImplementedError`: ```python try: from simple_term_menu import TerminalMenu @@ -539,24 +585,126 @@ Hermes runs on Linux, macOS, and WSL2 on Windows. When writing code that touches idx = int(input("Choice: ")) - 1 ``` -2. **File encoding.** Windows may save `.env` files in `cp1252`. Always handle encoding errors: +4. **File encoding.** Windows may save `.env` files in `cp1252`. Always + handle encoding errors: ```python try: load_dotenv(env_path) except UnicodeDecodeError: load_dotenv(env_path, encoding="latin-1") ``` + Config files (`config.yaml`) may be saved with a UTF-8 BOM by Notepad and + similar editors — use `encoding="utf-8-sig"` when reading files that + could have been touched by a Windows GUI editor. -3. **Process management.** `os.setsid()`, `os.killpg()`, and signal handling differ on Windows. Use platform checks: +5. **Process management.** `os.setsid()`, `os.killpg()`, `os.fork()`, + `os.getuid()`, and POSIX signal handling differ on Windows. Guard with + `platform.system()`, `sys.platform`, or `hasattr(os, "setsid")`: ```python - import platform if platform.system() != "Windows": kwargs["preexec_fn"] = os.setsid + else: + kwargs["creationflags"] = subprocess.CREATE_NEW_PROCESS_GROUP ``` -4. **Path separators.** Use `pathlib.Path` instead of string concatenation with `/`. + **Preferred:** for killing a process AND its children (what `os.killpg` + does on POSIX), use `psutil` — it works on every platform: + ```python + import psutil + try: + parent = psutil.Process(pid) + # Kill children first (leaf-up), then the parent. + for child in parent.children(recursive=True): + child.kill() + parent.kill() + except psutil.NoSuchProcess: + pass + ``` -5. **Shell commands in installers.** If you change `scripts/install.sh`, check if the equivalent change is needed in `scripts/install.ps1`. +6. **Signals that don't exist on Windows: `SIGALRM`, `SIGCHLD`, `SIGHUP`, + `SIGUSR1`, `SIGUSR2`, `SIGPIPE`, `SIGQUIT`, `SIGKILL`.** Python's + `signal` module raises `AttributeError` at import time if you reference + them on Windows. Use `getattr(signal, "SIGKILL", signal.SIGTERM)` or + gate the whole block behind a platform check. `loop.add_signal_handler` + raises `NotImplementedError` on Windows — always catch it. + +7. **Path separators.** Use `pathlib.Path` instead of string concatenation + with `/`. Forward slashes work almost everywhere on Windows, but + `subprocess.run(["cmd.exe", "/c", ...])` and other shell contexts can + require backslashes — convert with `str(path)` at the subprocess boundary, + not inside Python logic. + +8. **Symlinks need elevated privileges on Windows** (unless Developer Mode is + on). Tests that create symlinks need `@pytest.mark.skipif(sys.platform == + "win32", reason="Symlinks require elevated privileges on Windows")`. + +9. **POSIX file modes (0o600, 0o644, etc.) are NOT enforced on NTFS** by + default. Tests that assert on `stat().st_mode & 0o777` must skip on + Windows — the concept doesn't translate. Use ACLs (`icacls`, `pywin32`) + for Windows secret-file protection if needed. + +10. **Detached background daemons on Windows need `pythonw.exe`, NOT + `python.exe`.** `python.exe` always allocates or attaches to a console, + which makes it vulnerable to `CTRL_C_EVENT` broadcasts from any sibling + process. `pythonw.exe` is the no-console variant. Combine with + `CREATE_NO_WINDOW | DETACHED_PROCESS | CREATE_NEW_PROCESS_GROUP | + CREATE_BREAKAWAY_FROM_JOB` in `subprocess.Popen(creationflags=...)`. + See `hermes_cli/gateway_windows.py::_spawn_detached` for the reference + implementation. + +11. **`subprocess.Popen` with `.cmd` or `.bat` shims needs `shutil.which` + to resolve.** Passing `"agent-browser"` to `Popen` on Windows finds + the extensionless POSIX shebang shim in `node_modules/.bin/`, which + `CreateProcessW` can't execute — you'll get `WinError 193 "not a valid + Win32 application"`. Use `shutil.which("agent-browser", path=local_bin)` + which honors PATHEXT and picks the `.CMD` variant on Windows. + +12. **Don't use shell shebangs as a way to run Python.** `#!/usr/bin/env + python` only works when the file is executed through a Unix shell. + `subprocess.run(["./myscript.py"])` on Windows fails even if the file + has a shebang line. Always invoke Python explicitly: + `[sys.executable, "myscript.py"]`. + +13. **Shell commands in installers.** If you change `scripts/install.sh`, + make the equivalent change in `scripts/install.ps1`. The two scripts + are the canonical example of "works on Linux does not mean works on + Windows" and have drifted multiple times — keep them in lockstep. + +14. **Known paths that are OneDrive-redirected on Windows:** Desktop, + Documents, Pictures, Videos. The "real" path when OneDrive Backup is + enabled is `%USERPROFILE%\OneDrive\Desktop` (etc.), NOT + `%USERPROFILE%\Desktop` (which exists as an empty husk). Resolve the + real location via `ctypes` + `SHGetKnownFolderPath` or by reading the + `Shell Folders` registry key — never assume `~/Desktop`. + +15. **CRLF vs LF in generated scripts.** Windows `cmd.exe` and `schtasks` + parse line-by-line; mixed or LF-only line endings can break multi-line + `.cmd` / `.bat` files. Use `open(path, "w", encoding="utf-8", + newline="\r\n")` — or `open(path, "wb")` + explicit bytes — when + generating scripts Windows will execute. + +16. **Two different quoting schemes in one command line.** `subprocess.run + (["schtasks", "/TR", some_cmd])` → schtasks itself parses `/TR`, AND + the `some_cmd` string is re-parsed by `cmd.exe` when the task fires. + Different parsers, different escape rules. Use two separate quoting + helpers and never cross them. See `hermes_cli/gateway_windows.py:: + _quote_cmd_script_arg` and `_quote_schtasks_arg` for the reference + pair. + +### Testing cross-platform + +Tests that use POSIX-only syscalls need a skip marker. Common ones: +- Symlinks → `@pytest.mark.skipif(sys.platform == "win32", ...)` +- `0o600` file modes → `@pytest.mark.skipif(sys.platform.startswith("win"), ...)` +- `signal.SIGALRM` → Unix-only (see `tests/conftest.py::_enforce_test_timeout`) +- `os.setsid` / `os.fork` → Unix-only +- Live Winsock / Windows-specific regression tests → + `@pytest.mark.skipif(sys.platform != "win32", reason="Windows-specific regression")` + +If you monkeypatch `sys.platform` for cross-platform tests, also patch +`platform.system()` / `platform.release()` / `platform.mac_ver()` — each +re-reads the real OS independently, so half-patched tests still route +through the wrong branch on a Windows runner. --- diff --git a/agent/copilot_acp_client.py b/agent/copilot_acp_client.py index 457b32b37b..3643837bf5 100644 --- a/agent/copilot_acp_client.py +++ b/agent/copilot_acp_client.py @@ -69,7 +69,7 @@ def _resolve_home_dir() -> str: try: import pwd - resolved = pwd.getpwuid(os.getuid()).pw_dir.strip() + resolved = pwd.getpwuid(os.getuid()).pw_dir.strip() # windows-footgun: ok — POSIX fallback inside try/except (pwd import fails on Windows) if resolved: return resolved except Exception: diff --git a/gateway/platforms/whatsapp.py b/gateway/platforms/whatsapp.py index 5537e87ab4..8e21736441 100644 --- a/gateway/platforms/whatsapp.py +++ b/gateway/platforms/whatsapp.py @@ -155,10 +155,26 @@ def _terminate_bridge_process(proc, *, force: bool = False) -> None: raise OSError(details or f"taskkill failed for PID {proc.pid}") return - import signal - - sig = signal.SIGTERM if not force else signal.SIGKILL - os.killpg(os.getpgid(proc.pid), sig) + import psutil + try: + parent = psutil.Process(proc.pid) + children = parent.children(recursive=True) + if force: + for child in children: + try: + child.kill() + except psutil.NoSuchProcess: + pass + parent.kill() + else: + for child in children: + try: + child.terminate() + except psutil.NoSuchProcess: + pass + parent.terminate() + except psutil.NoSuchProcess: + return import sys sys.path.insert(0, str(Path(__file__).resolve().parents[2])) diff --git a/gateway/run.py b/gateway/run.py index cc534af363..7e541fe922 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -15388,12 +15388,12 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool = if threading.current_thread() is threading.main_thread(): for sig in (signal.SIGINT, signal.SIGTERM): try: - loop.add_signal_handler(sig, shutdown_signal_handler, sig) + loop.add_signal_handler(sig, shutdown_signal_handler, sig) # windows-footgun: ok — wrapped in try/except NotImplementedError for Windows except NotImplementedError: pass if hasattr(signal, "SIGUSR1"): try: - loop.add_signal_handler(signal.SIGUSR1, restart_signal_handler) + loop.add_signal_handler(signal.SIGUSR1, restart_signal_handler) # windows-footgun: ok — POSIX signal, guarded by hasattr above + try/except NotImplementedError except NotImplementedError: pass else: diff --git a/gateway/status.py b/gateway/status.py index 5e9b615d63..afe969572d 100644 --- a/gateway/status.py +++ b/gateway/status.py @@ -313,10 +313,20 @@ def _pid_exists(pid: int) -> bool: killing that process (and often unrelated processes in the same console group). Long-standing Python quirk; see bpo-14484. - Fix: use the Win32 ``OpenProcess`` / ``WaitForSingleObject`` pair on - Windows to check existence without any signal path; use the POSIX - ``os.kill(pid, 0)`` idiom on POSIX where it actually is a no-op. + Implementation: prefer :mod:`psutil` (hard dependency — the canonical + cross-platform answer, maintained by Giampaolo Rodolà, uses + ``OpenProcess + GetExitCodeProcess`` on Windows internally). Fall back + to a hand-rolled ctypes ``OpenProcess`` / ``WaitForSingleObject`` pair + on Windows + ``os.kill(pid, 0)`` on POSIX if psutil is somehow + unavailable — e.g. stripped-down install or import error during the + scaffold phase before ``psutil`` is pip-installed. """ + try: + import psutil # type: ignore + return bool(psutil.pid_exists(int(pid))) + except ImportError: + pass # Fall through to stdlib fallback. + if _IS_WINDOWS: try: import ctypes @@ -352,7 +362,7 @@ def _pid_exists(pid: int) -> bool: return False else: try: - os.kill(int(pid), 0) + os.kill(int(pid), 0) # windows-footgun: ok — POSIX-only branch (the whole point of _pid_exists) return True except ProcessLookupError: return False diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py index fdd2471098..468960ef17 100644 --- a/hermes_cli/auth.py +++ b/hermes_cli/auth.py @@ -893,7 +893,7 @@ def _file_lock( if msvcrt and (not lock_path.exists() or lock_path.stat().st_size == 0): lock_path.write_text(" ", encoding="utf-8") - with lock_path.open("r+" if msvcrt else "a+") as lock_file: + with lock_path.open("r+" if msvcrt else "a+", encoding="utf-8") as lock_file: deadline = time.monotonic() + max(1.0, timeout_seconds) while True: try: diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 9f37717465..a554d50d6f 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -4148,8 +4148,9 @@ def load_env() -> Dict[str, str]: if env_path.exists(): # On Windows, open() defaults to the system locale (cp1252) which can - # fail on UTF-8 .env files. Use explicit UTF-8 only on Windows. - open_kw = {"encoding": "utf-8", "errors": "replace"} if _IS_WINDOWS else {} + # fail on UTF-8 .env files. Always use explicit UTF-8; tolerate BOM + # via utf-8-sig since users may edit .env in Notepad which adds one. + open_kw = {"encoding": "utf-8-sig", "errors": "replace"} with open(env_path, **open_kw) as f: raw_lines = f.readlines() # Sanitize before parsing: split concatenated lines & drop stale @@ -4234,8 +4235,8 @@ def sanitize_env_file() -> int: if not env_path.exists(): return 0 - read_kw = {"encoding": "utf-8", "errors": "replace"} if _IS_WINDOWS else {} - write_kw = {"encoding": "utf-8"} if _IS_WINDOWS else {} + read_kw = {"encoding": "utf-8-sig", "errors": "replace"} + write_kw = {"encoding": "utf-8"} with open(env_path, **read_kw) as f: original_lines = f.readlines() @@ -4324,8 +4325,8 @@ def save_env_value(key: str, value: str): # On Windows, open() defaults to the system locale (cp1252) which can # cause OSError errno 22 on UTF-8 .env files. - read_kw = {"encoding": "utf-8", "errors": "replace"} if _IS_WINDOWS else {} - write_kw = {"encoding": "utf-8"} if _IS_WINDOWS else {} + read_kw = {"encoding": "utf-8-sig", "errors": "replace"} + write_kw = {"encoding": "utf-8"} lines = [] if env_path.exists(): @@ -4394,8 +4395,8 @@ def remove_env_value(key: str) -> bool: os.environ.pop(key, None) return False - read_kw = {"encoding": "utf-8", "errors": "replace"} if _IS_WINDOWS else {} - write_kw = {"encoding": "utf-8"} if _IS_WINDOWS else {} + read_kw = {"encoding": "utf-8-sig", "errors": "replace"} + write_kw = {"encoding": "utf-8"} with open(env_path, **read_kw) as f: lines = f.readlines() diff --git a/hermes_cli/env_loader.py b/hermes_cli/env_loader.py index 61824672c0..8040b73eb5 100644 --- a/hermes_cli/env_loader.py +++ b/hermes_cli/env_loader.py @@ -113,7 +113,7 @@ def _sanitize_env_file_if_needed(path: Path) -> None: except ImportError: return # early bootstrap — config module not available yet - read_kw = {"encoding": "utf-8", "errors": "replace"} + read_kw = {"encoding": "utf-8-sig", "errors": "replace"} try: with open(path, **read_kw) as f: original = f.readlines() diff --git a/hermes_cli/gateway.py b/hermes_cli/gateway.py index cfa9d66df8..01fbf9e745 100644 --- a/hermes_cli/gateway.py +++ b/hermes_cli/gateway.py @@ -177,7 +177,7 @@ def _request_gateway_self_restart(pid: int) -> bool: if not _is_pid_ancestor_of_current_process(pid): return False try: - os.kill(pid, signal.SIGUSR1) + os.kill(pid, signal.SIGUSR1) # windows-footgun: ok — POSIX signal, guarded by hasattr(signal, 'SIGUSR1') above except (ProcessLookupError, PermissionError, OSError): return False return True @@ -213,7 +213,7 @@ def _graceful_restart_via_sigusr1(pid: int, drain_timeout: float) -> bool: if pid <= 0: return False try: - os.kill(pid, signal.SIGUSR1) + os.kill(pid, signal.SIGUSR1) # windows-footgun: ok — POSIX signal, guarded by hasattr(signal, 'SIGUSR1') above except ProcessLookupError: # Already gone — nothing to drain. return True @@ -1196,13 +1196,13 @@ class SystemScopeRequiresRootError(RuntimeError): def _user_dbus_socket_path() -> Path: """Return the expected per-user D-Bus socket path (regardless of existence).""" - xdg = os.environ.get("XDG_RUNTIME_DIR") or f"/run/user/{os.getuid()}" + xdg = os.environ.get("XDG_RUNTIME_DIR") or f"/run/user/{os.getuid()}" # windows-footgun: ok — POSIX systemd helper, never invoked on Windows return Path(xdg) / "bus" def _user_systemd_private_socket_path() -> Path: """Return the per-user systemd private socket path (regardless of existence).""" - xdg = os.environ.get("XDG_RUNTIME_DIR") or f"/run/user/{os.getuid()}" + xdg = os.environ.get("XDG_RUNTIME_DIR") or f"/run/user/{os.getuid()}" # windows-footgun: ok — POSIX systemd helper, never invoked on Windows return Path(xdg) / "systemd" / "private" @@ -1225,7 +1225,7 @@ def _ensure_user_systemd_env() -> None: We detect the standard socket path and set the vars so all subsequent subprocess calls inherit them. """ - uid = os.getuid() + uid = os.getuid() # windows-footgun: ok — POSIX systemd helper, never invoked on Windows if "XDG_RUNTIME_DIR" not in os.environ: runtime_dir = f"/run/user/{uid}" if Path(runtime_dir).exists(): @@ -1291,7 +1291,7 @@ def _preflight_user_systemd(*, auto_enable_linger: bool = True) -> None: username, reason="User systemd control sockets are missing even though linger is enabled.", fix_hint=( - f" systemctl start user@{os.getuid()}.service\n" + f" systemctl start user@{os.getuid()}.service\n" # windows-footgun: ok — POSIX systemd helper, never invoked on Windows " (may require sudo; try again after the command succeeds)" ), ) @@ -1561,7 +1561,7 @@ def remove_legacy_hermes_units( # System-scope removal (needs root) if system_units: - if os.geteuid() != 0: + if os.geteuid() != 0: # windows-footgun: ok — Linux systemd removal path, guarded by `if system == "Linux"` / systemd-only branch print() print_warning("System-scope legacy units require root to remove.") print_info(" Re-run with: sudo hermes gateway migrate-legacy") @@ -1608,7 +1608,7 @@ def print_systemd_scope_conflict_warning() -> None: def _require_root_for_system_service(action: str) -> None: - if os.geteuid() != 0: + if os.geteuid() != 0: # windows-footgun: ok — POSIX systemd helper, never invoked on Windows raise SystemScopeRequiresRootError( f"System gateway {action} requires root. Re-run with sudo.", action, @@ -1676,7 +1676,7 @@ def install_linux_gateway_from_setup(force: bool = False) -> tuple[str | None, b if scope == "system": run_as_user = _default_system_service_user() - if os.geteuid() != 0: + if os.geteuid() != 0: # windows-footgun: ok — Linux systemd install wizard, never invoked on Windows print_warning(" System service install requires sudo, so Hermes can't create it from this user session.") if run_as_user: print_info(f" After setup, run: sudo hermes gateway install --system --run-as-user {run_as_user}") @@ -1720,7 +1720,7 @@ def get_systemd_linger_status() -> tuple[bool | None, str]: if not username: try: import pwd - username = pwd.getpwuid(os.getuid()).pw_name + username = pwd.getpwuid(os.getuid()).pw_name # windows-footgun: ok — POSIX loginctl helper, never invoked on Windows except Exception: return None, "could not determine current user" @@ -1770,7 +1770,7 @@ def _launchd_user_home() -> Path: """ import pwd - return Path(pwd.getpwuid(os.getuid()).pw_dir) + return Path(pwd.getpwuid(os.getuid()).pw_dir) # windows-footgun: ok — POSIX launchd (macOS) helper, never invoked on Windows def get_launchd_plist_path() -> Path: @@ -2169,7 +2169,7 @@ def _system_scope_wizard_would_need_root(system: bool = False) -> bool: ``SystemScopeRequiresRootError`` propagate out and leave the user staring at a bare shell. """ - if os.geteuid() == 0: + if os.geteuid() == 0: # windows-footgun: ok — systemd scope wizard decision, never invoked on Windows return False return _select_systemd_scope(system=system) @@ -2520,7 +2520,7 @@ def get_launchd_label() -> str: def _launchd_domain() -> str: - return f"gui/{os.getuid()}" + return f"gui/{os.getuid()}" # windows-footgun: ok — POSIX launchd (macOS) helper, never invoked on Windows def generate_launchd_plist() -> str: diff --git a/hermes_cli/main.py b/hermes_cli/main.py index b14afae0b3..8ac9a0a30b 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -6838,7 +6838,7 @@ def _ensure_fhs_path_guard() -> None: if sys.platform != "linux": return try: - if os.geteuid() != 0: + if os.geteuid() != 0: # windows-footgun: ok — Linux FHS helper, guarded by sys.platform == "linux" above + AttributeError catch return except AttributeError: return diff --git a/hermes_cli/pty_bridge.py b/hermes_cli/pty_bridge.py index fcb858356b..f2ef8d0876 100644 --- a/hermes_cli/pty_bridge.py +++ b/hermes_cli/pty_bridge.py @@ -213,7 +213,7 @@ class PtyBridge: # SIGHUP is the conventional "your terminal went away" signal. # We escalate if the child ignores it. - for sig in (signal.SIGHUP, signal.SIGTERM, signal.SIGKILL): + for sig in (signal.SIGHUP, signal.SIGTERM, signal.SIGKILL): # windows-footgun: ok — POSIX-only module (imports fcntl/termios/ptyprocess at top) if not self._proc.isalive(): break try: diff --git a/hermes_cli/uninstall.py b/hermes_cli/uninstall.py index 67cea41820..6698f7cee2 100644 --- a/hermes_cli/uninstall.py +++ b/hermes_cli/uninstall.py @@ -167,7 +167,7 @@ def uninstall_gateway_service(): scope = "system" if is_system else "user" try: - if is_system and os.geteuid() != 0: + if is_system and os.geteuid() != 0: # windows-footgun: ok — Linux systemd uninstall path, guarded by `if system == "Linux"` above log_warn(f"System gateway service exists at {unit_path} " f"but needs sudo to remove") continue diff --git a/plugins/context_engine/__init__.py b/plugins/context_engine/__init__.py index 5321ad299a..da9206dc34 100644 --- a/plugins/context_engine/__init__.py +++ b/plugins/context_engine/__init__.py @@ -54,7 +54,7 @@ def discover_context_engines() -> List[Tuple[str, str, bool]]: if yaml_file.exists(): try: import yaml - with open(yaml_file) as f: + with open(yaml_file, encoding="utf-8-sig") as f: meta = yaml.safe_load(f) or {} desc = meta.get("description", "") except Exception: diff --git a/plugins/disk-cleanup/disk_cleanup.py b/plugins/disk-cleanup/disk_cleanup.py index cef2698316..b7f748e7f2 100755 --- a/plugins/disk-cleanup/disk_cleanup.py +++ b/plugins/disk-cleanup/disk_cleanup.py @@ -90,7 +90,7 @@ def _log(message: str) -> None: log_file = get_log_file() log_file.parent.mkdir(parents=True, exist_ok=True) ts = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S") - with open(log_file, "a") as f: + with open(log_file, "a", encoding="utf-8") as f: f.write(f"[{ts}] {message}\n") except OSError: # Never let the audit log break the agent loop. diff --git a/plugins/google_meet/process_manager.py b/plugins/google_meet/process_manager.py index e16ff4fa6a..0709c6a1f9 100644 --- a/plugins/google_meet/process_manager.py +++ b/plugins/google_meet/process_manager.py @@ -310,7 +310,7 @@ def stop(*, reason: str = "requested") -> Dict[str, Any]: time.sleep(0.5) if _pid_alive(pid): try: - os.kill(pid, signal.SIGKILL) + os.kill(pid, signal.SIGKILL) # windows-footgun: ok — POSIX-only plugin (google_meet registers no-op on Windows; see __init__.py) except ProcessLookupError: pass diff --git a/plugins/google_meet/realtime/openai_client.py b/plugins/google_meet/realtime/openai_client.py index 258723180a..e9738d106a 100644 --- a/plugins/google_meet/realtime/openai_client.py +++ b/plugins/google_meet/realtime/openai_client.py @@ -292,7 +292,7 @@ class RealtimeSpeaker: return self.processed_path.parent.mkdir(parents=True, exist_ok=True) record = {"id": entry.get("id"), "text": entry.get("text", ""), "result": result} - with open(self.processed_path, "a") as fp: + with open(self.processed_path, "a", encoding="utf-8") as fp: fp.write(json.dumps(record) + "\n") # ── main loop ──────────────────────────────────────────────────────── diff --git a/plugins/memory/__init__.py b/plugins/memory/__init__.py index 0d714f64dd..2398f2ebd8 100644 --- a/plugins/memory/__init__.py +++ b/plugins/memory/__init__.py @@ -135,7 +135,7 @@ def discover_memory_providers() -> List[Tuple[str, str, bool]]: if yaml_file.exists(): try: import yaml - with open(yaml_file) as f: + with open(yaml_file, encoding="utf-8-sig") as f: meta = yaml.safe_load(f) or {} desc = meta.get("description", "") except Exception: @@ -381,7 +381,7 @@ def discover_plugin_cli_commands() -> List[dict]: if yaml_file.exists(): try: import yaml - with open(yaml_file) as f: + with open(yaml_file, encoding="utf-8-sig") as f: meta = yaml.safe_load(f) or {} desc = meta.get("description", "") if desc: diff --git a/plugins/memory/hindsight/__init__.py b/plugins/memory/hindsight/__init__.py index b7751a918e..20772844f1 100644 --- a/plugins/memory/hindsight/__init__.py +++ b/plugins/memory/hindsight/__init__.py @@ -1215,7 +1215,7 @@ class HindsightMemoryProvider(MemoryProvider): # would capture output from other threads. import hindsight_embed.daemon_embed_manager as dem from rich.console import Console - dem.console = Console(file=open(log_path, "a"), force_terminal=False) + dem.console = Console(file=open(log_path, "a", encoding="utf-8"), force_terminal=False) client = self._get_client() profile = self._config.get("profile", "hermes") @@ -1231,15 +1231,15 @@ class HindsightMemoryProvider(MemoryProvider): if config_changed: profile_env = _materialize_embedded_profile_env(self._config) if client._manager.is_running(profile): - with open(log_path, "a") as f: + with open(log_path, "a", encoding="utf-8") as f: f.write("\n=== Config changed, restarting daemon ===\n") client._manager.stop(profile) client._ensure_started() - with open(log_path, "a") as f: + with open(log_path, "a", encoding="utf-8") as f: f.write("\n=== Daemon started successfully ===\n") except Exception as e: - with open(log_path, "a") as f: + with open(log_path, "a", encoding="utf-8") as f: f.write(f"\n=== Daemon startup failed: {e} ===\n") traceback.print_exc(file=f) diff --git a/plugins/memory/holographic/__init__.py b/plugins/memory/holographic/__init__.py index dc9ee530c5..681ce7660c 100644 --- a/plugins/memory/holographic/__init__.py +++ b/plugins/memory/holographic/__init__.py @@ -101,7 +101,7 @@ def _load_plugin_config() -> dict: return {} try: import yaml - with open(config_path) as f: + with open(config_path, encoding="utf-8-sig") as f: all_config = yaml.safe_load(f) or {} return cfg_get(all_config, "plugins", "hermes-memory-store", default={}) or {} except Exception: @@ -136,11 +136,11 @@ class HolographicMemoryProvider(MemoryProvider): import yaml existing = {} if config_path.exists(): - with open(config_path) as f: + with open(config_path, encoding="utf-8-sig") as f: existing = yaml.safe_load(f) or {} existing.setdefault("plugins", {}) existing["plugins"]["hermes-memory-store"] = values - with open(config_path, "w") as f: + with open(config_path, "w", encoding="utf-8") as f: yaml.dump(existing, f, default_flow_style=False) except Exception: pass diff --git a/pyproject.toml b/pyproject.toml index 8f69c5b468..ebe26b3c17 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,6 +42,12 @@ dependencies = [ # Python resolves automatically. No-op on Linux/macOS (which have # /usr/share/zoneinfo). Credits: PR #13182 (@sprmn24). "tzdata>=2023.3; sys_platform == 'win32'", + # Cross-platform process / PID management. `psutil` is the canonical + # answer for "is this PID alive" and process-tree walking across Linux, + # macOS and Windows. It replaces POSIX-only idioms like `os.kill(pid, 0)` + # (which is a silent killer on Windows — see CONTRIBUTING.md) and + # `os.killpg` (which doesn't exist on Windows). + "psutil>=5.9.0,<8", ] [project.optional-dependencies] diff --git a/scripts/check-windows-footguns.py b/scripts/check-windows-footguns.py new file mode 100644 index 0000000000..f424be9071 --- /dev/null +++ b/scripts/check-windows-footguns.py @@ -0,0 +1,624 @@ +#!/usr/bin/env python3 +""" +Grep-based checker for Windows cross-platform footguns. + +Flags common patterns that break silently on Windows. Run before PRs — +cheap, fast, catches regressions in a codebase that runs on three OSes. + +Usage: + # Scan staged changes (default when run from a git checkout) + python scripts/check-windows-footguns.py + + # Scan the full tree (full-repo audit) + python scripts/check-windows-footguns.py --all + + # Scan a specific file or directory + python scripts/check-windows-footguns.py path/to/file.py path/to/dir/ + + # Scan only modified files vs. main + python scripts/check-windows-footguns.py --diff main + +Exit status: + 0 — no Windows footguns found (or all matches suppressed) + 1 — at least one unsuppressed match + +Suppress an intentional use (e.g. tests or platform-gated code) with: + os.kill(pid, 0) # windows-footgun: ok — only called on POSIX +""" + +from __future__ import annotations + +import argparse +import os +import re +import subprocess +import sys +from dataclasses import dataclass +from pathlib import Path +from typing import Iterable + +REPO_ROOT = Path(__file__).resolve().parent.parent + +SUPPRESS_MARKER = re.compile(r"#\s*windows-footgun\s*:\s*ok\b", re.IGNORECASE) + +# Line-level guard hints. If a line contains any of these tokens, we assume +# the programmer wrote the line in full awareness of the Windows pitfall — +# e.g. `if hasattr(os, 'setsid'): ... os.setsid()`, or the classic +# `getattr(signal, 'SIGKILL', signal.SIGTERM)`, or `shutil.which("wmic")`. +# False negatives are fine here — the inline `# windows-footgun: ok` marker +# is still the authoritative suppression. This is just to reduce the noise +# floor on obviously-guarded lines so the signal-to-noise stays useful. +GUARD_HINTS = ( + "hasattr(os,", + "hasattr(signal,", + "getattr(os,", + "getattr(signal,", + "shutil.which(", + "if platform.system() != \"Windows\"", + "if platform.system() != 'Windows'", + "if sys.platform == \"win32\"", + "if sys.platform != \"win32\"", + "if sys.platform == 'win32'", + "if sys.platform != 'win32'", + "IS_WINDOWS", + "is_windows", +) + +# Dirs we never scan. +EXCLUDED_DIRS = { + ".git", + "node_modules", + "venv", + ".venv", + "__pycache__", + "build", + "dist", + ".tox", + ".mypy_cache", + ".pytest_cache", + "site-packages", + "website/build", + "optional-skills", # external skills +} + +# File globs we never scan (beyond the dirs above). +EXCLUDED_SUFFIXES = { + ".pyc", + ".pyo", + ".so", + ".dll", + ".exe", + ".png", + ".jpg", + ".gif", + ".ico", + ".svg", + ".mp4", + ".mp3", + ".wav", + ".pdf", + ".zip", + ".tar", + ".gz", + ".whl", + ".lock", + ".min.js", + ".min.css", +} + +# Files we never scan (self-referential — this script mentions the +# patterns it detects — and the CONTRIBUTING docs that list them). +EXCLUDED_FILES = { + "scripts/check-windows-footguns.py", + "CONTRIBUTING.md", +} + + +@dataclass +class Footgun: + """A Windows cross-platform footgun pattern.""" + + name: str + pattern: re.Pattern + message: str + fix: str + # If set, matches in files/paths containing any of these substrings are + # silently ignored (e.g. tests that legitimately exercise the footgun + # behind a platform guard). Prefer `# windows-footgun: ok` inline + # suppression over this list; only use path_allowlist for whole files + # that are inherently tests of the footgun itself. + path_allowlist: tuple[str, ...] = () + # Optional post-match predicate. Takes the re.Match and returns True + # if the match is a REAL footgun (not a false positive). Use this when + # the regex can't fully distinguish (e.g. open() where mode may contain + # "b" for binary, or the line may have `encoding=` elsewhere). + post_filter: "callable | None" = None + + +FOOTGUNS: list[Footgun] = [ + Footgun( + name="open() without encoding= on text mode", + # Match builtins.open() specifically — NOT os.open(), .open() + # method calls (Path.open, tarfile.open, zf.open, webbrowser.open, + # Image.open, wave.open, etc), or `async def open()` method + # definitions. The pattern requires a start-of-identifier boundary + # before `open(` so `os.open`, `.open`, `def open` are all skipped. + # Note: Path.open() is ALSO affected by the encoding default, but + # rather than flagging all `.open(` (huge noise), we require an + # explicit builtins-style open() call. Path.open() is rare in the + # codebase compared to open() and can be audited separately. + pattern=re.compile( + r"""(?:^|[\s\(,;=])(?[^'"]*)['"])?""" + ), + message=( + "open() without an explicit encoding= uses the platform default " + "(UTF-8 on POSIX, cp1252/mbcs on Windows) — files round-tripped " + "between hosts get mojibake. Always pass encoding='utf-8' for " + "text files, or use open(path, 'rb')/'wb' for binary." + ), + fix=( + "open(path, 'r', encoding='utf-8') # or 'utf-8-sig' if the " + "file may have a BOM" + ), + # Filter: only flag if mode is missing-or-text AND the line doesn't + # already pass encoding=. Skip binary mode (contains "b"). + post_filter=lambda m, line: ( + "b" not in (m.group("mode") or "") + and "encoding=" not in line + and "encoding =" not in line + # Skip `def open(` and `async def open(` (method definitions) + and not line.lstrip().startswith("def ") + and not line.lstrip().startswith("async def ") + # Skip open(path, **kwargs) patterns — encoding may be in the dict. + # Too expensive to trace; require the author to set encoding in + # the dict and trust them (or they can add a # windows-footgun: ok). + and "**" not in line + ), + ), + Footgun( + name="os.kill(pid, 0)", + pattern=re.compile(r"\bos\.kill\s*\(\s*[^,]+,\s*0\s*\)"), + message=( + "os.kill(pid, 0) is NOT a no-op on Windows — it sends " + "CTRL_C_EVENT to the target's console process group, " + "hard-killing the target and potentially unrelated siblings. " + "See bpo-14484." + ), + fix=( + "Use psutil.pid_exists(pid) (psutil is a core dependency). " + "Or gateway.status._pid_exists(pid) for the hermes wrapper " + "with a stdlib fallback." + ), + ), + Footgun( + name="bare os.setsid", + pattern=re.compile(r"(? bool: + """Return True if this file is in scope for the checker.""" + # Skip the excluded dirs + parts = set(path.parts) + if parts & EXCLUDED_DIRS: + return False + # Skip excluded suffixes + for suffix in EXCLUDED_SUFFIXES: + if str(path).endswith(suffix): + return False + # Skip self and docs that intentionally mention the patterns + rel = path.relative_to(REPO_ROOT).as_posix() + if rel in EXCLUDED_FILES: + return False + # Only scan text files (rough heuristic — .py, .md, .sh, .ps1, .yaml, etc.) + if path.suffix in {".py", ".pyw", ".pyi"}: + return True + # Other file types are read but only Python-specific patterns would match; + # that's fine and cheap to skip. + return False + + +def iter_files(paths: Iterable[Path]) -> Iterable[Path]: + for p in paths: + if p.is_file(): + if should_scan_file(p): + yield p + elif p.is_dir(): + for root, dirs, files in os.walk(p): + # prune excluded dirs in-place for speed + dirs[:] = [d for d in dirs if d not in EXCLUDED_DIRS] + for fname in files: + fpath = Path(root) / fname + if should_scan_file(fpath): + yield fpath + + +def _strip_code(line: str) -> str: + """Return just the code portion of a line — strip trailing comments and + skip lines that are entirely inside a string literal or comment. + + Heuristic only (we don't parse Python); good enough to avoid flagging + our own `# ``os.kill(pid, 0)`` is NOT a no-op` docstring-style comments. + """ + stripped = line.lstrip() + # Line starts with # — entirely a comment. + if stripped.startswith("#"): + return "" + # Remove trailing "# ..." inline comment. Naive — doesn't handle `#` + # inside strings — but on balance reduces noise far more than it adds. + hash_idx = _find_unquoted_hash(line) + if hash_idx is not None: + return line[:hash_idx] + return line + + +def _find_unquoted_hash(line: str) -> int | None: + """Index of the first `#` not inside a single/double/triple-quoted string. + + Simple state machine — good enough for the 99% case of "code, then + optional trailing comment." + """ + i = 0 + n = len(line) + in_s = False # single-quote string + in_d = False # double-quote string + while i < n: + c = line[i] + if c == "\\" and (in_s or in_d) and i + 1 < n: + i += 2 + continue + if not in_d and c == "'": + in_s = not in_s + elif not in_s and c == '"': + in_d = not in_d + elif c == "#" and not in_s and not in_d: + return i + i += 1 + return None + + +def scan_file(path: Path, footguns: list[Footgun]) -> list[tuple[int, str, Footgun]]: + """Return a list of (line_number, line, footgun) for unsuppressed matches.""" + try: + text = path.read_text(encoding="utf-8", errors="replace") + except OSError: + return [] + matches: list[tuple[int, str, Footgun]] = [] + + # Track whether we're inside a triple-quoted string (docstring/raw block). + # Simple state machine — handles both ''' and """, toggled by the FIRST + # triple-quote we see; we don't try to handle nested or f-string cases. + in_triple: str | None = None # None, "'''", or '"""' + + for i, line in enumerate(text.splitlines(), start=1): + # Update triple-quote state based on this line's occurrences. + code_for_scan = line + if in_triple: + # We're inside a docstring — skip the whole line's scan. + # Check if it closes here. + if in_triple in line: + # Find the closing delimiter; anything after it is real code. + after = line.split(in_triple, 1)[1] + in_triple = None + code_for_scan = after + else: + continue + # Now check for docstring-open in the (possibly after-triple) portion. + # Scan for the first unescaped '''/""" in the current code_for_scan. + stripped = code_for_scan.strip() + for delim in ('"""', "'''"): + if delim in code_for_scan: + # Count occurrences — even count means single-line docstring, + # odd means we've entered a multi-line one. + count = code_for_scan.count(delim) + if count % 2 == 1: + # Odd — we're now inside the triple-quoted block. + # Scan only the part BEFORE the opening delimiter. + before = code_for_scan.split(delim, 1)[0] + code_for_scan = before + in_triple = delim + break + else: + # Even — entire docstring fits on one line. Strip it + # from the scan text to avoid matching on prose. + parts = code_for_scan.split(delim) + # Keep the "outside" parts (every other chunk, starting + # with index 0) as code, drop the "inside" parts. + code_for_scan = "".join(parts[::2]) + break + + if SUPPRESS_MARKER.search(line): + continue + # Skip if the line has an obvious guard — e.g. hasattr/getattr/ + # shutil.which or a platform check. False negatives are acceptable; + # the inline suppression marker is the authoritative override. + if any(hint in line for hint in GUARD_HINTS): + continue + code = _strip_code(code_for_scan) + if not code.strip(): + continue + for fg in footguns: + if fg.path_allowlist and any(s in str(path) for s in fg.path_allowlist): + continue + match = fg.pattern.search(code) + if not match: + continue + if fg.post_filter is not None: + try: + if not fg.post_filter(match, line): + continue + except (IndexError, AttributeError): + # Post-filter assumed a named group that isn't there — skip. + continue + matches.append((i, line.rstrip(), fg)) + return matches + + +def get_staged_files() -> list[Path]: + """Return paths staged in the current git index. Empty on non-git trees.""" + try: + out = subprocess.check_output( + ["git", "diff", "--cached", "--name-only", "--diff-filter=ACMR"], + cwd=REPO_ROOT, + stderr=subprocess.DEVNULL, + text=True, + ) + except (subprocess.CalledProcessError, FileNotFoundError): + return [] + return [REPO_ROOT / f for f in out.splitlines() if f.strip()] + + +def get_diff_files(ref: str) -> list[Path]: + """Return paths modified vs. the given git ref.""" + try: + out = subprocess.check_output( + ["git", "diff", f"{ref}...HEAD", "--name-only", "--diff-filter=ACMR"], + cwd=REPO_ROOT, + stderr=subprocess.DEVNULL, + text=True, + ) + except (subprocess.CalledProcessError, FileNotFoundError): + return [] + return [REPO_ROOT / f for f in out.splitlines() if f.strip()] + + +def parse_args(argv: list[str]) -> argparse.Namespace: + p = argparse.ArgumentParser( + description="Flag Windows cross-platform footguns in Python code." + ) + p.add_argument( + "paths", + nargs="*", + type=Path, + help="Specific files/dirs to scan (default: staged changes).", + ) + p.add_argument( + "--all", + action="store_true", + help="Scan the full repository (hermes_cli/, gateway/, tools/, cron/, etc.).", + ) + p.add_argument( + "--diff", + metavar="REF", + help="Scan files changed vs. the given git ref (e.g. --diff main).", + ) + p.add_argument( + "--list", + action="store_true", + help="List all known footgun rules and exit.", + ) + return p.parse_args(argv) + + +def print_rules() -> None: + print("Known Windows footguns checked by this script:\n") + for i, fg in enumerate(FOOTGUNS, start=1): + print(f"{i:2}. {fg.name}") + print(f" {fg.message}") + print(f" Fix: {fg.fix}") + print() + + +def main(argv: list[str]) -> int: + args = parse_args(argv) + + if args.list: + print_rules() + return 0 + + if args.all: + # Scan main Python packages + scripts + roots = [ + REPO_ROOT / "hermes_cli", + REPO_ROOT / "gateway", + REPO_ROOT / "tools", + REPO_ROOT / "cron", + REPO_ROOT / "agent", + REPO_ROOT / "plugins", + REPO_ROOT / "scripts", + REPO_ROOT / "acp_adapter", + REPO_ROOT / "acp_registry", + ] + roots = [r for r in roots if r.exists()] + elif args.diff: + roots = get_diff_files(args.diff) + elif args.paths: + roots = [p.resolve() for p in args.paths] + else: + # Default: staged changes + roots = get_staged_files() + if not roots: + print( + "No staged files to scan. Pass --all for a full-repo scan, " + "--diff for a range diff, or paths explicitly.", + file=sys.stderr, + ) + return 0 + + total_matches = 0 + files_scanned = 0 + for path in iter_files(roots): + files_scanned += 1 + matches = scan_file(path, FOOTGUNS) + for lineno, line, fg in matches: + rel = path.relative_to(REPO_ROOT).as_posix() + print(f"{rel}:{lineno}: [{fg.name}]") + print(f" {line.strip()}") + print(f" — {fg.message}") + print(f" Fix: {fg.fix.splitlines()[0]}") + print() + total_matches += 1 + + if total_matches: + print( + f"\n✗ {total_matches} Windows footgun(s) found across " + f"{files_scanned} file(s) scanned.", + file=sys.stderr, + ) + print( + " If an individual match is a false positive or intentionally " + "platform-gated, suppress it with `# windows-footgun: ok` on " + "the same line.\n Run with --list to see all rules.", + file=sys.stderr, + ) + return 1 + + print( + f"✓ No Windows footguns found ({files_scanned} file(s) scanned)." + ) + return 0 + + +if __name__ == "__main__": + sys.exit(main(sys.argv[1:])) diff --git a/scripts/profile-tui.py b/scripts/profile-tui.py index 3a39e4c5ed..edbdf2ee45 100755 --- a/scripts/profile-tui.py +++ b/scripts/profile-tui.py @@ -457,7 +457,7 @@ def run_once(args: argparse.Namespace) -> dict[str, Any]: break time.sleep(0.1) else: - os.kill(pid, signal.SIGKILL) + os.kill(pid, signal.SIGKILL) # windows-footgun: ok — POSIX-only script (imports pty at top) os.waitpid(pid, 0) except (ProcessLookupError, ChildProcessError): pass diff --git a/tools/code_execution_tool.py b/tools/code_execution_tool.py index 828fb559e8..092f7e37e9 100644 --- a/tools/code_execution_tool.py +++ b/tools/code_execution_tool.py @@ -1450,14 +1450,24 @@ def execute_code( def _kill_process_group(proc, escalate: bool = False): - """Kill the child and its entire process group.""" + """Kill the child and its entire process tree (cross-platform via psutil).""" + import psutil try: - if _IS_WINDOWS: - proc.terminate() - else: - os.killpg(os.getpgid(proc.pid), signal.SIGTERM) - except (ProcessLookupError, PermissionError) as e: - logger.debug("Could not kill process group: %s", e, exc_info=True) + parent = psutil.Process(proc.pid) + children = parent.children(recursive=True) + for child in children: + try: + child.terminate() + except psutil.NoSuchProcess: + pass + try: + parent.terminate() + except psutil.NoSuchProcess: + pass + except psutil.NoSuchProcess: + pass + except (PermissionError, OSError) as e: + logger.debug("Could not terminate process tree: %s", e, exc_info=True) try: proc.kill() except Exception as e2: @@ -1469,12 +1479,20 @@ def _kill_process_group(proc, escalate: bool = False): proc.wait(timeout=5) except subprocess.TimeoutExpired: try: - if _IS_WINDOWS: - proc.kill() - else: - os.killpg(os.getpgid(proc.pid), signal.SIGKILL) - except (ProcessLookupError, PermissionError) as e: - logger.debug("Could not kill process group with SIGKILL: %s", e, exc_info=True) + parent = psutil.Process(proc.pid) + for child in parent.children(recursive=True): + try: + child.kill() + except psutil.NoSuchProcess: + pass + try: + parent.kill() + except psutil.NoSuchProcess: + pass + except psutil.NoSuchProcess: + pass + except (PermissionError, OSError) as e: + logger.debug("Could not kill process tree: %s", e, exc_info=True) try: proc.kill() except Exception as e2: diff --git a/tools/environments/local.py b/tools/environments/local.py index e952ab5e6f..985bf4bdce 100644 --- a/tools/environments/local.py +++ b/tools/environments/local.py @@ -489,7 +489,7 @@ class LocalEnvironment(BaseEnvironment): def _group_alive(pgid: int) -> bool: try: # POSIX-only: _IS_WINDOWS is handled before this helper is used. - os.killpg(pgid, 0) + os.killpg(pgid, 0) # windows-footgun: ok — POSIX process-group alive probe return True except ProcessLookupError: return False @@ -527,7 +527,7 @@ class LocalEnvironment(BaseEnvironment): raise try: - os.killpg(pgid, signal.SIGTERM) + os.killpg(pgid, signal.SIGTERM) # windows-footgun: ok — POSIX process-group SIGTERM (guarded by _IS_WINDOWS above) except ProcessLookupError: return @@ -539,7 +539,7 @@ class LocalEnvironment(BaseEnvironment): try: # POSIX-only: _IS_WINDOWS is handled by the outer branch. - os.killpg(pgid, signal.SIGKILL) + os.killpg(pgid, signal.SIGKILL) # windows-footgun: ok — POSIX process-group SIGKILL except ProcessLookupError: return _wait_for_group_exit(pgid, 2.0) diff --git a/tools/memory_tool.py b/tools/memory_tool.py index 0de12a64f3..80ee3c63d6 100644 --- a/tools/memory_tool.py +++ b/tools/memory_tool.py @@ -159,7 +159,7 @@ class MemoryStore: if msvcrt and (not lock_path.exists() or lock_path.stat().st_size == 0): lock_path.write_text(" ", encoding="utf-8") - fd = open(lock_path, "r+" if msvcrt else "a+") + fd = open(lock_path, "r+" if msvcrt else "a+", encoding="utf-8") try: if fcntl: fcntl.flock(fd, fcntl.LOCK_EX) diff --git a/tools/process_registry.py b/tools/process_registry.py index a145e8c281..d4c602bb4c 100644 --- a/tools/process_registry.py +++ b/tools/process_registry.py @@ -435,10 +435,22 @@ class ProcessRegistry: os.kill(pid, signal.SIGTERM) return + import psutil try: - os.killpg(os.getpgid(pid), signal.SIGTERM) - except (OSError, ProcessLookupError, PermissionError): - os.kill(pid, signal.SIGTERM) + parent = psutil.Process(pid) + for child in parent.children(recursive=True): + try: + child.terminate() + except psutil.NoSuchProcess: + pass + parent.terminate() + except psutil.NoSuchProcess: + return + except (OSError, PermissionError): + try: + os.kill(pid, signal.SIGTERM) + except (OSError, ProcessLookupError, PermissionError): + pass # ----- Spawn ----- @@ -1032,12 +1044,22 @@ class ProcessRegistry: if session.pid: os.kill(session.pid, signal.SIGTERM) elif session.process: - # Local process -- kill the process group + # Local process -- kill the process tree try: if _IS_WINDOWS: session.process.terminate() else: - os.killpg(os.getpgid(session.process.pid), signal.SIGTERM) + import psutil + try: + parent = psutil.Process(session.process.pid) + for child in parent.children(recursive=True): + try: + child.terminate() + except psutil.NoSuchProcess: + pass + parent.terminate() + except psutil.NoSuchProcess: + pass except (ProcessLookupError, PermissionError): session.process.kill() elif session.env_ref and session.pid: diff --git a/tools/skill_usage.py b/tools/skill_usage.py index 88bca75219..e25f136544 100644 --- a/tools/skill_usage.py +++ b/tools/skill_usage.py @@ -76,7 +76,7 @@ def _usage_file_lock(): if msvcrt and (not lock_path.exists() or lock_path.stat().st_size == 0): lock_path.write_text(" ", encoding="utf-8") - fd = open(lock_path, "r+" if msvcrt else "a+") + fd = open(lock_path, "r+" if msvcrt else "a+", encoding="utf-8") try: if fcntl: fcntl.flock(fd, fcntl.LOCK_EX) diff --git a/tools/tts_tool.py b/tools/tts_tool.py index 8b82e1665b..7a190081a1 100644 --- a/tools/tts_tool.py +++ b/tools/tts_tool.py @@ -541,9 +541,16 @@ def _terminate_command_tts_process_tree(proc: subprocess.Popen) -> None: proc.kill() return + import psutil try: - os.killpg(proc.pid, signal.SIGTERM) - except ProcessLookupError: + parent = psutil.Process(proc.pid) + for child in parent.children(recursive=True): + try: + child.terminate() + except psutil.NoSuchProcess: + pass + parent.terminate() + except psutil.NoSuchProcess: return except Exception: proc.terminate() @@ -555,8 +562,14 @@ def _terminate_command_tts_process_tree(proc: subprocess.Popen) -> None: pass try: - os.killpg(proc.pid, signal.SIGKILL) - except ProcessLookupError: + parent = psutil.Process(proc.pid) + for child in parent.children(recursive=True): + try: + child.kill() + except psutil.NoSuchProcess: + pass + parent.kill() + except psutil.NoSuchProcess: return except Exception: proc.kill()