From fab984c7f8a1240332729f509b3a0daca50bb0a6 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Thu, 7 May 2026 18:39:38 -0700 Subject: [PATCH] execute_code: pass through Windows OS-essential env vars MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sandbox's env scrubbing was dropping SYSTEMROOT, WINDIR, COMSPEC, APPDATA, etc. On Windows this broke the child process before any RPC could happen: OSError: [WinError 10106] The requested service provider could not be loaded or initialized Python's socket module uses SYSTEMROOT to locate mswsock.dll during Winsock initialization. Without it, socket.socket(AF_INET, SOCK_STREAM) fails — and the existing loopback-TCP fallback for Windows couldn't work. Fix: add a small Windows-only allowlist (_WINDOWS_ESSENTIAL_ENV_VARS) matched by exact uppercase name, after the existing secret-substring block. The secret block still runs first, so the allowlist cannot be used to exfiltrate credentials. Also extract the env scrubber into a testable helper (_scrub_child_env) that takes is_windows as a parameter, so the logic can be unit-tested on any OS. Live Winsock smoke test verifies that a child spawned with the scrubbed env can now create an AF_INET socket on a real Windows host; the test is guarded by sys.platform == 'win32' so POSIX CI stays green. --- .../tools/test_code_execution_windows_env.py | 243 ++++++++++++++++++ tools/code_execution_tool.py | 107 ++++++-- 2 files changed, 327 insertions(+), 23 deletions(-) create mode 100644 tests/tools/test_code_execution_windows_env.py diff --git a/tests/tools/test_code_execution_windows_env.py b/tests/tools/test_code_execution_windows_env.py new file mode 100644 index 0000000000..1e1f1eb989 --- /dev/null +++ b/tests/tools/test_code_execution_windows_env.py @@ -0,0 +1,243 @@ +"""Tests for execute_code env scrubbing on Windows. + +On Windows the child process needs a small set of OS-essential env vars +(SYSTEMROOT, WINDIR, COMSPEC, ...) to run. Without SYSTEMROOT in particular, +``socket.socket(AF_INET, SOCK_STREAM)`` fails inside the sandbox with +WinError 10106 (Winsock can't locate mswsock.dll) and no tool call over +loopback TCP can ever succeed. + +These tests cover ``_scrub_child_env`` directly so they run on every OS +— the logic is conditional on a passed-in ``is_windows`` flag, not on +the host platform. We also keep a live Winsock smoke test that only runs +on a real Windows host. +""" + +import os +import socket +import subprocess +import sys +import textwrap +import unittest.mock as mock + +import pytest + +from tools.code_execution_tool import ( + _SAFE_ENV_PREFIXES, + _SECRET_SUBSTRINGS, + _WINDOWS_ESSENTIAL_ENV_VARS, + _scrub_child_env, +) + + +def _no_passthrough(_name): + return False + + +class TestWindowsEssentialAllowlist: + """The allowlist itself — contents, shape, and invariants.""" + + def test_contains_winsock_required_vars(self): + # Without SYSTEMROOT the child cannot initialize Winsock. + assert "SYSTEMROOT" in _WINDOWS_ESSENTIAL_ENV_VARS + + def test_contains_subprocess_required_vars(self): + # Without COMSPEC, subprocess can't resolve the default shell. + assert "COMSPEC" in _WINDOWS_ESSENTIAL_ENV_VARS + + def test_contains_user_profile_vars(self): + # os.path.expanduser("~") on Windows uses USERPROFILE. + assert "USERPROFILE" in _WINDOWS_ESSENTIAL_ENV_VARS + assert "APPDATA" in _WINDOWS_ESSENTIAL_ENV_VARS + assert "LOCALAPPDATA" in _WINDOWS_ESSENTIAL_ENV_VARS + + def test_contains_only_uppercase_names(self): + # Windows env var names are case-insensitive but we canonicalize to + # uppercase for the membership check (``k.upper() in _WINDOWS_...``). + for name in _WINDOWS_ESSENTIAL_ENV_VARS: + assert name == name.upper(), f"{name!r} should be uppercase" + + def test_no_overlap_with_secret_substrings(self): + # Sanity: none of the essential OS vars should look like secrets. + # If this ever fires, we'd have a precedence ordering bug (secrets + # are blocked *before* the essentials check). + for name in _WINDOWS_ESSENTIAL_ENV_VARS: + assert not any(s in name for s in _SECRET_SUBSTRINGS), ( + f"{name!r} looks secret-like — would be blocked before the " + "essentials allowlist can match" + ) + + +class TestScrubChildEnvWindows: + """Verify _scrub_child_env passes Windows essentials through when + is_windows=True and blocks them when is_windows=False (so POSIX hosts + don't inherit pointless Windows vars).""" + + def _sample_windows_env(self): + """A realistic subset of what os.environ looks like on Windows.""" + return { + "SYSTEMROOT": r"C:\Windows", + "SystemDrive": "C:", # Windows preserves native case + "WINDIR": r"C:\Windows", + "ComSpec": r"C:\Windows\System32\cmd.exe", + "PATHEXT": ".COM;.EXE;.BAT;.CMD;.PY", + "USERPROFILE": r"C:\Users\alice", + "APPDATA": r"C:\Users\alice\AppData\Roaming", + "LOCALAPPDATA": r"C:\Users\alice\AppData\Local", + "PATH": r"C:\Windows\System32;C:\Python311", + "HOME": r"C:\Users\alice", + "TEMP": r"C:\Users\alice\AppData\Local\Temp", + # Should still be blocked: + "OPENAI_API_KEY": "sk-secret", + "GITHUB_TOKEN": "ghp_secret", + "MY_PASSWORD": "hunter2", + # Not matched by any rule — should be dropped on both OSes: + "RANDOM_UNKNOWN_VAR": "value", + } + + def test_windows_essentials_passed_through_when_is_windows_true(self): + env = self._sample_windows_env() + scrubbed = _scrub_child_env(env, + is_passthrough=_no_passthrough, + is_windows=True) + + # Every essential var from the sample env should survive. + assert scrubbed["SYSTEMROOT"] == r"C:\Windows" + assert scrubbed["SystemDrive"] == "C:" # case preserved + assert scrubbed["WINDIR"] == r"C:\Windows" + assert scrubbed["ComSpec"] == r"C:\Windows\System32\cmd.exe" + assert scrubbed["PATHEXT"] == ".COM;.EXE;.BAT;.CMD;.PY" + assert scrubbed["USERPROFILE"] == r"C:\Users\alice" + assert scrubbed["APPDATA"].endswith("Roaming") + assert scrubbed["LOCALAPPDATA"].endswith("Local") + + # Safe-prefix vars still pass (baseline behavior). + assert "PATH" in scrubbed + assert "HOME" in scrubbed + assert "TEMP" in scrubbed + + def test_secrets_still_blocked_on_windows(self): + """The Windows allowlist must NOT defeat the secret-substring block. + + This is the key security invariant: essentials are allowed by + *exact name*, and the secret-substring block runs before the + essentials check anyway, so a variable named e.g. ``API_KEY`` can + never sneak through just because we added Windows support. + """ + env = self._sample_windows_env() + scrubbed = _scrub_child_env(env, + is_passthrough=_no_passthrough, + is_windows=True) + assert "OPENAI_API_KEY" not in scrubbed + assert "GITHUB_TOKEN" not in scrubbed + assert "MY_PASSWORD" not in scrubbed + + def test_unknown_vars_still_dropped_on_windows(self): + env = self._sample_windows_env() + scrubbed = _scrub_child_env(env, + is_passthrough=_no_passthrough, + is_windows=True) + assert "RANDOM_UNKNOWN_VAR" not in scrubbed + + def test_essentials_blocked_when_is_windows_false(self): + """On POSIX hosts, Windows-specific vars should not pass — they + have no meaning and could confuse child tooling.""" + env = self._sample_windows_env() + scrubbed = _scrub_child_env(env, + is_passthrough=_no_passthrough, + is_windows=False) + # Safe prefixes still match (PATH, HOME, TEMP). + assert "PATH" in scrubbed + assert "HOME" in scrubbed + assert "TEMP" in scrubbed + # But Windows OS vars should be dropped. + assert "SYSTEMROOT" not in scrubbed + assert "WINDIR" not in scrubbed + assert "ComSpec" not in scrubbed + assert "APPDATA" not in scrubbed + + def test_case_insensitive_essential_match(self): + """Windows env var names are case-insensitive at the OS level but + Python preserves whatever case os.environ reported. The scrubber + must normalize to uppercase for the membership check.""" + env = { + "SystemRoot": r"C:\Windows", # mixed case + "comspec": r"C:\Windows\System32\cmd.exe", # lowercase + "APPDATA": r"C:\Users\x\AppData\Roaming", # uppercase + } + scrubbed = _scrub_child_env(env, + is_passthrough=_no_passthrough, + is_windows=True) + assert "SystemRoot" in scrubbed + assert "comspec" in scrubbed + assert "APPDATA" in scrubbed + + +class TestScrubChildEnvPassthroughInteraction: + """The passthrough hook runs *before* the secret block, so a skill + can legitimately forward a third-party API key. The Windows + essentials addition must not interfere with that.""" + + def test_passthrough_wins_over_secret_block(self): + env = {"TENOR_API_KEY": "x", "PATH": "/bin"} + scrubbed = _scrub_child_env(env, + is_passthrough=lambda k: k == "TENOR_API_KEY", + is_windows=False) + assert scrubbed.get("TENOR_API_KEY") == "x" + assert scrubbed.get("PATH") == "/bin" + + def test_passthrough_still_works_on_windows(self): + env = { + "TENOR_API_KEY": "x", + "SYSTEMROOT": r"C:\Windows", + "OPENAI_API_KEY": "sk-secret", # not passthrough + } + scrubbed = _scrub_child_env( + env, + is_passthrough=lambda k: k == "TENOR_API_KEY", + is_windows=True, + ) + assert scrubbed.get("TENOR_API_KEY") == "x" + assert scrubbed.get("SYSTEMROOT") == r"C:\Windows" + assert "OPENAI_API_KEY" not in scrubbed + + +@pytest.mark.skipif( + sys.platform != "win32", + reason="Winsock-specific regression — only meaningful on Windows", +) +class TestWindowsSocketSmokeTest: + """Integration-ish smoke test: spawn a child Python with a scrubbed + env and confirm it can create an AF_INET socket. This is the + regression that motivated the fix — without SYSTEMROOT the child + hits WinError 10106 before any RPC is attempted.""" + + def test_child_can_create_socket_with_scrubbed_env(self): + scrubbed = _scrub_child_env(os.environ, is_passthrough=_no_passthrough) + + # Build a tiny child script that simply opens an AF_INET socket. + script = textwrap.dedent(""" + import socket, sys + try: + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s.close() + print("OK") + sys.exit(0) + except OSError as exc: + print(f"FAIL: {exc}") + sys.exit(1) + """).strip() + + result = subprocess.run( + [sys.executable, "-c", script], + env=scrubbed, + capture_output=True, + text=True, + timeout=15, + ) + assert result.returncode == 0, ( + f"Child failed to create socket with scrubbed env:\n" + f" stdout={result.stdout!r}\n" + f" stderr={result.stderr!r}\n" + f" scrubbed keys={sorted(scrubbed.keys())}" + ) + assert "OK" in result.stdout diff --git a/tools/code_execution_tool.py b/tools/code_execution_tool.py index 8e9112056f..e115f5dc1f 100644 --- a/tools/code_execution_tool.py +++ b/tools/code_execution_tool.py @@ -73,6 +73,85 @@ DEFAULT_MAX_TOOL_CALLS = 50 MAX_STDOUT_BYTES = 50_000 # 50 KB MAX_STDERR_BYTES = 10_000 # 10 KB +# Environment variable scrubbing rules (shared between the local + remote +# backends). Secret-substring block is applied first; anything left must +# match either a safe prefix or, on Windows, an OS-essential name. +_SAFE_ENV_PREFIXES = ("PATH", "HOME", "USER", "LANG", "LC_", "TERM", + "TMPDIR", "TMP", "TEMP", "SHELL", "LOGNAME", + "XDG_", "PYTHONPATH", "VIRTUAL_ENV", "CONDA", + "HERMES_") +_SECRET_SUBSTRINGS = ("KEY", "TOKEN", "SECRET", "PASSWORD", "CREDENTIAL", + "PASSWD", "AUTH") + +# Windows-only: a handful of variables are required by the OS/CRT itself. +# Without them, even stdlib calls like ``socket.socket()`` fail with +# WinError 10106 (Winsock can't locate mswsock.dll) and ``subprocess`` +# can't resolve cmd.exe. These are well-known OS paths, not secrets, so +# we allow them through by exact name. The _SECRET_SUBSTRINGS block +# still runs as a safety net (none of these names match those substrings). +_WINDOWS_ESSENTIAL_ENV_VARS = frozenset({ + "SYSTEMROOT", # %SYSTEMROOT%\System32 — Winsock needs this + "SYSTEMDRIVE", # C: (or wherever Windows lives) + "WINDIR", # usually same as SYSTEMROOT + "COMSPEC", # cmd.exe path — subprocess shell=True needs it + "PATHEXT", # .COM;.EXE;.BAT;... — shell lookup + "OS", # "Windows_NT" — some tools gate on this + "PROCESSOR_ARCHITECTURE", + "NUMBER_OF_PROCESSORS", + "PUBLIC", # C:\Users\Public + "ALLUSERSPROFILE", # C:\ProgramData — some stdlib paths use it + "PROGRAMDATA", # C:\ProgramData + "PROGRAMFILES", + "PROGRAMFILES(X86)", + "PROGRAMW6432", + "APPDATA", # %USERPROFILE%\AppData\Roaming — Python uses it + "LOCALAPPDATA", # %USERPROFILE%\AppData\Local + "USERPROFILE", # C:\Users\ — Python's expanduser uses it + "USERDOMAIN", + "USERNAME", + "HOMEDRIVE", # C: + "HOMEPATH", # \Users\ + "COMPUTERNAME", +}) + + +def _scrub_child_env(source_env, is_passthrough=None, is_windows=None): + """Produce the scrubbed child-process env for execute_code. + + Rules (order matters): + 1. Passthrough vars (skill- or config-declared) always pass. + 2. Secret-substring names (KEY/TOKEN/etc.) are blocked. + 3. Names matching a safe prefix pass. + 4. On Windows, a small OS-essential allowlist passes by exact name + — without these the child can't even create a socket or spawn a + subprocess. + + Extracted into a helper so tests can exercise the logic without + spawning a subprocess. + """ + if is_passthrough is None: + try: + from tools.env_passthrough import is_env_passthrough as _ep + except Exception: + _ep = lambda _: False # noqa: E731 + is_passthrough = _ep + if is_windows is None: + is_windows = _IS_WINDOWS + + scrubbed = {} + for k, v in source_env.items(): + if is_passthrough(k): + scrubbed[k] = v + continue + if any(s in k.upper() for s in _SECRET_SUBSTRINGS): + continue + if any(k.startswith(p) for p in _SAFE_ENV_PREFIXES): + scrubbed[k] = v + continue + if is_windows and k.upper() in _WINDOWS_ESSENTIAL_ENV_VARS: + scrubbed[k] = v + return scrubbed + def check_sandbox_requirements() -> bool: """Code execution sandbox requires a POSIX OS for Unix domain sockets.""" @@ -1079,29 +1158,11 @@ def execute_code( # generated scripts. The child accesses tools via RPC, not direct API. # Exception: env vars declared by loaded skills (via env_passthrough # registry) or explicitly allowed by the user in config.yaml - # (terminal.env_passthrough) are passed through. - _SAFE_ENV_PREFIXES = ("PATH", "HOME", "USER", "LANG", "LC_", "TERM", - "TMPDIR", "TMP", "TEMP", "SHELL", "LOGNAME", - "XDG_", "PYTHONPATH", "VIRTUAL_ENV", "CONDA", - "HERMES_") - _SECRET_SUBSTRINGS = ("KEY", "TOKEN", "SECRET", "PASSWORD", "CREDENTIAL", - "PASSWD", "AUTH") - try: - from tools.env_passthrough import is_env_passthrough as _is_passthrough - except Exception: - _is_passthrough = lambda _: False # noqa: E731 - child_env = {} - for k, v in os.environ.items(): - # Passthrough vars (skill-declared or user-configured) always pass. - if _is_passthrough(k): - child_env[k] = v - continue - # Block vars with secret-like names. - if any(s in k.upper() for s in _SECRET_SUBSTRINGS): - continue - # Allow vars with known safe prefixes. - if any(k.startswith(p) for p in _SAFE_ENV_PREFIXES): - child_env[k] = v + # (terminal.env_passthrough) are passed through. On Windows, a small + # OS-essential allowlist (SYSTEMROOT, WINDIR, COMSPEC, ...) is also + # passed through — without those, the child can't create a socket + # or spawn a subprocess. See ``_scrub_child_env`` for the rules. + child_env = _scrub_child_env(os.environ) child_env["HERMES_RPC_SOCKET"] = rpc_endpoint child_env["PYTHONDONTWRITEBYTECODE"] = "1" # Ensure the hermes-agent root is importable in the sandbox so