mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-09 03:11:58 +00:00
The sandbox's env scrubbing was dropping SYSTEMROOT, WINDIR, COMSPEC,
APPDATA, etc. On Windows this broke the child process before any RPC
could happen:
OSError: [WinError 10106] The requested service provider could not
be loaded or initialized
Python's socket module uses SYSTEMROOT to locate mswsock.dll during
Winsock initialization. Without it, socket.socket(AF_INET, SOCK_STREAM)
fails — and the existing loopback-TCP fallback for Windows couldn't work.
Fix: add a small Windows-only allowlist (_WINDOWS_ESSENTIAL_ENV_VARS)
matched by exact uppercase name, after the existing secret-substring
block. The secret block still runs first, so the allowlist cannot be
used to exfiltrate credentials. Also extract the env scrubber into a
testable helper (_scrub_child_env) that takes is_windows as a parameter,
so the logic can be unit-tested on any OS.
Live Winsock smoke test verifies that a child spawned with the scrubbed
env can now create an AF_INET socket on a real Windows host; the test
is guarded by sys.platform == 'win32' so POSIX CI stays green.
243 lines
9.8 KiB
Python
243 lines
9.8 KiB
Python
"""Tests for execute_code env scrubbing on Windows.
|
|
|
|
On Windows the child process needs a small set of OS-essential env vars
|
|
(SYSTEMROOT, WINDIR, COMSPEC, ...) to run. Without SYSTEMROOT in particular,
|
|
``socket.socket(AF_INET, SOCK_STREAM)`` fails inside the sandbox with
|
|
WinError 10106 (Winsock can't locate mswsock.dll) and no tool call over
|
|
loopback TCP can ever succeed.
|
|
|
|
These tests cover ``_scrub_child_env`` directly so they run on every OS
|
|
— the logic is conditional on a passed-in ``is_windows`` flag, not on
|
|
the host platform. We also keep a live Winsock smoke test that only runs
|
|
on a real Windows host.
|
|
"""
|
|
|
|
import os
|
|
import socket
|
|
import subprocess
|
|
import sys
|
|
import textwrap
|
|
import unittest.mock as mock
|
|
|
|
import pytest
|
|
|
|
from tools.code_execution_tool import (
|
|
_SAFE_ENV_PREFIXES,
|
|
_SECRET_SUBSTRINGS,
|
|
_WINDOWS_ESSENTIAL_ENV_VARS,
|
|
_scrub_child_env,
|
|
)
|
|
|
|
|
|
def _no_passthrough(_name):
|
|
return False
|
|
|
|
|
|
class TestWindowsEssentialAllowlist:
|
|
"""The allowlist itself — contents, shape, and invariants."""
|
|
|
|
def test_contains_winsock_required_vars(self):
|
|
# Without SYSTEMROOT the child cannot initialize Winsock.
|
|
assert "SYSTEMROOT" in _WINDOWS_ESSENTIAL_ENV_VARS
|
|
|
|
def test_contains_subprocess_required_vars(self):
|
|
# Without COMSPEC, subprocess can't resolve the default shell.
|
|
assert "COMSPEC" in _WINDOWS_ESSENTIAL_ENV_VARS
|
|
|
|
def test_contains_user_profile_vars(self):
|
|
# os.path.expanduser("~") on Windows uses USERPROFILE.
|
|
assert "USERPROFILE" in _WINDOWS_ESSENTIAL_ENV_VARS
|
|
assert "APPDATA" in _WINDOWS_ESSENTIAL_ENV_VARS
|
|
assert "LOCALAPPDATA" in _WINDOWS_ESSENTIAL_ENV_VARS
|
|
|
|
def test_contains_only_uppercase_names(self):
|
|
# Windows env var names are case-insensitive but we canonicalize to
|
|
# uppercase for the membership check (``k.upper() in _WINDOWS_...``).
|
|
for name in _WINDOWS_ESSENTIAL_ENV_VARS:
|
|
assert name == name.upper(), f"{name!r} should be uppercase"
|
|
|
|
def test_no_overlap_with_secret_substrings(self):
|
|
# Sanity: none of the essential OS vars should look like secrets.
|
|
# If this ever fires, we'd have a precedence ordering bug (secrets
|
|
# are blocked *before* the essentials check).
|
|
for name in _WINDOWS_ESSENTIAL_ENV_VARS:
|
|
assert not any(s in name for s in _SECRET_SUBSTRINGS), (
|
|
f"{name!r} looks secret-like — would be blocked before the "
|
|
"essentials allowlist can match"
|
|
)
|
|
|
|
|
|
class TestScrubChildEnvWindows:
|
|
"""Verify _scrub_child_env passes Windows essentials through when
|
|
is_windows=True and blocks them when is_windows=False (so POSIX hosts
|
|
don't inherit pointless Windows vars)."""
|
|
|
|
def _sample_windows_env(self):
|
|
"""A realistic subset of what os.environ looks like on Windows."""
|
|
return {
|
|
"SYSTEMROOT": r"C:\Windows",
|
|
"SystemDrive": "C:", # Windows preserves native case
|
|
"WINDIR": r"C:\Windows",
|
|
"ComSpec": r"C:\Windows\System32\cmd.exe",
|
|
"PATHEXT": ".COM;.EXE;.BAT;.CMD;.PY",
|
|
"USERPROFILE": r"C:\Users\alice",
|
|
"APPDATA": r"C:\Users\alice\AppData\Roaming",
|
|
"LOCALAPPDATA": r"C:\Users\alice\AppData\Local",
|
|
"PATH": r"C:\Windows\System32;C:\Python311",
|
|
"HOME": r"C:\Users\alice",
|
|
"TEMP": r"C:\Users\alice\AppData\Local\Temp",
|
|
# Should still be blocked:
|
|
"OPENAI_API_KEY": "sk-secret",
|
|
"GITHUB_TOKEN": "ghp_secret",
|
|
"MY_PASSWORD": "hunter2",
|
|
# Not matched by any rule — should be dropped on both OSes:
|
|
"RANDOM_UNKNOWN_VAR": "value",
|
|
}
|
|
|
|
def test_windows_essentials_passed_through_when_is_windows_true(self):
|
|
env = self._sample_windows_env()
|
|
scrubbed = _scrub_child_env(env,
|
|
is_passthrough=_no_passthrough,
|
|
is_windows=True)
|
|
|
|
# Every essential var from the sample env should survive.
|
|
assert scrubbed["SYSTEMROOT"] == r"C:\Windows"
|
|
assert scrubbed["SystemDrive"] == "C:" # case preserved
|
|
assert scrubbed["WINDIR"] == r"C:\Windows"
|
|
assert scrubbed["ComSpec"] == r"C:\Windows\System32\cmd.exe"
|
|
assert scrubbed["PATHEXT"] == ".COM;.EXE;.BAT;.CMD;.PY"
|
|
assert scrubbed["USERPROFILE"] == r"C:\Users\alice"
|
|
assert scrubbed["APPDATA"].endswith("Roaming")
|
|
assert scrubbed["LOCALAPPDATA"].endswith("Local")
|
|
|
|
# Safe-prefix vars still pass (baseline behavior).
|
|
assert "PATH" in scrubbed
|
|
assert "HOME" in scrubbed
|
|
assert "TEMP" in scrubbed
|
|
|
|
def test_secrets_still_blocked_on_windows(self):
|
|
"""The Windows allowlist must NOT defeat the secret-substring block.
|
|
|
|
This is the key security invariant: essentials are allowed by
|
|
*exact name*, and the secret-substring block runs before the
|
|
essentials check anyway, so a variable named e.g. ``API_KEY`` can
|
|
never sneak through just because we added Windows support.
|
|
"""
|
|
env = self._sample_windows_env()
|
|
scrubbed = _scrub_child_env(env,
|
|
is_passthrough=_no_passthrough,
|
|
is_windows=True)
|
|
assert "OPENAI_API_KEY" not in scrubbed
|
|
assert "GITHUB_TOKEN" not in scrubbed
|
|
assert "MY_PASSWORD" not in scrubbed
|
|
|
|
def test_unknown_vars_still_dropped_on_windows(self):
|
|
env = self._sample_windows_env()
|
|
scrubbed = _scrub_child_env(env,
|
|
is_passthrough=_no_passthrough,
|
|
is_windows=True)
|
|
assert "RANDOM_UNKNOWN_VAR" not in scrubbed
|
|
|
|
def test_essentials_blocked_when_is_windows_false(self):
|
|
"""On POSIX hosts, Windows-specific vars should not pass — they
|
|
have no meaning and could confuse child tooling."""
|
|
env = self._sample_windows_env()
|
|
scrubbed = _scrub_child_env(env,
|
|
is_passthrough=_no_passthrough,
|
|
is_windows=False)
|
|
# Safe prefixes still match (PATH, HOME, TEMP).
|
|
assert "PATH" in scrubbed
|
|
assert "HOME" in scrubbed
|
|
assert "TEMP" in scrubbed
|
|
# But Windows OS vars should be dropped.
|
|
assert "SYSTEMROOT" not in scrubbed
|
|
assert "WINDIR" not in scrubbed
|
|
assert "ComSpec" not in scrubbed
|
|
assert "APPDATA" not in scrubbed
|
|
|
|
def test_case_insensitive_essential_match(self):
|
|
"""Windows env var names are case-insensitive at the OS level but
|
|
Python preserves whatever case os.environ reported. The scrubber
|
|
must normalize to uppercase for the membership check."""
|
|
env = {
|
|
"SystemRoot": r"C:\Windows", # mixed case
|
|
"comspec": r"C:\Windows\System32\cmd.exe", # lowercase
|
|
"APPDATA": r"C:\Users\x\AppData\Roaming", # uppercase
|
|
}
|
|
scrubbed = _scrub_child_env(env,
|
|
is_passthrough=_no_passthrough,
|
|
is_windows=True)
|
|
assert "SystemRoot" in scrubbed
|
|
assert "comspec" in scrubbed
|
|
assert "APPDATA" in scrubbed
|
|
|
|
|
|
class TestScrubChildEnvPassthroughInteraction:
|
|
"""The passthrough hook runs *before* the secret block, so a skill
|
|
can legitimately forward a third-party API key. The Windows
|
|
essentials addition must not interfere with that."""
|
|
|
|
def test_passthrough_wins_over_secret_block(self):
|
|
env = {"TENOR_API_KEY": "x", "PATH": "/bin"}
|
|
scrubbed = _scrub_child_env(env,
|
|
is_passthrough=lambda k: k == "TENOR_API_KEY",
|
|
is_windows=False)
|
|
assert scrubbed.get("TENOR_API_KEY") == "x"
|
|
assert scrubbed.get("PATH") == "/bin"
|
|
|
|
def test_passthrough_still_works_on_windows(self):
|
|
env = {
|
|
"TENOR_API_KEY": "x",
|
|
"SYSTEMROOT": r"C:\Windows",
|
|
"OPENAI_API_KEY": "sk-secret", # not passthrough
|
|
}
|
|
scrubbed = _scrub_child_env(
|
|
env,
|
|
is_passthrough=lambda k: k == "TENOR_API_KEY",
|
|
is_windows=True,
|
|
)
|
|
assert scrubbed.get("TENOR_API_KEY") == "x"
|
|
assert scrubbed.get("SYSTEMROOT") == r"C:\Windows"
|
|
assert "OPENAI_API_KEY" not in scrubbed
|
|
|
|
|
|
@pytest.mark.skipif(
|
|
sys.platform != "win32",
|
|
reason="Winsock-specific regression — only meaningful on Windows",
|
|
)
|
|
class TestWindowsSocketSmokeTest:
|
|
"""Integration-ish smoke test: spawn a child Python with a scrubbed
|
|
env and confirm it can create an AF_INET socket. This is the
|
|
regression that motivated the fix — without SYSTEMROOT the child
|
|
hits WinError 10106 before any RPC is attempted."""
|
|
|
|
def test_child_can_create_socket_with_scrubbed_env(self):
|
|
scrubbed = _scrub_child_env(os.environ, is_passthrough=_no_passthrough)
|
|
|
|
# Build a tiny child script that simply opens an AF_INET socket.
|
|
script = textwrap.dedent("""
|
|
import socket, sys
|
|
try:
|
|
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
|
s.close()
|
|
print("OK")
|
|
sys.exit(0)
|
|
except OSError as exc:
|
|
print(f"FAIL: {exc}")
|
|
sys.exit(1)
|
|
""").strip()
|
|
|
|
result = subprocess.run(
|
|
[sys.executable, "-c", script],
|
|
env=scrubbed,
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=15,
|
|
)
|
|
assert result.returncode == 0, (
|
|
f"Child failed to create socket with scrubbed env:\n"
|
|
f" stdout={result.stdout!r}\n"
|
|
f" stderr={result.stderr!r}\n"
|
|
f" scrubbed keys={sorted(scrubbed.keys())}"
|
|
)
|
|
assert "OK" in result.stdout
|