mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-09 03:11:58 +00:00
execute_code: pass through Windows OS-essential env vars
The sandbox's env scrubbing was dropping SYSTEMROOT, WINDIR, COMSPEC,
APPDATA, etc. On Windows this broke the child process before any RPC
could happen:
OSError: [WinError 10106] The requested service provider could not
be loaded or initialized
Python's socket module uses SYSTEMROOT to locate mswsock.dll during
Winsock initialization. Without it, socket.socket(AF_INET, SOCK_STREAM)
fails — and the existing loopback-TCP fallback for Windows couldn't work.
Fix: add a small Windows-only allowlist (_WINDOWS_ESSENTIAL_ENV_VARS)
matched by exact uppercase name, after the existing secret-substring
block. The secret block still runs first, so the allowlist cannot be
used to exfiltrate credentials. Also extract the env scrubber into a
testable helper (_scrub_child_env) that takes is_windows as a parameter,
so the logic can be unit-tested on any OS.
Live Winsock smoke test verifies that a child spawned with the scrubbed
env can now create an AF_INET socket on a real Windows host; the test
is guarded by sys.platform == 'win32' so POSIX CI stays green.
This commit is contained in:
parent
a2efad6bea
commit
5c859e5716
2 changed files with 327 additions and 23 deletions
243
tests/tools/test_code_execution_windows_env.py
Normal file
243
tests/tools/test_code_execution_windows_env.py
Normal file
|
|
@ -0,0 +1,243 @@
|
|||
"""Tests for execute_code env scrubbing on Windows.
|
||||
|
||||
On Windows the child process needs a small set of OS-essential env vars
|
||||
(SYSTEMROOT, WINDIR, COMSPEC, ...) to run. Without SYSTEMROOT in particular,
|
||||
``socket.socket(AF_INET, SOCK_STREAM)`` fails inside the sandbox with
|
||||
WinError 10106 (Winsock can't locate mswsock.dll) and no tool call over
|
||||
loopback TCP can ever succeed.
|
||||
|
||||
These tests cover ``_scrub_child_env`` directly so they run on every OS
|
||||
— the logic is conditional on a passed-in ``is_windows`` flag, not on
|
||||
the host platform. We also keep a live Winsock smoke test that only runs
|
||||
on a real Windows host.
|
||||
"""
|
||||
|
||||
import os
|
||||
import socket
|
||||
import subprocess
|
||||
import sys
|
||||
import textwrap
|
||||
import unittest.mock as mock
|
||||
|
||||
import pytest
|
||||
|
||||
from tools.code_execution_tool import (
|
||||
_SAFE_ENV_PREFIXES,
|
||||
_SECRET_SUBSTRINGS,
|
||||
_WINDOWS_ESSENTIAL_ENV_VARS,
|
||||
_scrub_child_env,
|
||||
)
|
||||
|
||||
|
||||
def _no_passthrough(_name):
|
||||
return False
|
||||
|
||||
|
||||
class TestWindowsEssentialAllowlist:
|
||||
"""The allowlist itself — contents, shape, and invariants."""
|
||||
|
||||
def test_contains_winsock_required_vars(self):
|
||||
# Without SYSTEMROOT the child cannot initialize Winsock.
|
||||
assert "SYSTEMROOT" in _WINDOWS_ESSENTIAL_ENV_VARS
|
||||
|
||||
def test_contains_subprocess_required_vars(self):
|
||||
# Without COMSPEC, subprocess can't resolve the default shell.
|
||||
assert "COMSPEC" in _WINDOWS_ESSENTIAL_ENV_VARS
|
||||
|
||||
def test_contains_user_profile_vars(self):
|
||||
# os.path.expanduser("~") on Windows uses USERPROFILE.
|
||||
assert "USERPROFILE" in _WINDOWS_ESSENTIAL_ENV_VARS
|
||||
assert "APPDATA" in _WINDOWS_ESSENTIAL_ENV_VARS
|
||||
assert "LOCALAPPDATA" in _WINDOWS_ESSENTIAL_ENV_VARS
|
||||
|
||||
def test_contains_only_uppercase_names(self):
|
||||
# Windows env var names are case-insensitive but we canonicalize to
|
||||
# uppercase for the membership check (``k.upper() in _WINDOWS_...``).
|
||||
for name in _WINDOWS_ESSENTIAL_ENV_VARS:
|
||||
assert name == name.upper(), f"{name!r} should be uppercase"
|
||||
|
||||
def test_no_overlap_with_secret_substrings(self):
|
||||
# Sanity: none of the essential OS vars should look like secrets.
|
||||
# If this ever fires, we'd have a precedence ordering bug (secrets
|
||||
# are blocked *before* the essentials check).
|
||||
for name in _WINDOWS_ESSENTIAL_ENV_VARS:
|
||||
assert not any(s in name for s in _SECRET_SUBSTRINGS), (
|
||||
f"{name!r} looks secret-like — would be blocked before the "
|
||||
"essentials allowlist can match"
|
||||
)
|
||||
|
||||
|
||||
class TestScrubChildEnvWindows:
|
||||
"""Verify _scrub_child_env passes Windows essentials through when
|
||||
is_windows=True and blocks them when is_windows=False (so POSIX hosts
|
||||
don't inherit pointless Windows vars)."""
|
||||
|
||||
def _sample_windows_env(self):
|
||||
"""A realistic subset of what os.environ looks like on Windows."""
|
||||
return {
|
||||
"SYSTEMROOT": r"C:\Windows",
|
||||
"SystemDrive": "C:", # Windows preserves native case
|
||||
"WINDIR": r"C:\Windows",
|
||||
"ComSpec": r"C:\Windows\System32\cmd.exe",
|
||||
"PATHEXT": ".COM;.EXE;.BAT;.CMD;.PY",
|
||||
"USERPROFILE": r"C:\Users\alice",
|
||||
"APPDATA": r"C:\Users\alice\AppData\Roaming",
|
||||
"LOCALAPPDATA": r"C:\Users\alice\AppData\Local",
|
||||
"PATH": r"C:\Windows\System32;C:\Python311",
|
||||
"HOME": r"C:\Users\alice",
|
||||
"TEMP": r"C:\Users\alice\AppData\Local\Temp",
|
||||
# Should still be blocked:
|
||||
"OPENAI_API_KEY": "sk-secret",
|
||||
"GITHUB_TOKEN": "ghp_secret",
|
||||
"MY_PASSWORD": "hunter2",
|
||||
# Not matched by any rule — should be dropped on both OSes:
|
||||
"RANDOM_UNKNOWN_VAR": "value",
|
||||
}
|
||||
|
||||
def test_windows_essentials_passed_through_when_is_windows_true(self):
|
||||
env = self._sample_windows_env()
|
||||
scrubbed = _scrub_child_env(env,
|
||||
is_passthrough=_no_passthrough,
|
||||
is_windows=True)
|
||||
|
||||
# Every essential var from the sample env should survive.
|
||||
assert scrubbed["SYSTEMROOT"] == r"C:\Windows"
|
||||
assert scrubbed["SystemDrive"] == "C:" # case preserved
|
||||
assert scrubbed["WINDIR"] == r"C:\Windows"
|
||||
assert scrubbed["ComSpec"] == r"C:\Windows\System32\cmd.exe"
|
||||
assert scrubbed["PATHEXT"] == ".COM;.EXE;.BAT;.CMD;.PY"
|
||||
assert scrubbed["USERPROFILE"] == r"C:\Users\alice"
|
||||
assert scrubbed["APPDATA"].endswith("Roaming")
|
||||
assert scrubbed["LOCALAPPDATA"].endswith("Local")
|
||||
|
||||
# Safe-prefix vars still pass (baseline behavior).
|
||||
assert "PATH" in scrubbed
|
||||
assert "HOME" in scrubbed
|
||||
assert "TEMP" in scrubbed
|
||||
|
||||
def test_secrets_still_blocked_on_windows(self):
|
||||
"""The Windows allowlist must NOT defeat the secret-substring block.
|
||||
|
||||
This is the key security invariant: essentials are allowed by
|
||||
*exact name*, and the secret-substring block runs before the
|
||||
essentials check anyway, so a variable named e.g. ``API_KEY`` can
|
||||
never sneak through just because we added Windows support.
|
||||
"""
|
||||
env = self._sample_windows_env()
|
||||
scrubbed = _scrub_child_env(env,
|
||||
is_passthrough=_no_passthrough,
|
||||
is_windows=True)
|
||||
assert "OPENAI_API_KEY" not in scrubbed
|
||||
assert "GITHUB_TOKEN" not in scrubbed
|
||||
assert "MY_PASSWORD" not in scrubbed
|
||||
|
||||
def test_unknown_vars_still_dropped_on_windows(self):
|
||||
env = self._sample_windows_env()
|
||||
scrubbed = _scrub_child_env(env,
|
||||
is_passthrough=_no_passthrough,
|
||||
is_windows=True)
|
||||
assert "RANDOM_UNKNOWN_VAR" not in scrubbed
|
||||
|
||||
def test_essentials_blocked_when_is_windows_false(self):
|
||||
"""On POSIX hosts, Windows-specific vars should not pass — they
|
||||
have no meaning and could confuse child tooling."""
|
||||
env = self._sample_windows_env()
|
||||
scrubbed = _scrub_child_env(env,
|
||||
is_passthrough=_no_passthrough,
|
||||
is_windows=False)
|
||||
# Safe prefixes still match (PATH, HOME, TEMP).
|
||||
assert "PATH" in scrubbed
|
||||
assert "HOME" in scrubbed
|
||||
assert "TEMP" in scrubbed
|
||||
# But Windows OS vars should be dropped.
|
||||
assert "SYSTEMROOT" not in scrubbed
|
||||
assert "WINDIR" not in scrubbed
|
||||
assert "ComSpec" not in scrubbed
|
||||
assert "APPDATA" not in scrubbed
|
||||
|
||||
def test_case_insensitive_essential_match(self):
|
||||
"""Windows env var names are case-insensitive at the OS level but
|
||||
Python preserves whatever case os.environ reported. The scrubber
|
||||
must normalize to uppercase for the membership check."""
|
||||
env = {
|
||||
"SystemRoot": r"C:\Windows", # mixed case
|
||||
"comspec": r"C:\Windows\System32\cmd.exe", # lowercase
|
||||
"APPDATA": r"C:\Users\x\AppData\Roaming", # uppercase
|
||||
}
|
||||
scrubbed = _scrub_child_env(env,
|
||||
is_passthrough=_no_passthrough,
|
||||
is_windows=True)
|
||||
assert "SystemRoot" in scrubbed
|
||||
assert "comspec" in scrubbed
|
||||
assert "APPDATA" in scrubbed
|
||||
|
||||
|
||||
class TestScrubChildEnvPassthroughInteraction:
|
||||
"""The passthrough hook runs *before* the secret block, so a skill
|
||||
can legitimately forward a third-party API key. The Windows
|
||||
essentials addition must not interfere with that."""
|
||||
|
||||
def test_passthrough_wins_over_secret_block(self):
|
||||
env = {"TENOR_API_KEY": "x", "PATH": "/bin"}
|
||||
scrubbed = _scrub_child_env(env,
|
||||
is_passthrough=lambda k: k == "TENOR_API_KEY",
|
||||
is_windows=False)
|
||||
assert scrubbed.get("TENOR_API_KEY") == "x"
|
||||
assert scrubbed.get("PATH") == "/bin"
|
||||
|
||||
def test_passthrough_still_works_on_windows(self):
|
||||
env = {
|
||||
"TENOR_API_KEY": "x",
|
||||
"SYSTEMROOT": r"C:\Windows",
|
||||
"OPENAI_API_KEY": "sk-secret", # not passthrough
|
||||
}
|
||||
scrubbed = _scrub_child_env(
|
||||
env,
|
||||
is_passthrough=lambda k: k == "TENOR_API_KEY",
|
||||
is_windows=True,
|
||||
)
|
||||
assert scrubbed.get("TENOR_API_KEY") == "x"
|
||||
assert scrubbed.get("SYSTEMROOT") == r"C:\Windows"
|
||||
assert "OPENAI_API_KEY" not in scrubbed
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
sys.platform != "win32",
|
||||
reason="Winsock-specific regression — only meaningful on Windows",
|
||||
)
|
||||
class TestWindowsSocketSmokeTest:
|
||||
"""Integration-ish smoke test: spawn a child Python with a scrubbed
|
||||
env and confirm it can create an AF_INET socket. This is the
|
||||
regression that motivated the fix — without SYSTEMROOT the child
|
||||
hits WinError 10106 before any RPC is attempted."""
|
||||
|
||||
def test_child_can_create_socket_with_scrubbed_env(self):
|
||||
scrubbed = _scrub_child_env(os.environ, is_passthrough=_no_passthrough)
|
||||
|
||||
# Build a tiny child script that simply opens an AF_INET socket.
|
||||
script = textwrap.dedent("""
|
||||
import socket, sys
|
||||
try:
|
||||
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
s.close()
|
||||
print("OK")
|
||||
sys.exit(0)
|
||||
except OSError as exc:
|
||||
print(f"FAIL: {exc}")
|
||||
sys.exit(1)
|
||||
""").strip()
|
||||
|
||||
result = subprocess.run(
|
||||
[sys.executable, "-c", script],
|
||||
env=scrubbed,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=15,
|
||||
)
|
||||
assert result.returncode == 0, (
|
||||
f"Child failed to create socket with scrubbed env:\n"
|
||||
f" stdout={result.stdout!r}\n"
|
||||
f" stderr={result.stderr!r}\n"
|
||||
f" scrubbed keys={sorted(scrubbed.keys())}"
|
||||
)
|
||||
assert "OK" in result.stdout
|
||||
|
|
@ -73,6 +73,85 @@ DEFAULT_MAX_TOOL_CALLS = 50
|
|||
MAX_STDOUT_BYTES = 50_000 # 50 KB
|
||||
MAX_STDERR_BYTES = 10_000 # 10 KB
|
||||
|
||||
# Environment variable scrubbing rules (shared between the local + remote
|
||||
# backends). Secret-substring block is applied first; anything left must
|
||||
# match either a safe prefix or, on Windows, an OS-essential name.
|
||||
_SAFE_ENV_PREFIXES = ("PATH", "HOME", "USER", "LANG", "LC_", "TERM",
|
||||
"TMPDIR", "TMP", "TEMP", "SHELL", "LOGNAME",
|
||||
"XDG_", "PYTHONPATH", "VIRTUAL_ENV", "CONDA",
|
||||
"HERMES_")
|
||||
_SECRET_SUBSTRINGS = ("KEY", "TOKEN", "SECRET", "PASSWORD", "CREDENTIAL",
|
||||
"PASSWD", "AUTH")
|
||||
|
||||
# Windows-only: a handful of variables are required by the OS/CRT itself.
|
||||
# Without them, even stdlib calls like ``socket.socket()`` fail with
|
||||
# WinError 10106 (Winsock can't locate mswsock.dll) and ``subprocess``
|
||||
# can't resolve cmd.exe. These are well-known OS paths, not secrets, so
|
||||
# we allow them through by exact name. The _SECRET_SUBSTRINGS block
|
||||
# still runs as a safety net (none of these names match those substrings).
|
||||
_WINDOWS_ESSENTIAL_ENV_VARS = frozenset({
|
||||
"SYSTEMROOT", # %SYSTEMROOT%\System32 — Winsock needs this
|
||||
"SYSTEMDRIVE", # C: (or wherever Windows lives)
|
||||
"WINDIR", # usually same as SYSTEMROOT
|
||||
"COMSPEC", # cmd.exe path — subprocess shell=True needs it
|
||||
"PATHEXT", # .COM;.EXE;.BAT;... — shell lookup
|
||||
"OS", # "Windows_NT" — some tools gate on this
|
||||
"PROCESSOR_ARCHITECTURE",
|
||||
"NUMBER_OF_PROCESSORS",
|
||||
"PUBLIC", # C:\Users\Public
|
||||
"ALLUSERSPROFILE", # C:\ProgramData — some stdlib paths use it
|
||||
"PROGRAMDATA", # C:\ProgramData
|
||||
"PROGRAMFILES",
|
||||
"PROGRAMFILES(X86)",
|
||||
"PROGRAMW6432",
|
||||
"APPDATA", # %USERPROFILE%\AppData\Roaming — Python uses it
|
||||
"LOCALAPPDATA", # %USERPROFILE%\AppData\Local
|
||||
"USERPROFILE", # C:\Users\<name> — Python's expanduser uses it
|
||||
"USERDOMAIN",
|
||||
"USERNAME",
|
||||
"HOMEDRIVE", # C:
|
||||
"HOMEPATH", # \Users\<name>
|
||||
"COMPUTERNAME",
|
||||
})
|
||||
|
||||
|
||||
def _scrub_child_env(source_env, is_passthrough=None, is_windows=None):
|
||||
"""Produce the scrubbed child-process env for execute_code.
|
||||
|
||||
Rules (order matters):
|
||||
1. Passthrough vars (skill- or config-declared) always pass.
|
||||
2. Secret-substring names (KEY/TOKEN/etc.) are blocked.
|
||||
3. Names matching a safe prefix pass.
|
||||
4. On Windows, a small OS-essential allowlist passes by exact name
|
||||
— without these the child can't even create a socket or spawn a
|
||||
subprocess.
|
||||
|
||||
Extracted into a helper so tests can exercise the logic without
|
||||
spawning a subprocess.
|
||||
"""
|
||||
if is_passthrough is None:
|
||||
try:
|
||||
from tools.env_passthrough import is_env_passthrough as _ep
|
||||
except Exception:
|
||||
_ep = lambda _: False # noqa: E731
|
||||
is_passthrough = _ep
|
||||
if is_windows is None:
|
||||
is_windows = _IS_WINDOWS
|
||||
|
||||
scrubbed = {}
|
||||
for k, v in source_env.items():
|
||||
if is_passthrough(k):
|
||||
scrubbed[k] = v
|
||||
continue
|
||||
if any(s in k.upper() for s in _SECRET_SUBSTRINGS):
|
||||
continue
|
||||
if any(k.startswith(p) for p in _SAFE_ENV_PREFIXES):
|
||||
scrubbed[k] = v
|
||||
continue
|
||||
if is_windows and k.upper() in _WINDOWS_ESSENTIAL_ENV_VARS:
|
||||
scrubbed[k] = v
|
||||
return scrubbed
|
||||
|
||||
|
||||
def check_sandbox_requirements() -> bool:
|
||||
"""Code execution sandbox requires a POSIX OS for Unix domain sockets."""
|
||||
|
|
@ -1079,29 +1158,11 @@ def execute_code(
|
|||
# generated scripts. The child accesses tools via RPC, not direct API.
|
||||
# Exception: env vars declared by loaded skills (via env_passthrough
|
||||
# registry) or explicitly allowed by the user in config.yaml
|
||||
# (terminal.env_passthrough) are passed through.
|
||||
_SAFE_ENV_PREFIXES = ("PATH", "HOME", "USER", "LANG", "LC_", "TERM",
|
||||
"TMPDIR", "TMP", "TEMP", "SHELL", "LOGNAME",
|
||||
"XDG_", "PYTHONPATH", "VIRTUAL_ENV", "CONDA",
|
||||
"HERMES_")
|
||||
_SECRET_SUBSTRINGS = ("KEY", "TOKEN", "SECRET", "PASSWORD", "CREDENTIAL",
|
||||
"PASSWD", "AUTH")
|
||||
try:
|
||||
from tools.env_passthrough import is_env_passthrough as _is_passthrough
|
||||
except Exception:
|
||||
_is_passthrough = lambda _: False # noqa: E731
|
||||
child_env = {}
|
||||
for k, v in os.environ.items():
|
||||
# Passthrough vars (skill-declared or user-configured) always pass.
|
||||
if _is_passthrough(k):
|
||||
child_env[k] = v
|
||||
continue
|
||||
# Block vars with secret-like names.
|
||||
if any(s in k.upper() for s in _SECRET_SUBSTRINGS):
|
||||
continue
|
||||
# Allow vars with known safe prefixes.
|
||||
if any(k.startswith(p) for p in _SAFE_ENV_PREFIXES):
|
||||
child_env[k] = v
|
||||
# (terminal.env_passthrough) are passed through. On Windows, a small
|
||||
# OS-essential allowlist (SYSTEMROOT, WINDIR, COMSPEC, ...) is also
|
||||
# passed through — without those, the child can't create a socket
|
||||
# or spawn a subprocess. See ``_scrub_child_env`` for the rules.
|
||||
child_env = _scrub_child_env(os.environ)
|
||||
child_env["HERMES_RPC_SOCKET"] = rpc_endpoint
|
||||
child_env["PYTHONDONTWRITEBYTECODE"] = "1"
|
||||
# Ensure the hermes-agent root is importable in the sandbox so
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue