hermes-agent/tests/tools/test_code_execution_windows_env.py

"""Tests for execute_code env scrubbing on Windows.

On Windows the child process needs a small set of OS-essential env vars
(SYSTEMROOT, WINDIR, COMSPEC, ...) to run.  Without SYSTEMROOT in particular,
``socket.socket(AF_INET, SOCK_STREAM)`` fails inside the sandbox with
WinError 10106 (Winsock can't locate mswsock.dll) and no tool call over
loopback TCP can ever succeed.

These tests cover ``_scrub_child_env`` directly so they run on every OS
— the logic is conditional on a passed-in ``is_windows`` flag, not on
the host platform.  We also keep a live Winsock smoke test that only runs
on a real Windows host.
"""

import os
import socket
import subprocess
import sys
import textwrap
import unittest.mock as mock

import pytest

from tools.code_execution_tool import (
    _SAFE_ENV_PREFIXES,
    _SECRET_SUBSTRINGS,
    _WINDOWS_ESSENTIAL_ENV_VARS,
    _scrub_child_env,
)


def _no_passthrough(_name):
    return False


class TestWindowsEssentialAllowlist:
    """The allowlist itself — contents, shape, and invariants."""

    def test_contains_winsock_required_vars(self):
        # Without SYSTEMROOT the child cannot initialize Winsock.
        assert "SYSTEMROOT" in _WINDOWS_ESSENTIAL_ENV_VARS

    def test_contains_subprocess_required_vars(self):
        # Without COMSPEC, subprocess can't resolve the default shell.
        assert "COMSPEC" in _WINDOWS_ESSENTIAL_ENV_VARS

    def test_contains_user_profile_vars(self):
        # os.path.expanduser("~") on Windows uses USERPROFILE.
        assert "USERPROFILE" in _WINDOWS_ESSENTIAL_ENV_VARS
        assert "APPDATA" in _WINDOWS_ESSENTIAL_ENV_VARS
        assert "LOCALAPPDATA" in _WINDOWS_ESSENTIAL_ENV_VARS

    def test_contains_only_uppercase_names(self):
        # Windows env var names are case-insensitive but we canonicalize to
        # uppercase for the membership check (``k.upper() in _WINDOWS_...``).
        for name in _WINDOWS_ESSENTIAL_ENV_VARS:
            assert name == name.upper(), f"{name!r} should be uppercase"

    def test_no_overlap_with_secret_substrings(self):
        # Sanity: none of the essential OS vars should look like secrets.
        # If this ever fires, we'd have a precedence ordering bug (secrets
        # are blocked *before* the essentials check).
        for name in _WINDOWS_ESSENTIAL_ENV_VARS:
            assert not any(s in name for s in _SECRET_SUBSTRINGS), (
                f"{name!r} looks secret-like — would be blocked before the "
                "essentials allowlist can match"
            )


class TestScrubChildEnvWindows:
    """Verify _scrub_child_env passes Windows essentials through when
    is_windows=True and blocks them when is_windows=False (so POSIX hosts
    don't inherit pointless Windows vars)."""

    def _sample_windows_env(self):
        """A realistic subset of what os.environ looks like on Windows."""
        return {
            "SYSTEMROOT": r"C:\Windows",
            "SystemDrive": "C:",        # Windows preserves native case
            "WINDIR": r"C:\Windows",
            "ComSpec": r"C:\Windows\System32\cmd.exe",
            "PATHEXT": ".COM;.EXE;.BAT;.CMD;.PY",
            "USERPROFILE": r"C:\Users\alice",
            "APPDATA": r"C:\Users\alice\AppData\Roaming",
            "LOCALAPPDATA": r"C:\Users\alice\AppData\Local",
            "PATH": r"C:\Windows\System32;C:\Python311",
            "HOME": r"C:\Users\alice",
            "TEMP": r"C:\Users\alice\AppData\Local\Temp",
            # Should still be blocked:
            "OPENAI_API_KEY": "sk-secret",
            "GITHUB_TOKEN": "ghp_secret",
            "MY_PASSWORD": "hunter2",
            # Not matched by any rule — should be dropped on both OSes:
            "RANDOM_UNKNOWN_VAR": "value",
        }

    def test_windows_essentials_passed_through_when_is_windows_true(self):
        env = self._sample_windows_env()
        scrubbed = _scrub_child_env(env,
                                    is_passthrough=_no_passthrough,
                                    is_windows=True)

        # Every essential var from the sample env should survive.
        assert scrubbed["SYSTEMROOT"] == r"C:\Windows"
        assert scrubbed["SystemDrive"] == "C:"  # case preserved
        assert scrubbed["WINDIR"] == r"C:\Windows"
        assert scrubbed["ComSpec"] == r"C:\Windows\System32\cmd.exe"
        assert scrubbed["PATHEXT"] == ".COM;.EXE;.BAT;.CMD;.PY"
        assert scrubbed["USERPROFILE"] == r"C:\Users\alice"
        assert scrubbed["APPDATA"].endswith("Roaming")
        assert scrubbed["LOCALAPPDATA"].endswith("Local")

        # Safe-prefix vars still pass (baseline behavior).
        assert "PATH" in scrubbed
        assert "HOME" in scrubbed
        assert "TEMP" in scrubbed

    def test_secrets_still_blocked_on_windows(self):
        """The Windows allowlist must NOT defeat the secret-substring block.

        This is the key security invariant: essentials are allowed by
        *exact name*, and the secret-substring block runs before the
        essentials check anyway, so a variable named e.g. ``API_KEY`` can
        never sneak through just because we added Windows support.
        """
        env = self._sample_windows_env()
        scrubbed = _scrub_child_env(env,
                                    is_passthrough=_no_passthrough,
                                    is_windows=True)
        assert "OPENAI_API_KEY" not in scrubbed
        assert "GITHUB_TOKEN" not in scrubbed
        assert "MY_PASSWORD" not in scrubbed

    def test_unknown_vars_still_dropped_on_windows(self):
        env = self._sample_windows_env()
        scrubbed = _scrub_child_env(env,
                                    is_passthrough=_no_passthrough,
                                    is_windows=True)
        assert "RANDOM_UNKNOWN_VAR" not in scrubbed

    def test_essentials_blocked_when_is_windows_false(self):
        """On POSIX hosts, Windows-specific vars should not pass — they
        have no meaning and could confuse child tooling."""
        env = self._sample_windows_env()
        scrubbed = _scrub_child_env(env,
                                    is_passthrough=_no_passthrough,
                                    is_windows=False)
        # Safe prefixes still match (PATH, HOME, TEMP).
        assert "PATH" in scrubbed
        assert "HOME" in scrubbed
        assert "TEMP" in scrubbed
        # But Windows OS vars should be dropped.
        assert "SYSTEMROOT" not in scrubbed
        assert "WINDIR" not in scrubbed
        assert "ComSpec" not in scrubbed
        assert "APPDATA" not in scrubbed

    def test_case_insensitive_essential_match(self):
        """Windows env var names are case-insensitive at the OS level but
        Python preserves whatever case os.environ reported.  The scrubber
        must normalize to uppercase for the membership check."""
        env = {
            "SystemRoot": r"C:\Windows",       # mixed case
            "comspec": r"C:\Windows\System32\cmd.exe",  # lowercase
            "APPDATA": r"C:\Users\x\AppData\Roaming",   # uppercase
        }
        scrubbed = _scrub_child_env(env,
                                    is_passthrough=_no_passthrough,
                                    is_windows=True)
        assert "SystemRoot" in scrubbed
        assert "comspec" in scrubbed
        assert "APPDATA" in scrubbed


class TestScrubChildEnvPassthroughInteraction:
    """The passthrough hook runs *before* the secret block, so a skill
    can legitimately forward a third-party API key.  The Windows
    essentials addition must not interfere with that."""

    def test_passthrough_wins_over_secret_block(self):
        env = {"TENOR_API_KEY": "x", "PATH": "/bin"}
        scrubbed = _scrub_child_env(env,
                                    is_passthrough=lambda k: k == "TENOR_API_KEY",
                                    is_windows=False)
        assert scrubbed.get("TENOR_API_KEY") == "x"
        assert scrubbed.get("PATH") == "/bin"

    def test_passthrough_still_works_on_windows(self):
        env = {
            "TENOR_API_KEY": "x",
            "SYSTEMROOT": r"C:\Windows",
            "OPENAI_API_KEY": "sk-secret",  # not passthrough
        }
        scrubbed = _scrub_child_env(
            env,
            is_passthrough=lambda k: k == "TENOR_API_KEY",
            is_windows=True,
        )
        assert scrubbed.get("TENOR_API_KEY") == "x"
        assert scrubbed.get("SYSTEMROOT") == r"C:\Windows"
        assert "OPENAI_API_KEY" not in scrubbed


@pytest.mark.skipif(
    sys.platform != "win32",
    reason="Winsock-specific regression — only meaningful on Windows",
)
class TestWindowsSocketSmokeTest:
    """Integration-ish smoke test: spawn a child Python with a scrubbed
    env and confirm it can create an AF_INET socket.  This is the
    regression that motivated the fix — without SYSTEMROOT the child
    hits WinError 10106 before any RPC is attempted."""

    def test_child_can_create_socket_with_scrubbed_env(self):
        scrubbed = _scrub_child_env(os.environ, is_passthrough=_no_passthrough)

        # Build a tiny child script that simply opens an AF_INET socket.
        script = textwrap.dedent("""
            import socket, sys
            try:
                s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
                s.close()
                print("OK")
                sys.exit(0)
            except OSError as exc:
                print(f"FAIL: {exc}")
                sys.exit(1)
        """).strip()

        result = subprocess.run(
            [sys.executable, "-c", script],
            env=scrubbed,
            capture_output=True,
            text=True,
            timeout=15,
        )
        assert result.returncode == 0, (
            f"Child failed to create socket with scrubbed env:\n"
            f"  stdout={result.stdout!r}\n"
            f"  stderr={result.stderr!r}\n"
            f"  scrubbed keys={sorted(scrubbed.keys())}"
        )
        assert "OK" in result.stdout