fix(windows): enable execute_code — stale AF_UNIX gate was blocking the tool

teknium1 noticed execute_code was missing from his enabled tools on Windows. Root cause: tools/code_execution_tool.py set ``SANDBOX_AVAILABLE = sys.platform != \"win32\"`` as a module-level constant, originally because the RPC transport required AF_UNIX. We added loopback TCP fallback for the sandbox in commit eeb723fff (and covered it in the Windows TCP tests), but forgot to lift the availability gate. So execute_code was still invisible via the check_fn path on Windows. - SANDBOX_AVAILABLE is now True unconditionally (it's still checked — a future platform could flip it off via monkeypatch/env if needed). - Error message when disabled no longer mentions Windows specifically, just says 'sandbox is unavailable in this environment'. - test_windows_returns_error updated: patches SANDBOX_AVAILABLE=False directly (which was always its real intent) and asserts on 'unavailable' instead of 'Windows'. Tests: 171 code-execution + windows-compat tests pass, no regressions.
2026-05-09 03:11:58 +00:00 · 2026-05-07 18:17:31 -07:00 · 2026-05-07 18:17:31 -07:00 · 21efeb51bb
commit 21efeb51bb
parent 8f91d7bfa9
2 changed files with 15 additions and 5 deletions
--- a/tests/tools/test_code_execution.py
+++ b/tests/tools/test_code_execution.py
@ -774,11 +774,17 @@ class TestEnvVarFiltering(unittest.TestCase):
 class TestExecuteCodeEdgeCases(unittest.TestCase):

    def test_windows_returns_error(self):
-        """On Windows (or when SANDBOX_AVAILABLE is False), returns error JSON."""
+        """When SANDBOX_AVAILABLE is False (e.g. when the backend deems
+        the sandbox unusable for this environment), execute_code returns
+        an error JSON with a readable message pointing the caller at
+        regular tool calls.  Previously this was a Windows-only gate;
+        execute_code now works on Windows via loopback TCP, so the
+        error is only emitted when SANDBOX_AVAILABLE is explicitly
+        flipped off (e.g. for future platform-specific disables)."""
        with patch("tools.code_execution_tool.SANDBOX_AVAILABLE", False):
            result = json.loads(execute_code("print('hi')", task_id="test"))
            self.assertIn("error", result)
-            self.assertIn("Windows", result["error"])
+            self.assertIn("unavailable", result["error"].lower())

    def test_whitespace_only_code(self):
        result = json.loads(execute_code("   \n\t  ", task_id="test"))
--- a/tools/code_execution_tool.py
+++ b/tools/code_execution_tool.py
@ -47,10 +47,13 @@ import uuid
 _IS_WINDOWS = platform.system() == "Windows"
 from typing import Any, Dict, List, Optional

-# Availability gate: UDS requires a POSIX OS
+# Availability gate.  On Windows we fall back to loopback TCP for the
+# sandbox RPC transport (AF_UNIX is unreliable on Windows Python) — see
+# ``_use_tcp_rpc`` in ``_execute_local`` below.  That makes execute_code
+# available on every platform Hermes itself runs on.
 logger = logging.getLogger(__name__)

-SANDBOX_AVAILABLE = sys.platform != "win32"
+SANDBOX_AVAILABLE = True

 # The 7 tools allowed inside the sandbox. The intersection of this list
 # and the session's enabled tools determines which stubs are generated.
@ -971,7 +974,8 @@ def execute_code(
    """
    if not SANDBOX_AVAILABLE:
        return json.dumps({
-            "error": "execute_code is not available on Windows. Use normal tool calls instead."
+            "error": "execute_code sandbox is unavailable in this environment. "
+                     "Use normal tool calls (terminal, read_file, write_file, ...) instead."
        })

    if not code or not code.strip():