fix(windows): enable execute_code — stale AF_UNIX gate was blocking the tool

teknium1 noticed execute_code was missing from his enabled tools on Windows.
Root cause: tools/code_execution_tool.py set ``SANDBOX_AVAILABLE =
sys.platform != \"win32\"`` as a module-level constant, originally because
the RPC transport required AF_UNIX.  We added loopback TCP fallback for
the sandbox in commit eeb723fff (and covered it in the Windows TCP tests),
but forgot to lift the availability gate.  So execute_code was still
invisible via the check_fn path on Windows.

- SANDBOX_AVAILABLE is now True unconditionally (it's still checked — a
  future platform could flip it off via monkeypatch/env if needed).
- Error message when disabled no longer mentions Windows specifically,
  just says 'sandbox is unavailable in this environment'.
- test_windows_returns_error updated: patches SANDBOX_AVAILABLE=False
  directly (which was always its real intent) and asserts on 'unavailable'
  instead of 'Windows'.

Tests: 171 code-execution + windows-compat tests pass, no regressions.
This commit is contained in:
Teknium 2026-05-07 18:17:31 -07:00
parent 8f91d7bfa9
commit 21efeb51bb
2 changed files with 15 additions and 5 deletions

View file

@ -774,11 +774,17 @@ class TestEnvVarFiltering(unittest.TestCase):
class TestExecuteCodeEdgeCases(unittest.TestCase):
def test_windows_returns_error(self):
"""On Windows (or when SANDBOX_AVAILABLE is False), returns error JSON."""
"""When SANDBOX_AVAILABLE is False (e.g. when the backend deems
the sandbox unusable for this environment), execute_code returns
an error JSON with a readable message pointing the caller at
regular tool calls. Previously this was a Windows-only gate;
execute_code now works on Windows via loopback TCP, so the
error is only emitted when SANDBOX_AVAILABLE is explicitly
flipped off (e.g. for future platform-specific disables)."""
with patch("tools.code_execution_tool.SANDBOX_AVAILABLE", False):
result = json.loads(execute_code("print('hi')", task_id="test"))
self.assertIn("error", result)
self.assertIn("Windows", result["error"])
self.assertIn("unavailable", result["error"].lower())
def test_whitespace_only_code(self):
result = json.loads(execute_code(" \n\t ", task_id="test"))

View file

@ -47,10 +47,13 @@ import uuid
_IS_WINDOWS = platform.system() == "Windows"
from typing import Any, Dict, List, Optional
# Availability gate: UDS requires a POSIX OS
# Availability gate. On Windows we fall back to loopback TCP for the
# sandbox RPC transport (AF_UNIX is unreliable on Windows Python) — see
# ``_use_tcp_rpc`` in ``_execute_local`` below. That makes execute_code
# available on every platform Hermes itself runs on.
logger = logging.getLogger(__name__)
SANDBOX_AVAILABLE = sys.platform != "win32"
SANDBOX_AVAILABLE = True
# The 7 tools allowed inside the sandbox. The intersection of this list
# and the session's enabled tools determines which stubs are generated.
@ -971,7 +974,8 @@ def execute_code(
"""
if not SANDBOX_AVAILABLE:
return json.dumps({
"error": "execute_code is not available on Windows. Use normal tool calls instead."
"error": "execute_code sandbox is unavailable in this environment. "
"Use normal tool calls (terminal, read_file, write_file, ...) instead."
})
if not code or not code.strip():