diff --git a/tests/tools/test_code_execution.py b/tests/tools/test_code_execution.py index a580604658..2d08265fb7 100644 --- a/tests/tools/test_code_execution.py +++ b/tests/tools/test_code_execution.py @@ -774,11 +774,17 @@ class TestEnvVarFiltering(unittest.TestCase): class TestExecuteCodeEdgeCases(unittest.TestCase): def test_windows_returns_error(self): - """On Windows (or when SANDBOX_AVAILABLE is False), returns error JSON.""" + """When SANDBOX_AVAILABLE is False (e.g. when the backend deems + the sandbox unusable for this environment), execute_code returns + an error JSON with a readable message pointing the caller at + regular tool calls. Previously this was a Windows-only gate; + execute_code now works on Windows via loopback TCP, so the + error is only emitted when SANDBOX_AVAILABLE is explicitly + flipped off (e.g. for future platform-specific disables).""" with patch("tools.code_execution_tool.SANDBOX_AVAILABLE", False): result = json.loads(execute_code("print('hi')", task_id="test")) self.assertIn("error", result) - self.assertIn("Windows", result["error"]) + self.assertIn("unavailable", result["error"].lower()) def test_whitespace_only_code(self): result = json.loads(execute_code(" \n\t ", task_id="test")) diff --git a/tools/code_execution_tool.py b/tools/code_execution_tool.py index 89d42484c2..8e9112056f 100644 --- a/tools/code_execution_tool.py +++ b/tools/code_execution_tool.py @@ -47,10 +47,13 @@ import uuid _IS_WINDOWS = platform.system() == "Windows" from typing import Any, Dict, List, Optional -# Availability gate: UDS requires a POSIX OS +# Availability gate. On Windows we fall back to loopback TCP for the +# sandbox RPC transport (AF_UNIX is unreliable on Windows Python) — see +# ``_use_tcp_rpc`` in ``_execute_local`` below. That makes execute_code +# available on every platform Hermes itself runs on. logger = logging.getLogger(__name__) -SANDBOX_AVAILABLE = sys.platform != "win32" +SANDBOX_AVAILABLE = True # The 7 tools allowed inside the sandbox. The intersection of this list # and the session's enabled tools determines which stubs are generated. @@ -971,7 +974,8 @@ def execute_code( """ if not SANDBOX_AVAILABLE: return json.dumps({ - "error": "execute_code is not available on Windows. Use normal tool calls instead." + "error": "execute_code sandbox is unavailable in this environment. " + "Use normal tool calls (terminal, read_file, write_file, ...) instead." }) if not code or not code.strip():