feat: devex help, add Makefile, ruff, pre-commit, and modernize CI

2026-04-27 01:11:40 +00:00 · 2026-03-09 20:36:51 -05:00 · 2026-03-09 20:36:51 -05:00 · f4d7e6a29e
commit f4d7e6a29e
parent 172a38c344
111 changed files with 11655 additions and 10200 deletions
--- a/tools/code_execution_tool.py
+++ b/tools/code_execution_tool.py
@ -31,7 +31,7 @@ import time
 import uuid

 _IS_WINDOWS = platform.system() == "Windows"
-from typing import Any, Dict, List, Optional
+from typing import Any

 # Availability gate: UDS requires a POSIX OS
 logger = logging.getLogger(__name__)
@ -40,21 +40,23 @@ SANDBOX_AVAILABLE = sys.platform != "win32"

 # The 7 tools allowed inside the sandbox. The intersection of this list
 # and the session's enabled tools determines which stubs are generated.
-SANDBOX_ALLOWED_TOOLS = frozenset([
-    "web_search",
-    "web_extract",
-    "read_file",
-    "write_file",
-    "search_files",
-    "patch",
-    "terminal",
-])
+SANDBOX_ALLOWED_TOOLS = frozenset(
+    [
+        "web_search",
+        "web_extract",
+        "read_file",
+        "write_file",
+        "search_files",
+        "patch",
+        "terminal",
+    ]
+)

 # Resource limit defaults (overridable via config.yaml → code_execution.*)
-DEFAULT_TIMEOUT = 300        # 5 minutes
+DEFAULT_TIMEOUT = 300  # 5 minutes
 DEFAULT_MAX_TOOL_CALLS = 50
-MAX_STDOUT_BYTES = 50_000    # 50 KB
-MAX_STDERR_BYTES = 10_000    # 10 KB
+MAX_STDOUT_BYTES = 50_000  # 50 KB
+MAX_STDERR_BYTES = 10_000  # 10 KB


 def check_sandbox_requirements() -> bool:
@ -114,7 +116,7 @@ _TOOL_STUBS = {
 }


-def generate_hermes_tools_module(enabled_tools: List[str]) -> str:
+def generate_hermes_tools_module(enabled_tools: list[str]) -> str:
    """
    Build the source code for the hermes_tools.py stub module.

@ -128,11 +130,7 @@ def generate_hermes_tools_module(enabled_tools: List[str]) -> str:
        if tool_name not in _TOOL_STUBS:
            continue
        func_name, sig, doc, args_expr = _TOOL_STUBS[tool_name]
-        stub_functions.append(
-            f"def {func_name}({sig}):\n"
-            f"    {doc}\n"
-            f"    return _call({func_name!r}, {args_expr})\n"
-        )
+        stub_functions.append(f"def {func_name}({sig}):\n    {doc}\n    return _call({func_name!r}, {args_expr})\n")
        export_names.append(func_name)

    header = '''\
@ -223,7 +221,7 @@ def _rpc_server_loop(
    server_sock: socket.socket,
    task_id: str,
    tool_call_log: list,
-    tool_call_counter: list,   # mutable [int] so the thread can increment
+    tool_call_counter: list,  # mutable [int] so the thread can increment
    max_tool_calls: int,
    allowed_tools: frozenset,
 ):
@ -243,7 +241,7 @@ def _rpc_server_loop(
        while True:
            try:
                chunk = conn.recv(65536)
-            except socket.timeout:
+            except TimeoutError:
                break
            if not chunk:
                break
@ -270,23 +268,22 @@ def _rpc_server_loop(
                # Enforce the allow-list
                if tool_name not in allowed_tools:
                    available = ", ".join(sorted(allowed_tools))
-                    resp = json.dumps({
-                        "error": (
-                            f"Tool '{tool_name}' is not available in execute_code. "
-                            f"Available: {available}"
-                        )
-                    })
+                    resp = json.dumps(
+                        {"error": (f"Tool '{tool_name}' is not available in execute_code. Available: {available}")}
+                    )
                    conn.sendall((resp + "\n").encode())
                    continue

                # Enforce tool call limit
                if tool_call_counter[0] >= max_tool_calls:
-                    resp = json.dumps({
-                        "error": (
-                            f"Tool call limit reached ({max_tool_calls}). "
-                            "No more tool calls allowed in this execution."
-                        )
-                    })
+                    resp = json.dumps(
+                        {
+                            "error": (
+                                f"Tool call limit reached ({max_tool_calls}). "
+                                "No more tool calls allowed in this execution."
+                            )
+                        }
+                    )
                    conn.sendall((resp + "\n").encode())
                    continue

@ -303,9 +300,7 @@ def _rpc_server_loop(
                    sys.stdout = open(os.devnull, "w")
                    sys.stderr = open(os.devnull, "w")
                    try:
-                        result = handle_function_call(
-                            tool_name, tool_args, task_id=task_id
-                        )
+                        result = handle_function_call(tool_name, tool_args, task_id=task_id)
                    finally:
                        sys.stdout.close()
                        sys.stderr.close()
@ -318,15 +313,17 @@ def _rpc_server_loop(

                # Log for observability
                args_preview = str(tool_args)[:80]
-                tool_call_log.append({
-                    "tool": tool_name,
-                    "args_preview": args_preview,
-                    "duration": round(call_duration, 2),
-                })
+                tool_call_log.append(
+                    {
+                        "tool": tool_name,
+                        "args_preview": args_preview,
+                        "duration": round(call_duration, 2),
+                    }
+                )

                conn.sendall((result + "\n").encode())

-    except socket.timeout:
+    except TimeoutError:
        pass
    except OSError:
        pass
@ -342,10 +339,11 @@ def _rpc_server_loop(
 # Main entry point
 # ---------------------------------------------------------------------------

+
 def execute_code(
    code: str,
-    task_id: Optional[str] = None,
-    enabled_tools: Optional[List[str]] = None,
+    task_id: str | None = None,
+    enabled_tools: list[str] | None = None,
 ) -> str:
    """
    Run a Python script in a sandboxed child process with RPC access
@ -361,9 +359,7 @@ def execute_code(
        JSON string with execution results.
    """
    if not SANDBOX_AVAILABLE:
-        return json.dumps({
-            "error": "execute_code is not available on Windows. Use normal tool calls instead."
-        })
+        return json.dumps({"error": "execute_code is not available on Windows. Use normal tool calls instead."})

    if not code or not code.strip():
        return json.dumps({"error": "No code provided."})
@ -397,9 +393,7 @@ def execute_code(

    try:
        # Write the auto-generated hermes_tools module
-        tools_src = generate_hermes_tools_module(
-            list(sandbox_tools) if enabled_tools else list(SANDBOX_ALLOWED_TOOLS)
-        )
+        tools_src = generate_hermes_tools_module(list(sandbox_tools) if enabled_tools else list(SANDBOX_ALLOWED_TOOLS))
        with open(os.path.join(tmpdir, "hermes_tools.py"), "w") as f:
            f.write(tools_src)

@ -415,8 +409,12 @@ def execute_code(
        rpc_thread = threading.Thread(
            target=_rpc_server_loop,
            args=(
-                server_sock, task_id, tool_call_log,
-                tool_call_counter, max_tool_calls, sandbox_tools,
+                server_sock,
+                task_id,
+                tool_call_log,
+                tool_call_counter,
+                max_tool_calls,
+                sandbox_tools,
            ),
            daemon=True,
        )
@ -426,11 +424,24 @@ def execute_code(
        # Build a minimal environment for the child. We intentionally exclude
        # API keys and tokens to prevent credential exfiltration from LLM-
        # generated scripts. The child accesses tools via RPC, not direct API.
-        _SAFE_ENV_PREFIXES = ("PATH", "HOME", "USER", "LANG", "LC_", "TERM",
-                              "TMPDIR", "TMP", "TEMP", "SHELL", "LOGNAME",
-                              "XDG_", "PYTHONPATH", "VIRTUAL_ENV", "CONDA")
-        _SECRET_SUBSTRINGS = ("KEY", "TOKEN", "SECRET", "PASSWORD", "CREDENTIAL",
-                              "PASSWD", "AUTH")
+        _SAFE_ENV_PREFIXES = (
+            "PATH",
+            "HOME",
+            "USER",
+            "LANG",
+            "LC_",
+            "TERM",
+            "TMPDIR",
+            "TMP",
+            "TEMP",
+            "SHELL",
+            "LOGNAME",
+            "XDG_",
+            "PYTHONPATH",
+            "VIRTUAL_ENV",
+            "CONDA",
+        )
+        _SECRET_SUBSTRINGS = ("KEY", "TOKEN", "SECRET", "PASSWORD", "CREDENTIAL", "PASSWD", "AUTH")
        child_env = {}
        for k, v in os.environ.items():
            if any(s in k.upper() for s in _SECRET_SUBSTRINGS):
@ -515,7 +526,7 @@ def execute_code(
        rpc_thread.join(timeout=3)

        # Build response
-        result: Dict[str, Any] = {
+        result: dict[str, Any] = {
            "status": status,
            "output": stdout_text,
            "tool_calls_made": tool_call_counter[0],
@ -538,17 +549,21 @@ def execute_code(
    except Exception as exc:
        duration = round(time.monotonic() - exec_start, 2)
        logging.exception("execute_code failed")
-        return json.dumps({
-            "status": "error",
-            "error": str(exc),
-            "tool_calls_made": tool_call_counter[0],
-            "duration_seconds": duration,
-        }, ensure_ascii=False)
+        return json.dumps(
+            {
+                "status": "error",
+                "error": str(exc),
+                "tool_calls_made": tool_call_counter[0],
+                "duration_seconds": duration,
+            },
+            ensure_ascii=False,
+        )

    finally:
        # Cleanup temp dir and socket
        try:
            import shutil
+
            shutil.rmtree(tmpdir, ignore_errors=True)
        except Exception as e:
            logger.debug("Could not clean temp dir: %s", e)
@ -592,6 +607,7 @@ def _load_config() -> dict:
    """Load code_execution config from CLI_CONFIG if available."""
    try:
        from cli import CLI_CONFIG
+
        return CLI_CONFIG.get("code_execution", {})
    except Exception:
        return {}
@ -604,27 +620,37 @@ def _load_config() -> dict:
 # Per-tool documentation lines for the execute_code description.
 # Ordered to match the canonical display order.
 _TOOL_DOC_LINES = [
-    ("web_search",
-     "  web_search(query: str, limit: int = 5) -> dict\n"
-     "    Returns {\"data\": {\"web\": [{\"url\", \"title\", \"description\"}, ...]}}"),
-    ("web_extract",
-     "  web_extract(urls: list[str]) -> dict\n"
-     "    Returns {\"results\": [{\"url\", \"title\", \"content\", \"error\"}, ...]} where content is markdown"),
-    ("read_file",
-     "  read_file(path: str, offset: int = 1, limit: int = 500) -> dict\n"
-     "    Lines are 1-indexed. Returns {\"content\": \"...\", \"total_lines\": N}"),
-    ("write_file",
-     "  write_file(path: str, content: str) -> dict\n"
-     "    Always overwrites the entire file."),
-    ("search_files",
-     "  search_files(pattern: str, target=\"content\", path=\".\", file_glob=None, limit=50) -> dict\n"
-     "    target: \"content\" (search inside files) or \"files\" (find files by name). Returns {\"matches\": [...]}"),
-    ("patch",
-     "  patch(path: str, old_string: str, new_string: str, replace_all: bool = False) -> dict\n"
-     "    Replaces old_string with new_string in the file."),
-    ("terminal",
-     "  terminal(command: str, timeout=None, workdir=None) -> dict\n"
-     "    Foreground only (no background/pty). Returns {\"output\": \"...\", \"exit_code\": N}"),
+    (
+        "web_search",
+        "  web_search(query: str, limit: int = 5) -> dict\n"
+        '    Returns {"data": {"web": [{"url", "title", "description"}, ...]}}',
+    ),
+    (
+        "web_extract",
+        "  web_extract(urls: list[str]) -> dict\n"
+        '    Returns {"results": [{"url", "title", "content", "error"}, ...]} where content is markdown',
+    ),
+    (
+        "read_file",
+        "  read_file(path: str, offset: int = 1, limit: int = 500) -> dict\n"
+        '    Lines are 1-indexed. Returns {"content": "...", "total_lines": N}',
+    ),
+    ("write_file", "  write_file(path: str, content: str) -> dict\n    Always overwrites the entire file."),
+    (
+        "search_files",
+        '  search_files(pattern: str, target="content", path=".", file_glob=None, limit=50) -> dict\n'
+        '    target: "content" (search inside files) or "files" (find files by name). Returns {"matches": [...]}',
+    ),
+    (
+        "patch",
+        "  patch(path: str, old_string: str, new_string: str, replace_all: bool = False) -> dict\n"
+        "    Replaces old_string with new_string in the file.",
+    ),
+    (
+        "terminal",
+        "  terminal(command: str, timeout=None, workdir=None) -> dict\n"
+        '    Foreground only (no background/pty). Returns {"output": "...", "exit_code": N}',
+    ),
 ]


@ -639,9 +665,7 @@ def build_execute_code_schema(enabled_sandbox_tools: set = None) -> dict:
        enabled_sandbox_tools = SANDBOX_ALLOWED_TOOLS

    # Build tool documentation lines for only the enabled tools
-    tool_lines = "\n".join(
-        doc for name, doc in _TOOL_DOC_LINES if name in enabled_sandbox_tools
-    )
+    tool_lines = "\n".join(doc for name, doc in _TOOL_DOC_LINES if name in enabled_sandbox_tools)

    # Build example import list from enabled tools
    import_examples = [n for n in ("web_search", "terminal") if n in enabled_sandbox_tools]
@ -702,8 +726,7 @@ registry.register(
    toolset="code_execution",
    schema=EXECUTE_CODE_SCHEMA,
    handler=lambda args, **kw: execute_code(
-        code=args.get("code", ""),
-        task_id=kw.get("task_id"),
-        enabled_tools=kw.get("enabled_tools")),
+        code=args.get("code", ""), task_id=kw.get("task_id"), enabled_tools=kw.get("enabled_tools")
+    ),
    check_fn=check_sandbox_requirements,
 )