diff --git a/tools/browser_tool.py b/tools/browser_tool.py index 117b96d15..e691bedab 100644 --- a/tools/browser_tool.py +++ b/tools/browser_tool.py @@ -51,6 +51,7 @@ import subprocess import shutil import sys import asyncio +import tempfile import threading import time import requests @@ -644,17 +645,25 @@ def _find_agent_browser() -> str: """ Find the agent-browser CLI executable. + Checks in order: PATH, local node_modules/.bin/, npx fallback. + Returns: Path to agent-browser executable Raises: FileNotFoundError: If agent-browser is not installed """ - # Check if it's in PATH + # Check if it's in PATH (global install) which_result = shutil.which("agent-browser") if which_result: return which_result + # Check local node_modules/.bin/ (npm install in repo root) + repo_root = Path(__file__).parent.parent + local_bin = repo_root / "node_modules" / ".bin" / "agent-browser" + if local_bin.exists(): + return str(local_bin) + # Check common npx locations npx_path = shutil.which("npx") if npx_path: @@ -662,6 +671,7 @@ def _find_agent_browser() -> str: raise FileNotFoundError( "agent-browser CLI not found. Install it with: npm install -g agent-browser\n" + "Or run 'npm install' in the repo root to install locally.\n" "Or ensure npx is available in your PATH." ) @@ -708,12 +718,26 @@ def _run_browser_command( ] + args try: + # Give each task its own socket directory to prevent concurrency conflicts. + # Without this, parallel workers fight over the same default socket path, + # causing "Failed to create socket directory: Permission denied" errors. + task_socket_dir = os.path.join( + tempfile.gettempdir(), + f"agent-browser-{session_info['session_name']}" + ) + os.makedirs(task_socket_dir, exist_ok=True) + + browser_env = { + **os.environ, + "AGENT_BROWSER_SOCKET_DIR": task_socket_dir, + } + result = subprocess.run( cmd_parts, capture_output=True, text=True, timeout=timeout, - env={**os.environ} + env=browser_env, ) # Parse JSON output @@ -1487,6 +1511,13 @@ def cleanup_browser(task_id: Optional[str] = None) -> None: except Exception as e: print(f"[browser_tool] Exception during BrowserBase session close: {e}", file=sys.stderr) + # Clean up per-task socket directory + session_name = session_info.get("session_name", "") + if session_name: + socket_dir = os.path.join(tempfile.gettempdir(), f"agent-browser-{session_name}") + if os.path.exists(socket_dir): + shutil.rmtree(socket_dir, ignore_errors=True) + del _active_sessions[task_id] if not os.getenv("HERMES_QUIET"): print(f"[browser_tool] Removed task {task_id} from active sessions", file=sys.stderr) diff --git a/tools/terminal_tool.py b/tools/terminal_tool.py index 93e250c86..a3b3c71e1 100644 --- a/tools/terminal_tool.py +++ b/tools/terminal_tool.py @@ -83,9 +83,9 @@ def _get_apptainer_cache_dir() -> Path: cache_path.mkdir(parents=True, exist_ok=True) return cache_path - # Use scratch dir parent for cache (one level up from sandboxes) + # Use user-specific subdirectory in scratch for cache scratch = _get_scratch_dir() - cache_path = scratch.parent / ".apptainer" + cache_path = scratch / ".apptainer" cache_path.mkdir(parents=True, exist_ok=True) return cache_path @@ -655,49 +655,37 @@ class _SingularityEnvironment: # Get or build SIF from docker:// URL (fast if already cached) self.image = _get_or_build_sif(image, self.executable) - # Get scratch directory for sandbox + # Get scratch directory for working files self.scratch_dir = _get_scratch_dir() - # Create unique sandbox directory + # Create unique ID for this environment self.sandbox_id = f"hermes-{uuid.uuid4().hex[:12]}" - self.sandbox_dir = self.scratch_dir / self.sandbox_id # Create a working directory that will be bound into the container + # This persists files across commands within the same task self.work_dir = self.scratch_dir / f"{self.sandbox_id}-work" self.work_dir.mkdir(parents=True, exist_ok=True) - # Build the sandbox - self._build_sandbox() - - def _build_sandbox(self): - """Build a writable sandbox from the container image (SIF or other).""" - try: - result = subprocess.run( - [self.executable, "build", "--sandbox", str(self.sandbox_dir), self.image], - capture_output=True, - text=True, - timeout=300 # 5 min timeout for building - ) - if result.returncode != 0: - raise RuntimeError(f"Failed to build sandbox: {result.stderr}") - - # Create /workspace directory inside the sandbox for bind mounting - workspace_in_sandbox = self.sandbox_dir / "workspace" - workspace_in_sandbox.mkdir(parents=True, exist_ok=True) - - except subprocess.TimeoutExpired: - shutil.rmtree(self.sandbox_dir, ignore_errors=True) - raise RuntimeError("Sandbox build timed out") + # No sandbox build needed! We use --writable-tmpfs which runs directly + # from the read-only SIF with an in-memory writable overlay. + # This is instant (vs minutes for sandbox build) and scales to any + # number of concurrent workers since they all share the same SIF. + print(f"[Singularity] Environment {self.sandbox_id} ready (tmpfs overlay, no build needed)", flush=True) def execute(self, command: str, cwd: str = "", *, timeout: int | None = None) -> dict: """Execute a command in the Singularity container.""" cmd = [self.executable, "exec"] - # Isolation flags - contain but allow network - cmd.extend(["--contain", "--cleanenv"]) + # Isolation flags - contain filesystem, clean env, isolate PID namespace + # --contain: isolate filesystem (no host mounts except explicit binds) + # --cleanenv: don't inherit host environment variables + # --pid: isolate PID namespace (ps aux only shows container processes) + # --writable-tmpfs: RAM-backed writable overlay on top of read-only SIF + # (instant startup, no sandbox build, scales to 250+ concurrent workers) + cmd.extend(["--contain", "--cleanenv", "--pid", "--writable-tmpfs"]) # Bind the working directory into the container at /workspace - # This gives the container access to a large writable space + # This gives the container access to a large writable space on disk cmd.extend(["--bind", f"{self.work_dir}:/workspace"]) # Also bind it to /tmp inside container for pip cache etc. @@ -707,8 +695,8 @@ class _SingularityEnvironment: work_dir = cwd or self.cwd cmd.extend(["--pwd", work_dir]) - # Use writable sandbox - cmd.extend(["--writable", str(self.sandbox_dir)]) + # Use the SIF directly (read-only, shared by all workers) + cmd.append(str(self.image)) # Transform sudo commands if SUDO_PASSWORD is available exec_command = _transform_sudo_command(command) @@ -732,8 +720,7 @@ class _SingularityEnvironment: return {"output": f"Command timed out after {timeout or self.timeout}s", "returncode": 124} def cleanup(self): - """Clean up sandbox and working directory.""" - shutil.rmtree(self.sandbox_dir, ignore_errors=True) + """Clean up working directory (no sandbox to remove with tmpfs overlay).""" shutil.rmtree(self.work_dir, ignore_errors=True) def stop(self):