Add new environments and enhance tool context functionality

- Introduced new environments: Terminal Test Environment and SWE Environment, each with default configurations for testing and software engineering tasks. - Added TerminalBench 2.0 evaluation environment with comprehensive setup for agentic LLMs, including task execution and verification. - Enhanced ToolContext with methods for uploading and downloading files, ensuring binary-safe operations. - Updated documentation across environments to reflect new features and usage instructions. - Refactored existing environment configurations for consistency and clarity.
2026-06-15 09:21:36 +00:00 · 2026-02-10 19:39:05 +00:00 · 2026-02-10 19:39:05 +00:00 · 35ad3146a8
commit 35ad3146a8
parent e8343f2d87
18 changed files with 1428 additions and 19 deletions
--- a/tools/file_tools.py
+++ b/tools/file_tools.py
@ -39,19 +39,24 @@ def _get_file_ops(task_id: str = "default") -> ShellFileOperations:
    # Create environment OUTSIDE locks so we don't block other rollouts
    # during slow Modal/Docker startup (~10s)
    if needs_creation:
+        from tools.terminal_tool import _task_env_overrides
+        
        config = _get_env_config()
        env_type = config["env_type"]
        
+        # Check per-task overrides (set by environments like TerminalBench2Env)
+        overrides = _task_env_overrides.get(task_id, {})
+        
        if env_type == "docker":
-            image = config["docker_image"]
+            image = overrides.get("docker_image") or config["docker_image"]
        elif env_type == "singularity":
-            image = config["singularity_image"]
+            image = overrides.get("singularity_image") or config["singularity_image"]
        elif env_type == "modal":
-            image = config["modal_image"]
+            image = overrides.get("modal_image") or config["modal_image"]
        else:
            image = ""
        
-        cwd = config["cwd"]
+        cwd = overrides.get("cwd") or config["cwd"]
        _check_disk_usage_warning()
        if not os.getenv("HERMES_QUIET"):
            print(f"[FileTools] Creating new {env_type} environment for task {task_id[:8]}...", flush=True)