diff --git a/tests/tools/test_container_cwd_sanitize.py b/tests/tools/test_container_cwd_sanitize.py new file mode 100644 index 00000000000..5de5bc2011b --- /dev/null +++ b/tests/tools/test_container_cwd_sanitize.py @@ -0,0 +1,146 @@ +"""Regression tests for host-path cwd sanitization on container backends. + +Two code paths in ``tools/terminal_tool.py`` must reject a host (or relative) +working directory before it reaches ``docker run -w``: + + 1. ``_get_env_config()`` sanitizes the ``TERMINAL_CWD``-derived ``config["cwd"]``. + 2. ``terminal_tool()`` resolves a *per-task cwd override* that WINS over + ``config["cwd"]`` (registered by the gateway/TUI for workspace tracking, + and by RL/benchmark envs). That override was applied RAW — never sanitized + — so a host cwd (e.g. a Windows desktop session's ``C:\\Users\\``) + leaked straight to ``docker run -w C:\\Users\\``, which fails to start + the container (exit 125). The sanitizer at path #1 lists ``C:\\``/``C:/`` as + host prefixes but only ever ran against ``config["cwd"]``, so the override + bypassed the one guard that would have caught it. + +Both paths now share ``_is_unusable_container_cwd()``; these tests pin its +behaviour so neither path can regress. +""" + +import tools.terminal_tool as tt + + +class TestIsUnusableContainerCwd: + def test_windows_backslash_host_path_rejected(self): + # The exact shape from the bug report: a Windows host cwd reaching a + # Linux container's -w flag. + assert tt._is_unusable_container_cwd(r"C:\Users\someuser") is True + + def test_windows_forwardslash_host_path_rejected(self): + assert tt._is_unusable_container_cwd("C:/Users/someuser") is True + + def test_posix_home_host_path_rejected(self): + assert tt._is_unusable_container_cwd("/home/ben/projects") is True + + def test_macos_users_host_path_rejected(self): + assert tt._is_unusable_container_cwd("/Users/ben/projects") is True + + def test_relative_path_rejected(self): + assert tt._is_unusable_container_cwd(".") is True + assert tt._is_unusable_container_cwd("src/app") is True + + def test_valid_container_workspace_accepted(self): + # In-container paths that RL/benchmark overrides legitimately set must + # pass through untouched. + assert tt._is_unusable_container_cwd("/workspace") is False + assert tt._is_unusable_container_cwd("/root") is False + assert tt._is_unusable_container_cwd("/app") is False + assert tt._is_unusable_container_cwd("/opt/project") is False + + def test_empty_is_not_flagged(self): + # Empty/None-ish cwd is handled by the caller's `or config["cwd"]` + # fallback, not by flagging it here. + assert tt._is_unusable_container_cwd("") is False + + def test_host_prefixes_include_windows_and_posix(self): + # Guard the constant itself — the Windows entries are the ones that + # were load-bearing for the reported desktop bug. + assert r"C:\\"[:2] in tt._HOST_CWD_PREFIXES or "C:\\" in tt._HOST_CWD_PREFIXES + assert "C:/" in tt._HOST_CWD_PREFIXES + assert "/home/" in tt._HOST_CWD_PREFIXES + assert "/Users/" in tt._HOST_CWD_PREFIXES + + def test_container_backends_set(self): + assert tt._CONTAINER_BACKENDS == frozenset( + {"docker", "singularity", "modal", "daytona"} + ) + + +class TestOverrideCwdSanitizedAtCallSite: + """E2E pin: a per-task cwd OVERRIDE that is a host path must NOT reach the + container builder. This is the actual reported bug — the gateway/TUI + registers the host launch dir as a cwd override, which previously won over + the (sanitized) config["cwd"] and flowed raw into `docker run -w`. + """ + + def _run_and_capture_cwd(self, monkeypatch, override_cwd, config_cwd="/root"): + """Drive terminal_tool() on the docker backend with a host-path cwd + override registered, and return the cwd that reached _create_environment + (i.e. the cwd that would be passed to `docker run -w`). + """ + captured = {} + + config = { + "env_type": "docker", + "docker_image": "pytorch/pytorch:latest", + "cwd": config_cwd, + "host_cwd": None, + "timeout": 180, + "lifetime_seconds": 300, + "container_cpu": 1, + "container_memory": 5120, + "container_disk": 51200, + "container_persistent": True, + "docker_volumes": [], + "docker_env": {}, + "docker_extra_args": [], + "docker_mount_cwd_to_workspace": False, + "docker_run_as_host_user": False, + "docker_forward_env": [], + "modal_mode": "auto", + } + + class _DummyEnv: + cwd = config_cwd + + def execute(self, *a, **k): + return {"output": "", "exit_code": 0} + + def fake_create_environment(env_type, image, cwd, timeout, **kwargs): + captured["cwd"] = cwd + return _DummyEnv() + + monkeypatch.setattr(tt, "_get_env_config", lambda: config) + monkeypatch.setattr(tt, "_start_cleanup_thread", lambda: None) + monkeypatch.setattr(tt, "_check_all_guards", lambda *a, **k: {"approved": True}) + monkeypatch.setattr(tt, "_create_environment", fake_create_environment) + # Force a fresh environment build so _create_environment is invoked. + monkeypatch.setattr(tt, "_active_environments", {}) + monkeypatch.setattr(tt, "_last_activity", {}) + + task_id = "sess-host-cwd" + tt.register_task_env_overrides(task_id, {"cwd": override_cwd}) + try: + tt.terminal_tool(command="pwd", task_id=task_id) + finally: + tt.clear_task_env_overrides(task_id) + tt._active_environments.pop(task_id, None) + tt._active_environments.pop("default", None) + return captured.get("cwd") + + def test_windows_host_override_does_not_reach_container(self, monkeypatch): + # The bug: C:\Users\ registered as override → docker run -w C:\Users\ → exit 125. + cwd = self._run_and_capture_cwd(monkeypatch, r"C:\Users\someuser") + assert cwd == "/root", ( + f"Host-path cwd override leaked to the container builder: {cwd!r}. " + "It must be sanitized back to config['cwd']." + ) + + def test_posix_host_override_does_not_reach_container(self, monkeypatch): + cwd = self._run_and_capture_cwd(monkeypatch, "/home/someuser/project") + assert cwd == "/root" + + def test_valid_container_override_is_preserved(self, monkeypatch): + # RL/benchmark envs set an in-container path; it must pass through. + cwd = self._run_and_capture_cwd(monkeypatch, "/workspace/task42") + assert cwd == "/workspace/task42" diff --git a/tests/tools/test_modal_sandbox_fixes.py b/tests/tools/test_modal_sandbox_fixes.py index 570ef5b2182..dab7ec14758 100644 --- a/tests/tools/test_modal_sandbox_fixes.py +++ b/tests/tools/test_modal_sandbox_fixes.py @@ -274,17 +274,30 @@ class TestEnsurepipFix: # ========================================================================= class TestHostPrefixList: - """Verify the host prefix list catches common host-only paths.""" + """Verify the host prefix list catches common host-only paths. - def test_all_common_host_prefixes_caught(self): - """The host prefix check should catch /Users/, /home/, C:\\, C:/.""" - # Read the actual source to verify the prefixes - import inspect - source = inspect.getsource(_tt_mod._get_env_config) - for prefix in ["/Users/", "/home/", 'C:\\\\"', "C:/"]: - # Normalize for source comparison - check = prefix.rstrip('"') - assert check in source or prefix in source, ( - f"Host prefix {prefix!r} not found in _get_env_config. " + The prefixes used to live as an inline literal inside ``_get_env_config``; + they now live in the module-level ``_HOST_CWD_PREFIXES`` constant shared by + both the ``_get_env_config`` sanitizer and the override-resolution guard + (``_is_unusable_container_cwd``). Assert the *behavior* (each common host + prefix is flagged as unusable inside a container) rather than grepping a + function's source — the latter is a change-detector that breaks on any + refactor that moves the constant. + """ + + def test_all_common_host_prefixes_present_in_constant(self): + """The shared prefix constant must list the common host-only roots.""" + for prefix in ("/Users/", "/home/", "C:\\", "C:/"): + assert prefix in _tt_mod._HOST_CWD_PREFIXES, ( + f"Host prefix {prefix!r} missing from _HOST_CWD_PREFIXES. " "Container backends need this to avoid using host paths." ) + + def test_all_common_host_paths_flagged_unusable(self): + """A host path under each prefix must be rejected as a container cwd.""" + for host_path in ("/Users/me/proj", "/home/me/proj", + "C:\\Users\\me", "C:/Users/me"): + assert _tt_mod._is_unusable_container_cwd(host_path) is True, ( + f"Host path {host_path!r} should be rejected as a container " + "cwd but was accepted." + ) diff --git a/tools/terminal_tool.py b/tools/terminal_tool.py index b89a5d8a959..daff82ac34a 100644 --- a/tools/terminal_tool.py +++ b/tools/terminal_tool.py @@ -1086,6 +1086,36 @@ def _safe_getcwd() -> str: return os.getenv("TERMINAL_CWD") or os.path.expanduser("~") +# Path prefixes that identify a *host* working directory which cannot exist +# inside a container sandbox. Covers POSIX user dirs and Windows drive paths +# (``C:\Users\...`` / ``C:/Users/...``) — the latter is how a Windows host's +# cwd looks when it leaks toward a Linux container's ``-w`` flag. +_HOST_CWD_PREFIXES = ("/Users/", "/home/", "C:\\", "C:/") + +_CONTAINER_BACKENDS = frozenset({"docker", "singularity", "modal", "daytona"}) + + +def _is_unusable_container_cwd(cwd: str) -> bool: + """Return True if *cwd* is a host/relative path that won't work as the + working directory inside a container sandbox. + + A container's cwd must be an absolute path that exists *inside* the + sandbox (e.g. ``/workspace`` or ``/root``). A host path (``/home/user``, + ``C:\\Users\\me``) or a relative path (``.``, ``src/``) is meaningless to + ``docker run -w`` and makes the container fail to start (exit 125). + """ + if not cwd: + return False + if any(cwd.startswith(p) for p in _HOST_CWD_PREFIXES): + return True + # Relative paths (".", "src/") can't be a container workdir either. Windows + # drive paths are absolute on Windows but os.path.isabs() is False on a + # POSIX host, so they're already caught by the prefix check above. + if not os.path.isabs(cwd): + return True + return False + + def _get_env_config() -> Dict[str, Any]: """Get terminal environment configuration from environment variables.""" # Default image with Python and Node.js for maximum compatibility @@ -1138,21 +1168,18 @@ def _get_env_config() -> Dict[str, Any]: if cwd: cwd = os.path.expanduser(cwd) host_cwd = None - host_prefixes = ("/Users/", "/home/", "C:\\", "C:/") if env_type == "docker" and mount_docker_cwd: docker_cwd_source = os.getenv("TERMINAL_CWD") or _safe_getcwd() candidate = os.path.abspath(os.path.expanduser(docker_cwd_source)) if ( - any(candidate.startswith(p) for p in host_prefixes) + any(candidate.startswith(p) for p in _HOST_CWD_PREFIXES) or (os.path.isabs(candidate) and os.path.isdir(candidate) and not candidate.startswith(("/workspace", "/root"))) ): host_cwd = candidate cwd = "/workspace" - elif env_type in {"modal", "docker", "singularity", "daytona"} and cwd: + elif env_type in _CONTAINER_BACKENDS and cwd: # Host paths and relative paths that won't work inside containers - is_host_path = any(cwd.startswith(p) for p in host_prefixes) - is_relative = not os.path.isabs(cwd) # e.g. "." or "src/" - if (is_host_path or is_relative) and cwd != default_cwd: + if _is_unusable_container_cwd(cwd) and cwd != default_cwd: logger.info("Ignoring TERMINAL_CWD=%r for %s backend " "(host/relative path won't work in sandbox). Using %r instead.", cwd, env_type, default_cwd) @@ -1925,6 +1952,25 @@ def terminal_tool( image = "" cwd = overrides.get("cwd") or config["cwd"] + # A per-task cwd override (registered by the gateway/TUI for workspace + # tracking, or by RL/benchmark envs) wins over config["cwd"] — but + # config["cwd"] was already sanitized for container backends in + # _get_env_config() while the override is raw. On a container backend a + # raw host path (e.g. a Windows desktop session's C:\Users\, or a + # POSIX /home/) reaches `docker run -w ` and the + # container fails to start (exit 125). Re-apply the same host/relative + # path guard to the *resolved* cwd so the override can't bypass it. + # Valid in-container override paths (RL/benchmark sandboxes that set + # cwd to /workspace, /root, etc.) are absolute non-host paths and pass + # through untouched. + if env_type in _CONTAINER_BACKENDS and _is_unusable_container_cwd(cwd): + if cwd != config["cwd"]: + logger.info( + "Ignoring host/relative cwd override %r for %s backend " + "(won't exist in sandbox). Using %r instead.", + cwd, env_type, config["cwd"], + ) + cwd = config["cwd"] default_timeout = config["timeout"] effective_timeout = timeout or default_timeout