diff --git a/tests/tools/test_shared_container_task_id.py b/tests/tools/test_shared_container_task_id.py index ab599fa8557..3a66cde441e 100644 --- a/tests/tools/test_shared_container_task_id.py +++ b/tests/tools/test_shared_container_task_id.py @@ -105,3 +105,49 @@ def test_get_active_env_honours_rl_override(): terminal_tool.clear_task_env_overrides("rl-42") terminal_tool._active_environments.pop("default", None) terminal_tool._active_environments.pop("rl-42", None) + + +def test_cwd_only_override_collapses_to_default(): + """CWD-only overrides (ACP adapter workspace tracking) must NOT trigger + container isolation — they should collapse to the shared 'default' + container so all surfaces (TUI, gateway, dashboard) share one sandbox. + Regression for #37361.""" + terminal_tool.register_task_env_overrides( + "acp-session-abc", {"cwd": "/home/user/project"} + ) + try: + assert ( + terminal_tool._resolve_container_task_id("acp-session-abc") + == "default" + ) + finally: + terminal_tool.clear_task_env_overrides("acp-session-abc") + + +def test_cwd_plus_docker_image_keeps_own_id(): + """When overrides include both cwd AND docker_image, isolation must + still be honoured (RL/benchmark pattern with explicit cwd).""" + terminal_tool.register_task_env_overrides( + "rl-with-cwd", {"docker_image": "myimg:latest", "cwd": "/workspace"} + ) + try: + assert ( + terminal_tool._resolve_container_task_id("rl-with-cwd") + == "rl-with-cwd" + ) + finally: + terminal_tool.clear_task_env_overrides("rl-with-cwd") + + +def test_env_type_override_keeps_own_id(): + """env_type is an isolation key — must trigger per-task container.""" + terminal_tool.register_task_env_overrides( + "bench-env", {"env_type": "sandbox", "cwd": "/work"} + ) + try: + assert ( + terminal_tool._resolve_container_task_id("bench-env") + == "bench-env" + ) + finally: + terminal_tool.clear_task_env_overrides("bench-env") diff --git a/tools/terminal_tool.py b/tools/terminal_tool.py index 3e81eff9f67..8d091705fc1 100644 --- a/tools/terminal_tool.py +++ b/tools/terminal_tool.py @@ -1006,9 +1006,20 @@ def _resolve_container_task_id(task_id: Optional[str]) -> str: task_id, we honour it by returning the task_id unchanged -- those rollouts need their own isolated sandbox, which is the whole point of the override. + + CWD-only overrides (registered by the ACP adapter for workspace + tracking) are *not* isolation signals — they should not cause each + session to spin up its own container. Only overrides containing + backend-specific image keys or ``env_type`` trigger isolation. """ + _ISOLATION_KEYS = frozenset({ + "docker_image", "modal_image", "singularity_image", + "daytona_image", "env_type", + }) if task_id and task_id in _task_env_overrides: - return task_id + overrides = _task_env_overrides[task_id] + if set(overrides.keys()) & _ISOLATION_KEYS: + return task_id return "default"