From 329c33dac3d1dafe81ffa83e7194f7ff1e734c61 Mon Sep 17 00:00:00 2001 From: teknium1 <127238744+teknium1@users.noreply.github.com> Date: Sun, 7 Jun 2026 23:36:16 -0700 Subject: [PATCH] fix(terminal): read cwd overrides under raw task_id after container collapse PR #41822 collapsed CWD-only overrides to the shared 'default' container via _resolve_container_task_id, but three call sites kept routing the *env/override lookup* through that collapsed id: - the foreground exec path read _task_env_overrides[effective_task_id], yet register_task_env_overrides writes under the raw task_id, so a CWD-only override's cwd was silently dropped (env spun up at the wrong root, exit 126); - the get-or-create env lookup keyed solely on effective_task_id, so an env cached under the raw task_id was missed and duplicated; - register_task_env_overrides synced the new cwd onto the env under the collapsed id, missing a live env cached under the raw task_id. Container *identity* still collapses to 'default' (sharing preserved); only the per-session env/override *lookup* now prefers the raw task_id and falls back to the collapsed id. Fixes the 3 regressions in test_terminal_task_cwd.py left red by #41822. --- tools/terminal_tool.py | 48 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 39 insertions(+), 9 deletions(-) diff --git a/tools/terminal_tool.py b/tools/terminal_tool.py index 8d091705fc1..3e17d2c865e 100644 --- a/tools/terminal_tool.py +++ b/tools/terminal_tool.py @@ -972,9 +972,14 @@ def register_task_env_overrides(task_id: str, overrides: Dict[str, Any]): # while letting an explicit ACP cwd change win, as the client expects. new_cwd = overrides.get("cwd") if isinstance(new_cwd, str) and new_cwd.strip(): + # The live env is cached under the raw task_id for per-session surfaces + # (ACP/gateway/dashboard) and under the collapsed container id for + # isolation-keyed rollouts. Try the raw id first, then the container id, + # so a CWD-only override (which collapses to "default") still finds and + # updates the originating session's env. container_id = _resolve_container_task_id(task_id) with _env_lock: - env = _active_environments.get(container_id) + env = _active_environments.get(task_id) or _active_environments.get(container_id) if env is not None and getattr(env, "cwd", None) is not None: env.cwd = new_cwd @@ -1848,8 +1853,20 @@ def terminal_tool( effective_task_id = _resolve_container_task_id(task_id) # Check per-task overrides (set by environments like TerminalBench2Env) - # before falling back to global env var config - overrides = _task_env_overrides.get(effective_task_id, {}) + # before falling back to global env var config. + # + # Overrides are keyed by the *raw* task_id (that's the key + # ``register_task_env_overrides`` writes under), NOT by the collapsed + # container id. A CWD-only override collapses ``effective_task_id`` to + # ``"default"`` for container sharing, but its cwd must still be read + # back here under the originating task_id, or the override is silently + # dropped. Fall back to the collapsed id so isolation-keyed RL/benchmark + # overrides (registered under an id that equals their container id) keep + # resolving as before. + overrides = ( + (_task_env_overrides.get(task_id) if task_id else None) + or _task_env_overrides.get(effective_task_id, {}) + ) # Select image based on env type, with per-task override support if env_type == "docker": @@ -1898,9 +1915,18 @@ def terminal_tool( # task_id wait for the first one to finish creating the sandbox, # instead of each creating their own (wasting Modal resources). with _env_lock: - if effective_task_id in _active_environments: - _last_activity[effective_task_id] = time.time() - env = _active_environments[effective_task_id] + # Prefer the collapsed container id, but fall back to an env cached + # under the raw task_id. Per-session surfaces (ACP/gateway/dashboard) + # with a CWD-only override collapse to "default" for container + # sharing, yet an env may already be cached under the originating + # task_id; honor it instead of spawning a duplicate. + _existing_key = ( + effective_task_id if effective_task_id in _active_environments + else (task_id if task_id and task_id in _active_environments else None) + ) + if _existing_key is not None: + _last_activity[_existing_key] = time.time() + env = _active_environments[_existing_key] needs_creation = False else: needs_creation = True @@ -1915,9 +1941,13 @@ def terminal_tool( with task_lock: # Double-check after acquiring the per-task lock with _env_lock: - if effective_task_id in _active_environments: - _last_activity[effective_task_id] = time.time() - env = _active_environments[effective_task_id] + _existing_key = ( + effective_task_id if effective_task_id in _active_environments + else (task_id if task_id and task_id in _active_environments else None) + ) + if _existing_key is not None: + _last_activity[_existing_key] = time.time() + env = _active_environments[_existing_key] needs_creation = False if needs_creation: