From b2faeba182fbed7018ed0323f3cb668660b4768d Mon Sep 17 00:00:00 2001 From: teknium1 <127238744+teknium1@users.noreply.github.com> Date: Sat, 27 Jun 2026 19:07:09 -0700 Subject: [PATCH] fix(file-ops): make preserved cwd reachable at write-time resolution (#26211) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Belt-and-suspenders on top of the cherry-picked cwd-preservation fix: - Proactively mirror every live terminal cwd into _last_known_cwd on each successful read, so the durable anchor survives even when the cleanup thread pops both _file_ops_cache and _active_environments before _get_file_ops' stale-cache save branch can fire. - Fall back to _last_known_cwd in _authoritative_workspace_root. write_file_tool resolves the path (via _resolve_path_for_task) BEFORE _get_file_ops rebuilds the env, so restoring only the rebuilt env's cwd was insufficient — the resolution that decides where the file lands runs first. This closes that gap. The local env's persisted _cwd_file can't serve this role: it's keyed by a random per-session uuid and deleted on cleanup (the same cleanup that triggers the bug). The in-memory _last_known_cwd registry is the durable anchor instead. Adds a real-IO E2E regression (TestSilentFileMisplacementE2E) exercising the actual write_file_tool path after env cleanup. --- tests/tools/test_file_tools.py | 138 +++++++++++++++++++++++++++++++++ tools/file_tools.py | 51 +++++++++++- 2 files changed, 188 insertions(+), 1 deletion(-) diff --git a/tests/tools/test_file_tools.py b/tests/tools/test_file_tools.py index b7e8a13d383..f99cf1d0ec3 100644 --- a/tests/tools/test_file_tools.py +++ b/tests/tools/test_file_tools.py @@ -615,3 +615,141 @@ class TestLastKnownCwd: # Should fall back to config default assert cwd_passed == "/config/default/path", \ f"Expected cwd='/config/default/path', got {cwd_passed!r}" + + @patch("tools.terminal_tool._active_environments", new_callable=dict) + @patch("tools.file_tools._file_ops_cache", new_callable=dict) + def test_live_cwd_read_mirrors_into_last_known_cwd(self, mock_cache, mock_active): + """Belt-and-suspenders (#26211): every successful live-cwd read records + the cwd in _last_known_cwd, so the durable anchor doesn't depend on the + cleanup-detection branch of _get_file_ops firing.""" + from tools.file_tools import _get_live_tracking_cwd, _last_known_cwd + + task_id = "default" + _last_known_cwd.pop(task_id, None) + + cached = MagicMock() + cached.env = MagicMock() + cached.env.cwd = "/Users/user/project" + cached.env.cwd_owner = "default" + mock_cache[task_id] = cached + + live = _get_live_tracking_cwd(task_id) + + assert live == "/Users/user/project" + # The read mirrored the live cwd into the durable registry. + assert _last_known_cwd.get(task_id) == "/Users/user/project" + _last_known_cwd.pop(task_id, None) + + @patch("tools.terminal_tool._active_environments", new_callable=dict) + @patch("tools.file_tools._file_ops_cache", new_callable=dict) + @patch("tools.terminal_tool._get_env_config") + @patch("tools.terminal_tool._create_environment") + def test_mirrored_cwd_survives_when_cache_already_cleared( + self, mock_create_env, mock_config, mock_cache, mock_active + ): + """The original save-old-cwd path only fires when _file_ops_cache still + holds the stale entry. If the cleanup thread popped BOTH dicts first, + _get_file_ops sees cached=None and never saves — but the proactive + mirror from an earlier live read already populated _last_known_cwd, so + the rebuilt env still restores the user's directory.""" + from tools.file_tools import ( + _get_file_ops, _get_live_tracking_cwd, _last_known_cwd, + ) + + task_id = "default" + _last_known_cwd.pop(task_id, None) + + # 1) Env is alive and the agent has cd'd into the project. A live read + # (happens on every relative-path resolution) mirrors the cwd. + cached = MagicMock() + cached.env = MagicMock() + cached.env.cwd = "/Users/user/project" + cached.env.cwd_owner = "default" + mock_cache[task_id] = cached + assert _get_live_tracking_cwd(task_id) == "/Users/user/project" + assert _last_known_cwd.get(task_id) == "/Users/user/project" + + # 2) Cleanup thread kills the env AND clears the cache before the next + # file write — so _get_file_ops' save-old-cwd branch never runs. + mock_cache.pop(task_id, None) + mock_active.clear() + + mock_env = MagicMock() + mock_env.cwd = "/Users/user/project" + mock_create_env.return_value = mock_env + mock_config.return_value = { + "env_type": "local", + "cwd": "/config/default/path", + "timeout": 30, + } + + _get_file_ops(task_id) + + create_call = mock_create_env.call_args + assert create_call is not None, "_create_environment was not called" + kwargs = create_call.kwargs if create_call.kwargs else {} + cwd_passed = kwargs.get("cwd", None) + if cwd_passed is None: + args = create_call.args if create_call.args else [] + if len(args) >= 3: + cwd_passed = args[2] + + # Rebuilt env restored the mirrored cwd, NOT the config default. + assert cwd_passed == "/Users/user/project", \ + f"Expected restored cwd='/Users/user/project', got {cwd_passed!r}" + _last_known_cwd.pop(task_id, None) + + +class TestSilentFileMisplacementE2E: + """Real-IO regression for #26211. + + Exercises the actual write_file_tool path against a temp filesystem: an + agent cd's into a project, the cleanup thread kills the env, and a later + relative-path write must land in the project dir (not the config default). + Mocks miss this because resolution (_resolve_path_for_task) runs BEFORE + _get_file_ops rebuilds the env — only the durable _last_known_cwd fallback + in _authoritative_workspace_root makes the resolved path correct. + """ + + def test_relative_write_after_env_cleanup_lands_in_user_cwd(self, tmp_path, monkeypatch): + import tools.terminal_tool as tt + import tools.file_tools as ft + + project = tmp_path / "project" + config_default = tmp_path / "config_default" + project.mkdir() + config_default.mkdir() + monkeypatch.delenv("TERMINAL_CWD", raising=False) + + _orig = tt._get_env_config + monkeypatch.setattr( + tt, "_get_env_config", + lambda: {**_orig(), "env_type": "local", "cwd": str(config_default)}, + ) + + task_id = "default" + ft._last_known_cwd.pop(task_id, None) + + # 1) Env alive; agent has cd'd into the project. A relative write + # while alive mirrors the live cwd into the durable registry. + fo = ft._get_file_ops(task_id) + fo.env.cwd = str(project) + fo.env.cwd_owner = "default" + ft.write_file_tool("alive.txt", "1\n", task_id) + assert (project / "alive.txt").exists() + + # 2) Cleanup thread kills the env AND clears the file_ops cache. + with tt._env_lock: + tt._active_environments.pop(task_id, None) + tt._last_activity.pop(task_id, None) + with ft._file_ops_lock: + ft._file_ops_cache.pop(task_id, None) + + # 3) The next relative write must still land in the project dir. + res = json.loads(ft.write_file_tool("report.txt", "hello\n", task_id)) + assert res.get("resolved_path") == str(project / "report.txt"), res + assert (project / "report.txt").exists(), "file should be in the user's cwd" + assert not (config_default / "report.txt").exists(), \ + "file silently misplaced into config default (the #26211 bug)" + + ft._last_known_cwd.pop(task_id, None) diff --git a/tools/file_tools.py b/tools/file_tools.py index 7f8e42202c4..cf47322b16f 100644 --- a/tools/file_tools.py +++ b/tools/file_tools.py @@ -204,10 +204,13 @@ def _get_live_tracking_cwd(task_id: str = "default") -> str | None: env = getattr(cached, "env", None) live_cwd = _live_cwd_if_owned(env, task_id) if live_cwd: + _remember_last_known_cwd(container_key, live_cwd) return live_cwd # Legacy: a cache entry carrying its own cwd with no env to own it. if env is None and getattr(cached, "cwd", None): - return getattr(cached, "cwd", None) + legacy_cwd = getattr(cached, "cwd", None) + _remember_last_known_cwd(container_key, legacy_cwd) + return legacy_cwd try: from tools.terminal_tool import _active_environments, _env_lock @@ -216,6 +219,7 @@ def _get_live_tracking_cwd(task_id: str = "default") -> str | None: env = _active_environments.get(container_key) or _active_environments.get(task_id) live_cwd = _live_cwd_if_owned(env, task_id) if live_cwd: + _remember_last_known_cwd(container_key, live_cwd) return live_cwd except Exception: pass @@ -240,6 +244,16 @@ def _authoritative_workspace_root(task_id: str = "default") -> str | None: live = _get_live_tracking_cwd(task_id) if live: return live + # When the terminal env was cleaned up mid-conversation, the live cwd is + # gone but the directory the agent navigated to is still recorded in the + # durable _last_known_cwd registry. Prefer it over the config/process + # fallback so a relative-path write resolved BEFORE the env is rebuilt + # still lands in the user's directory (root cause of #26211: write happens + # via _resolve_path_for_task -> here, which runs before _get_file_ops + # rebuilds the env). Keyed by the resolved container id, same as the save. + preserved = _last_known_cwd_for(task_id) + if preserved: + return preserved registered = _registered_task_cwd_override(task_id) if registered: return registered @@ -560,6 +574,41 @@ _file_ops_cache: dict = {} # terminal environment is cleaned up and rebuilt (root cause of #26211). _last_known_cwd: dict = {} + +def _remember_last_known_cwd(task_id: str, cwd: str | None) -> None: + """Mirror a live terminal cwd into the durable ``_last_known_cwd`` registry. + + Belt-and-suspenders for #26211: the cleanup thread can pop BOTH + ``_file_ops_cache`` and ``_active_environments`` before ``_get_file_ops`` + reaches its stale-cache detection branch, in which case the old cwd is + never saved and the rebuilt env falls back to the config default — exactly + the silent-misplacement bug. By recording the cwd on every successful live + read (which happens on every relative-path file resolution while the env is + alive), the durable anchor no longer depends on the cleanup-detection + branch firing, so it survives recreation regardless of pop ordering. + """ + if not cwd: + return + with _file_ops_lock: + if _last_known_cwd.get(task_id) != cwd: + _last_known_cwd[task_id] = cwd + + +def _last_known_cwd_for(task_id: str = "default") -> str | None: + """Read the durable last-known cwd for *task_id*, container-key aware. + + The registry is keyed by the resolved container id (the same key used by + the save sites in ``_get_file_ops`` / ``_get_live_tracking_cwd``), so look + up the resolved key first and fall back to the raw task id. + """ + try: + from tools.terminal_tool import _resolve_container_task_id + container_key = _resolve_container_task_id(task_id) + except Exception: + container_key = task_id + with _file_ops_lock: + return _last_known_cwd.get(container_key) or _last_known_cwd.get(task_id) + # Track files read per task to detect re-read loops and deduplicate reads. # Per task_id we store: # "last_key": the key of the most recent read/search call (or None)