fix(file-ops): make preserved cwd reachable at write-time resolution (#26211)

Belt-and-suspenders on top of the cherry-picked cwd-preservation fix:

- Proactively mirror every live terminal cwd into _last_known_cwd on each
  successful read, so the durable anchor survives even when the cleanup
  thread pops both _file_ops_cache and _active_environments before
  _get_file_ops' stale-cache save branch can fire.
- Fall back to _last_known_cwd in _authoritative_workspace_root. write_file_tool
  resolves the path (via _resolve_path_for_task) BEFORE _get_file_ops rebuilds
  the env, so restoring only the rebuilt env's cwd was insufficient — the
  resolution that decides where the file lands runs first. This closes that gap.

The local env's persisted _cwd_file can't serve this role: it's keyed by a
random per-session uuid and deleted on cleanup (the same cleanup that triggers
the bug). The in-memory _last_known_cwd registry is the durable anchor instead.

Adds a real-IO E2E regression (TestSilentFileMisplacementE2E) exercising the
actual write_file_tool path after env cleanup.
This commit is contained in:
teknium1 2026-06-27 19:07:09 -07:00 committed by Teknium
parent adeba1d7a8
commit b2faeba182
2 changed files with 188 additions and 1 deletions

View file

@ -615,3 +615,141 @@ class TestLastKnownCwd:
# Should fall back to config default
assert cwd_passed == "/config/default/path", \
f"Expected cwd='/config/default/path', got {cwd_passed!r}"
@patch("tools.terminal_tool._active_environments", new_callable=dict)
@patch("tools.file_tools._file_ops_cache", new_callable=dict)
def test_live_cwd_read_mirrors_into_last_known_cwd(self, mock_cache, mock_active):
"""Belt-and-suspenders (#26211): every successful live-cwd read records
the cwd in _last_known_cwd, so the durable anchor doesn't depend on the
cleanup-detection branch of _get_file_ops firing."""
from tools.file_tools import _get_live_tracking_cwd, _last_known_cwd
task_id = "default"
_last_known_cwd.pop(task_id, None)
cached = MagicMock()
cached.env = MagicMock()
cached.env.cwd = "/Users/user/project"
cached.env.cwd_owner = "default"
mock_cache[task_id] = cached
live = _get_live_tracking_cwd(task_id)
assert live == "/Users/user/project"
# The read mirrored the live cwd into the durable registry.
assert _last_known_cwd.get(task_id) == "/Users/user/project"
_last_known_cwd.pop(task_id, None)
@patch("tools.terminal_tool._active_environments", new_callable=dict)
@patch("tools.file_tools._file_ops_cache", new_callable=dict)
@patch("tools.terminal_tool._get_env_config")
@patch("tools.terminal_tool._create_environment")
def test_mirrored_cwd_survives_when_cache_already_cleared(
self, mock_create_env, mock_config, mock_cache, mock_active
):
"""The original save-old-cwd path only fires when _file_ops_cache still
holds the stale entry. If the cleanup thread popped BOTH dicts first,
_get_file_ops sees cached=None and never saves but the proactive
mirror from an earlier live read already populated _last_known_cwd, so
the rebuilt env still restores the user's directory."""
from tools.file_tools import (
_get_file_ops, _get_live_tracking_cwd, _last_known_cwd,
)
task_id = "default"
_last_known_cwd.pop(task_id, None)
# 1) Env is alive and the agent has cd'd into the project. A live read
# (happens on every relative-path resolution) mirrors the cwd.
cached = MagicMock()
cached.env = MagicMock()
cached.env.cwd = "/Users/user/project"
cached.env.cwd_owner = "default"
mock_cache[task_id] = cached
assert _get_live_tracking_cwd(task_id) == "/Users/user/project"
assert _last_known_cwd.get(task_id) == "/Users/user/project"
# 2) Cleanup thread kills the env AND clears the cache before the next
# file write — so _get_file_ops' save-old-cwd branch never runs.
mock_cache.pop(task_id, None)
mock_active.clear()
mock_env = MagicMock()
mock_env.cwd = "/Users/user/project"
mock_create_env.return_value = mock_env
mock_config.return_value = {
"env_type": "local",
"cwd": "/config/default/path",
"timeout": 30,
}
_get_file_ops(task_id)
create_call = mock_create_env.call_args
assert create_call is not None, "_create_environment was not called"
kwargs = create_call.kwargs if create_call.kwargs else {}
cwd_passed = kwargs.get("cwd", None)
if cwd_passed is None:
args = create_call.args if create_call.args else []
if len(args) >= 3:
cwd_passed = args[2]
# Rebuilt env restored the mirrored cwd, NOT the config default.
assert cwd_passed == "/Users/user/project", \
f"Expected restored cwd='/Users/user/project', got {cwd_passed!r}"
_last_known_cwd.pop(task_id, None)
class TestSilentFileMisplacementE2E:
"""Real-IO regression for #26211.
Exercises the actual write_file_tool path against a temp filesystem: an
agent cd's into a project, the cleanup thread kills the env, and a later
relative-path write must land in the project dir (not the config default).
Mocks miss this because resolution (_resolve_path_for_task) runs BEFORE
_get_file_ops rebuilds the env only the durable _last_known_cwd fallback
in _authoritative_workspace_root makes the resolved path correct.
"""
def test_relative_write_after_env_cleanup_lands_in_user_cwd(self, tmp_path, monkeypatch):
import tools.terminal_tool as tt
import tools.file_tools as ft
project = tmp_path / "project"
config_default = tmp_path / "config_default"
project.mkdir()
config_default.mkdir()
monkeypatch.delenv("TERMINAL_CWD", raising=False)
_orig = tt._get_env_config
monkeypatch.setattr(
tt, "_get_env_config",
lambda: {**_orig(), "env_type": "local", "cwd": str(config_default)},
)
task_id = "default"
ft._last_known_cwd.pop(task_id, None)
# 1) Env alive; agent has cd'd into the project. A relative write
# while alive mirrors the live cwd into the durable registry.
fo = ft._get_file_ops(task_id)
fo.env.cwd = str(project)
fo.env.cwd_owner = "default"
ft.write_file_tool("alive.txt", "1\n", task_id)
assert (project / "alive.txt").exists()
# 2) Cleanup thread kills the env AND clears the file_ops cache.
with tt._env_lock:
tt._active_environments.pop(task_id, None)
tt._last_activity.pop(task_id, None)
with ft._file_ops_lock:
ft._file_ops_cache.pop(task_id, None)
# 3) The next relative write must still land in the project dir.
res = json.loads(ft.write_file_tool("report.txt", "hello\n", task_id))
assert res.get("resolved_path") == str(project / "report.txt"), res
assert (project / "report.txt").exists(), "file should be in the user's cwd"
assert not (config_default / "report.txt").exists(), \
"file silently misplaced into config default (the #26211 bug)"
ft._last_known_cwd.pop(task_id, None)

View file

@ -204,10 +204,13 @@ def _get_live_tracking_cwd(task_id: str = "default") -> str | None:
env = getattr(cached, "env", None)
live_cwd = _live_cwd_if_owned(env, task_id)
if live_cwd:
_remember_last_known_cwd(container_key, live_cwd)
return live_cwd
# Legacy: a cache entry carrying its own cwd with no env to own it.
if env is None and getattr(cached, "cwd", None):
return getattr(cached, "cwd", None)
legacy_cwd = getattr(cached, "cwd", None)
_remember_last_known_cwd(container_key, legacy_cwd)
return legacy_cwd
try:
from tools.terminal_tool import _active_environments, _env_lock
@ -216,6 +219,7 @@ def _get_live_tracking_cwd(task_id: str = "default") -> str | None:
env = _active_environments.get(container_key) or _active_environments.get(task_id)
live_cwd = _live_cwd_if_owned(env, task_id)
if live_cwd:
_remember_last_known_cwd(container_key, live_cwd)
return live_cwd
except Exception:
pass
@ -240,6 +244,16 @@ def _authoritative_workspace_root(task_id: str = "default") -> str | None:
live = _get_live_tracking_cwd(task_id)
if live:
return live
# When the terminal env was cleaned up mid-conversation, the live cwd is
# gone but the directory the agent navigated to is still recorded in the
# durable _last_known_cwd registry. Prefer it over the config/process
# fallback so a relative-path write resolved BEFORE the env is rebuilt
# still lands in the user's directory (root cause of #26211: write happens
# via _resolve_path_for_task -> here, which runs before _get_file_ops
# rebuilds the env). Keyed by the resolved container id, same as the save.
preserved = _last_known_cwd_for(task_id)
if preserved:
return preserved
registered = _registered_task_cwd_override(task_id)
if registered:
return registered
@ -560,6 +574,41 @@ _file_ops_cache: dict = {}
# terminal environment is cleaned up and rebuilt (root cause of #26211).
_last_known_cwd: dict = {}
def _remember_last_known_cwd(task_id: str, cwd: str | None) -> None:
"""Mirror a live terminal cwd into the durable ``_last_known_cwd`` registry.
Belt-and-suspenders for #26211: the cleanup thread can pop BOTH
``_file_ops_cache`` and ``_active_environments`` before ``_get_file_ops``
reaches its stale-cache detection branch, in which case the old cwd is
never saved and the rebuilt env falls back to the config default exactly
the silent-misplacement bug. By recording the cwd on every successful live
read (which happens on every relative-path file resolution while the env is
alive), the durable anchor no longer depends on the cleanup-detection
branch firing, so it survives recreation regardless of pop ordering.
"""
if not cwd:
return
with _file_ops_lock:
if _last_known_cwd.get(task_id) != cwd:
_last_known_cwd[task_id] = cwd
def _last_known_cwd_for(task_id: str = "default") -> str | None:
"""Read the durable last-known cwd for *task_id*, container-key aware.
The registry is keyed by the resolved container id (the same key used by
the save sites in ``_get_file_ops`` / ``_get_live_tracking_cwd``), so look
up the resolved key first and fall back to the raw task id.
"""
try:
from tools.terminal_tool import _resolve_container_task_id
container_key = _resolve_container_task_id(task_id)
except Exception:
container_key = task_id
with _file_ops_lock:
return _last_known_cwd.get(container_key) or _last_known_cwd.get(task_id)
# Track files read per task to detect re-read loops and deduplicate reads.
# Per task_id we store:
# "last_key": the key of the most recent read/search call (or None)