mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-07-01 12:02:05 +00:00
fix(file-ops): make preserved cwd reachable at write-time resolution (#26211)
Belt-and-suspenders on top of the cherry-picked cwd-preservation fix: - Proactively mirror every live terminal cwd into _last_known_cwd on each successful read, so the durable anchor survives even when the cleanup thread pops both _file_ops_cache and _active_environments before _get_file_ops' stale-cache save branch can fire. - Fall back to _last_known_cwd in _authoritative_workspace_root. write_file_tool resolves the path (via _resolve_path_for_task) BEFORE _get_file_ops rebuilds the env, so restoring only the rebuilt env's cwd was insufficient — the resolution that decides where the file lands runs first. This closes that gap. The local env's persisted _cwd_file can't serve this role: it's keyed by a random per-session uuid and deleted on cleanup (the same cleanup that triggers the bug). The in-memory _last_known_cwd registry is the durable anchor instead. Adds a real-IO E2E regression (TestSilentFileMisplacementE2E) exercising the actual write_file_tool path after env cleanup.
This commit is contained in:
parent
adeba1d7a8
commit
b2faeba182
2 changed files with 188 additions and 1 deletions
|
|
@ -615,3 +615,141 @@ class TestLastKnownCwd:
|
|||
# Should fall back to config default
|
||||
assert cwd_passed == "/config/default/path", \
|
||||
f"Expected cwd='/config/default/path', got {cwd_passed!r}"
|
||||
|
||||
@patch("tools.terminal_tool._active_environments", new_callable=dict)
|
||||
@patch("tools.file_tools._file_ops_cache", new_callable=dict)
|
||||
def test_live_cwd_read_mirrors_into_last_known_cwd(self, mock_cache, mock_active):
|
||||
"""Belt-and-suspenders (#26211): every successful live-cwd read records
|
||||
the cwd in _last_known_cwd, so the durable anchor doesn't depend on the
|
||||
cleanup-detection branch of _get_file_ops firing."""
|
||||
from tools.file_tools import _get_live_tracking_cwd, _last_known_cwd
|
||||
|
||||
task_id = "default"
|
||||
_last_known_cwd.pop(task_id, None)
|
||||
|
||||
cached = MagicMock()
|
||||
cached.env = MagicMock()
|
||||
cached.env.cwd = "/Users/user/project"
|
||||
cached.env.cwd_owner = "default"
|
||||
mock_cache[task_id] = cached
|
||||
|
||||
live = _get_live_tracking_cwd(task_id)
|
||||
|
||||
assert live == "/Users/user/project"
|
||||
# The read mirrored the live cwd into the durable registry.
|
||||
assert _last_known_cwd.get(task_id) == "/Users/user/project"
|
||||
_last_known_cwd.pop(task_id, None)
|
||||
|
||||
@patch("tools.terminal_tool._active_environments", new_callable=dict)
|
||||
@patch("tools.file_tools._file_ops_cache", new_callable=dict)
|
||||
@patch("tools.terminal_tool._get_env_config")
|
||||
@patch("tools.terminal_tool._create_environment")
|
||||
def test_mirrored_cwd_survives_when_cache_already_cleared(
|
||||
self, mock_create_env, mock_config, mock_cache, mock_active
|
||||
):
|
||||
"""The original save-old-cwd path only fires when _file_ops_cache still
|
||||
holds the stale entry. If the cleanup thread popped BOTH dicts first,
|
||||
_get_file_ops sees cached=None and never saves — but the proactive
|
||||
mirror from an earlier live read already populated _last_known_cwd, so
|
||||
the rebuilt env still restores the user's directory."""
|
||||
from tools.file_tools import (
|
||||
_get_file_ops, _get_live_tracking_cwd, _last_known_cwd,
|
||||
)
|
||||
|
||||
task_id = "default"
|
||||
_last_known_cwd.pop(task_id, None)
|
||||
|
||||
# 1) Env is alive and the agent has cd'd into the project. A live read
|
||||
# (happens on every relative-path resolution) mirrors the cwd.
|
||||
cached = MagicMock()
|
||||
cached.env = MagicMock()
|
||||
cached.env.cwd = "/Users/user/project"
|
||||
cached.env.cwd_owner = "default"
|
||||
mock_cache[task_id] = cached
|
||||
assert _get_live_tracking_cwd(task_id) == "/Users/user/project"
|
||||
assert _last_known_cwd.get(task_id) == "/Users/user/project"
|
||||
|
||||
# 2) Cleanup thread kills the env AND clears the cache before the next
|
||||
# file write — so _get_file_ops' save-old-cwd branch never runs.
|
||||
mock_cache.pop(task_id, None)
|
||||
mock_active.clear()
|
||||
|
||||
mock_env = MagicMock()
|
||||
mock_env.cwd = "/Users/user/project"
|
||||
mock_create_env.return_value = mock_env
|
||||
mock_config.return_value = {
|
||||
"env_type": "local",
|
||||
"cwd": "/config/default/path",
|
||||
"timeout": 30,
|
||||
}
|
||||
|
||||
_get_file_ops(task_id)
|
||||
|
||||
create_call = mock_create_env.call_args
|
||||
assert create_call is not None, "_create_environment was not called"
|
||||
kwargs = create_call.kwargs if create_call.kwargs else {}
|
||||
cwd_passed = kwargs.get("cwd", None)
|
||||
if cwd_passed is None:
|
||||
args = create_call.args if create_call.args else []
|
||||
if len(args) >= 3:
|
||||
cwd_passed = args[2]
|
||||
|
||||
# Rebuilt env restored the mirrored cwd, NOT the config default.
|
||||
assert cwd_passed == "/Users/user/project", \
|
||||
f"Expected restored cwd='/Users/user/project', got {cwd_passed!r}"
|
||||
_last_known_cwd.pop(task_id, None)
|
||||
|
||||
|
||||
class TestSilentFileMisplacementE2E:
|
||||
"""Real-IO regression for #26211.
|
||||
|
||||
Exercises the actual write_file_tool path against a temp filesystem: an
|
||||
agent cd's into a project, the cleanup thread kills the env, and a later
|
||||
relative-path write must land in the project dir (not the config default).
|
||||
Mocks miss this because resolution (_resolve_path_for_task) runs BEFORE
|
||||
_get_file_ops rebuilds the env — only the durable _last_known_cwd fallback
|
||||
in _authoritative_workspace_root makes the resolved path correct.
|
||||
"""
|
||||
|
||||
def test_relative_write_after_env_cleanup_lands_in_user_cwd(self, tmp_path, monkeypatch):
|
||||
import tools.terminal_tool as tt
|
||||
import tools.file_tools as ft
|
||||
|
||||
project = tmp_path / "project"
|
||||
config_default = tmp_path / "config_default"
|
||||
project.mkdir()
|
||||
config_default.mkdir()
|
||||
monkeypatch.delenv("TERMINAL_CWD", raising=False)
|
||||
|
||||
_orig = tt._get_env_config
|
||||
monkeypatch.setattr(
|
||||
tt, "_get_env_config",
|
||||
lambda: {**_orig(), "env_type": "local", "cwd": str(config_default)},
|
||||
)
|
||||
|
||||
task_id = "default"
|
||||
ft._last_known_cwd.pop(task_id, None)
|
||||
|
||||
# 1) Env alive; agent has cd'd into the project. A relative write
|
||||
# while alive mirrors the live cwd into the durable registry.
|
||||
fo = ft._get_file_ops(task_id)
|
||||
fo.env.cwd = str(project)
|
||||
fo.env.cwd_owner = "default"
|
||||
ft.write_file_tool("alive.txt", "1\n", task_id)
|
||||
assert (project / "alive.txt").exists()
|
||||
|
||||
# 2) Cleanup thread kills the env AND clears the file_ops cache.
|
||||
with tt._env_lock:
|
||||
tt._active_environments.pop(task_id, None)
|
||||
tt._last_activity.pop(task_id, None)
|
||||
with ft._file_ops_lock:
|
||||
ft._file_ops_cache.pop(task_id, None)
|
||||
|
||||
# 3) The next relative write must still land in the project dir.
|
||||
res = json.loads(ft.write_file_tool("report.txt", "hello\n", task_id))
|
||||
assert res.get("resolved_path") == str(project / "report.txt"), res
|
||||
assert (project / "report.txt").exists(), "file should be in the user's cwd"
|
||||
assert not (config_default / "report.txt").exists(), \
|
||||
"file silently misplaced into config default (the #26211 bug)"
|
||||
|
||||
ft._last_known_cwd.pop(task_id, None)
|
||||
|
|
|
|||
|
|
@ -204,10 +204,13 @@ def _get_live_tracking_cwd(task_id: str = "default") -> str | None:
|
|||
env = getattr(cached, "env", None)
|
||||
live_cwd = _live_cwd_if_owned(env, task_id)
|
||||
if live_cwd:
|
||||
_remember_last_known_cwd(container_key, live_cwd)
|
||||
return live_cwd
|
||||
# Legacy: a cache entry carrying its own cwd with no env to own it.
|
||||
if env is None and getattr(cached, "cwd", None):
|
||||
return getattr(cached, "cwd", None)
|
||||
legacy_cwd = getattr(cached, "cwd", None)
|
||||
_remember_last_known_cwd(container_key, legacy_cwd)
|
||||
return legacy_cwd
|
||||
|
||||
try:
|
||||
from tools.terminal_tool import _active_environments, _env_lock
|
||||
|
|
@ -216,6 +219,7 @@ def _get_live_tracking_cwd(task_id: str = "default") -> str | None:
|
|||
env = _active_environments.get(container_key) or _active_environments.get(task_id)
|
||||
live_cwd = _live_cwd_if_owned(env, task_id)
|
||||
if live_cwd:
|
||||
_remember_last_known_cwd(container_key, live_cwd)
|
||||
return live_cwd
|
||||
except Exception:
|
||||
pass
|
||||
|
|
@ -240,6 +244,16 @@ def _authoritative_workspace_root(task_id: str = "default") -> str | None:
|
|||
live = _get_live_tracking_cwd(task_id)
|
||||
if live:
|
||||
return live
|
||||
# When the terminal env was cleaned up mid-conversation, the live cwd is
|
||||
# gone but the directory the agent navigated to is still recorded in the
|
||||
# durable _last_known_cwd registry. Prefer it over the config/process
|
||||
# fallback so a relative-path write resolved BEFORE the env is rebuilt
|
||||
# still lands in the user's directory (root cause of #26211: write happens
|
||||
# via _resolve_path_for_task -> here, which runs before _get_file_ops
|
||||
# rebuilds the env). Keyed by the resolved container id, same as the save.
|
||||
preserved = _last_known_cwd_for(task_id)
|
||||
if preserved:
|
||||
return preserved
|
||||
registered = _registered_task_cwd_override(task_id)
|
||||
if registered:
|
||||
return registered
|
||||
|
|
@ -560,6 +574,41 @@ _file_ops_cache: dict = {}
|
|||
# terminal environment is cleaned up and rebuilt (root cause of #26211).
|
||||
_last_known_cwd: dict = {}
|
||||
|
||||
|
||||
def _remember_last_known_cwd(task_id: str, cwd: str | None) -> None:
|
||||
"""Mirror a live terminal cwd into the durable ``_last_known_cwd`` registry.
|
||||
|
||||
Belt-and-suspenders for #26211: the cleanup thread can pop BOTH
|
||||
``_file_ops_cache`` and ``_active_environments`` before ``_get_file_ops``
|
||||
reaches its stale-cache detection branch, in which case the old cwd is
|
||||
never saved and the rebuilt env falls back to the config default — exactly
|
||||
the silent-misplacement bug. By recording the cwd on every successful live
|
||||
read (which happens on every relative-path file resolution while the env is
|
||||
alive), the durable anchor no longer depends on the cleanup-detection
|
||||
branch firing, so it survives recreation regardless of pop ordering.
|
||||
"""
|
||||
if not cwd:
|
||||
return
|
||||
with _file_ops_lock:
|
||||
if _last_known_cwd.get(task_id) != cwd:
|
||||
_last_known_cwd[task_id] = cwd
|
||||
|
||||
|
||||
def _last_known_cwd_for(task_id: str = "default") -> str | None:
|
||||
"""Read the durable last-known cwd for *task_id*, container-key aware.
|
||||
|
||||
The registry is keyed by the resolved container id (the same key used by
|
||||
the save sites in ``_get_file_ops`` / ``_get_live_tracking_cwd``), so look
|
||||
up the resolved key first and fall back to the raw task id.
|
||||
"""
|
||||
try:
|
||||
from tools.terminal_tool import _resolve_container_task_id
|
||||
container_key = _resolve_container_task_id(task_id)
|
||||
except Exception:
|
||||
container_key = task_id
|
||||
with _file_ops_lock:
|
||||
return _last_known_cwd.get(container_key) or _last_known_cwd.get(task_id)
|
||||
|
||||
# Track files read per task to detect re-read loops and deduplicate reads.
|
||||
# Per task_id we store:
|
||||
# "last_key": the key of the most recent read/search call (or None)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue