diff --git a/hermes_cli/kanban_db.py b/hermes_cli/kanban_db.py index 81d852ebfa4..b4b08d628f8 100644 --- a/hermes_cli/kanban_db.py +++ b/hermes_cli/kanban_db.py @@ -3037,6 +3037,46 @@ def complete_task( # Workspace / tmux cleanup # --------------------------------------------------------------------------- +def _is_managed_scratch_path(p: Path) -> bool: + """Return True iff *p* lives inside a kanban-managed scratch root. + + A managed root is one of: + + * ``HERMES_KANBAN_WORKSPACES_ROOT`` when set (worker-side override + injected by the dispatcher). + * The current kanban home's ``kanban/`` subtree, which covers both the + legacy default-board scratch root (``/kanban/workspaces``) + and per-board roots (``/kanban/boards//workspaces``). + + Used by :func:`_cleanup_workspace` to refuse to ``shutil.rmtree`` paths + outside Hermes-managed storage. A board ``default_workdir`` pointing at a + real source tree can otherwise pair with ``workspace_kind='scratch'`` and + cause task completion to delete user data (#28818). + """ + try: + p_abs = p.resolve(strict=False) + except OSError: + return False + roots: list[Path] = [] + override = os.environ.get("HERMES_KANBAN_WORKSPACES_ROOT", "").strip() + if override: + try: + roots.append(Path(override).expanduser().resolve(strict=False)) + except OSError: + pass + try: + roots.append((kanban_home() / "kanban").resolve(strict=False)) + except OSError: + pass + for root in roots: + try: + if p_abs.is_relative_to(root): + return True + except ValueError: + continue + return False + + def _cleanup_workspace(conn: sqlite3.Connection, task_id: str) -> None: """Remove a task's scratch workspace dir and kill its stale tmux session. @@ -3059,8 +3099,21 @@ def _cleanup_workspace(conn: sqlite3.Connection, task_id: str) -> None: import shutil wp = Path(path) if wp.is_dir(): - shutil.rmtree(wp, ignore_errors=True) - _log.debug("Removed scratch workspace: %s", wp) + # Containment guard (#28818): a board's ``default_workdir`` can + # pair ``workspace_kind='scratch'`` with a user-supplied path + # pointing at a real source tree. Without this check, task + # completion would unconditionally ``shutil.rmtree`` that path + # and silently delete the user's source data. + if _is_managed_scratch_path(wp): + shutil.rmtree(wp, ignore_errors=True) + _log.debug("Removed scratch workspace: %s", wp) + else: + _log.warning( + "Refusing to remove out-of-scratch workspace for task %s: %s " + "(workspace_kind='scratch' but path is outside any " + "kanban-managed workspaces root)", + task_id, wp, + ) # Also kill the tmux session for the worker that owned this task, # if the tmux session is now dead (worker process exited). _cleanup_worker_tmux(conn, task_id) diff --git a/tests/hermes_cli/test_kanban_db.py b/tests/hermes_cli/test_kanban_db.py index 25ef4e9f865..5ee08913420 100644 --- a/tests/hermes_cli/test_kanban_db.py +++ b/tests/hermes_cli/test_kanban_db.py @@ -1470,6 +1470,97 @@ def test_worktree_workspace_returns_intended_path(kanban_home, tmp_path): assert str(ws) == target +# --------------------------------------------------------------------------- +# Scratch cleanup containment (#28818) +# --------------------------------------------------------------------------- + +def test_cleanup_workspace_removes_managed_scratch_dir(kanban_home): + """A scratch workspace under the kanban workspaces root is removed.""" + with kb.connect() as conn: + t = kb.create_task(conn, title="scratchy") + task = kb.get_task(conn, t) + ws = kb.resolve_workspace(task) + kb.set_workspace_path(conn, t, ws) + assert ws.is_dir() + kb.complete_task(conn, t, result="ok") + assert not ws.exists(), "Hermes-managed scratch dir should be cleaned up" + + +def test_cleanup_workspace_refuses_path_outside_scratch_root(kanban_home, tmp_path): + """A scratch task with a user path outside the workspaces root must NOT be deleted (#28818). + + Reproduces the data-loss vector where a board's ``default_workdir`` is set + to a real source directory; tasks created without an explicit + ``workspace_kind`` inherit ``scratch`` semantics, and the old cleanup path + would ``shutil.rmtree`` the user's source tree on task completion. + """ + real_source = tmp_path / "real-source" + real_source.mkdir() + (real_source / ".git").mkdir() + (real_source / "README.md").write_text("important", encoding="utf-8") + + with kb.connect() as conn: + t = kb.create_task(conn, title="ship") + # Simulate the bad state directly: workspace_kind='scratch' (default) + # but workspace_path pointing at the user's real source tree, which is + # exactly what board.default_workdir produces when the task is created + # without an explicit workspace_kind. + conn.execute( + "UPDATE tasks SET workspace_kind=?, workspace_path=? WHERE id=?", + ("scratch", str(real_source), t), + ) + conn.commit() + kb.complete_task(conn, t, result="ok") + + assert real_source.exists(), "User source tree must not be deleted by scratch cleanup" + assert (real_source / ".git").exists() + assert (real_source / "README.md").read_text(encoding="utf-8") == "important" + + +def test_cleanup_workspace_honors_workspaces_root_env_override(tmp_path, monkeypatch): + """``HERMES_KANBAN_WORKSPACES_ROOT`` extends the managed-scratch set. + + Worker subprocesses run with this env var injected by the dispatcher. The + cleanup containment check must treat paths under it as managed even when + they sit outside the active kanban home. + """ + home = tmp_path / ".hermes" + home.mkdir() + monkeypatch.setenv("HERMES_HOME", str(home)) + monkeypatch.setattr(Path, "home", lambda: tmp_path) + workspaces_override = tmp_path / "ext-workspaces" + workspaces_override.mkdir() + monkeypatch.setenv("HERMES_KANBAN_WORKSPACES_ROOT", str(workspaces_override)) + kb.init_db() + + with kb.connect() as conn: + t = kb.create_task(conn, title="ext") + scratch_dir = workspaces_override / t + scratch_dir.mkdir() + conn.execute( + "UPDATE tasks SET workspace_kind=?, workspace_path=? WHERE id=?", + ("scratch", str(scratch_dir), t), + ) + conn.commit() + kb.complete_task(conn, t, result="ok") + + assert not scratch_dir.exists(), "Override-root scratch dir should be cleaned up" + + +def test_is_managed_scratch_path_accepts_per_board_workspaces(kanban_home, tmp_path): + """Per-board scratch dirs under ``/kanban/boards//workspaces`` are managed.""" + board_scratch = kanban_home / "kanban" / "boards" / "my-board" / "workspaces" / "task-1" + board_scratch.mkdir(parents=True) + assert kb._is_managed_scratch_path(board_scratch) + + +def test_is_managed_scratch_path_rejects_real_source_tree(kanban_home, tmp_path): + """A path outside any managed root (e.g. a user's repo) is NOT managed.""" + real = tmp_path / "code" / "my-project" + real.mkdir(parents=True) + assert not kb._is_managed_scratch_path(real) + + # --------------------------------------------------------------------------- # Tenancy # ---------------------------------------------------------------------------