From f4953bc6488e54c8a706f947d541d592b2cf08ab Mon Sep 17 00:00:00 2001 From: dearmayo Date: Mon, 25 May 2026 23:01:26 +0900 Subject: [PATCH] fix(subdirectory_hints): prevent loading AGENTS.md outside workspace SubdirectoryHintTracker was scanning directories outside the active working directory, allowing files like ~/.codex/AGENTS.md or ~/.claude/CLAUDE.md to be loaded and injected into the agent context. This causes cross-agent context contamination and instruction mixup. Add _is_ancestor_or_same() helper and a path boundary check in _is_valid_subdir(): only directories within the working directory tree (i.e. path.is_relative_to(working_dir)) are allowed. Also add exist_ok=True to mkdir() calls in new tests to prevent pytest-xdist race conditions when workers share the same tmp_path parent. Tests added: - test_outside_working_dir_rejected: verifies sibling dirs are blocked - test_outside_working_dir_absolute_path_rejected: verifies ~/.codex paths blocked - test_inside_workspace_subdir_allowed: verifies normal subdir access unaffected - test_sibling_repo_not_loaded_via_ancestor_walk: ancestor walk stays within workspace --- agent/subdirectory_hints.py | 50 +++++++++++- tests/agent/test_subdirectory_hints.py | 104 +++++++++++++++++++++++-- 2 files changed, 147 insertions(+), 7 deletions(-) diff --git a/agent/subdirectory_hints.py b/agent/subdirectory_hints.py index dcc514b9014..858807aba2d 100644 --- a/agent/subdirectory_hints.py +++ b/agent/subdirectory_hints.py @@ -45,6 +45,15 @@ _COMMAND_TOOLS = {"terminal"} # Prevents scanning all the way to / for deeply nested paths. _MAX_ANCESTOR_WALK = 5 + +def _is_ancestor_or_same(a: Path, b: Path) -> bool: + """Check if *a* is the same as or an ancestor of *b* (parent directory check).""" + try: + b.relative_to(a) + return True + except ValueError: + return False + class SubdirectoryHintTracker: """Track which directories the agent visits and load hints on first access. @@ -158,7 +167,13 @@ class SubdirectoryHintTracker: self._add_path_candidate(token, candidates) def _is_valid_subdir(self, path: Path) -> bool: - """Check if path is a valid directory to scan for hints.""" + """Check if path is a valid directory to scan for hints. + + Only allow subdirectories within the working directory tree. + This prevents loading AGENTS.md from outside the active workspace + (e.g. ~/.codex/AGENTS.md, ~/.claude/CLAUDE.md), which causes + cross-agent context contamination and instruction mixup. + """ try: if not path.is_dir(): return False @@ -166,12 +181,43 @@ class SubdirectoryHintTracker: return False if path in self._loaded_dirs: return False + # Reject paths outside the working directory tree. + # path.resolve() may differ from working_dir.resolve() due to symlinks, + # but path.is_relative_to(working_dir) handles both absolute and + # symlinked paths correctly on Python 3.9+. + try: + if not path.is_relative_to(self.working_dir): + return False + except (OSError, ValueError): + # Older Python or path resolution error — fall back to parent + # check as a best-effort safeguard. + if not _is_ancestor_or_same(self.working_dir, path): + return False return True def _load_hints_for_directory(self, directory: Path) -> Optional[str]: - """Load hint files from a directory. Returns formatted text or None.""" + """Load hint files from a directory. Returns formatted text or None. + + Only loads hints from directories within the working directory tree. + """ self._loaded_dirs.add(directory) + # Reject paths outside the working directory tree. + try: + if not directory.is_relative_to(self.working_dir): + logger.debug( + "Skipping hint files in %s — outside working_dir %s", + directory, self.working_dir, + ) + return None + except (OSError, ValueError): + if not _is_ancestor_or_same(self.working_dir, directory): + logger.debug( + "Skipping hint files in %s — outside working_dir %s", + directory, self.working_dir, + ) + return None + found_hints = [] for filename in _HINT_FILENAMES: hint_path = directory / filename diff --git a/tests/agent/test_subdirectory_hints.py b/tests/agent/test_subdirectory_hints.py index 7c1a74e66cc..cf445797cee 100644 --- a/tests/agent/test_subdirectory_hints.py +++ b/tests/agent/test_subdirectory_hints.py @@ -122,17 +122,75 @@ class TestSubdirectoryHintTracker: assert result is not None assert "Frontend rules" in result - def test_outside_working_dir_still_checked(self, tmp_path, project): - """Paths outside working_dir are still checked for hints.""" - other_project = tmp_path / "other" - other_project.mkdir() + def test_outside_working_dir_rejected(self, tmp_path, project): + """Paths outside working_dir are rejected — no hints from outside workspace. + + Note: project fixture returns tmp_path, so we need a path whose ancestor + is outside project. We simulate this by creating a directory at the same + level as project but not inside it — which requires creating a parent + tree. Since tmp_path / "other" IS inside tmp_path (=project), we need + a different approach: use tmp_path.parent as the reference for "outside". + """ + # Create a directory at the same level as tmp_path (project), + # which means it's a sibling of project — not a child. + # Since tmp_path IS project, tmp_path.parent / "other" is a sibling. + parent = tmp_path.parent + other_project = parent / "other" + other_project.mkdir(exist_ok=True) (other_project / "AGENTS.md").write_text("Other project rules") tracker = SubdirectoryHintTracker(working_dir=str(project)) result = tracker.check_tool_call( "read_file", {"path": str(other_project / "file.py")} ) + # Outside workspace — should NOT load hints + assert result is None + + def test_outside_working_dir_absolute_path_rejected(self, tmp_path, project): + """Absolute paths like ~/.codex/AGENTS.md are rejected.""" + # Create a directory at the parent level of project, simulating ~/.codex + parent = tmp_path.parent + outside_dir = parent / ".test-codex" + outside_dir.mkdir(exist_ok=True) + (outside_dir / "AGENTS.md").write_text("Codex contamination rules") + tracker = SubdirectoryHintTracker(working_dir=str(project)) + result = tracker.check_tool_call( + "read_file", {"path": str(outside_dir / "AGENTS.md")} + ) + # Reading a hint file outside working_dir — should NOT load hints + assert result is None + + def test_inside_workspace_subdir_allowed(self, project): + """Paths inside working_dir are still allowed.""" + tracker = SubdirectoryHintTracker(working_dir=str(project)) + result = tracker.check_tool_call( + "read_file", {"path": str(project / "backend" / "src" / "main.py")} + ) assert result is not None - assert "Other project rules" in result + assert "Backend-specific instructions" in result + + def test_sibling_repo_not_loaded_via_ancestor_walk(self, tmp_path, project): + """Ancestor walk from inside working_dir should NOT discover sibling repo hints.""" + # Create a nested structure inside working_dir + deep_dir = project / "deep" / "nested" / "very" / "deep" + deep_dir.mkdir(parents=True) + (deep_dir / "file.py").write_text("deep file") + # Also create a sibling directory at the parent level + parent = tmp_path.parent + sibling = parent / "sibling-repo" + sibling.mkdir(exist_ok=True) + (sibling / "AGENTS.md").write_text("Sibling repo rules") + # Create a .cursorrules in the deep/nested/very dir so ancestor walk + # discovers it (fixture's deep/nested/path is NOT an ancestor of very/deep) + (deep_dir / ".cursorrules").write_text("Deep cursorrules") + tracker = SubdirectoryHintTracker(working_dir=str(project)) + result = tracker.check_tool_call( + "read_file", {"path": str(deep_dir / "file.py")} + ) + # Should discover deep cursorrules from the file's own directory + # but NOT sibling repo hints + assert result is not None + assert "Deep cursorrules" in result + assert "Sibling repo rules" not in result def test_workdir_arg(self, project): """The workdir argument from terminal tool is checked.""" @@ -232,3 +290,39 @@ class TestPermissionErrorHandling: ) # Result may be None (backend skipped) — the key point is no crash assert result is None or isinstance(result, str) + + +class TestOutsideWorkspaceRejection: + """Direct tests for _is_valid_subdir rejecting outside-workspace paths.""" + + def test_is_valid_subdir_rejects_outside_path(self, tmp_path, project): + """_is_valid_subdir should return False for paths outside working_dir. + + Note: tmp_path / "other" is inside tmp_path (=project), so we use + tmp_path.parent / "other" to create a true outside-path sibling. + """ + parent = tmp_path.parent + other_project = parent / "other" + other_project.mkdir(exist_ok=True) + tracker = SubdirectoryHintTracker(working_dir=str(project)) + assert tracker._is_valid_subdir(other_project) is False + + def test_is_valid_subdir_allows_inside_path(self, project): + """_is_valid_subdir should return True for paths inside working_dir.""" + tracker = SubdirectoryHintTracker(working_dir=str(project)) + backend = project / "backend" + assert tracker._is_valid_subdir(backend) is True + + def test_is_valid_subdir_rejects_parent_dir(self, tmp_path, project): + """_is_valid_subdir should reject parent directories outside working_dir.""" + parent = tmp_path.parent + tracker = SubdirectoryHintTracker(working_dir=str(project)) + assert tracker._is_valid_subdir(parent) is False + + def test_is_valid_subdir_rejects_sibling_dir(self, tmp_path, project): + """_is_valid_subdir should reject a sibling directory (simulating ~/.codex).""" + parent = tmp_path.parent + outside = parent / ".test-codex" + outside.mkdir(exist_ok=True) + tracker = SubdirectoryHintTracker(working_dir=str(project)) + assert tracker._is_valid_subdir(outside) is False