diff --git a/tests/tools/test_file_tools.py b/tests/tools/test_file_tools.py index f99cf1d0ec3..36918417938 100644 --- a/tests/tools/test_file_tools.py +++ b/tests/tools/test_file_tools.py @@ -275,6 +275,104 @@ class TestPatchHandler: assert "traversal" in result["error"].lower() +class TestPatchSensitivePathExtraction: + """Regression tests for patch_tool sensitive-path extraction. + + The sensitive path check relies on a regex that parses V4A patch + headers. These tests cover: + + 1. ``*** Move File:`` operations (previously missed — the regex only + matched Update/Add/Delete, so Move could target /etc/* without + hitting the check). + 2. ``***Keyword File:`` with no space after ``***`` (previously missed — + the regex required ``\\s+`` even though patch_parser accepts ``\\s*``). + 3. ``..`` traversal in Move headers (the Move endpoints run through the + same traversal rejection as the other V4A headers). + """ + + @patch("tools.file_tools._get_file_ops") + def test_patch_move_to_sensitive_dst_blocked(self, mock_get): + from tools.file_tools import patch_tool + patch_text = ( + "*** Begin Patch\n" + "*** Move File: /tmp/work.txt -> /etc/crontab\n" + "*** End Patch\n" + ) + result = json.loads(patch_tool(mode="patch", patch=patch_text)) + assert "error" in result + assert "sensitive" in result["error"].lower() + mock_get.assert_not_called() + + @patch("tools.file_tools._get_file_ops") + def test_patch_move_from_sensitive_src_blocked(self, mock_get): + from tools.file_tools import patch_tool + patch_text = ( + "*** Begin Patch\n" + "*** Move File: /etc/hosts -> /tmp/leak.txt\n" + "*** End Patch\n" + ) + result = json.loads(patch_tool(mode="patch", patch=patch_text)) + assert "error" in result + assert "sensitive" in result["error"].lower() + mock_get.assert_not_called() + + @patch("tools.file_tools._get_file_ops") + def test_patch_update_no_space_after_asterisks_blocked(self, mock_get): + """``***Update File:`` (no space after asterisks) must also be caught. + + patch_parser.py accepts this form (``\\s*`` in its regex), so the + sensitive path check must be at least as lenient or the check + is bypassed. + """ + from tools.file_tools import patch_tool + patch_text = ( + "*** Begin Patch\n" + "***Update File: /etc/resolv.conf\n" + "@@ @@\n" + "-old\n" + "+new\n" + "*** End Patch\n" + ) + result = json.loads(patch_tool(mode="patch", patch=patch_text)) + assert "error" in result + assert "sensitive" in result["error"].lower() + mock_get.assert_not_called() + + @patch("tools.file_tools._get_file_ops") + def test_patch_move_rejects_traversal_endpoint(self, mock_get): + """A Move endpoint with ``..`` traversal is rejected, same as the + Update/Add/Delete headers.""" + from tools.file_tools import patch_tool + patch_text = ( + "*** Begin Patch\n" + "*** Move File: /tmp/work.txt -> ../../../etc/shadow\n" + "*** End Patch\n" + ) + result = json.loads(patch_tool(mode="patch", patch=patch_text)) + assert "error" in result + assert "traversal" in result["error"].lower() + mock_get.assert_not_called() + + @patch("tools.file_tools._get_file_ops") + def test_patch_move_safe_paths_not_blocked(self, mock_get): + """Safe Move operations should still reach the file_ops dispatch.""" + mock_ops = MagicMock() + result_obj = MagicMock() + result_obj.to_dict.return_value = {"status": "ok"} + mock_ops.patch_v4a.return_value = result_obj + mock_get.return_value = mock_ops + + from tools.file_tools import patch_tool + patch_text = ( + "*** Begin Patch\n" + "*** Move File: /tmp/a.txt -> /tmp/b.txt\n" + "*** End Patch\n" + ) + result = json.loads(patch_tool(mode="patch", patch=patch_text)) + assert "error" not in result + mock_ops.patch_v4a.assert_called_once() + + class TestSearchHandler: @patch("tools.file_tools._get_file_ops") def test_search_calls_file_ops(self, mock_get): diff --git a/tools/file_tools.py b/tools/file_tools.py index 4d2b3340afb..a0b32ea39d4 100644 --- a/tools/file_tools.py +++ b/tools/file_tools.py @@ -1513,8 +1513,7 @@ def patch_tool(mode: str = "replace", path: str = None, old_string: str = None, if mode == "patch" and patch: import re as _re from tools.path_security import has_traversal_component - for _m in _re.finditer(r'^\*\*\*\s+(?:Update|Add|Delete)\s+File:\s*(.+)$', patch, _re.MULTILINE): - v4a_path = _m.group(1).strip() + def _reject_v4a_traversal(v4a_path: str) -> str | None: # V4A path headers come from patch CONTENT, not the explicit # ``path=`` arg — so they're more attacker-influenceable (skill # content, web extract, prompt injection). Reject ``..`` traversal @@ -1527,9 +1526,31 @@ def patch_tool(mode: str = "replace", path: str = None, old_string: str = None, return tool_error( f"V4A patch header contains '..' traversal: {v4a_path!r}. " "Use the agent's cwd-relative path (no '..') or an absolute " - "path in '*** Update File:' / '*** Add File:' / '*** Delete File:' headers." + "path in '*** Update File:' / '*** Add File:' / " + "'*** Delete File:' / '*** Move File:' headers." ) + return None + + # ``\s*`` (not ``\s+``) after ``***`` matches patch_parser leniency: + # it accepts ``***Update File:`` with no space after the asterisks + # (patch_parser.py uses ``\*\*\*\s*Update\s+File:``). Requiring a space + # here let a no-space header parse + apply while skipping this check. + for _m in _re.finditer(r'^\*\*\*\s*(?:Update|Add|Delete)\s+File:\s*(.+)$', patch, _re.MULTILINE): + v4a_path = _m.group(1).strip() + _err = _reject_v4a_traversal(v4a_path) + if _err: + return _err _paths_to_check.append(v4a_path) + # ``*** Move File: src -> dst`` is a valid V4A op (patch_parser.py:114) + # but was never extracted, so a Move targeting /etc/crontab skipped the + # sensitive-path pre-check. Check BOTH endpoints, and run them through + # the same ``..`` traversal rejection as the other headers. + for _m in _re.finditer(r'^\*\*\*\s*Move\s+File:\s*(.+?)\s*->\s*(.+)$', patch, _re.MULTILINE): + for v4a_path in (_m.group(1).strip(), _m.group(2).strip()): + _err = _reject_v4a_traversal(v4a_path) + if _err: + return _err + _paths_to_check.append(v4a_path) for _p in _paths_to_check: sensitive_err = _check_sensitive_path(_p, task_id) if sensitive_err: