fix: reject read_file symlinks to blocking devices (#10133)

* fix: reject read_file symlinks to blocking devices

The read_file guard already refused direct device paths such as /dev/zero, but a workspace symlink resolving to one of those devices could still reach the shell-backed read path and hang on wc/head/sed. Keep the literal alias check and add a resolved-path pass so local symlinks to blocked device/fd endpoints are rejected before I/O.

Constraint: Preserve literal /dev/stdin handling before terminal-specific realpath resolution

Confidence: high

Scope-risk: narrow

Tested: pytest tests/tools/test_file_read_guards.py tests/tools/test_file_tools.py -q; python -m compileall tools/file_tools.py tests/tools/test_file_read_guards.py; git diff --check
Signed-off-by: WuKongAI-CMU <210765158+WuKongAI-CMU@users.noreply.github.com>

* Keep file guard tests off sensitive macOS temp paths

The branch now inherits a sensitive-path write guard from upstream main. On macOS, tempfile.mkdtemp() resolves under /private/var/folders, so the new write-path guard fired before the file read dedup assertions could exercise their intended behavior. The tests now create their scratch files inside the worktree temp checkout, outside those system-sensitive prefixes, without changing production behavior.

Constraint: Rebased branch must pass the expanded file read guard suite on macOS.

Rejected: Loosen the production sensitive-path prefix list | broader behavior change unrelated to this PR.

Confidence: high

Scope-risk: narrow

Tested: pytest tests/tools/test_file_read_guards.py -q

---------

Signed-off-by: WuKongAI-CMU <210765158+WuKongAI-CMU@users.noreply.github.com>
Co-authored-by: WuKongAI-CMU <210765158+WuKongAI-CMU@users.noreply.github.com>
This commit is contained in:
Peter 2026-05-25 08:07:38 -04:00 committed by GitHub
parent b7b8bec800
commit ee59ef1946
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 65 additions and 11 deletions

View file

@ -55,6 +55,11 @@ def _make_fake_ops(content="hello\n", total_lines=1, file_size=6):
return fake
def _make_safe_tempdir(prefix: str) -> str:
"""Create a temp dir outside macOS system-sensitive /private/var paths."""
return tempfile.mkdtemp(prefix=prefix, dir=os.getcwd())
# ---------------------------------------------------------------------------
# Device path blocking
# ---------------------------------------------------------------------------
@ -100,12 +105,48 @@ class TestDevicePathBlocking(unittest.TestCase):
self.assertFalse(_is_blocked_device("/tmp/test.py"))
self.assertFalse(_is_blocked_device("/home/user/.bashrc"))
def test_symlink_to_blocked_device_is_blocked(self):
with tempfile.TemporaryDirectory() as tmpdir:
link_path = os.path.join(tmpdir, "zero-link")
try:
os.symlink("/dev/zero", link_path)
except OSError as exc:
self.skipTest(f"symlink unavailable: {exc}")
self.assertTrue(_is_blocked_device(link_path))
def test_symlink_to_regular_file_not_blocked(self):
with tempfile.TemporaryDirectory() as tmpdir:
target_path = os.path.join(tmpdir, "regular.txt")
link_path = os.path.join(tmpdir, "regular-link")
with open(target_path, "w", encoding="utf-8") as handle:
handle.write("safe\n")
try:
os.symlink(target_path, link_path)
except OSError as exc:
self.skipTest(f"symlink unavailable: {exc}")
self.assertFalse(_is_blocked_device(link_path))
def test_read_file_tool_rejects_device(self):
"""read_file_tool returns an error without any file I/O."""
result = json.loads(read_file_tool("/dev/zero", task_id="dev_test"))
self.assertIn("error", result)
self.assertIn("device file", result["error"])
@patch("tools.file_tools._get_file_ops")
def test_read_file_tool_rejects_device_symlink_before_io(self, mock_ops):
with tempfile.TemporaryDirectory() as tmpdir:
link_path = os.path.join(tmpdir, "zero-link")
try:
os.symlink("/dev/zero", link_path)
except OSError as exc:
self.skipTest(f"symlink unavailable: {exc}")
result = json.loads(read_file_tool(link_path, task_id="dev_link_test"))
self.assertIn("error", result)
self.assertIn("device file", result["error"])
mock_ops.assert_not_called()
# ---------------------------------------------------------------------------
# Character-count limits
@ -166,7 +207,7 @@ class TestFileDedup(unittest.TestCase):
def setUp(self):
_read_tracker.clear()
self._tmpdir = tempfile.mkdtemp()
self._tmpdir = _make_safe_tempdir("hermes-dedup-")
self._tmpfile = os.path.join(self._tmpdir, "dedup_test.txt")
with open(self._tmpfile, "w") as f:
f.write("line one\nline two\n")
@ -631,7 +672,7 @@ class TestWriteInvalidatesDedup(unittest.TestCase):
def setUp(self):
_read_tracker.clear()
self._tmpdir = tempfile.mkdtemp()
self._tmpdir = _make_safe_tempdir("hermes-write-dedup-")
self._tmpfile = os.path.join(self._tmpdir, "write_dedup.txt")
with open(self._tmpfile, "w") as f:
f.write("original content\n")

View file

@ -127,15 +127,9 @@ def _resolve_path_for_task(filepath: str, task_id: str = "default") -> Path:
return p.resolve()
def _is_blocked_device(filepath: str) -> bool:
"""Return True if the path would hang the process (infinite output or blocking input).
Uses the *literal* path no symlink resolution because the model
specifies paths directly and realpath follows symlinks all the way
through (e.g. /dev/stdin /proc/self/fd/0 /dev/pts/0), defeating
the check.
"""
normalized = os.path.expanduser(filepath)
def _is_blocked_device_path(path: str) -> bool:
"""Return True for concrete device/fd paths that can hang reads."""
normalized = os.path.expanduser(path)
if normalized in _BLOCKED_DEVICE_PATHS:
return True
# /proc/self/fd/0-2 and /proc/<pid>/fd/0-2 are Linux aliases for stdio
@ -152,6 +146,25 @@ def _is_blocked_device(filepath: str) -> bool:
return False
def _is_blocked_device(filepath: str) -> bool:
"""Return True if the path would hang the process (infinite output or blocking input).
Check the literal path first so aliases like /dev/stdin are caught before
they resolve to terminal-specific paths. Then check the resolved path so a
workspace symlink to /dev/zero cannot bypass the guard.
"""
normalized = os.path.expanduser(filepath)
if _is_blocked_device_path(normalized):
return True
try:
resolved = os.path.realpath(normalized)
except (OSError, ValueError):
return False
if resolved != normalized and _is_blocked_device_path(resolved):
return True
return False
# Paths that file tools should refuse to write to without going through the
# terminal tool's approval system. These match prefixes after os.path.realpath.
_SENSITIVE_PATH_PREFIXES = (