fix(file): reject read_file line-numbered writeback

This commit is contained in:
Brandon Zarnitz 2026-05-04 11:52:37 -04:00 committed by Teknium
parent a18bae65b9
commit 71274f264b
2 changed files with 74 additions and 3 deletions

View file

@ -91,6 +91,33 @@ class TestWriteFileHandler:
assert any("write_file expected denial" in r.getMessage() for r in caplog.records)
assert not any(r.levelno >= logging.ERROR for r in caplog.records)
@patch("tools.file_tools._get_file_ops")
def test_rejects_read_file_line_numbered_content(self, mock_get):
"""#19798 — do not persist read_file's LINE_NUM|CONTENT display format."""
from tools.file_tools import write_file_tool
content = " 1|setting: new_value\n 2|other: thing\n"
result = json.loads(write_file_tool("/tmp/config.yaml", content))
assert "error" in result
assert "line-number" in result["error"].lower()
mock_get.assert_not_called()
@patch("tools.file_tools._get_file_ops")
def test_allows_sparse_literal_pipe_content(self, mock_get):
"""A single literal N| line should not be treated as read_file output."""
mock_ops = MagicMock()
result_obj = MagicMock()
result_obj.to_dict.return_value = {"status": "ok", "path": "/tmp/out.txt", "bytes": 21}
mock_ops.write_file.return_value = result_obj
mock_get.return_value = mock_ops
from tools.file_tools import write_file_tool
result = json.loads(write_file_tool("/tmp/out.txt", "1|literal value\nplain line\n"))
assert result["status"] == "ok"
mock_ops.write_file.assert_called_once()
@patch("tools.file_tools._get_file_ops")
def test_unexpected_exception_still_logs_error(self, mock_get, caplog):
mock_get.side_effect = RuntimeError("boom")

View file

@ -657,6 +657,49 @@ def _is_internal_file_status_text(content: str) -> bool:
return False
def _looks_like_read_file_line_numbered_content(content: str) -> bool:
"""Return True for content dominated by read_file's ``LINE_NUM|CONTENT`` display.
``read_file`` intentionally returns line-numbered text to the model. If
that display format is echoed into ``write_file``, config/source files are
silently corrupted with prefixes like `` 1|``. We reject writes where the
non-empty lines are mostly consecutive read_file-style numbered lines, while
allowing sparse literal pipe content such as a single ``1|value`` line.
"""
if not isinstance(content, str):
return False
lines = [line for line in content.splitlines() if line.strip()]
if len(lines) < 2:
return False
numbered: list[int] = []
for line in lines:
stripped = line.lstrip()
prefix, sep, _rest = stripped.partition("|")
if sep and prefix.isdigit():
numbered.append(int(prefix))
if len(numbered) < 2:
return False
if len(numbered) / len(lines) < 0.6:
return False
consecutive_pairs = sum(
1 for prev, current in zip(numbered, numbered[1:])
if current == prev + 1
)
return consecutive_pairs >= len(numbered) - 1
def _is_internal_file_tool_content(content: str) -> bool:
"""Return True when content is file-tool display text, not intended file bytes."""
return (
_is_internal_file_status_text(content)
or _looks_like_read_file_line_numbered_content(content)
)
def _get_file_ops(task_id: str = "default") -> ShellFileOperations:
"""Get or create ShellFileOperations for a terminal environment.
@ -1213,10 +1256,11 @@ def write_file_tool(path: str, content: str, task_id: str = "default",
cross_warning = _check_cross_profile_path(path, task_id)
if cross_warning:
return tool_error(cross_warning)
if _is_internal_file_status_text(content):
if _is_internal_file_tool_content(content):
return tool_error(
"Refusing to write internal read_file status text as file content. "
"Re-read the file or reconstruct the intended file contents before writing."
"Refusing to write internal read_file display text as file content. "
"Strip read_file line-number prefixes or reconstruct the intended "
"file contents before writing."
)
try:
# Resolve once for the registry lock + stale check. Failures here