fix(agent): make a binary @file: reference actionable instead of a dead end

A binary @file: ref (PDF, docx, spreadsheet, …) expanded to a bare
"binary files are not supported" warning with no content. The model saw a
failure and gave up — e.g. a dropped PDF came back as a text note claiming the
type was unsupported, even though the file was staged on disk right next to it.

Inject an actionable content block instead: the path, mime type, size, and a
nudge to use its tools to read/convert/view the file (and explicitly not to tell
the user the type is unsupported). General across every binary type — not
PDF-specific. The file already resolves where the agent's tools run (local cwd
or the staged copy in a remote session workspace), so it can act on it directly.
This commit is contained in:
Brooklyn Nicholson 2026-06-09 19:16:46 -05:00
parent 29147afd63
commit 7ffc216bc0
2 changed files with 55 additions and 5 deletions

View file

@ -246,7 +246,14 @@ def _expand_file_reference(
if not path.is_file():
return f"{ref.raw}: path is not a file", None
if _is_binary_file(path):
return f"{ref.raw}: binary files are not supported", None
# A binary file can't be inlined as text, but it IS on disk (the agent's
# tools run where this resolves — the local cwd, or the staged copy in a
# remote session workspace). Returning a bare "not supported" warning
# with no content was a dead end: the model saw a failure and gave up
# (told the user the file type wasn't supported). Instead, hand it an
# actionable block — the path, type, size, and a nudge to use its tools —
# so it can read/convert/view the file itself.
return None, _binary_reference_block(ref, path)
text = path.read_text(encoding="utf-8")
if ref.line_start is not None:
@ -493,6 +500,30 @@ def _rg_files(path: Path, cwd: Path, limit: int) -> list[Path] | None:
return files[:limit]
def _human_bytes(n: int) -> str:
size = float(n)
for unit in ("B", "KB", "MB", "GB"):
if size < 1024 or unit == "GB":
return f"{int(size)} {unit}" if unit == "B" else f"{size:.1f} {unit}"
size /= 1024
return f"{size:.1f} GB"
def _binary_reference_block(ref: ContextReference, path: Path) -> str:
mime, _ = mimetypes.guess_type(path.name)
mime = mime or "application/octet-stream"
try:
size = _human_bytes(path.stat().st_size)
except OSError:
size = "unknown size"
return (
f"📎 {ref.raw} ({mime}, {size}) — binary file, not inlined as text. "
f"It is available on disk at `{path}`. Use your tools to work with it "
f"(read or convert it, extract its text, or view/render it as needed); "
f"do not tell the user the file type is unsupported."
)
def _file_metadata(path: Path) -> str:
if _is_binary_file(path):
return f"{path.stat().st_size} bytes"

View file

@ -192,21 +192,40 @@ def test_expand_git_diff_staged_and_log(sample_repo: Path):
assert "VALUE = 2" in result.message
def test_binary_and_missing_files_become_warnings(sample_repo: Path):
def test_missing_file_becomes_warning(sample_repo: Path):
from agent.context_references import preprocess_context_references
result = preprocess_context_references(
"Check @file:blob.bin and @file:nope.txt",
"Check @file:nope.txt",
cwd=sample_repo,
context_length=100_000,
)
assert result.expanded
assert len(result.warnings) == 2
assert "binary" in result.message.lower()
assert len(result.warnings) == 1
assert "not found" in result.message.lower()
def test_binary_file_yields_actionable_block_not_a_dead_warning(sample_repo: Path):
from agent.context_references import preprocess_context_references
result = preprocess_context_references(
"Check @file:blob.bin",
cwd=sample_repo,
context_length=100_000,
)
assert result.expanded
# The whole point: a binary attachment must NOT degrade into a discouraging
# warning that makes the model give up — it gets an actionable content block.
assert not result.warnings
assert "blob.bin" in result.message
assert "binary" in result.message.lower()
assert "not supported" not in result.message.lower()
# And it must point the agent at the file so it can act on it with tools.
assert str(sample_repo / "blob.bin") in result.message
def test_soft_budget_warns_and_hard_budget_refuses(sample_repo: Path):
from agent.context_references import preprocess_context_references