diff --git a/agent/context_references.py b/agent/context_references.py index dc03ccd740b..6307033d270 100644 --- a/agent/context_references.py +++ b/agent/context_references.py @@ -246,7 +246,14 @@ def _expand_file_reference( if not path.is_file(): return f"{ref.raw}: path is not a file", None if _is_binary_file(path): - return f"{ref.raw}: binary files are not supported", None + # A binary file can't be inlined as text, but it IS on disk (the agent's + # tools run where this resolves — the local cwd, or the staged copy in a + # remote session workspace). Returning a bare "not supported" warning + # with no content was a dead end: the model saw a failure and gave up + # (told the user the file type wasn't supported). Instead, hand it an + # actionable block — the path, type, size, and a nudge to use its tools — + # so it can read/convert/view the file itself. + return None, _binary_reference_block(ref, path) text = path.read_text(encoding="utf-8") if ref.line_start is not None: @@ -493,6 +500,30 @@ def _rg_files(path: Path, cwd: Path, limit: int) -> list[Path] | None: return files[:limit] +def _human_bytes(n: int) -> str: + size = float(n) + for unit in ("B", "KB", "MB", "GB"): + if size < 1024 or unit == "GB": + return f"{int(size)} {unit}" if unit == "B" else f"{size:.1f} {unit}" + size /= 1024 + return f"{size:.1f} GB" + + +def _binary_reference_block(ref: ContextReference, path: Path) -> str: + mime, _ = mimetypes.guess_type(path.name) + mime = mime or "application/octet-stream" + try: + size = _human_bytes(path.stat().st_size) + except OSError: + size = "unknown size" + return ( + f"📎 {ref.raw} ({mime}, {size}) — binary file, not inlined as text. " + f"It is available on disk at `{path}`. Use your tools to work with it " + f"(read or convert it, extract its text, or view/render it as needed); " + f"do not tell the user the file type is unsupported." + ) + + def _file_metadata(path: Path) -> str: if _is_binary_file(path): return f"{path.stat().st_size} bytes" diff --git a/tests/agent/test_context_references.py b/tests/agent/test_context_references.py index 02456d06494..1afd5ee20b2 100644 --- a/tests/agent/test_context_references.py +++ b/tests/agent/test_context_references.py @@ -192,21 +192,40 @@ def test_expand_git_diff_staged_and_log(sample_repo: Path): assert "VALUE = 2" in result.message -def test_binary_and_missing_files_become_warnings(sample_repo: Path): +def test_missing_file_becomes_warning(sample_repo: Path): from agent.context_references import preprocess_context_references result = preprocess_context_references( - "Check @file:blob.bin and @file:nope.txt", + "Check @file:nope.txt", cwd=sample_repo, context_length=100_000, ) assert result.expanded - assert len(result.warnings) == 2 - assert "binary" in result.message.lower() + assert len(result.warnings) == 1 assert "not found" in result.message.lower() +def test_binary_file_yields_actionable_block_not_a_dead_warning(sample_repo: Path): + from agent.context_references import preprocess_context_references + + result = preprocess_context_references( + "Check @file:blob.bin", + cwd=sample_repo, + context_length=100_000, + ) + + assert result.expanded + # The whole point: a binary attachment must NOT degrade into a discouraging + # warning that makes the model give up — it gets an actionable content block. + assert not result.warnings + assert "blob.bin" in result.message + assert "binary" in result.message.lower() + assert "not supported" not in result.message.lower() + # And it must point the agent at the file so it can act on it with tools. + assert str(sample_repo / "blob.bin") in result.message + + def test_soft_budget_warns_and_hard_budget_refuses(sample_repo: Path): from agent.context_references import preprocess_context_references