test(gateway): cover document context note for PDF/DOCX vs text

Pin the contract for _build_document_context_note: text documents confirm the
inlined content and record the path; binary documents (PDF/DOCX/XLSX/octet-
stream) tell the agent to extract the text itself and never instruct it to ask
the user to paste the contents.
This commit is contained in:
xxxigm 2026-06-11 16:51:12 +07:00 committed by Teknium
parent e7ae145ac4
commit 4e9be3ee32

View file

@ -0,0 +1,57 @@
"""Tests for the document context note prepended to user turns with attachments.
A user who attaches a PDF / DOCX in chat used to see the agent treat it as
"unreadable" because the context note told the model to "Ask the user what
they'd like you to do with it" — steering it away from extracting the text it
is perfectly capable of reading. These tests pin the contract:
- text documents: note confirms the (adapter-)inlined content + records path.
- binary documents (PDF/DOCX/): note tells the agent to extract the text
itself and never tells it to punt back to the user.
"""
import importlib
import pytest
gateway_run = importlib.import_module("gateway.run")
_build_document_context_note = gateway_run._build_document_context_note
class TestTextDocumentNote:
@pytest.mark.parametrize("mtype", ["text/plain", "text/markdown", "text/csv"])
def test_text_note_mentions_included_content_and_path(self, mtype):
note = _build_document_context_note("notes.txt", "/cache/doc_notes.txt", mtype)
assert "text document" in note
assert "notes.txt" in note
assert "/cache/doc_notes.txt" in note
assert "included below" in note
class TestBinaryDocumentNote:
@pytest.mark.parametrize(
"mtype",
[
"application/pdf",
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
"application/octet-stream",
],
)
def test_binary_note_guides_extraction(self, mtype):
note = _build_document_context_note("contract.pdf", "/cache/doc_contract.pdf", mtype)
# Records the path so the agent can open it.
assert "/cache/doc_contract.pdf" in note
# Tells the agent to read it by extracting the text...
assert "extract" in note.lower()
# ...and does NOT steer it into punting back to the user (the bug).
assert "ask the user" not in note.lower()
assert "paste" in note.lower()
def test_binary_note_distinct_from_text_note(self):
text_note = _build_document_context_note("a.txt", "/c/a.txt", "text/plain")
pdf_note = _build_document_context_note("a.pdf", "/c/a.pdf", "application/pdf")
assert text_note != pdf_note
# The text path claims content is inlined; the binary path must not.
assert "included below" in text_note
assert "included below" not in pdf_note