mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-12 08:51:53 +00:00
test(gateway): cover document context note for PDF/DOCX vs text
Pin the contract for _build_document_context_note: text documents confirm the inlined content and record the path; binary documents (PDF/DOCX/XLSX/octet- stream) tell the agent to extract the text itself and never instruct it to ask the user to paste the contents.
This commit is contained in:
parent
e7ae145ac4
commit
4e9be3ee32
1 changed files with 57 additions and 0 deletions
57
tests/gateway/test_document_context_note.py
Normal file
57
tests/gateway/test_document_context_note.py
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
"""Tests for the document context note prepended to user turns with attachments.
|
||||
|
||||
A user who attaches a PDF / DOCX in chat used to see the agent treat it as
|
||||
"unreadable" because the context note told the model to "Ask the user what
|
||||
they'd like you to do with it" — steering it away from extracting the text it
|
||||
is perfectly capable of reading. These tests pin the contract:
|
||||
|
||||
- text documents: note confirms the (adapter-)inlined content + records path.
|
||||
- binary documents (PDF/DOCX/…): note tells the agent to extract the text
|
||||
itself and never tells it to punt back to the user.
|
||||
"""
|
||||
|
||||
import importlib
|
||||
|
||||
import pytest
|
||||
|
||||
gateway_run = importlib.import_module("gateway.run")
|
||||
_build_document_context_note = gateway_run._build_document_context_note
|
||||
|
||||
|
||||
class TestTextDocumentNote:
|
||||
@pytest.mark.parametrize("mtype", ["text/plain", "text/markdown", "text/csv"])
|
||||
def test_text_note_mentions_included_content_and_path(self, mtype):
|
||||
note = _build_document_context_note("notes.txt", "/cache/doc_notes.txt", mtype)
|
||||
assert "text document" in note
|
||||
assert "notes.txt" in note
|
||||
assert "/cache/doc_notes.txt" in note
|
||||
assert "included below" in note
|
||||
|
||||
|
||||
class TestBinaryDocumentNote:
|
||||
@pytest.mark.parametrize(
|
||||
"mtype",
|
||||
[
|
||||
"application/pdf",
|
||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||
"application/octet-stream",
|
||||
],
|
||||
)
|
||||
def test_binary_note_guides_extraction(self, mtype):
|
||||
note = _build_document_context_note("contract.pdf", "/cache/doc_contract.pdf", mtype)
|
||||
# Records the path so the agent can open it.
|
||||
assert "/cache/doc_contract.pdf" in note
|
||||
# Tells the agent to read it by extracting the text...
|
||||
assert "extract" in note.lower()
|
||||
# ...and does NOT steer it into punting back to the user (the bug).
|
||||
assert "ask the user" not in note.lower()
|
||||
assert "paste" in note.lower()
|
||||
|
||||
def test_binary_note_distinct_from_text_note(self):
|
||||
text_note = _build_document_context_note("a.txt", "/c/a.txt", "text/plain")
|
||||
pdf_note = _build_document_context_note("a.pdf", "/c/a.pdf", "application/pdf")
|
||||
assert text_note != pdf_note
|
||||
# The text path claims content is inlined; the binary path must not.
|
||||
assert "included below" in text_note
|
||||
assert "included below" not in pdf_note
|
||||
Loading…
Add table
Add a link
Reference in a new issue