diff --git a/gateway/run.py b/gateway/run.py index 041a6efcd56..85267cc44ee 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -1425,6 +1425,33 @@ def _build_media_placeholder(event) -> str: return "\n".join(parts) +def _build_document_context_note(display_name: str, agent_path: str, mtype: str) -> str: + """Context note prepended to a user turn when they attach a document. + + Text documents (``text/*``) have their content inlined upstream by the + platform adapter, so the note just confirms that and records the path. + + Binary documents (PDF, DOCX, XLSX, …) cannot be inlined as text. The note + must tell the agent to *extract* the text itself before answering — earlier + wording ("Ask the user what they'd like you to do with it") steered the + model into punting back to the user, which is why attached PDFs/DOCX looked + "unreadable" to the agent even though it has the tools to read them. + """ + if mtype.startswith("text/"): + return ( + f"[The user sent a text document: '{display_name}'. " + f"Its content has been included below. " + f"The file is also saved at: {agent_path}]" + ) + return ( + f"[The user sent a document: '{display_name}'. It is saved at: {agent_path}. " + f"Its text is not inlined here (it's a binary format such as PDF or DOCX). " + f"To read it, extract the document's text yourself — for example with the " + f"terminal tool or the ocr-and-documents skill — before answering, instead " + f"of asking the user to paste the contents.]" + ) + + def _format_duration(seconds: float) -> str: total = int(round(seconds)) if total < 0: @@ -7732,18 +7759,7 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew # cache directories are auto-mounted at /root/.hermes/cache/* by get_cache_directory_mounts(). agent_path = to_agent_visible_cache_path(path) - if mtype.startswith("text/"): - context_note = ( - f"[The user sent a text document: '{display_name}'. " - f"Its content has been included below. " - f"The file is also saved at: {agent_path}]" - ) - else: - context_note = ( - f"[The user sent a document: '{display_name}'. " - f"The file is saved at: {agent_path}. " - f"Ask the user what they'd like you to do with it.]" - ) + context_note = _build_document_context_note(display_name, agent_path, mtype) message_text = f"{context_note}\n\n{message_text}" if getattr(event, "reply_to_text", None) and event.reply_to_message_id: