wip: tool result fixes -- persistence

2026-04-25 00:51:20 +00:00 · 2026-04-07 22:21:27 -07:00 · 2026-04-07 22:21:27 -07:00 · 65e24c942e
commit 65e24c942e
parent 22d1bda185
11 changed files with 869 additions and 235 deletions
--- a/tools/file_tools.py
+++ b/tools/file_tools.py
@ -7,6 +7,7 @@ import logging
 import os
 import threading
 from pathlib import Path
+from tools.binary_extensions import has_binary_extension
 from tools.file_operations import ShellFileOperations
 from agent.redact import redact_sensitive_text

@ -290,11 +291,24 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str =
                ),
            })

+        # Resolve path once for all guards below
+        import pathlib as _pathlib
+        _resolved = _pathlib.Path(path).expanduser().resolve()
+
+        # ── Binary file guard ─────────────────────────────────────────
+        # Block binary files by extension (no I/O).
+        if has_binary_extension(str(_resolved)):
+            _ext = _resolved.suffix.lower()
+            return json.dumps({
+                "error": (
+                    f"Cannot read binary file '{path}' ({_ext}). "
+                    "Use vision_analyze for images, or terminal to inspect binary files."
+                ),
+            })
+
        # ── Hermes internal path guard ────────────────────────────────
        # Prevent prompt injection via catalog or hub metadata files.
-        import pathlib as _pathlib
        from hermes_constants import get_hermes_home as _get_hh
-        _resolved = _pathlib.Path(path).expanduser().resolve()
        _hermes_home = _get_hh().resolve()
        _blocked_dirs = [
            _hermes_home / "skills" / ".hub" / "index-cache",
@ -313,6 +327,27 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str =
            except ValueError:
                pass

+        # ── Pre-read file size guard ──────────────────────────────────
+        # Stat the file before reading.  If it's large and the model
+        # didn't request a narrow range, block and tell it to use
+        # offset/limit — cheaper than reading 200K chars then rejecting.
+        _PRE_READ_MAX_BYTES = 100_000
+        _NARROW_LIMIT = 200
+        try:
+            _fsize = os.path.getsize(str(_resolved))
+        except OSError:
+            _fsize = 0
+        if _fsize > _PRE_READ_MAX_BYTES and limit > _NARROW_LIMIT:
+            return json.dumps({
+                "error": (
+                    f"File is too large to read in full ({_fsize:,} bytes). "
+                    f"Use offset and limit parameters to read specific sections "
+                    f"(e.g. offset=1, limit=100 for the first 100 lines)."
+                ),
+                "path": path,
+                "file_size": _fsize,
+            }, ensure_ascii=False)
+
        # ── Dedup check ───────────────────────────────────────────────
        # If we already read this exact (path, offset, limit) and the
        # file hasn't been modified since, return a lightweight stub
@ -726,7 +761,7 @@ def _check_file_reqs():

 READ_FILE_SCHEMA = {
    "name": "read_file",
-    "description": "Read a text file with line numbers and pagination. Use this instead of cat/head/tail in terminal. Output format: 'LINE_NUM|CONTENT'. Suggests similar filenames if not found. Use offset and limit for large files. Reads exceeding ~100K characters are rejected; use offset and limit to read specific sections of large files. NOTE: Cannot read images or binary files — use vision_analyze for images.",
+    "description": "Read a text file with line numbers and pagination. Use this instead of cat/head/tail in terminal. Output format: 'LINE_NUM|CONTENT'. Suggests similar filenames if not found. When you already know which part of the file you need, only read that part using offset and limit — this is important for larger files. Files over 100KB will be rejected unless you specify a narrow range (limit <= 200). NOTE: Cannot read images or binary files — use vision_analyze for images.",
    "parameters": {
        "type": "object",
        "properties": {
@ -817,7 +852,7 @@ def _handle_search_files(args, **kw):
        output_mode=args.get("output_mode", "content"), context=args.get("context", 0), task_id=tid)


-registry.register(name="read_file", toolset="file", schema=READ_FILE_SCHEMA, handler=_handle_read_file, check_fn=_check_file_reqs, emoji="📖")
-registry.register(name="write_file", toolset="file", schema=WRITE_FILE_SCHEMA, handler=_handle_write_file, check_fn=_check_file_reqs, emoji="✍️")
-registry.register(name="patch", toolset="file", schema=PATCH_SCHEMA, handler=_handle_patch, check_fn=_check_file_reqs, emoji="🔧")
-registry.register(name="search_files", toolset="file", schema=SEARCH_FILES_SCHEMA, handler=_handle_search_files, check_fn=_check_file_reqs, emoji="🔎")
+registry.register(name="read_file", toolset="file", schema=READ_FILE_SCHEMA, handler=_handle_read_file, check_fn=_check_file_reqs, emoji="📖", max_result_size_chars=float('inf'))
+registry.register(name="write_file", toolset="file", schema=WRITE_FILE_SCHEMA, handler=_handle_write_file, check_fn=_check_file_reqs, emoji="✍️", max_result_size_chars=100_000)
+registry.register(name="patch", toolset="file", schema=PATCH_SCHEMA, handler=_handle_patch, check_fn=_check_file_reqs, emoji="🔧", max_result_size_chars=100_000)
+registry.register(name="search_files", toolset="file", schema=SEARCH_FILES_SCHEMA, handler=_handle_search_files, check_fn=_check_file_reqs, emoji="🔎", max_result_size_chars=20_000)