fix(tools): address PR review — remove _extract_raw_output, BudgetConfig everywhere, read_file hardening

- Remove _extract_raw_output: persist content verbatim (fixes size mismatch bug)
- Drop import aliases: import from budget_config directly, one canonical name
- BudgetConfig param on maybe_persist_tool_result and enforce_turn_budget
- read_file: limit=None signature, pre-read guard fires only when limit omitted (256KB)
- Unify binary extensions: file_operations.py imports from binary_extensions.py
- Exclude .pdf and .svg from binary set (text-based, agents may inspect)
- Remove redundant outer try/except in eval path (internal fallback handles it)
- Fix broken tests: update assertion strings for new persistence format
- Module-level constants: _PRE_READ_MAX_BYTES, _DEFAULT_READ_LIMIT
- Remove redundant pathlib import (Path already at module level)
- Update spec.md with IMPLEMENTED annotations and design decisions
This commit is contained in:
alt-glitch 2026-04-08 00:13:41 -07:00 committed by Teknium
parent 77c5bc9da9
commit bbcff8dcd0
8 changed files with 83 additions and 158 deletions

View file

@ -16,8 +16,8 @@ BINARY_EXTENSIONS = frozenset({
# Executables/binaries
".exe", ".dll", ".so", ".dylib", ".bin", ".o", ".a", ".obj", ".lib",
".app", ".msi", ".deb", ".rpm",
# Documents (PDF is here; read_file excludes it at the call site)
".pdf", ".doc", ".docx", ".xls", ".xlsx", ".ppt", ".pptx",
# Documents (exclude .pdf — text-based, agents may want to inspect)
".doc", ".docx", ".xls", ".xlsx", ".ppt", ".pptx",
".odt", ".ods", ".odp",
# Fonts
".ttf", ".otf", ".woff", ".woff2", ".eot",

View file

@ -33,6 +33,7 @@ from dataclasses import dataclass, field
from typing import Optional, List, Dict, Any
from pathlib import Path
from hermes_constants import get_hermes_home
from tools.binary_extensions import BINARY_EXTENSIONS
# ---------------------------------------------------------------------------
@ -280,26 +281,6 @@ class FileOperations(ABC):
# Shell-based Implementation
# =============================================================================
# Binary file extensions (fast path check)
BINARY_EXTENSIONS = {
# Images
'.png', '.jpg', '.jpeg', '.gif', '.webp', '.bmp', '.ico', '.tiff', '.tif',
'.svg', # SVG is text but often treated as binary
# Audio/Video
'.mp3', '.mp4', '.wav', '.avi', '.mov', '.mkv', '.flac', '.ogg', '.webm',
# Archives
'.zip', '.tar', '.gz', '.bz2', '.xz', '.7z', '.rar',
# Documents
'.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx',
# Compiled/Binary
'.exe', '.dll', '.so', '.dylib', '.o', '.a', '.pyc', '.pyo', '.class',
'.wasm', '.bin',
# Fonts
'.ttf', '.otf', '.woff', '.woff2', '.eot',
# Other
'.db', '.sqlite', '.sqlite3',
}
# Image extensions (subset of binary that we can return as base64)
IMAGE_EXTENSIONS = {'.png', '.jpg', '.jpeg', '.gif', '.webp', '.bmp', '.ico'}

View file

@ -26,6 +26,8 @@ _EXPECTED_WRITE_ERRNOS = {errno.EACCES, errno.EPERM, errno.EROFS}
# Configurable via config.yaml: file_read_max_chars: 200000
# ---------------------------------------------------------------------------
_DEFAULT_MAX_READ_CHARS = 100_000
_PRE_READ_MAX_BYTES = 256_000 # reject full-file reads on files larger than this
_DEFAULT_READ_LIMIT = 500
_max_read_chars_cached: int | None = None
@ -277,7 +279,7 @@ def clear_file_ops_cache(task_id: str = None):
_file_ops_cache.clear()
def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str = "default") -> str:
def read_file_tool(path: str, offset: int = 1, limit: int | None = None, task_id: str = "default") -> str:
"""Read a file with pagination and line numbers."""
try:
# ── Device path guard ─────────────────────────────────────────
@ -291,9 +293,7 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str =
),
})
# Resolve path once for all guards below
import pathlib as _pathlib
_resolved = _pathlib.Path(path).expanduser().resolve()
_resolved = Path(path).expanduser().resolve()
# ── Binary file guard ─────────────────────────────────────────
# Block binary files by extension (no I/O).
@ -328,25 +328,26 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str =
pass
# ── Pre-read file size guard ──────────────────────────────────
# Stat the file before reading. If it's large and the model
# didn't request a narrow range, block and tell it to use
# offset/limit — cheaper than reading 200K chars then rejecting.
_PRE_READ_MAX_BYTES = 100_000
_NARROW_LIMIT = 200
try:
_fsize = os.path.getsize(str(_resolved))
except OSError:
_fsize = 0
if _fsize > _PRE_READ_MAX_BYTES and limit > _NARROW_LIMIT:
return json.dumps({
"error": (
f"File is too large to read in full ({_fsize:,} bytes). "
f"Use offset and limit parameters to read specific sections "
f"(e.g. offset=1, limit=100 for the first 100 lines)."
),
"path": path,
"file_size": _fsize,
}, ensure_ascii=False)
# Guard only when the caller omits limit; an explicit limit means
# the caller knows what slice it wants.
if limit is None:
try:
_fsize = os.path.getsize(str(_resolved))
except OSError:
_fsize = 0
if _fsize > _PRE_READ_MAX_BYTES:
return json.dumps({
"error": (
f"File is too large to read in full ({_fsize:,} bytes). "
f"Use offset and limit parameters to read specific sections "
f"(e.g. offset=1, limit=100 for the first 100 lines)."
),
"path": path,
"file_size": _fsize,
}, ensure_ascii=False)
if limit is None:
limit = _DEFAULT_READ_LIMIT
# ── Dedup check ───────────────────────────────────────────────
# If we already read this exact (path, offset, limit) and the
@ -761,7 +762,7 @@ def _check_file_reqs():
READ_FILE_SCHEMA = {
"name": "read_file",
"description": "Read a text file with line numbers and pagination. Use this instead of cat/head/tail in terminal. Output format: 'LINE_NUM|CONTENT'. Suggests similar filenames if not found. When you already know which part of the file you need, only read that part using offset and limit — this is important for larger files. Files over 100KB will be rejected unless you specify a narrow range (limit <= 200). NOTE: Cannot read images or binary files — use vision_analyze for images.",
"description": "Read a text file with line numbers and pagination. Use this instead of cat/head/tail in terminal. Output format: 'LINE_NUM|CONTENT'. Suggests similar filenames if not found. When you already know which part of the file you need, only read that part using offset and limit — this is important for larger files. Files over 256KB will be rejected unless you provide a limit parameter. NOTE: Cannot read images or binary files — use vision_analyze for images.",
"parameters": {
"type": "object",
"properties": {
@ -825,7 +826,7 @@ SEARCH_FILES_SCHEMA = {
def _handle_read_file(args, **kw):
tid = kw.get("task_id") or "default"
return read_file_tool(path=args.get("path", ""), offset=args.get("offset", 1), limit=args.get("limit", 500), task_id=tid)
return read_file_tool(path=args.get("path", ""), offset=args.get("offset", 1), limit=args.get("limit"), task_id=tid)
def _handle_write_file(args, **kw):

View file

@ -176,8 +176,8 @@ class ToolRegistry:
return entry.max_result_size_chars
if default is not None:
return default
from tools.tool_result_storage import DEFAULT_MAX_RESULT_SIZE_CHARS
return DEFAULT_MAX_RESULT_SIZE_CHARS
from tools.budget_config import DEFAULT_RESULT_SIZE_CHARS
return DEFAULT_RESULT_SIZE_CHARS
def get_all_tool_names(self) -> List[str]:
"""Return sorted list of all registered tool names."""

View file

@ -20,14 +20,13 @@ Defense against context-window overflow operates at three levels:
where many medium-sized results combine to overflow context.
"""
import json
import logging
import uuid
from tools.budget_config import (
DEFAULT_RESULT_SIZE_CHARS as DEFAULT_MAX_RESULT_SIZE_CHARS,
DEFAULT_TURN_BUDGET_CHARS as MAX_TURN_BUDGET_CHARS,
DEFAULT_PREVIEW_SIZE_CHARS as PREVIEW_SIZE_CHARS,
DEFAULT_PREVIEW_SIZE_CHARS,
BudgetConfig,
DEFAULT_BUDGET,
)
logger = logging.getLogger(__name__)
@ -38,7 +37,7 @@ HEREDOC_MARKER = "HERMES_PERSIST_EOF"
_BUDGET_TOOL_NAME = "__budget_enforcement__"
def generate_preview(content: str, max_chars: int = PREVIEW_SIZE_CHARS) -> tuple[str, bool]:
def generate_preview(content: str, max_chars: int = DEFAULT_PREVIEW_SIZE_CHARS) -> tuple[str, bool]:
"""Truncate at last newline within max_chars. Returns (preview, has_more)."""
if len(content) <= max_chars:
return content, False
@ -56,21 +55,6 @@ def _heredoc_marker(content: str) -> str:
return f"HERMES_PERSIST_{uuid.uuid4().hex[:8]}"
def _extract_raw_output(content: str) -> str:
"""Extract the 'output' field from JSON tool results for cleaner persistence.
Tool handlers return json.dumps({"output": ..., "exit_code": ...}) for the
API, but persisted files should contain readable text, not a JSON blob.
"""
try:
data = json.loads(content)
if isinstance(data, dict) and "output" in data:
return data["output"]
except (json.JSONDecodeError, TypeError):
pass
return content
def _write_to_sandbox(content: str, remote_path: str, env) -> bool:
"""Write content into the sandbox via env.execute(). Returns True on success."""
marker = _heredoc_marker(content)
@ -113,8 +97,8 @@ def maybe_persist_tool_result(
tool_name: str,
tool_use_id: str,
env=None,
config: BudgetConfig = DEFAULT_BUDGET,
threshold: int | float | None = None,
preview_size: int = PREVIEW_SIZE_CHARS,
) -> str:
"""Layer 2: persist oversized result into the sandbox, return preview + path.
@ -127,32 +111,26 @@ def maybe_persist_tool_result(
tool_name: Name of the tool (used for threshold lookup).
tool_use_id: Unique ID for this tool call (used as filename).
env: The active BaseEnvironment instance, or None.
threshold: Override threshold; if None, looked up from registry.
preview_size: Max chars for the inline preview after persistence.
config: BudgetConfig controlling thresholds and preview size.
threshold: Explicit override; takes precedence over config resolution.
Returns:
Original content if small, or <persisted-output> replacement.
"""
if threshold is None:
from tools.registry import registry
threshold = registry.get_max_result_size(tool_name)
effective_threshold = threshold if threshold is not None else config.resolve_threshold(tool_name)
# Infinity means never persist (e.g. read_file)
if threshold == float("inf"):
if effective_threshold == float("inf"):
return content
if len(content) <= threshold:
if len(content) <= effective_threshold:
return content
remote_path = f"{STORAGE_DIR}/{tool_use_id}.txt"
# Write raw output (not JSON wrapper) so read_file returns readable text
file_content = _extract_raw_output(content)
preview, has_more = generate_preview(file_content, max_chars=preview_size)
preview, has_more = generate_preview(content, max_chars=config.preview_size)
# Try writing into the sandbox
if env is not None:
try:
if _write_to_sandbox(file_content, remote_path, env):
if _write_to_sandbox(content, remote_path, env):
logger.info(
"Persisted large tool result: %s (%s, %d chars -> %s)",
tool_name, tool_use_id, len(content), remote_path,
@ -161,7 +139,6 @@ def maybe_persist_tool_result(
except Exception as exc:
logger.warning("Sandbox write failed for %s: %s", tool_use_id, exc)
# Fallback: inline truncation (no sandbox available or write failed)
logger.info(
"Inline-truncating large tool result: %s (%d chars, no sandbox write)",
tool_name, len(content),
@ -176,8 +153,7 @@ def maybe_persist_tool_result(
def enforce_turn_budget(
tool_messages: list[dict],
env=None,
budget: int = MAX_TURN_BUDGET_CHARS,
preview_size: int = PREVIEW_SIZE_CHARS,
config: BudgetConfig = DEFAULT_BUDGET,
) -> list[dict]:
"""Layer 3: enforce aggregate budget across all tool results in a turn.
@ -196,14 +172,13 @@ def enforce_turn_budget(
if PERSISTED_OUTPUT_TAG not in content:
candidates.append((i, size))
if total_size <= budget:
if total_size <= config.turn_budget:
return tool_messages
# Sort candidates by size descending — persist largest first
candidates.sort(key=lambda x: x[1], reverse=True)
for idx, size in candidates:
if total_size <= budget:
if total_size <= config.turn_budget:
break
msg = tool_messages[idx]
content = msg["content"]
@ -214,8 +189,8 @@ def enforce_turn_budget(
tool_name=_BUDGET_TOOL_NAME,
tool_use_id=tool_use_id,
env=env,
config=config,
threshold=0,
preview_size=preview_size,
)
if replacement != content:
total_size -= size