mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix(tools): address PR review — remove _extract_raw_output, BudgetConfig everywhere, read_file hardening
- Remove _extract_raw_output: persist content verbatim (fixes size mismatch bug) - Drop import aliases: import from budget_config directly, one canonical name - BudgetConfig param on maybe_persist_tool_result and enforce_turn_budget - read_file: limit=None signature, pre-read guard fires only when limit omitted (256KB) - Unify binary extensions: file_operations.py imports from binary_extensions.py - Exclude .pdf and .svg from binary set (text-based, agents may inspect) - Remove redundant outer try/except in eval path (internal fallback handles it) - Fix broken tests: update assertion strings for new persistence format - Module-level constants: _PRE_READ_MAX_BYTES, _DEFAULT_READ_LIMIT - Remove redundant pathlib import (Path already at module level) - Update spec.md with IMPLEMENTED annotations and design decisions
This commit is contained in:
parent
77c5bc9da9
commit
bbcff8dcd0
8 changed files with 83 additions and 158 deletions
|
|
@ -455,17 +455,13 @@ class HermesAgentLoop:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
tc_id = tc.get("id", "") if isinstance(tc, dict) else tc.id
|
tc_id = tc.get("id", "") if isinstance(tc, dict) else tc.id
|
||||||
try:
|
tool_result = maybe_persist_tool_result(
|
||||||
tool_result = maybe_persist_tool_result(
|
content=tool_result,
|
||||||
content=tool_result,
|
tool_name=tool_name,
|
||||||
tool_name=tool_name,
|
tool_use_id=tc_id,
|
||||||
tool_use_id=tc_id,
|
env=get_active_env(self.task_id),
|
||||||
env=get_active_env(self.task_id),
|
config=self.budget_config,
|
||||||
threshold=self.budget_config.resolve_threshold(tool_name),
|
)
|
||||||
preview_size=self.budget_config.preview_size,
|
|
||||||
)
|
|
||||||
except Exception:
|
|
||||||
pass # Persistence is best-effort in eval path
|
|
||||||
|
|
||||||
messages.append(
|
messages.append(
|
||||||
{
|
{
|
||||||
|
|
@ -475,17 +471,13 @@ class HermesAgentLoop:
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
num_tcs = len(assistant_msg.tool_calls)
|
||||||
num_tcs = len(assistant_msg.tool_calls)
|
if num_tcs > 0:
|
||||||
if num_tcs > 0:
|
enforce_turn_budget(
|
||||||
enforce_turn_budget(
|
messages[-num_tcs:],
|
||||||
messages[-num_tcs:],
|
env=get_active_env(self.task_id),
|
||||||
env=get_active_env(self.task_id),
|
config=self.budget_config,
|
||||||
budget=self.budget_config.turn_budget,
|
)
|
||||||
preview_size=self.budget_config.preview_size,
|
|
||||||
)
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
turn_elapsed = _time.monotonic() - turn_start
|
turn_elapsed = _time.monotonic() - turn_start
|
||||||
logger.info(
|
logger.info(
|
||||||
|
|
|
||||||
|
|
@ -1011,10 +1011,9 @@ class TestExecuteToolCalls:
|
||||||
big_result = "x" * 150_000
|
big_result = "x" * 150_000
|
||||||
with patch("run_agent.handle_function_call", return_value=big_result):
|
with patch("run_agent.handle_function_call", return_value=big_result):
|
||||||
agent._execute_tool_calls(mock_msg, messages, "task-1")
|
agent._execute_tool_calls(mock_msg, messages, "task-1")
|
||||||
# Content should be replaced with preview + file path
|
# Content should be replaced with persisted-output or truncation
|
||||||
assert len(messages[0]["content"]) < 150_000
|
assert len(messages[0]["content"]) < 150_000
|
||||||
assert "Large tool response" in messages[0]["content"]
|
assert ("Truncated" in messages[0]["content"] or "<persisted-output>" in messages[0]["content"])
|
||||||
assert "Full output saved to:" in messages[0]["content"]
|
|
||||||
|
|
||||||
|
|
||||||
class TestConcurrentToolExecution:
|
class TestConcurrentToolExecution:
|
||||||
|
|
@ -1249,8 +1248,7 @@ class TestConcurrentToolExecution:
|
||||||
assert len(messages) == 2
|
assert len(messages) == 2
|
||||||
for m in messages:
|
for m in messages:
|
||||||
assert len(m["content"]) < 150_000
|
assert len(m["content"]) < 150_000
|
||||||
assert "Large tool response" in m["content"]
|
assert ("Truncated" in m["content"] or "<persisted-output>" in m["content"])
|
||||||
assert "Full output saved to:" in m["content"]
|
|
||||||
|
|
||||||
def test_invoke_tool_dispatches_to_handle_function_call(self, agent):
|
def test_invoke_tool_dispatches_to_handle_function_call(self, agent):
|
||||||
"""_invoke_tool should route regular tools through handle_function_call."""
|
"""_invoke_tool should route regular tools through handle_function_call."""
|
||||||
|
|
|
||||||
|
|
@ -3,16 +3,18 @@
|
||||||
import pytest
|
import pytest
|
||||||
from unittest.mock import MagicMock, patch
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
|
from tools.budget_config import (
|
||||||
|
DEFAULT_RESULT_SIZE_CHARS,
|
||||||
|
DEFAULT_TURN_BUDGET_CHARS,
|
||||||
|
DEFAULT_PREVIEW_SIZE_CHARS,
|
||||||
|
BudgetConfig,
|
||||||
|
)
|
||||||
from tools.tool_result_storage import (
|
from tools.tool_result_storage import (
|
||||||
DEFAULT_MAX_RESULT_SIZE_CHARS,
|
|
||||||
HEREDOC_MARKER,
|
HEREDOC_MARKER,
|
||||||
MAX_TURN_BUDGET_CHARS,
|
|
||||||
PERSISTED_OUTPUT_TAG,
|
PERSISTED_OUTPUT_TAG,
|
||||||
PERSISTED_OUTPUT_CLOSING_TAG,
|
PERSISTED_OUTPUT_CLOSING_TAG,
|
||||||
PREVIEW_SIZE_CHARS,
|
|
||||||
STORAGE_DIR,
|
STORAGE_DIR,
|
||||||
_build_persisted_message,
|
_build_persisted_message,
|
||||||
_extract_raw_output,
|
|
||||||
_heredoc_marker,
|
_heredoc_marker,
|
||||||
_write_to_sandbox,
|
_write_to_sandbox,
|
||||||
enforce_turn_budget,
|
enforce_turn_budget,
|
||||||
|
|
@ -56,35 +58,12 @@ class TestGeneratePreview:
|
||||||
assert has_more is False
|
assert has_more is False
|
||||||
|
|
||||||
def test_exact_boundary(self):
|
def test_exact_boundary(self):
|
||||||
text = "x" * PREVIEW_SIZE_CHARS
|
text = "x" * DEFAULT_PREVIEW_SIZE_CHARS
|
||||||
preview, has_more = generate_preview(text)
|
preview, has_more = generate_preview(text)
|
||||||
assert preview == text
|
assert preview == text
|
||||||
assert has_more is False
|
assert has_more is False
|
||||||
|
|
||||||
|
|
||||||
# ── _extract_raw_output ────────────────────────────────────────────────
|
|
||||||
|
|
||||||
class TestExtractRawOutput:
|
|
||||||
def test_extracts_output_from_terminal_json(self):
|
|
||||||
import json
|
|
||||||
content = json.dumps({"output": "hello world\nline2", "exit_code": 0, "error": None})
|
|
||||||
assert _extract_raw_output(content) == "hello world\nline2"
|
|
||||||
|
|
||||||
def test_passes_through_non_json(self):
|
|
||||||
assert _extract_raw_output("plain text output") == "plain text output"
|
|
||||||
|
|
||||||
def test_passes_through_json_without_output_key(self):
|
|
||||||
import json
|
|
||||||
content = json.dumps({"result": "something", "status": "ok"})
|
|
||||||
assert _extract_raw_output(content) == content
|
|
||||||
|
|
||||||
def test_extracts_large_output(self):
|
|
||||||
import json
|
|
||||||
big = "x\n" * 30_000
|
|
||||||
content = json.dumps({"output": big, "exit_code": 0, "error": None})
|
|
||||||
assert _extract_raw_output(content) == big
|
|
||||||
|
|
||||||
|
|
||||||
# ── _heredoc_marker ───────────────────────────────────────────────────
|
# ── _heredoc_marker ───────────────────────────────────────────────────
|
||||||
|
|
||||||
class TestHeredocMarker:
|
class TestHeredocMarker:
|
||||||
|
|
@ -206,8 +185,8 @@ class TestMaybePersistToolResult:
|
||||||
assert len(result) < len(content)
|
assert len(result) < len(content)
|
||||||
env.execute.assert_called_once()
|
env.execute.assert_called_once()
|
||||||
|
|
||||||
def test_persists_raw_output_not_json_wrapper(self):
|
def test_persists_full_content_as_is(self):
|
||||||
"""When content is JSON with 'output' key, file should contain raw output."""
|
"""Content is persisted verbatim — no JSON extraction."""
|
||||||
import json
|
import json
|
||||||
env = MagicMock()
|
env = MagicMock()
|
||||||
env.execute.return_value = {"output": "", "returncode": 0}
|
env.execute.return_value = {"output": "", "returncode": 0}
|
||||||
|
|
@ -221,10 +200,9 @@ class TestMaybePersistToolResult:
|
||||||
threshold=30_000,
|
threshold=30_000,
|
||||||
)
|
)
|
||||||
assert PERSISTED_OUTPUT_TAG in result
|
assert PERSISTED_OUTPUT_TAG in result
|
||||||
# The heredoc written to sandbox should contain raw text, not JSON
|
# The heredoc written to sandbox should contain the full JSON blob
|
||||||
cmd = env.execute.call_args[0][0]
|
cmd = env.execute.call_args[0][0]
|
||||||
assert "line1\nline2\n" in cmd
|
assert '"exit_code"' in cmd
|
||||||
assert '"exit_code"' not in cmd
|
|
||||||
|
|
||||||
def test_above_threshold_no_env_truncates_inline(self):
|
def test_above_threshold_no_env_truncates_inline(self):
|
||||||
content = "x" * 60_000
|
content = "x" * 60_000
|
||||||
|
|
@ -386,7 +364,7 @@ class TestEnforceTurnBudget:
|
||||||
{"role": "tool", "tool_call_id": "t1", "content": "small"},
|
{"role": "tool", "tool_call_id": "t1", "content": "small"},
|
||||||
{"role": "tool", "tool_call_id": "t2", "content": "also small"},
|
{"role": "tool", "tool_call_id": "t2", "content": "also small"},
|
||||||
]
|
]
|
||||||
result = enforce_turn_budget(msgs, env=None, budget=200_000)
|
result = enforce_turn_budget(msgs, env=None, config=BudgetConfig(turn_budget=200_000))
|
||||||
assert result[0]["content"] == "small"
|
assert result[0]["content"] == "small"
|
||||||
assert result[1]["content"] == "also small"
|
assert result[1]["content"] == "also small"
|
||||||
|
|
||||||
|
|
@ -398,7 +376,7 @@ class TestEnforceTurnBudget:
|
||||||
{"role": "tool", "tool_call_id": "t2", "content": "b" * 130_000},
|
{"role": "tool", "tool_call_id": "t2", "content": "b" * 130_000},
|
||||||
]
|
]
|
||||||
# Total 210K > 200K budget
|
# Total 210K > 200K budget
|
||||||
enforce_turn_budget(msgs, env=env, budget=200_000)
|
enforce_turn_budget(msgs, env=env, config=BudgetConfig(turn_budget=200_000))
|
||||||
# The larger one (130K) should be persisted first
|
# The larger one (130K) should be persisted first
|
||||||
assert PERSISTED_OUTPUT_TAG in msgs[1]["content"]
|
assert PERSISTED_OUTPUT_TAG in msgs[1]["content"]
|
||||||
|
|
||||||
|
|
@ -410,7 +388,7 @@ class TestEnforceTurnBudget:
|
||||||
"content": f"{PERSISTED_OUTPUT_TAG}\nalready persisted\n{PERSISTED_OUTPUT_CLOSING_TAG}"},
|
"content": f"{PERSISTED_OUTPUT_TAG}\nalready persisted\n{PERSISTED_OUTPUT_CLOSING_TAG}"},
|
||||||
{"role": "tool", "tool_call_id": "t2", "content": "x" * 250_000},
|
{"role": "tool", "tool_call_id": "t2", "content": "x" * 250_000},
|
||||||
]
|
]
|
||||||
enforce_turn_budget(msgs, env=env, budget=200_000)
|
enforce_turn_budget(msgs, env=env, config=BudgetConfig(turn_budget=200_000))
|
||||||
# t1 should be untouched (already persisted)
|
# t1 should be untouched (already persisted)
|
||||||
assert msgs[0]["content"].startswith(PERSISTED_OUTPUT_TAG)
|
assert msgs[0]["content"].startswith(PERSISTED_OUTPUT_TAG)
|
||||||
# t2 should be persisted
|
# t2 should be persisted
|
||||||
|
|
@ -425,7 +403,7 @@ class TestEnforceTurnBudget:
|
||||||
{"role": "tool", "tool_call_id": f"t{i}", "content": "x" * 42_000}
|
{"role": "tool", "tool_call_id": f"t{i}", "content": "x" * 42_000}
|
||||||
for i in range(6)
|
for i in range(6)
|
||||||
]
|
]
|
||||||
enforce_turn_budget(msgs, env=env, budget=200_000)
|
enforce_turn_budget(msgs, env=env, config=BudgetConfig(turn_budget=200_000))
|
||||||
# At least some results should be persisted to get under 200K
|
# At least some results should be persisted to get under 200K
|
||||||
persisted_count = sum(
|
persisted_count = sum(
|
||||||
1 for m in msgs if PERSISTED_OUTPUT_TAG in m["content"]
|
1 for m in msgs if PERSISTED_OUTPUT_TAG in m["content"]
|
||||||
|
|
@ -436,17 +414,17 @@ class TestEnforceTurnBudget:
|
||||||
msgs = [
|
msgs = [
|
||||||
{"role": "tool", "tool_call_id": "t1", "content": "x" * 250_000},
|
{"role": "tool", "tool_call_id": "t1", "content": "x" * 250_000},
|
||||||
]
|
]
|
||||||
enforce_turn_budget(msgs, env=None, budget=200_000)
|
enforce_turn_budget(msgs, env=None, config=BudgetConfig(turn_budget=200_000))
|
||||||
# Should be truncated (no sandbox available)
|
# Should be truncated (no sandbox available)
|
||||||
assert "Truncated" in msgs[0]["content"] or PERSISTED_OUTPUT_TAG in msgs[0]["content"]
|
assert "Truncated" in msgs[0]["content"] or PERSISTED_OUTPUT_TAG in msgs[0]["content"]
|
||||||
|
|
||||||
def test_returns_same_list(self):
|
def test_returns_same_list(self):
|
||||||
msgs = [{"role": "tool", "tool_call_id": "t1", "content": "ok"}]
|
msgs = [{"role": "tool", "tool_call_id": "t1", "content": "ok"}]
|
||||||
result = enforce_turn_budget(msgs, env=None, budget=200_000)
|
result = enforce_turn_budget(msgs, env=None, config=BudgetConfig(turn_budget=200_000))
|
||||||
assert result is msgs
|
assert result is msgs
|
||||||
|
|
||||||
def test_empty_messages(self):
|
def test_empty_messages(self):
|
||||||
result = enforce_turn_budget([], env=None, budget=200_000)
|
result = enforce_turn_budget([], env=None, config=BudgetConfig(turn_budget=200_000))
|
||||||
assert result == []
|
assert result == []
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -463,7 +441,7 @@ class TestPerToolThresholds:
|
||||||
from tools.registry import registry
|
from tools.registry import registry
|
||||||
# Unknown tool should return the default
|
# Unknown tool should return the default
|
||||||
val = registry.get_max_result_size("nonexistent_tool_xyz")
|
val = registry.get_max_result_size("nonexistent_tool_xyz")
|
||||||
assert val == DEFAULT_MAX_RESULT_SIZE_CHARS
|
assert val == DEFAULT_RESULT_SIZE_CHARS
|
||||||
|
|
||||||
def test_terminal_threshold(self):
|
def test_terminal_threshold(self):
|
||||||
from tools.registry import registry
|
from tools.registry import registry
|
||||||
|
|
|
||||||
|
|
@ -16,8 +16,8 @@ BINARY_EXTENSIONS = frozenset({
|
||||||
# Executables/binaries
|
# Executables/binaries
|
||||||
".exe", ".dll", ".so", ".dylib", ".bin", ".o", ".a", ".obj", ".lib",
|
".exe", ".dll", ".so", ".dylib", ".bin", ".o", ".a", ".obj", ".lib",
|
||||||
".app", ".msi", ".deb", ".rpm",
|
".app", ".msi", ".deb", ".rpm",
|
||||||
# Documents (PDF is here; read_file excludes it at the call site)
|
# Documents (exclude .pdf — text-based, agents may want to inspect)
|
||||||
".pdf", ".doc", ".docx", ".xls", ".xlsx", ".ppt", ".pptx",
|
".doc", ".docx", ".xls", ".xlsx", ".ppt", ".pptx",
|
||||||
".odt", ".ods", ".odp",
|
".odt", ".ods", ".odp",
|
||||||
# Fonts
|
# Fonts
|
||||||
".ttf", ".otf", ".woff", ".woff2", ".eot",
|
".ttf", ".otf", ".woff", ".woff2", ".eot",
|
||||||
|
|
|
||||||
|
|
@ -33,6 +33,7 @@ from dataclasses import dataclass, field
|
||||||
from typing import Optional, List, Dict, Any
|
from typing import Optional, List, Dict, Any
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from hermes_constants import get_hermes_home
|
from hermes_constants import get_hermes_home
|
||||||
|
from tools.binary_extensions import BINARY_EXTENSIONS
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
@ -280,26 +281,6 @@ class FileOperations(ABC):
|
||||||
# Shell-based Implementation
|
# Shell-based Implementation
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|
||||||
# Binary file extensions (fast path check)
|
|
||||||
BINARY_EXTENSIONS = {
|
|
||||||
# Images
|
|
||||||
'.png', '.jpg', '.jpeg', '.gif', '.webp', '.bmp', '.ico', '.tiff', '.tif',
|
|
||||||
'.svg', # SVG is text but often treated as binary
|
|
||||||
# Audio/Video
|
|
||||||
'.mp3', '.mp4', '.wav', '.avi', '.mov', '.mkv', '.flac', '.ogg', '.webm',
|
|
||||||
# Archives
|
|
||||||
'.zip', '.tar', '.gz', '.bz2', '.xz', '.7z', '.rar',
|
|
||||||
# Documents
|
|
||||||
'.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx',
|
|
||||||
# Compiled/Binary
|
|
||||||
'.exe', '.dll', '.so', '.dylib', '.o', '.a', '.pyc', '.pyo', '.class',
|
|
||||||
'.wasm', '.bin',
|
|
||||||
# Fonts
|
|
||||||
'.ttf', '.otf', '.woff', '.woff2', '.eot',
|
|
||||||
# Other
|
|
||||||
'.db', '.sqlite', '.sqlite3',
|
|
||||||
}
|
|
||||||
|
|
||||||
# Image extensions (subset of binary that we can return as base64)
|
# Image extensions (subset of binary that we can return as base64)
|
||||||
IMAGE_EXTENSIONS = {'.png', '.jpg', '.jpeg', '.gif', '.webp', '.bmp', '.ico'}
|
IMAGE_EXTENSIONS = {'.png', '.jpg', '.jpeg', '.gif', '.webp', '.bmp', '.ico'}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -26,6 +26,8 @@ _EXPECTED_WRITE_ERRNOS = {errno.EACCES, errno.EPERM, errno.EROFS}
|
||||||
# Configurable via config.yaml: file_read_max_chars: 200000
|
# Configurable via config.yaml: file_read_max_chars: 200000
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
_DEFAULT_MAX_READ_CHARS = 100_000
|
_DEFAULT_MAX_READ_CHARS = 100_000
|
||||||
|
_PRE_READ_MAX_BYTES = 256_000 # reject full-file reads on files larger than this
|
||||||
|
_DEFAULT_READ_LIMIT = 500
|
||||||
_max_read_chars_cached: int | None = None
|
_max_read_chars_cached: int | None = None
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -277,7 +279,7 @@ def clear_file_ops_cache(task_id: str = None):
|
||||||
_file_ops_cache.clear()
|
_file_ops_cache.clear()
|
||||||
|
|
||||||
|
|
||||||
def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str = "default") -> str:
|
def read_file_tool(path: str, offset: int = 1, limit: int | None = None, task_id: str = "default") -> str:
|
||||||
"""Read a file with pagination and line numbers."""
|
"""Read a file with pagination and line numbers."""
|
||||||
try:
|
try:
|
||||||
# ── Device path guard ─────────────────────────────────────────
|
# ── Device path guard ─────────────────────────────────────────
|
||||||
|
|
@ -291,9 +293,7 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str =
|
||||||
),
|
),
|
||||||
})
|
})
|
||||||
|
|
||||||
# Resolve path once for all guards below
|
_resolved = Path(path).expanduser().resolve()
|
||||||
import pathlib as _pathlib
|
|
||||||
_resolved = _pathlib.Path(path).expanduser().resolve()
|
|
||||||
|
|
||||||
# ── Binary file guard ─────────────────────────────────────────
|
# ── Binary file guard ─────────────────────────────────────────
|
||||||
# Block binary files by extension (no I/O).
|
# Block binary files by extension (no I/O).
|
||||||
|
|
@ -328,25 +328,26 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str =
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# ── Pre-read file size guard ──────────────────────────────────
|
# ── Pre-read file size guard ──────────────────────────────────
|
||||||
# Stat the file before reading. If it's large and the model
|
# Guard only when the caller omits limit; an explicit limit means
|
||||||
# didn't request a narrow range, block and tell it to use
|
# the caller knows what slice it wants.
|
||||||
# offset/limit — cheaper than reading 200K chars then rejecting.
|
if limit is None:
|
||||||
_PRE_READ_MAX_BYTES = 100_000
|
try:
|
||||||
_NARROW_LIMIT = 200
|
_fsize = os.path.getsize(str(_resolved))
|
||||||
try:
|
except OSError:
|
||||||
_fsize = os.path.getsize(str(_resolved))
|
_fsize = 0
|
||||||
except OSError:
|
if _fsize > _PRE_READ_MAX_BYTES:
|
||||||
_fsize = 0
|
return json.dumps({
|
||||||
if _fsize > _PRE_READ_MAX_BYTES and limit > _NARROW_LIMIT:
|
"error": (
|
||||||
return json.dumps({
|
f"File is too large to read in full ({_fsize:,} bytes). "
|
||||||
"error": (
|
f"Use offset and limit parameters to read specific sections "
|
||||||
f"File is too large to read in full ({_fsize:,} bytes). "
|
f"(e.g. offset=1, limit=100 for the first 100 lines)."
|
||||||
f"Use offset and limit parameters to read specific sections "
|
),
|
||||||
f"(e.g. offset=1, limit=100 for the first 100 lines)."
|
"path": path,
|
||||||
),
|
"file_size": _fsize,
|
||||||
"path": path,
|
}, ensure_ascii=False)
|
||||||
"file_size": _fsize,
|
|
||||||
}, ensure_ascii=False)
|
if limit is None:
|
||||||
|
limit = _DEFAULT_READ_LIMIT
|
||||||
|
|
||||||
# ── Dedup check ───────────────────────────────────────────────
|
# ── Dedup check ───────────────────────────────────────────────
|
||||||
# If we already read this exact (path, offset, limit) and the
|
# If we already read this exact (path, offset, limit) and the
|
||||||
|
|
@ -761,7 +762,7 @@ def _check_file_reqs():
|
||||||
|
|
||||||
READ_FILE_SCHEMA = {
|
READ_FILE_SCHEMA = {
|
||||||
"name": "read_file",
|
"name": "read_file",
|
||||||
"description": "Read a text file with line numbers and pagination. Use this instead of cat/head/tail in terminal. Output format: 'LINE_NUM|CONTENT'. Suggests similar filenames if not found. When you already know which part of the file you need, only read that part using offset and limit — this is important for larger files. Files over 100KB will be rejected unless you specify a narrow range (limit <= 200). NOTE: Cannot read images or binary files — use vision_analyze for images.",
|
"description": "Read a text file with line numbers and pagination. Use this instead of cat/head/tail in terminal. Output format: 'LINE_NUM|CONTENT'. Suggests similar filenames if not found. When you already know which part of the file you need, only read that part using offset and limit — this is important for larger files. Files over 256KB will be rejected unless you provide a limit parameter. NOTE: Cannot read images or binary files — use vision_analyze for images.",
|
||||||
"parameters": {
|
"parameters": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
|
|
@ -825,7 +826,7 @@ SEARCH_FILES_SCHEMA = {
|
||||||
|
|
||||||
def _handle_read_file(args, **kw):
|
def _handle_read_file(args, **kw):
|
||||||
tid = kw.get("task_id") or "default"
|
tid = kw.get("task_id") or "default"
|
||||||
return read_file_tool(path=args.get("path", ""), offset=args.get("offset", 1), limit=args.get("limit", 500), task_id=tid)
|
return read_file_tool(path=args.get("path", ""), offset=args.get("offset", 1), limit=args.get("limit"), task_id=tid)
|
||||||
|
|
||||||
|
|
||||||
def _handle_write_file(args, **kw):
|
def _handle_write_file(args, **kw):
|
||||||
|
|
|
||||||
|
|
@ -176,8 +176,8 @@ class ToolRegistry:
|
||||||
return entry.max_result_size_chars
|
return entry.max_result_size_chars
|
||||||
if default is not None:
|
if default is not None:
|
||||||
return default
|
return default
|
||||||
from tools.tool_result_storage import DEFAULT_MAX_RESULT_SIZE_CHARS
|
from tools.budget_config import DEFAULT_RESULT_SIZE_CHARS
|
||||||
return DEFAULT_MAX_RESULT_SIZE_CHARS
|
return DEFAULT_RESULT_SIZE_CHARS
|
||||||
|
|
||||||
def get_all_tool_names(self) -> List[str]:
|
def get_all_tool_names(self) -> List[str]:
|
||||||
"""Return sorted list of all registered tool names."""
|
"""Return sorted list of all registered tool names."""
|
||||||
|
|
|
||||||
|
|
@ -20,14 +20,13 @@ Defense against context-window overflow operates at three levels:
|
||||||
where many medium-sized results combine to overflow context.
|
where many medium-sized results combine to overflow context.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import json
|
|
||||||
import logging
|
import logging
|
||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
from tools.budget_config import (
|
from tools.budget_config import (
|
||||||
DEFAULT_RESULT_SIZE_CHARS as DEFAULT_MAX_RESULT_SIZE_CHARS,
|
DEFAULT_PREVIEW_SIZE_CHARS,
|
||||||
DEFAULT_TURN_BUDGET_CHARS as MAX_TURN_BUDGET_CHARS,
|
BudgetConfig,
|
||||||
DEFAULT_PREVIEW_SIZE_CHARS as PREVIEW_SIZE_CHARS,
|
DEFAULT_BUDGET,
|
||||||
)
|
)
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -38,7 +37,7 @@ HEREDOC_MARKER = "HERMES_PERSIST_EOF"
|
||||||
_BUDGET_TOOL_NAME = "__budget_enforcement__"
|
_BUDGET_TOOL_NAME = "__budget_enforcement__"
|
||||||
|
|
||||||
|
|
||||||
def generate_preview(content: str, max_chars: int = PREVIEW_SIZE_CHARS) -> tuple[str, bool]:
|
def generate_preview(content: str, max_chars: int = DEFAULT_PREVIEW_SIZE_CHARS) -> tuple[str, bool]:
|
||||||
"""Truncate at last newline within max_chars. Returns (preview, has_more)."""
|
"""Truncate at last newline within max_chars. Returns (preview, has_more)."""
|
||||||
if len(content) <= max_chars:
|
if len(content) <= max_chars:
|
||||||
return content, False
|
return content, False
|
||||||
|
|
@ -56,21 +55,6 @@ def _heredoc_marker(content: str) -> str:
|
||||||
return f"HERMES_PERSIST_{uuid.uuid4().hex[:8]}"
|
return f"HERMES_PERSIST_{uuid.uuid4().hex[:8]}"
|
||||||
|
|
||||||
|
|
||||||
def _extract_raw_output(content: str) -> str:
|
|
||||||
"""Extract the 'output' field from JSON tool results for cleaner persistence.
|
|
||||||
|
|
||||||
Tool handlers return json.dumps({"output": ..., "exit_code": ...}) for the
|
|
||||||
API, but persisted files should contain readable text, not a JSON blob.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
data = json.loads(content)
|
|
||||||
if isinstance(data, dict) and "output" in data:
|
|
||||||
return data["output"]
|
|
||||||
except (json.JSONDecodeError, TypeError):
|
|
||||||
pass
|
|
||||||
return content
|
|
||||||
|
|
||||||
|
|
||||||
def _write_to_sandbox(content: str, remote_path: str, env) -> bool:
|
def _write_to_sandbox(content: str, remote_path: str, env) -> bool:
|
||||||
"""Write content into the sandbox via env.execute(). Returns True on success."""
|
"""Write content into the sandbox via env.execute(). Returns True on success."""
|
||||||
marker = _heredoc_marker(content)
|
marker = _heredoc_marker(content)
|
||||||
|
|
@ -113,8 +97,8 @@ def maybe_persist_tool_result(
|
||||||
tool_name: str,
|
tool_name: str,
|
||||||
tool_use_id: str,
|
tool_use_id: str,
|
||||||
env=None,
|
env=None,
|
||||||
|
config: BudgetConfig = DEFAULT_BUDGET,
|
||||||
threshold: int | float | None = None,
|
threshold: int | float | None = None,
|
||||||
preview_size: int = PREVIEW_SIZE_CHARS,
|
|
||||||
) -> str:
|
) -> str:
|
||||||
"""Layer 2: persist oversized result into the sandbox, return preview + path.
|
"""Layer 2: persist oversized result into the sandbox, return preview + path.
|
||||||
|
|
||||||
|
|
@ -127,32 +111,26 @@ def maybe_persist_tool_result(
|
||||||
tool_name: Name of the tool (used for threshold lookup).
|
tool_name: Name of the tool (used for threshold lookup).
|
||||||
tool_use_id: Unique ID for this tool call (used as filename).
|
tool_use_id: Unique ID for this tool call (used as filename).
|
||||||
env: The active BaseEnvironment instance, or None.
|
env: The active BaseEnvironment instance, or None.
|
||||||
threshold: Override threshold; if None, looked up from registry.
|
config: BudgetConfig controlling thresholds and preview size.
|
||||||
preview_size: Max chars for the inline preview after persistence.
|
threshold: Explicit override; takes precedence over config resolution.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Original content if small, or <persisted-output> replacement.
|
Original content if small, or <persisted-output> replacement.
|
||||||
"""
|
"""
|
||||||
if threshold is None:
|
effective_threshold = threshold if threshold is not None else config.resolve_threshold(tool_name)
|
||||||
from tools.registry import registry
|
|
||||||
threshold = registry.get_max_result_size(tool_name)
|
|
||||||
|
|
||||||
# Infinity means never persist (e.g. read_file)
|
if effective_threshold == float("inf"):
|
||||||
if threshold == float("inf"):
|
|
||||||
return content
|
return content
|
||||||
|
|
||||||
if len(content) <= threshold:
|
if len(content) <= effective_threshold:
|
||||||
return content
|
return content
|
||||||
|
|
||||||
remote_path = f"{STORAGE_DIR}/{tool_use_id}.txt"
|
remote_path = f"{STORAGE_DIR}/{tool_use_id}.txt"
|
||||||
# Write raw output (not JSON wrapper) so read_file returns readable text
|
preview, has_more = generate_preview(content, max_chars=config.preview_size)
|
||||||
file_content = _extract_raw_output(content)
|
|
||||||
preview, has_more = generate_preview(file_content, max_chars=preview_size)
|
|
||||||
|
|
||||||
# Try writing into the sandbox
|
|
||||||
if env is not None:
|
if env is not None:
|
||||||
try:
|
try:
|
||||||
if _write_to_sandbox(file_content, remote_path, env):
|
if _write_to_sandbox(content, remote_path, env):
|
||||||
logger.info(
|
logger.info(
|
||||||
"Persisted large tool result: %s (%s, %d chars -> %s)",
|
"Persisted large tool result: %s (%s, %d chars -> %s)",
|
||||||
tool_name, tool_use_id, len(content), remote_path,
|
tool_name, tool_use_id, len(content), remote_path,
|
||||||
|
|
@ -161,7 +139,6 @@ def maybe_persist_tool_result(
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.warning("Sandbox write failed for %s: %s", tool_use_id, exc)
|
logger.warning("Sandbox write failed for %s: %s", tool_use_id, exc)
|
||||||
|
|
||||||
# Fallback: inline truncation (no sandbox available or write failed)
|
|
||||||
logger.info(
|
logger.info(
|
||||||
"Inline-truncating large tool result: %s (%d chars, no sandbox write)",
|
"Inline-truncating large tool result: %s (%d chars, no sandbox write)",
|
||||||
tool_name, len(content),
|
tool_name, len(content),
|
||||||
|
|
@ -176,8 +153,7 @@ def maybe_persist_tool_result(
|
||||||
def enforce_turn_budget(
|
def enforce_turn_budget(
|
||||||
tool_messages: list[dict],
|
tool_messages: list[dict],
|
||||||
env=None,
|
env=None,
|
||||||
budget: int = MAX_TURN_BUDGET_CHARS,
|
config: BudgetConfig = DEFAULT_BUDGET,
|
||||||
preview_size: int = PREVIEW_SIZE_CHARS,
|
|
||||||
) -> list[dict]:
|
) -> list[dict]:
|
||||||
"""Layer 3: enforce aggregate budget across all tool results in a turn.
|
"""Layer 3: enforce aggregate budget across all tool results in a turn.
|
||||||
|
|
||||||
|
|
@ -196,14 +172,13 @@ def enforce_turn_budget(
|
||||||
if PERSISTED_OUTPUT_TAG not in content:
|
if PERSISTED_OUTPUT_TAG not in content:
|
||||||
candidates.append((i, size))
|
candidates.append((i, size))
|
||||||
|
|
||||||
if total_size <= budget:
|
if total_size <= config.turn_budget:
|
||||||
return tool_messages
|
return tool_messages
|
||||||
|
|
||||||
# Sort candidates by size descending — persist largest first
|
|
||||||
candidates.sort(key=lambda x: x[1], reverse=True)
|
candidates.sort(key=lambda x: x[1], reverse=True)
|
||||||
|
|
||||||
for idx, size in candidates:
|
for idx, size in candidates:
|
||||||
if total_size <= budget:
|
if total_size <= config.turn_budget:
|
||||||
break
|
break
|
||||||
msg = tool_messages[idx]
|
msg = tool_messages[idx]
|
||||||
content = msg["content"]
|
content = msg["content"]
|
||||||
|
|
@ -214,8 +189,8 @@ def enforce_turn_budget(
|
||||||
tool_name=_BUDGET_TOOL_NAME,
|
tool_name=_BUDGET_TOOL_NAME,
|
||||||
tool_use_id=tool_use_id,
|
tool_use_id=tool_use_id,
|
||||||
env=env,
|
env=env,
|
||||||
|
config=config,
|
||||||
threshold=0,
|
threshold=0,
|
||||||
preview_size=preview_size,
|
|
||||||
)
|
)
|
||||||
if replacement != content:
|
if replacement != content:
|
||||||
total_size -= size
|
total_size -= size
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue