diff --git a/run_agent.py b/run_agent.py index 48daa3113c9..7235f9f6c1c 100644 --- a/run_agent.py +++ b/run_agent.py @@ -405,6 +405,68 @@ def _strip_budget_warnings_from_history(messages: list) -> None: msg["content"] = cleaned +# ========================================================================= +# Large tool result handler — save oversized output to temp file +# ========================================================================= + +# Threshold at which tool results are saved to a file instead of kept inline. +# 100K chars ≈ 25K tokens — generous for any reasonable output but prevents +# catastrophic context explosions. +_LARGE_RESULT_CHARS = 100_000 + +# How many characters of the original result to include as an inline preview +# so the model has immediate context about what the tool returned. +_LARGE_RESULT_PREVIEW_CHARS = 1_500 + + +def _save_oversized_tool_result(function_name: str, function_result: str) -> str: + """Replace oversized tool results with a file reference + preview. + + When a tool returns more than ``_LARGE_RESULT_CHARS`` characters, the full + content is written to a temporary file under ``HERMES_HOME/cache/tool_responses/`` + and the result sent to the model is replaced with: + • a brief head preview (first ``_LARGE_RESULT_PREVIEW_CHARS`` chars) + • the file path so the model can use ``read_file`` / ``search_files`` + + Falls back to destructive truncation if the file write fails. + """ + original_len = len(function_result) + if original_len <= _LARGE_RESULT_CHARS: + return function_result + + # Build the target directory + try: + response_dir = os.path.join(get_hermes_home(), "cache", "tool_responses") + os.makedirs(response_dir, exist_ok=True) + + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f") + # Sanitize tool name for use in filename + safe_name = re.sub(r"[^\w\-]", "_", function_name)[:40] + filename = f"{safe_name}_{timestamp}.txt" + filepath = os.path.join(response_dir, filename) + + with open(filepath, "w", encoding="utf-8") as f: + f.write(function_result) + + preview = function_result[:_LARGE_RESULT_PREVIEW_CHARS] + return ( + f"{preview}\n\n" + f"[Large tool response: {original_len:,} characters total — " + f"only the first {_LARGE_RESULT_PREVIEW_CHARS:,} shown above. " + f"Full output saved to: {filepath}\n" + f"Use read_file or search_files on that path to access the rest.]" + ) + except Exception as exc: + # Fall back to destructive truncation if file write fails + logger.warning("Failed to save large tool result to file: %s", exc) + return ( + function_result[:_LARGE_RESULT_CHARS] + + f"\n\n[Truncated: tool response was {original_len:,} chars, " + f"exceeding the {_LARGE_RESULT_CHARS:,} char limit. " + f"File save failed: {exc}]" + ) + + class AIAgent: """ AI Agent with tool calling capabilities. @@ -6051,15 +6113,8 @@ class AIAgent: except Exception as cb_err: logging.debug(f"Tool complete callback error: {cb_err}") - # Truncate oversized results - MAX_TOOL_RESULT_CHARS = 100_000 - if len(function_result) > MAX_TOOL_RESULT_CHARS: - original_len = len(function_result) - function_result = ( - function_result[:MAX_TOOL_RESULT_CHARS] - + f"\n\n[Truncated: tool response was {original_len:,} chars, " - f"exceeding the {MAX_TOOL_RESULT_CHARS:,} char limit]" - ) + # Save oversized results to file instead of destructive truncation + function_result = _save_oversized_tool_result(name, function_result) # Append tool result message in order tool_msg = { @@ -6332,18 +6387,8 @@ class AIAgent: except Exception as cb_err: logging.debug(f"Tool complete callback error: {cb_err}") - # Guard against tools returning absurdly large content that would - # blow up the context window. 100K chars ≈ 25K tokens — generous - # enough for any reasonable tool output but prevents catastrophic - # context explosions (e.g. accidental base64 image dumps). - MAX_TOOL_RESULT_CHARS = 100_000 - if len(function_result) > MAX_TOOL_RESULT_CHARS: - original_len = len(function_result) - function_result = ( - function_result[:MAX_TOOL_RESULT_CHARS] - + f"\n\n[Truncated: tool response was {original_len:,} chars, " - f"exceeding the {MAX_TOOL_RESULT_CHARS:,} char limit]" - ) + # Save oversized results to file instead of destructive truncation + function_result = _save_oversized_tool_result(function_name, function_result) tool_msg = { "role": "tool", diff --git a/tests/test_large_tool_result.py b/tests/test_large_tool_result.py new file mode 100644 index 00000000000..ef51f2fe500 --- /dev/null +++ b/tests/test_large_tool_result.py @@ -0,0 +1,162 @@ +"""Tests for _save_oversized_tool_result() — the large tool response handler. + +When a tool returns more than _LARGE_RESULT_CHARS characters, the full content +is saved to a file and the model receives a preview + file path instead. +""" + +import os +import re + +import pytest + +from run_agent import ( + _save_oversized_tool_result, + _LARGE_RESULT_CHARS, + _LARGE_RESULT_PREVIEW_CHARS, +) + + +class TestSaveOversizedToolResult: + """Unit tests for the large tool result handler.""" + + def test_small_result_returned_unchanged(self): + """Results under the threshold pass through untouched.""" + small = "x" * 1000 + assert _save_oversized_tool_result("terminal", small) is small + + def test_exactly_at_threshold_returned_unchanged(self): + """Results exactly at the threshold pass through.""" + exact = "y" * _LARGE_RESULT_CHARS + assert _save_oversized_tool_result("terminal", exact) is exact + + def test_oversized_result_saved_to_file(self, tmp_path, monkeypatch): + """Results over the threshold are written to a file.""" + monkeypatch.setenv("HERMES_HOME", str(tmp_path / ".hermes")) + os.makedirs(tmp_path / ".hermes", exist_ok=True) + + big = "A" * (_LARGE_RESULT_CHARS + 500) + result = _save_oversized_tool_result("terminal", big) + + # Should contain the preview + assert result.startswith("A" * _LARGE_RESULT_PREVIEW_CHARS) + # Should mention the file path + assert "Full output saved to:" in result + # Should mention original size + assert f"{len(big):,}" in result + + # Extract the file path and verify the file exists with full content + match = re.search(r"Full output saved to: (.+?)\n", result) + assert match, f"No file path found in result: {result[:300]}" + filepath = match.group(1) + assert os.path.isfile(filepath) + with open(filepath, "r", encoding="utf-8") as f: + saved = f.read() + assert saved == big + assert len(saved) == _LARGE_RESULT_CHARS + 500 + + def test_file_placed_in_cache_tool_responses(self, tmp_path, monkeypatch): + """Saved file lives under HERMES_HOME/cache/tool_responses/.""" + hermes_home = str(tmp_path / ".hermes") + monkeypatch.setenv("HERMES_HOME", hermes_home) + os.makedirs(hermes_home, exist_ok=True) + + big = "B" * (_LARGE_RESULT_CHARS + 1) + result = _save_oversized_tool_result("web_search", big) + + match = re.search(r"Full output saved to: (.+?)\n", result) + filepath = match.group(1) + expected_dir = os.path.join(hermes_home, "cache", "tool_responses") + assert filepath.startswith(expected_dir) + + def test_filename_contains_tool_name(self, tmp_path, monkeypatch): + """The saved filename includes a sanitized version of the tool name.""" + monkeypatch.setenv("HERMES_HOME", str(tmp_path / ".hermes")) + os.makedirs(tmp_path / ".hermes", exist_ok=True) + + big = "C" * (_LARGE_RESULT_CHARS + 1) + result = _save_oversized_tool_result("browser_navigate", big) + + match = re.search(r"Full output saved to: (.+?)\n", result) + filename = os.path.basename(match.group(1)) + assert filename.startswith("browser_navigate_") + assert filename.endswith(".txt") + + def test_tool_name_sanitized(self, tmp_path, monkeypatch): + """Special characters in tool names are replaced in the filename.""" + monkeypatch.setenv("HERMES_HOME", str(tmp_path / ".hermes")) + os.makedirs(tmp_path / ".hermes", exist_ok=True) + + big = "D" * (_LARGE_RESULT_CHARS + 1) + result = _save_oversized_tool_result("mcp:some/weird tool", big) + + match = re.search(r"Full output saved to: (.+?)\n", result) + filename = os.path.basename(match.group(1)) + # No slashes or colons in filename + assert "/" not in filename + assert ":" not in filename + + def test_fallback_on_write_failure(self, tmp_path, monkeypatch): + """When file write fails, falls back to destructive truncation.""" + # Point HERMES_HOME to a path that will fail (file, not directory) + bad_path = str(tmp_path / "not_a_dir.txt") + with open(bad_path, "w") as f: + f.write("I'm a file, not a directory") + monkeypatch.setenv("HERMES_HOME", bad_path) + + big = "E" * (_LARGE_RESULT_CHARS + 50_000) + result = _save_oversized_tool_result("terminal", big) + + # Should still contain data (fallback truncation) + assert len(result) > 0 + assert result.startswith("E" * 1000) + # Should mention the failure + assert "File save failed" in result + # Should be truncated to approximately _LARGE_RESULT_CHARS + error msg + assert len(result) < len(big) + + def test_preview_length_capped(self, tmp_path, monkeypatch): + """The inline preview is capped at _LARGE_RESULT_PREVIEW_CHARS.""" + monkeypatch.setenv("HERMES_HOME", str(tmp_path / ".hermes")) + os.makedirs(tmp_path / ".hermes", exist_ok=True) + + # Use distinct chars so we can measure the preview + big = "Z" * (_LARGE_RESULT_CHARS + 5000) + result = _save_oversized_tool_result("terminal", big) + + # The preview section is the content before the "[Large tool response:" marker + marker_pos = result.index("[Large tool response:") + preview_section = result[:marker_pos].rstrip() + assert len(preview_section) == _LARGE_RESULT_PREVIEW_CHARS + + def test_guidance_message_mentions_tools(self, tmp_path, monkeypatch): + """The replacement message tells the model how to access the file.""" + monkeypatch.setenv("HERMES_HOME", str(tmp_path / ".hermes")) + os.makedirs(tmp_path / ".hermes", exist_ok=True) + + big = "F" * (_LARGE_RESULT_CHARS + 1) + result = _save_oversized_tool_result("terminal", big) + + assert "read_file" in result + assert "search_files" in result + + def test_empty_result_passes_through(self): + """Empty strings are not oversized.""" + assert _save_oversized_tool_result("terminal", "") == "" + + def test_unicode_content_preserved(self, tmp_path, monkeypatch): + """Unicode content is fully preserved in the saved file.""" + monkeypatch.setenv("HERMES_HOME", str(tmp_path / ".hermes")) + os.makedirs(tmp_path / ".hermes", exist_ok=True) + + # Mix of ASCII and multi-byte unicode to exceed threshold + unit = "Hello 世界! 🎉 " * 100 # ~1400 chars per repeat + big = unit * ((_LARGE_RESULT_CHARS // len(unit)) + 1) + assert len(big) > _LARGE_RESULT_CHARS + + result = _save_oversized_tool_result("terminal", big) + match = re.search(r"Full output saved to: (.+?)\n", result) + filepath = match.group(1) + + with open(filepath, "r", encoding="utf-8") as f: + saved = f.read() + assert saved == big diff --git a/tests/test_run_agent.py b/tests/test_run_agent.py index 963ee56f313..66ba411a01e 100644 --- a/tests/test_run_agent.py +++ b/tests/test_run_agent.py @@ -1002,16 +1002,19 @@ class TestExecuteToolCalls: assert messages[0]["role"] == "tool" assert messages[0]["tool_call_id"] == "c1" - def test_result_truncation_over_100k(self, agent): + def test_result_truncation_over_100k(self, agent, tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path / ".hermes")) + (tmp_path / ".hermes").mkdir() tc = _mock_tool_call(name="web_search", arguments="{}", call_id="c1") mock_msg = _mock_assistant_msg(content="", tool_calls=[tc]) messages = [] big_result = "x" * 150_000 with patch("run_agent.handle_function_call", return_value=big_result): agent._execute_tool_calls(mock_msg, messages, "task-1") - # Content should be truncated + # Content should be replaced with preview + file path assert len(messages[0]["content"]) < 150_000 - assert "Truncated" in messages[0]["content"] + assert "Large tool response" in messages[0]["content"] + assert "Full output saved to:" in messages[0]["content"] class TestConcurrentToolExecution: @@ -1230,8 +1233,10 @@ class TestConcurrentToolExecution: assert "cancelled" in messages[0]["content"].lower() or "skipped" in messages[0]["content"].lower() assert "cancelled" in messages[1]["content"].lower() or "skipped" in messages[1]["content"].lower() - def test_concurrent_truncates_large_results(self, agent): - """Concurrent path should truncate results over 100k chars.""" + def test_concurrent_truncates_large_results(self, agent, tmp_path, monkeypatch): + """Concurrent path should save oversized results to file.""" + monkeypatch.setenv("HERMES_HOME", str(tmp_path / ".hermes")) + (tmp_path / ".hermes").mkdir() tc1 = _mock_tool_call(name="web_search", arguments='{}', call_id="c1") tc2 = _mock_tool_call(name="web_search", arguments='{}', call_id="c2") mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2]) @@ -1244,7 +1249,8 @@ class TestConcurrentToolExecution: assert len(messages) == 2 for m in messages: assert len(m["content"]) < 150_000 - assert "Truncated" in m["content"] + assert "Large tool response" in m["content"] + assert "Full output saved to:" in m["content"] def test_invoke_tool_dispatches_to_handle_function_call(self, agent): """_invoke_tool should route regular tools through handle_function_call."""