"""Tool result persistence -- preserves large outputs instead of truncating. Defense against context-window overflow operates at three levels: 1. **Per-tool output cap** (inside each tool): Tools like search_files pre-truncate their own output before returning. This is the first line of defense and the only one the tool author controls. 2. **Per-result persistence** (maybe_persist_tool_result): After a tool returns, if its output exceeds the tool's registered threshold (registry.get_max_result_size), the full output is written INTO THE SANDBOX temp dir (for example /tmp/hermes-results/{tool_use_id}.txt on standard Linux, or $TMPDIR/hermes-results/{tool_use_id}.txt on Termux) via env.execute(). The in-context content is replaced with a preview + file path reference. The model can read_file to access the full output on any backend. 3. **Per-turn aggregate budget** (enforce_turn_budget): After all tool results in a single assistant turn are collected, if the total exceeds MAX_TURN_BUDGET_CHARS (200K), the largest non-persisted results are spilled to disk until the aggregate is under budget. This catches cases where many medium-sized results combine to overflow context. """ import logging import os import shlex import uuid from tools.budget_config import ( DEFAULT_PREVIEW_SIZE_CHARS, BudgetConfig, DEFAULT_BUDGET, ) logger = logging.getLogger(__name__) PERSISTED_OUTPUT_TAG = "" PERSISTED_OUTPUT_CLOSING_TAG = "" STORAGE_DIR = "/tmp/hermes-results" HEREDOC_MARKER = "HERMES_PERSIST_EOF" _BUDGET_TOOL_NAME = "__budget_enforcement__" def _resolve_storage_dir(env) -> str: """Return the best temp-backed storage dir for this environment.""" if env is not None: get_temp_dir = getattr(env, "get_temp_dir", None) if callable(get_temp_dir): try: temp_dir = get_temp_dir() except Exception as exc: logger.debug("Could not resolve env temp dir: %s", exc) else: if temp_dir: temp_dir = temp_dir.rstrip("/") or "/" return f"{temp_dir}/hermes-results" return STORAGE_DIR def generate_preview(content: str, max_chars: int = DEFAULT_PREVIEW_SIZE_CHARS) -> tuple[str, bool]: """Truncate at last newline within max_chars. Returns (preview, has_more).""" if len(content) <= max_chars: return content, False truncated = content[:max_chars] last_nl = truncated.rfind("\n") if last_nl > max_chars // 2: truncated = truncated[:last_nl + 1] return truncated, True def _heredoc_marker(content: str) -> str: """Return a heredoc delimiter that doesn't collide with content.""" if HEREDOC_MARKER not in content: return HEREDOC_MARKER return f"HERMES_PERSIST_{uuid.uuid4().hex[:8]}" def _write_to_sandbox(content: str, remote_path: str, env) -> bool: """Write content into the sandbox via env.execute(). Returns True on success.""" marker = _heredoc_marker(content) storage_dir = os.path.dirname(remote_path) cmd = ( f"mkdir -p {shlex.quote(storage_dir)} && cat > {shlex.quote(remote_path)} << '{marker}'\n" f"{content}\n" f"{marker}" ) result = env.execute(cmd, timeout=30) return result.get("returncode", 1) == 0 def _build_persisted_message( preview: str, has_more: bool, original_size: int, file_path: str, ) -> str: """Build the replacement block.""" size_kb = original_size / 1024 if size_kb >= 1024: size_str = f"{size_kb / 1024:.1f} MB" else: size_str = f"{size_kb:.1f} KB" msg = f"{PERSISTED_OUTPUT_TAG}\n" msg += f"This tool result was too large ({original_size:,} characters, {size_str}).\n" msg += f"Full output saved to: {file_path}\n" msg += "Use the read_file tool with offset and limit to access specific sections of this output.\n\n" msg += f"Preview (first {len(preview)} chars):\n" msg += preview if has_more: msg += "\n..." msg += f"\n{PERSISTED_OUTPUT_CLOSING_TAG}" return msg def maybe_persist_tool_result( content: str, tool_name: str, tool_use_id: str, env=None, config: BudgetConfig = DEFAULT_BUDGET, threshold: int | float | None = None, ) -> str: """Layer 2: persist oversized result into the sandbox, return preview + path. Writes via env.execute() so the file is accessible from any backend (local, Docker, SSH, Modal, Daytona). Falls back to inline truncation if write fails or no env is available. Args: content: Raw tool result string. tool_name: Name of the tool (used for threshold lookup). tool_use_id: Unique ID for this tool call (used as filename). env: The active BaseEnvironment instance, or None. config: BudgetConfig controlling thresholds and preview size. threshold: Explicit override; takes precedence over config resolution. Returns: Original content if small, or replacement. """ effective_threshold = threshold if threshold is not None else config.resolve_threshold(tool_name) if effective_threshold == float("inf"): return content if len(content) <= effective_threshold: return content storage_dir = _resolve_storage_dir(env) remote_path = f"{storage_dir}/{tool_use_id}.txt" preview, has_more = generate_preview(content, max_chars=config.preview_size) if env is not None: try: if _write_to_sandbox(content, remote_path, env): logger.info( "Persisted large tool result: %s (%s, %d chars -> %s)", tool_name, tool_use_id, len(content), remote_path, ) return _build_persisted_message(preview, has_more, len(content), remote_path) except Exception as exc: logger.warning("Sandbox write failed for %s: %s", tool_use_id, exc) logger.info( "Inline-truncating large tool result: %s (%d chars, no sandbox write)", tool_name, len(content), ) return ( f"{preview}\n\n" f"[Truncated: tool response was {len(content):,} chars. " f"Full output could not be saved to sandbox.]" ) def enforce_turn_budget( tool_messages: list[dict], env=None, config: BudgetConfig = DEFAULT_BUDGET, ) -> list[dict]: """Layer 3: enforce aggregate budget across all tool results in a turn. If total chars exceed budget, persist the largest non-persisted results first (via sandbox write) until under budget. Already-persisted results are skipped. Mutates the list in-place and returns it. """ candidates = [] total_size = 0 for i, msg in enumerate(tool_messages): content = msg.get("content", "") size = len(content) total_size += size if PERSISTED_OUTPUT_TAG not in content: candidates.append((i, size)) if total_size <= config.turn_budget: return tool_messages candidates.sort(key=lambda x: x[1], reverse=True) for idx, size in candidates: if total_size <= config.turn_budget: break msg = tool_messages[idx] content = msg["content"] tool_use_id = msg.get("tool_call_id", f"budget_{idx}") replacement = maybe_persist_tool_result( content=content, tool_name=_BUDGET_TOOL_NAME, tool_use_id=tool_use_id, env=env, config=config, threshold=0, ) if replacement != content: total_size -= size total_size += len(replacement) tool_messages[idx]["content"] = replacement logger.info( "Budget enforcement: persisted tool result %s (%d chars)", tool_use_id, size, ) return tool_messages