Enhance batch processing and tool validation

- Added support for tracking partial results and tool error counts in batch processing. - Implemented filtering of corrupted entries during batch file combination based on valid tool names. - Updated terminal tool to improve command execution and error handling, including retry logic for transient failures. - Refactored model tools to use a simple terminal tool with no session persistence. - Improved logging and error messages for invalid API responses and tool calls. - Introduced chunked processing for large content in web tools to manage size limitations effectively.
2026-04-25 00:51:20 +00:00 · 2026-01-10 05:56:26 +00:00 · 2026-01-10 05:56:26 +00:00 · 4071ba29da
commit 4071ba29da
parent 21f9e2df40
8 changed files with 572 additions and 111 deletions
--- a/tools/web_tools.py
+++ b/tools/web_tools.py
@ -139,6 +139,9 @@ async def process_content_with_llm(
    to intelligently extract key information and create markdown summaries,
    significantly reducing token usage while preserving all important information.
    
+    For very large content (>500k chars), uses chunked processing with synthesis.
+    For extremely large content (>2M chars), refuses to process entirely.
+    
    Args:
        content (str): The raw content to process
        url (str): The source URL (for context, optional)
@ -149,13 +152,25 @@ async def process_content_with_llm(
    Returns:
        Optional[str]: Processed markdown content, or None if content too short or processing fails
    """
+    # Size thresholds
+    MAX_CONTENT_SIZE = 2_000_000  # 2M chars - refuse entirely above this
+    CHUNK_THRESHOLD = 500_000     # 500k chars - use chunked processing above this
+    CHUNK_SIZE = 100_000          # 100k chars per chunk
+    MAX_OUTPUT_SIZE = 5000        # Hard cap on final output size
+    
    try:
-        # Skip processing if content is too short
-        if len(content) < min_length:
-            print(f"📏 Content too short ({len(content)} < {min_length} chars), skipping LLM processing")
-            return None
+        content_len = len(content)
        
-        print(f"🧠 Processing content with LLM ({len(content)} characters)")
+        # Refuse if content is absurdly large
+        if content_len > MAX_CONTENT_SIZE:
+            size_mb = content_len / 1_000_000
+            print(f"🚫 Content too large ({size_mb:.1f}MB > 2MB limit). Refusing to process.")
+            return f"[Content too large to process: {size_mb:.1f}MB. Try using web_crawl with specific extraction instructions, or search for a more focused source.]"
+        
+        # Skip processing if content is too short
+        if content_len < min_length:
+            print(f"📏 Content too short ({content_len} < {min_length} chars), skipping LLM processing")
+            return None
        
        # Create context information
        context_info = []
@ -163,10 +178,83 @@ async def process_content_with_llm(
            context_info.append(f"Title: {title}")
        if url:
            context_info.append(f"Source: {url}")
-        
        context_str = "\n".join(context_info) + "\n\n" if context_info else ""
        
-        # Simplified prompt for better quality markdown output
+        # Check if we need chunked processing
+        if content_len > CHUNK_THRESHOLD:
+            print(f"📦 Content large ({content_len:,} chars). Using chunked processing...")
+            return await _process_large_content_chunked(
+                content, context_str, model, CHUNK_SIZE, MAX_OUTPUT_SIZE
+            )
+        
+        # Standard single-pass processing for normal content
+        print(f"🧠 Processing content with LLM ({content_len} characters)")
+        
+        processed_content = await _call_summarizer_llm(content, context_str, model)
+        
+        if processed_content:
+            # Enforce output cap
+            if len(processed_content) > MAX_OUTPUT_SIZE:
+                processed_content = processed_content[:MAX_OUTPUT_SIZE] + "\n\n[... summary truncated for context management ...]"
+            
+            # Log compression metrics
+            processed_length = len(processed_content)
+            compression_ratio = processed_length / content_len if content_len > 0 else 1.0
+            print(f"✅ Content processed: {content_len} → {processed_length} chars ({compression_ratio:.1%})")
+        
+        return processed_content
+        
+    except Exception as e:
+        print(f"❌ Error processing content with LLM: {str(e)}")
+        return f"[Failed to process content: {str(e)[:100]}. Content size: {len(content):,} chars]"
+
+
+async def _call_summarizer_llm(
+    content: str, 
+    context_str: str, 
+    model: str, 
+    max_tokens: int = 4000,
+    is_chunk: bool = False,
+    chunk_info: str = ""
+) -> Optional[str]:
+    """
+    Make a single LLM call to summarize content.
+    
+    Args:
+        content: The content to summarize
+        context_str: Context information (title, URL)
+        model: Model to use
+        max_tokens: Maximum output tokens
+        is_chunk: Whether this is a chunk of a larger document
+        chunk_info: Information about chunk position (e.g., "Chunk 2/5")
+        
+    Returns:
+        Summarized content or None on failure
+    """
+    if is_chunk:
+        # Chunk-specific prompt - aware that this is partial content
+        system_prompt = """You are an expert content analyst processing a SECTION of a larger document. Your job is to extract and summarize the key information from THIS SECTION ONLY.
+
+Important guidelines for chunk processing:
+1. Do NOT write introductions or conclusions - this is a partial document
+2. Focus on extracting ALL key facts, figures, data points, and insights from this section
+3. Preserve important quotes, code snippets, and specific details verbatim
+4. Use bullet points and structured formatting for easy synthesis later
+5. Note any references to other sections (e.g., "as mentioned earlier", "see below") without trying to resolve them
+
+Your output will be combined with summaries of other sections, so focus on thorough extraction rather than narrative flow."""
+
+        user_prompt = f"""Extract key information from this SECTION of a larger document:
+
+{context_str}{chunk_info}
+
+SECTION CONTENT:
+{content}
+
+Extract all important information from this section in a structured format. Focus on facts, data, insights, and key details. Do not add introductions or conclusions."""
+
+    else:
+        # Standard full-document prompt
        system_prompt = """You are an expert content analyst. Your job is to process web content and create a comprehensive yet concise summary that preserves all important information while dramatically reducing bulk.

 Create a well-structured markdown summary that includes:
@ -183,49 +271,155 @@ Your goal is to preserve ALL important information while reducing length. Never

 Create a markdown summary that captures all key information in a well-organized, scannable format. Include important quotes and code snippets in their original formatting. Focus on actionable information, specific details, and unique insights."""

-        # Call the LLM asynchronously with retry logic for flaky API
-        max_retries = 6
-        retry_delay = 2  # Start with 2 seconds
-        last_error = None
+    # Call the LLM with retry logic
+    max_retries = 6
+    retry_delay = 2
+    last_error = None

-        for attempt in range(max_retries):
-            try:
-                response = await summarizer_client.chat.completions.create(
-                    model=model,
-                    messages=[
-                        {"role": "system", "content": system_prompt},
-                        {"role": "user", "content": user_prompt}
-                    ],
-                    temperature=0.1,  # Low temperature for consistent extraction
-                    max_tokens=4000   # Generous limit for comprehensive processing
-                )
-                break  # Success, exit retry loop
-            except Exception as api_error:
-                last_error = api_error
-                if attempt < max_retries - 1:
-                    print(f"⚠️  LLM API call failed (attempt {attempt + 1}/{max_retries}): {str(api_error)[:100]}")
-                    print(f"   Retrying in {retry_delay}s...")
-                    await asyncio.sleep(retry_delay)
-                    retry_delay = min(retry_delay * 2, 60)  # Exponential backoff: 2s, 4s, 8s, 16s, 32s, 60s
-                else:
-                    # All retries exhausted
-                    raise last_error
+    for attempt in range(max_retries):
+        try:
+            response = await summarizer_client.chat.completions.create(
+                model=model,
+                messages=[
+                    {"role": "system", "content": system_prompt},
+                    {"role": "user", "content": user_prompt}
+                ],
+                temperature=0.1,
+                max_tokens=max_tokens
+            )
+            return response.choices[0].message.content.strip()
+        except Exception as api_error:
+            last_error = api_error
+            if attempt < max_retries - 1:
+                print(f"⚠️  LLM API call failed (attempt {attempt + 1}/{max_retries}): {str(api_error)[:100]}")
+                print(f"   Retrying in {retry_delay}s...")
+                await asyncio.sleep(retry_delay)
+                retry_delay = min(retry_delay * 2, 60)
+            else:
+                raise last_error
+    
+    return None
+
+
+async def _process_large_content_chunked(
+    content: str, 
+    context_str: str, 
+    model: str, 
+    chunk_size: int,
+    max_output_size: int
+) -> Optional[str]:
+    """
+    Process large content by chunking, summarizing each chunk in parallel,
+    then synthesizing the summaries.
+    
+    Args:
+        content: The large content to process
+        context_str: Context information
+        model: Model to use
+        chunk_size: Size of each chunk in characters
+        max_output_size: Maximum final output size
        
-        # Get the markdown response directly
-        processed_content = response.choices[0].message.content.strip()
+    Returns:
+        Synthesized summary or None on failure
+    """
+    # Split content into chunks
+    chunks = []
+    for i in range(0, len(content), chunk_size):
+        chunk = content[i:i + chunk_size]
+        chunks.append(chunk)
+    
+    print(f"   📦 Split into {len(chunks)} chunks of ~{chunk_size:,} chars each")
+    
+    # Summarize each chunk in parallel
+    async def summarize_chunk(chunk_idx: int, chunk_content: str) -> tuple[int, Optional[str]]:
+        """Summarize a single chunk."""
+        try:
+            chunk_info = f"[Processing chunk {chunk_idx + 1} of {len(chunks)}]"
+            summary = await _call_summarizer_llm(
+                chunk_content, 
+                context_str, 
+                model, 
+                max_tokens=2000,
+                is_chunk=True,
+                chunk_info=chunk_info
+            )
+            if summary:
+                print(f"   ✅ Chunk {chunk_idx + 1}/{len(chunks)} summarized: {len(chunk_content):,} → {len(summary):,} chars")
+            return chunk_idx, summary
+        except Exception as e:
+            print(f"   ⚠️  Chunk {chunk_idx + 1}/{len(chunks)} failed: {str(e)[:50]}")
+            return chunk_idx, None
+    
+    # Run all chunk summarizations in parallel
+    tasks = [summarize_chunk(i, chunk) for i, chunk in enumerate(chunks)]
+    results = await asyncio.gather(*tasks)
+    
+    # Collect successful summaries in order
+    summaries = []
+    for chunk_idx, summary in sorted(results, key=lambda x: x[0]):
+        if summary:
+            summaries.append(f"## Section {chunk_idx + 1}\n{summary}")
+    
+    if not summaries:
+        print(f"   ❌ All chunk summarizations failed")
+        return "[Failed to process large content: all chunk summarizations failed]"
+    
+    print(f"   📊 Got {len(summaries)}/{len(chunks)} chunk summaries")
+    
+    # If only one chunk succeeded, just return it (with cap)
+    if len(summaries) == 1:
+        result = summaries[0]
+        if len(result) > max_output_size:
+            result = result[:max_output_size] + "\n\n[... truncated ...]"
+        return result
+    
+    # Synthesize the summaries into a final summary
+    print(f"   🔗 Synthesizing {len(summaries)} summaries...")
+    
+    combined_summaries = "\n\n---\n\n".join(summaries)
+    
+    synthesis_prompt = f"""You have been given summaries of different sections of a large document. 
+Synthesize these into ONE cohesive, comprehensive summary that:
+1. Removes redundancy between sections
+2. Preserves all key facts, figures, and actionable information
+3. Is well-organized with clear structure
+4. Is under {max_output_size} characters
+
+{context_str}SECTION SUMMARIES:
+{combined_summaries}
+
+Create a single, unified markdown summary."""
+
+    try:
+        response = await summarizer_client.chat.completions.create(
+            model=model,
+            messages=[
+                {"role": "system", "content": "You synthesize multiple summaries into one cohesive, comprehensive summary. Be thorough but concise."},
+                {"role": "user", "content": synthesis_prompt}
+            ],
+            temperature=0.1,
+            max_tokens=4000
+        )
+        final_summary = response.choices[0].message.content.strip()
        
-        # Calculate compression metrics for logging
-        original_length = len(content)
-        processed_length = len(processed_content)
-        compression_ratio = processed_length / original_length if original_length > 0 else 1.0
+        # Enforce hard cap
+        if len(final_summary) > max_output_size:
+            final_summary = final_summary[:max_output_size] + "\n\n[... summary truncated for context management ...]"
        
-        print(f"✅ Content processed: {original_length} → {processed_length} chars ({compression_ratio:.1%})")
+        original_len = len(content)
+        final_len = len(final_summary)
+        compression = final_len / original_len if original_len > 0 else 1.0
        
-        return processed_content
+        print(f"   ✅ Synthesis complete: {original_len:,} → {final_len:,} chars ({compression:.2%})")
+        return final_summary
        
    except Exception as e:
-        print(f"❌ Error processing content with LLM: {str(e)}")
-        return None
+        print(f"   ⚠️  Synthesis failed: {str(e)[:100]}")
+        # Fall back to concatenated summaries with truncation
+        fallback = "\n\n".join(summaries)
+        if len(fallback) > max_output_size:
+            fallback = fallback[:max_output_size] + "\n\n[... truncated due to synthesis failure ...]"
+        return fallback


 def clean_base64_images(text: str) -> str: