Enhance batch processing and tool validation

- Added support for tracking partial results and tool error counts in batch processing.
- Implemented filtering of corrupted entries during batch file combination based on valid tool names.
- Updated terminal tool to improve command execution and error handling, including retry logic for transient failures.
- Refactored model tools to use a simple terminal tool with no session persistence.
- Improved logging and error messages for invalid API responses and tool calls.
- Introduced chunked processing for large content in web tools to manage size limitations effectively.
This commit is contained in:
teknium 2026-01-10 05:56:26 +00:00
parent 21f9e2df40
commit 4071ba29da
8 changed files with 572 additions and 111 deletions

View file

@ -139,6 +139,9 @@ async def process_content_with_llm(
to intelligently extract key information and create markdown summaries,
significantly reducing token usage while preserving all important information.
For very large content (>500k chars), uses chunked processing with synthesis.
For extremely large content (>2M chars), refuses to process entirely.
Args:
content (str): The raw content to process
url (str): The source URL (for context, optional)
@ -149,13 +152,25 @@ async def process_content_with_llm(
Returns:
Optional[str]: Processed markdown content, or None if content too short or processing fails
"""
# Size thresholds
MAX_CONTENT_SIZE = 2_000_000 # 2M chars - refuse entirely above this
CHUNK_THRESHOLD = 500_000 # 500k chars - use chunked processing above this
CHUNK_SIZE = 100_000 # 100k chars per chunk
MAX_OUTPUT_SIZE = 5000 # Hard cap on final output size
try:
# Skip processing if content is too short
if len(content) < min_length:
print(f"📏 Content too short ({len(content)} < {min_length} chars), skipping LLM processing")
return None
content_len = len(content)
print(f"🧠 Processing content with LLM ({len(content)} characters)")
# Refuse if content is absurdly large
if content_len > MAX_CONTENT_SIZE:
size_mb = content_len / 1_000_000
print(f"🚫 Content too large ({size_mb:.1f}MB > 2MB limit). Refusing to process.")
return f"[Content too large to process: {size_mb:.1f}MB. Try using web_crawl with specific extraction instructions, or search for a more focused source.]"
# Skip processing if content is too short
if content_len < min_length:
print(f"📏 Content too short ({content_len} < {min_length} chars), skipping LLM processing")
return None
# Create context information
context_info = []
@ -163,10 +178,83 @@ async def process_content_with_llm(
context_info.append(f"Title: {title}")
if url:
context_info.append(f"Source: {url}")
context_str = "\n".join(context_info) + "\n\n" if context_info else ""
# Simplified prompt for better quality markdown output
# Check if we need chunked processing
if content_len > CHUNK_THRESHOLD:
print(f"📦 Content large ({content_len:,} chars). Using chunked processing...")
return await _process_large_content_chunked(
content, context_str, model, CHUNK_SIZE, MAX_OUTPUT_SIZE
)
# Standard single-pass processing for normal content
print(f"🧠 Processing content with LLM ({content_len} characters)")
processed_content = await _call_summarizer_llm(content, context_str, model)
if processed_content:
# Enforce output cap
if len(processed_content) > MAX_OUTPUT_SIZE:
processed_content = processed_content[:MAX_OUTPUT_SIZE] + "\n\n[... summary truncated for context management ...]"
# Log compression metrics
processed_length = len(processed_content)
compression_ratio = processed_length / content_len if content_len > 0 else 1.0
print(f"✅ Content processed: {content_len}{processed_length} chars ({compression_ratio:.1%})")
return processed_content
except Exception as e:
print(f"❌ Error processing content with LLM: {str(e)}")
return f"[Failed to process content: {str(e)[:100]}. Content size: {len(content):,} chars]"
async def _call_summarizer_llm(
content: str,
context_str: str,
model: str,
max_tokens: int = 4000,
is_chunk: bool = False,
chunk_info: str = ""
) -> Optional[str]:
"""
Make a single LLM call to summarize content.
Args:
content: The content to summarize
context_str: Context information (title, URL)
model: Model to use
max_tokens: Maximum output tokens
is_chunk: Whether this is a chunk of a larger document
chunk_info: Information about chunk position (e.g., "Chunk 2/5")
Returns:
Summarized content or None on failure
"""
if is_chunk:
# Chunk-specific prompt - aware that this is partial content
system_prompt = """You are an expert content analyst processing a SECTION of a larger document. Your job is to extract and summarize the key information from THIS SECTION ONLY.
Important guidelines for chunk processing:
1. Do NOT write introductions or conclusions - this is a partial document
2. Focus on extracting ALL key facts, figures, data points, and insights from this section
3. Preserve important quotes, code snippets, and specific details verbatim
4. Use bullet points and structured formatting for easy synthesis later
5. Note any references to other sections (e.g., "as mentioned earlier", "see below") without trying to resolve them
Your output will be combined with summaries of other sections, so focus on thorough extraction rather than narrative flow."""
user_prompt = f"""Extract key information from this SECTION of a larger document:
{context_str}{chunk_info}
SECTION CONTENT:
{content}
Extract all important information from this section in a structured format. Focus on facts, data, insights, and key details. Do not add introductions or conclusions."""
else:
# Standard full-document prompt
system_prompt = """You are an expert content analyst. Your job is to process web content and create a comprehensive yet concise summary that preserves all important information while dramatically reducing bulk.
Create a well-structured markdown summary that includes:
@ -183,49 +271,155 @@ Your goal is to preserve ALL important information while reducing length. Never
Create a markdown summary that captures all key information in a well-organized, scannable format. Include important quotes and code snippets in their original formatting. Focus on actionable information, specific details, and unique insights."""
# Call the LLM asynchronously with retry logic for flaky API
max_retries = 6
retry_delay = 2 # Start with 2 seconds
last_error = None
# Call the LLM with retry logic
max_retries = 6
retry_delay = 2
last_error = None
for attempt in range(max_retries):
try:
response = await summarizer_client.chat.completions.create(
model=model,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
],
temperature=0.1, # Low temperature for consistent extraction
max_tokens=4000 # Generous limit for comprehensive processing
)
break # Success, exit retry loop
except Exception as api_error:
last_error = api_error
if attempt < max_retries - 1:
print(f"⚠️ LLM API call failed (attempt {attempt + 1}/{max_retries}): {str(api_error)[:100]}")
print(f" Retrying in {retry_delay}s...")
await asyncio.sleep(retry_delay)
retry_delay = min(retry_delay * 2, 60) # Exponential backoff: 2s, 4s, 8s, 16s, 32s, 60s
else:
# All retries exhausted
raise last_error
for attempt in range(max_retries):
try:
response = await summarizer_client.chat.completions.create(
model=model,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
],
temperature=0.1,
max_tokens=max_tokens
)
return response.choices[0].message.content.strip()
except Exception as api_error:
last_error = api_error
if attempt < max_retries - 1:
print(f"⚠️ LLM API call failed (attempt {attempt + 1}/{max_retries}): {str(api_error)[:100]}")
print(f" Retrying in {retry_delay}s...")
await asyncio.sleep(retry_delay)
retry_delay = min(retry_delay * 2, 60)
else:
raise last_error
return None
async def _process_large_content_chunked(
content: str,
context_str: str,
model: str,
chunk_size: int,
max_output_size: int
) -> Optional[str]:
"""
Process large content by chunking, summarizing each chunk in parallel,
then synthesizing the summaries.
Args:
content: The large content to process
context_str: Context information
model: Model to use
chunk_size: Size of each chunk in characters
max_output_size: Maximum final output size
# Get the markdown response directly
processed_content = response.choices[0].message.content.strip()
Returns:
Synthesized summary or None on failure
"""
# Split content into chunks
chunks = []
for i in range(0, len(content), chunk_size):
chunk = content[i:i + chunk_size]
chunks.append(chunk)
print(f" 📦 Split into {len(chunks)} chunks of ~{chunk_size:,} chars each")
# Summarize each chunk in parallel
async def summarize_chunk(chunk_idx: int, chunk_content: str) -> tuple[int, Optional[str]]:
"""Summarize a single chunk."""
try:
chunk_info = f"[Processing chunk {chunk_idx + 1} of {len(chunks)}]"
summary = await _call_summarizer_llm(
chunk_content,
context_str,
model,
max_tokens=2000,
is_chunk=True,
chunk_info=chunk_info
)
if summary:
print(f" ✅ Chunk {chunk_idx + 1}/{len(chunks)} summarized: {len(chunk_content):,}{len(summary):,} chars")
return chunk_idx, summary
except Exception as e:
print(f" ⚠️ Chunk {chunk_idx + 1}/{len(chunks)} failed: {str(e)[:50]}")
return chunk_idx, None
# Run all chunk summarizations in parallel
tasks = [summarize_chunk(i, chunk) for i, chunk in enumerate(chunks)]
results = await asyncio.gather(*tasks)
# Collect successful summaries in order
summaries = []
for chunk_idx, summary in sorted(results, key=lambda x: x[0]):
if summary:
summaries.append(f"## Section {chunk_idx + 1}\n{summary}")
if not summaries:
print(f" ❌ All chunk summarizations failed")
return "[Failed to process large content: all chunk summarizations failed]"
print(f" 📊 Got {len(summaries)}/{len(chunks)} chunk summaries")
# If only one chunk succeeded, just return it (with cap)
if len(summaries) == 1:
result = summaries[0]
if len(result) > max_output_size:
result = result[:max_output_size] + "\n\n[... truncated ...]"
return result
# Synthesize the summaries into a final summary
print(f" 🔗 Synthesizing {len(summaries)} summaries...")
combined_summaries = "\n\n---\n\n".join(summaries)
synthesis_prompt = f"""You have been given summaries of different sections of a large document.
Synthesize these into ONE cohesive, comprehensive summary that:
1. Removes redundancy between sections
2. Preserves all key facts, figures, and actionable information
3. Is well-organized with clear structure
4. Is under {max_output_size} characters
{context_str}SECTION SUMMARIES:
{combined_summaries}
Create a single, unified markdown summary."""
try:
response = await summarizer_client.chat.completions.create(
model=model,
messages=[
{"role": "system", "content": "You synthesize multiple summaries into one cohesive, comprehensive summary. Be thorough but concise."},
{"role": "user", "content": synthesis_prompt}
],
temperature=0.1,
max_tokens=4000
)
final_summary = response.choices[0].message.content.strip()
# Calculate compression metrics for logging
original_length = len(content)
processed_length = len(processed_content)
compression_ratio = processed_length / original_length if original_length > 0 else 1.0
# Enforce hard cap
if len(final_summary) > max_output_size:
final_summary = final_summary[:max_output_size] + "\n\n[... summary truncated for context management ...]"
print(f"✅ Content processed: {original_length}{processed_length} chars ({compression_ratio:.1%})")
original_len = len(content)
final_len = len(final_summary)
compression = final_len / original_len if original_len > 0 else 1.0
return processed_content
print(f" ✅ Synthesis complete: {original_len:,}{final_len:,} chars ({compression:.2%})")
return final_summary
except Exception as e:
print(f"❌ Error processing content with LLM: {str(e)}")
return None
print(f" ⚠️ Synthesis failed: {str(e)[:100]}")
# Fall back to concatenated summaries with truncation
fallback = "\n\n".join(summaries)
if len(fallback) > max_output_size:
fallback = fallback[:max_output_size] + "\n\n[... truncated due to synthesis failure ...]"
return fallback
def clean_base64_images(text: str) -> str: