From de9c0edc515a1245e4a9a42cdb0f915132091836 Mon Sep 17 00:00:00 2001 From: teknium Date: Wed, 15 Oct 2025 18:07:06 +0000 Subject: [PATCH] some bugfixes --- batch_runner.py | 32 ++++++++++++++++++---- run_agent.py | 9 ++++--- run_datagen_images.sh | 12 +++++++++ tools/vision_tools.py | 62 +++++++++++++++++++++++++------------------ 4 files changed, 80 insertions(+), 35 deletions(-) create mode 100644 run_datagen_images.sh diff --git a/batch_runner.py b/batch_runner.py index fc0b9d8d3..4954126f9 100644 --- a/batch_runner.py +++ b/batch_runner.py @@ -86,13 +86,35 @@ def _extract_tool_stats(messages: List[Dict[str, Any]]) -> Dict[str, Dict[str, i # Determine if tool call was successful is_success = True try: - # Try to parse as JSON and check for error field + # Try to parse as JSON and check for actual error values content_json = json.loads(content) if isinstance(content, str) else content - if isinstance(content_json, dict) and "error" in content_json: - is_success = False + + if isinstance(content_json, dict): + # Check if error field exists AND has a non-null value + if "error" in content_json and content_json["error"] is not None: + is_success = False + + # Special handling for terminal tool responses + # Terminal wraps its response in a "content" field + if "content" in content_json and isinstance(content_json["content"], dict): + inner_content = content_json["content"] + # Check for actual error (non-null error field or non-zero exit code) + has_error = (inner_content.get("error") is not None or + inner_content.get("exit_code", 0) != 0) + if has_error: + is_success = False + + # Check for "success": false pattern used by some tools + if content_json.get("success") is False: + is_success = False + except: - # If not JSON, check if content contains error indicators - if not content or "error" in content.lower(): + # If not JSON, check if content is empty or explicitly states an error + # Note: We avoid simple substring matching to prevent false positives + if not content: + is_success = False + # Only mark as failure if it explicitly starts with "Error:" or "ERROR:" + elif content.strip().lower().startswith("error:"): is_success = False # Update success/failure count diff --git a/run_agent.py b/run_agent.py index 443e33beb..e828d3e29 100644 --- a/run_agent.py +++ b/run_agent.py @@ -99,10 +99,11 @@ class AIAgent: format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', datefmt='%H:%M:%S' ) - # Also set OpenAI client logging to debug - logging.getLogger('openai').setLevel(logging.DEBUG) - logging.getLogger('httpx').setLevel(logging.DEBUG) - print("๐Ÿ” Verbose logging enabled") + # Keep OpenAI and httpx at INFO level to avoid massive base64 logs + # Even in verbose mode, we don't want to see full request/response bodies + logging.getLogger('openai').setLevel(logging.INFO) + logging.getLogger('httpx').setLevel(logging.WARNING) + print("๐Ÿ” Verbose logging enabled (OpenAI/httpx request bodies suppressed)") else: # Set logging to INFO level for important messages only logging.basicConfig( diff --git a/run_datagen_images.sh b/run_datagen_images.sh new file mode 100644 index 000000000..79e448ec6 --- /dev/null +++ b/run_datagen_images.sh @@ -0,0 +1,12 @@ +python batch_runner.py \ + --dataset_file="hermes-agent-imagen-data/hermes_agent_imagen_eval.jsonl" \ + --batch_size=10 \ + --run_name="imagen_eval_gpt5" \ + --distribution="image_gen" \ + --model="gpt-5" \ + --base_url="https://api.openai.com/v1" \ + --api_key="${OPENAI_API_KEY}" \ + --num_workers=4 \ + --max_turns=5 \ + --verbose \ + --ephemeral_system_prompt="When generating an image for the user view the image by using the vision_analyze tool to ensure it is what the user wanted. If it isn't feel free to retry a few times. If none are perfect, choose the best option that is the closest match, and explain its imperfections. If the image generation tool fails, try again a few times. If the vision analyze tool fails, provide the image to the user and explain it is your best effort attempt." \ No newline at end of file diff --git a/tools/vision_tools.py b/tools/vision_tools.py index c221f4926..20d7776ec 100644 --- a/tools/vision_tools.py +++ b/tools/vision_tools.py @@ -33,10 +33,10 @@ import asyncio import uuid import datetime import base64 -import requests from pathlib import Path from typing import Dict, Any, Optional from openai import AsyncOpenAI +import httpx # Use httpx for async HTTP requests # Initialize Nous Research API client for vision processing nous_client = AsyncOpenAI( @@ -131,9 +131,9 @@ def _validate_image_url(url: str) -> bool: return True # Allow all HTTP/HTTPS URLs for flexibility -def _download_image(image_url: str, destination: Path) -> Path: +async def _download_image(image_url: str, destination: Path) -> Path: """ - Download an image from a URL to a local destination. + Download an image from a URL to a local destination (async). Args: image_url (str): The URL of the image to download @@ -148,16 +148,17 @@ def _download_image(image_url: str, destination: Path) -> Path: # Create parent directories if they don't exist destination.parent.mkdir(parents=True, exist_ok=True) - # Download the image with appropriate headers - response = requests.get( - image_url, - timeout=30, - headers={"User-Agent": "hermes-agent-vision/1.0"}, - ) - response.raise_for_status() + # Download the image with appropriate headers using async httpx + async with httpx.AsyncClient(timeout=30.0) as client: + response = await client.get( + image_url, + headers={"User-Agent": "hermes-agent-vision/1.0"}, + ) + response.raise_for_status() + + # Save the image content + destination.write_bytes(response.content) - # Save the image content - destination.write_bytes(response.content) return destination @@ -249,20 +250,21 @@ async def vision_analyze_tool( debug_call_data = { "parameters": { "image_url": image_url, - "user_prompt": user_prompt, + "user_prompt": user_prompt[:200] + "..." if len(user_prompt) > 200 else user_prompt, "model": model }, "error": None, "success": False, "analysis_length": 0, - "model_used": model + "model_used": model, + "image_size_bytes": 0 } temp_image_path = None try: - print(f"๐Ÿ” Analyzing image from URL: {image_url[:60]}{'...' if len(image_url) > 60 else ''}") - print(f"๐Ÿ“ User prompt: {user_prompt[:100]}{'...' if len(user_prompt) > 100 else ''}") + print(f"๐Ÿ” Analyzing image from URL: {image_url[:60]}{'...' if len(image_url) > 60 else ''}", flush=True) + print(f"๐Ÿ“ User prompt: {user_prompt[:100]}{'...' if len(user_prompt) > 100 else ''}", flush=True) # Validate image URL if not _validate_image_url(image_url): @@ -273,17 +275,25 @@ async def vision_analyze_tool( raise ValueError("NOUS_API_KEY environment variable not set") # Download the image to a temporary location - print(f"โฌ‡๏ธ Downloading image from URL...") + print(f"โฌ‡๏ธ Downloading image from URL...", flush=True) temp_dir = Path("./temp_vision_images") temp_image_path = temp_dir / f"temp_image_{uuid.uuid4()}.jpg" - _download_image(image_url, temp_image_path) - print(f"โœ… Image downloaded successfully") + await _download_image(image_url, temp_image_path) + + # Get image file size for logging + image_size_bytes = temp_image_path.stat().st_size + image_size_kb = image_size_bytes / 1024 + print(f"โœ… Image downloaded successfully ({image_size_kb:.1f} KB)", flush=True) # Convert image to base64 data URL - print(f"๐Ÿ”„ Converting image to base64...") + print(f"๐Ÿ”„ Converting image to base64...", flush=True) image_data_url = _image_to_base64_data_url(temp_image_path) - print(f"โœ… Image converted to base64 ({len(image_data_url)} characters)") + # Calculate size in KB for better readability + data_size_kb = len(image_data_url) / 1024 + print(f"โœ… Image converted to base64 ({data_size_kb:.1f} KB)", flush=True) + + debug_call_data["image_size_bytes"] = image_size_bytes # Use the prompt as provided (model_tools.py now handles full description formatting) comprehensive_prompt = user_prompt @@ -307,7 +317,7 @@ async def vision_analyze_tool( } ] - print(f"๐Ÿง  Processing image with {model}...") + print(f"๐Ÿง  Processing image with {model}...", flush=True) # Call the vision API response = await nous_client.chat.completions.create( @@ -321,7 +331,7 @@ async def vision_analyze_tool( analysis = response.choices[0].message.content.strip() analysis_length = len(analysis) - print(f"โœ… Image analysis completed ({analysis_length} characters)") + print(f"โœ… Image analysis completed ({analysis_length} characters)", flush=True) # Prepare successful response result = { @@ -340,7 +350,7 @@ async def vision_analyze_tool( except Exception as e: error_msg = f"Error analyzing image: {str(e)}" - print(f"โŒ {error_msg}") + print(f"โŒ {error_msg}", flush=True) # Prepare error response result = { @@ -359,9 +369,9 @@ async def vision_analyze_tool( if temp_image_path and temp_image_path.exists(): try: temp_image_path.unlink() - print(f"๐Ÿงน Cleaned up temporary image file") + print(f"๐Ÿงน Cleaned up temporary image file", flush=True) except Exception as cleanup_error: - print(f"โš ๏ธ Warning: Could not delete temporary file: {cleanup_error}") + print(f"โš ๏ธ Warning: Could not delete temporary file: {cleanup_error}", flush=True) def check_nous_api_key() -> bool: