From de9c0edc515a1245e4a9a42cdb0f915132091836 Mon Sep 17 00:00:00 2001
From: teknium <teknium@nousresearch.com>
Date: Wed, 15 Oct 2025 18:07:06 +0000
Subject: [PATCH] some bugfixes

---
 batch_runner.py       | 32 ++++++++++++++++++----
 run_agent.py          |  9 ++++---
 run_datagen_images.sh | 12 +++++++++
 tools/vision_tools.py | 62 +++++++++++++++++++++++++------------------
 4 files changed, 80 insertions(+), 35 deletions(-)
 create mode 100644 run_datagen_images.sh

diff --git a/batch_runner.py b/batch_runner.py
index fc0b9d8d3..4954126f9 100644
--- a/batch_runner.py
+++ b/batch_runner.py
@@ -86,13 +86,35 @@ def _extract_tool_stats(messages: List[Dict[str, Any]]) -> Dict[str, Dict[str, i
             # Determine if tool call was successful
             is_success = True
             try:
-                # Try to parse as JSON and check for error field
+                # Try to parse as JSON and check for actual error values
                 content_json = json.loads(content) if isinstance(content, str) else content
-                if isinstance(content_json, dict) and "error" in content_json:
-                    is_success = False
+                
+                if isinstance(content_json, dict):
+                    # Check if error field exists AND has a non-null value
+                    if "error" in content_json and content_json["error"] is not None:
+                        is_success = False
+                    
+                    # Special handling for terminal tool responses
+                    # Terminal wraps its response in a "content" field
+                    if "content" in content_json and isinstance(content_json["content"], dict):
+                        inner_content = content_json["content"]
+                        # Check for actual error (non-null error field or non-zero exit code)
+                        has_error = (inner_content.get("error") is not None or 
+                                   inner_content.get("exit_code", 0) != 0)
+                        if has_error:
+                            is_success = False
+                    
+                    # Check for "success": false pattern used by some tools
+                    if content_json.get("success") is False:
+                        is_success = False
+                        
             except:
-                # If not JSON, check if content contains error indicators
-                if not content or "error" in content.lower():
+                # If not JSON, check if content is empty or explicitly states an error
+                # Note: We avoid simple substring matching to prevent false positives
+                if not content:
+                    is_success = False
+                # Only mark as failure if it explicitly starts with "Error:" or "ERROR:"
+                elif content.strip().lower().startswith("error:"):
                     is_success = False
             
             # Update success/failure count
diff --git a/run_agent.py b/run_agent.py
index 443e33beb..e828d3e29 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -99,10 +99,11 @@ class AIAgent:
                 format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
                 datefmt='%H:%M:%S'
             )
-            # Also set OpenAI client logging to debug
-            logging.getLogger('openai').setLevel(logging.DEBUG)
-            logging.getLogger('httpx').setLevel(logging.DEBUG)
-            print("🔍 Verbose logging enabled")
+            # Keep OpenAI and httpx at INFO level to avoid massive base64 logs
+            # Even in verbose mode, we don't want to see full request/response bodies
+            logging.getLogger('openai').setLevel(logging.INFO)
+            logging.getLogger('httpx').setLevel(logging.WARNING)
+            print("🔍 Verbose logging enabled (OpenAI/httpx request bodies suppressed)")
         else:
             # Set logging to INFO level for important messages only
             logging.basicConfig(
diff --git a/run_datagen_images.sh b/run_datagen_images.sh
new file mode 100644
index 000000000..79e448ec6
--- /dev/null
+++ b/run_datagen_images.sh
@@ -0,0 +1,12 @@
+python batch_runner.py \
+  --dataset_file="hermes-agent-imagen-data/hermes_agent_imagen_eval.jsonl" \
+  --batch_size=10 \
+  --run_name="imagen_eval_gpt5" \
+  --distribution="image_gen" \
+  --model="gpt-5" \
+  --base_url="https://api.openai.com/v1" \
+  --api_key="${OPENAI_API_KEY}" \
+  --num_workers=4 \
+  --max_turns=5 \
+  --verbose \
+  --ephemeral_system_prompt="When generating an image for the user view the image by using the vision_analyze tool to ensure it is what the user wanted. If it isn't feel free to retry a few times. If none are perfect, choose the best option that is the closest match, and explain its imperfections. If the image generation tool fails, try again a few times. If the vision analyze tool fails, provide the image to the user and explain it is your best effort attempt."
\ No newline at end of file
diff --git a/tools/vision_tools.py b/tools/vision_tools.py
index c221f4926..20d7776ec 100644
--- a/tools/vision_tools.py
+++ b/tools/vision_tools.py
@@ -33,10 +33,10 @@ import asyncio
 import uuid
 import datetime
 import base64
-import requests
 from pathlib import Path
 from typing import Dict, Any, Optional
 from openai import AsyncOpenAI
+import httpx  # Use httpx for async HTTP requests
 
 # Initialize Nous Research API client for vision processing
 nous_client = AsyncOpenAI(
@@ -131,9 +131,9 @@ def _validate_image_url(url: str) -> bool:
     return True  # Allow all HTTP/HTTPS URLs for flexibility
 
 
-def _download_image(image_url: str, destination: Path) -> Path:
+async def _download_image(image_url: str, destination: Path) -> Path:
     """
-    Download an image from a URL to a local destination.
+    Download an image from a URL to a local destination (async).
     
     Args:
         image_url (str): The URL of the image to download
@@ -148,16 +148,17 @@ def _download_image(image_url: str, destination: Path) -> Path:
     # Create parent directories if they don't exist
     destination.parent.mkdir(parents=True, exist_ok=True)
     
-    # Download the image with appropriate headers
-    response = requests.get(
-        image_url,
-        timeout=30,
-        headers={"User-Agent": "hermes-agent-vision/1.0"},
-    )
-    response.raise_for_status()
+    # Download the image with appropriate headers using async httpx
+    async with httpx.AsyncClient(timeout=30.0) as client:
+        response = await client.get(
+            image_url,
+            headers={"User-Agent": "hermes-agent-vision/1.0"},
+        )
+        response.raise_for_status()
+        
+        # Save the image content
+        destination.write_bytes(response.content)
     
-    # Save the image content
-    destination.write_bytes(response.content)
     return destination
 
 
@@ -249,20 +250,21 @@ async def vision_analyze_tool(
     debug_call_data = {
         "parameters": {
             "image_url": image_url,
-            "user_prompt": user_prompt,
+            "user_prompt": user_prompt[:200] + "..." if len(user_prompt) > 200 else user_prompt,
             "model": model
         },
         "error": None,
         "success": False,
         "analysis_length": 0,
-        "model_used": model
+        "model_used": model,
+        "image_size_bytes": 0
     }
     
     temp_image_path = None
     
     try:
-        print(f"🔍 Analyzing image from URL: {image_url[:60]}{'...' if len(image_url) > 60 else ''}")
-        print(f"📝 User prompt: {user_prompt[:100]}{'...' if len(user_prompt) > 100 else ''}")
+        print(f"🔍 Analyzing image from URL: {image_url[:60]}{'...' if len(image_url) > 60 else ''}", flush=True)
+        print(f"📝 User prompt: {user_prompt[:100]}{'...' if len(user_prompt) > 100 else ''}", flush=True)
         
         # Validate image URL
         if not _validate_image_url(image_url):
@@ -273,17 +275,25 @@ async def vision_analyze_tool(
             raise ValueError("NOUS_API_KEY environment variable not set")
         
         # Download the image to a temporary location
-        print(f"⬇️  Downloading image from URL...")
+        print(f"⬇️  Downloading image from URL...", flush=True)
         temp_dir = Path("./temp_vision_images")
         temp_image_path = temp_dir / f"temp_image_{uuid.uuid4()}.jpg"
         
-        _download_image(image_url, temp_image_path)
-        print(f"✅ Image downloaded successfully")
+        await _download_image(image_url, temp_image_path)
+        
+        # Get image file size for logging
+        image_size_bytes = temp_image_path.stat().st_size
+        image_size_kb = image_size_bytes / 1024
+        print(f"✅ Image downloaded successfully ({image_size_kb:.1f} KB)", flush=True)
         
         # Convert image to base64 data URL
-        print(f"🔄 Converting image to base64...")
+        print(f"🔄 Converting image to base64...", flush=True)
         image_data_url = _image_to_base64_data_url(temp_image_path)
-        print(f"✅ Image converted to base64 ({len(image_data_url)} characters)")
+        # Calculate size in KB for better readability
+        data_size_kb = len(image_data_url) / 1024
+        print(f"✅ Image converted to base64 ({data_size_kb:.1f} KB)", flush=True)
+        
+        debug_call_data["image_size_bytes"] = image_size_bytes
         
         # Use the prompt as provided (model_tools.py now handles full description formatting)
         comprehensive_prompt = user_prompt
@@ -307,7 +317,7 @@ async def vision_analyze_tool(
             }
         ]
         
-        print(f"🧠 Processing image with {model}...")
+        print(f"🧠 Processing image with {model}...", flush=True)
         
         # Call the vision API
         response = await nous_client.chat.completions.create(
@@ -321,7 +331,7 @@ async def vision_analyze_tool(
         analysis = response.choices[0].message.content.strip()
         analysis_length = len(analysis)
         
-        print(f"✅ Image analysis completed ({analysis_length} characters)")
+        print(f"✅ Image analysis completed ({analysis_length} characters)", flush=True)
         
         # Prepare successful response
         result = {
@@ -340,7 +350,7 @@ async def vision_analyze_tool(
         
     except Exception as e:
         error_msg = f"Error analyzing image: {str(e)}"
-        print(f"❌ {error_msg}")
+        print(f"❌ {error_msg}", flush=True)
         
         # Prepare error response
         result = {
@@ -359,9 +369,9 @@ async def vision_analyze_tool(
         if temp_image_path and temp_image_path.exists():
             try:
                 temp_image_path.unlink()
-                print(f"🧹 Cleaned up temporary image file")
+                print(f"🧹 Cleaned up temporary image file", flush=True)
             except Exception as cleanup_error:
-                print(f"⚠️  Warning: Could not delete temporary file: {cleanup_error}")
+                print(f"⚠️  Warning: Could not delete temporary file: {cleanup_error}", flush=True)
 
 
 def check_nous_api_key() -> bool: