feat: devex help, add Makefile, ruff, pre-commit, and modernize CI

2026-04-27 01:11:40 +00:00 · 2026-03-09 20:36:51 -05:00 · 2026-03-09 20:36:51 -05:00 · f4d7e6a29e
commit f4d7e6a29e
parent 172a38c344
111 changed files with 11655 additions and 10200 deletions
--- a/tools/vision_tools.py
+++ b/tools/vision_tools.py
@ -19,7 +19,7 @@ Features:
 Usage:
    from vision_tools import vision_analyze_tool
    import asyncio
-    
+
    # Analyze an image
    result = await vision_analyze_tool(
        image_url="https://example.com/image.jpg",
@ -33,11 +33,14 @@ import json
 import logging
 import os
 import uuid
+from collections.abc import Awaitable
 from pathlib import Path
-from typing import Any, Awaitable, Dict, Optional
+from typing import Any
 from urllib.parse import urlparse
+
 import httpx
 from openai import AsyncOpenAI
+
 from agent.auxiliary_client import get_vision_auxiliary_client
 from tools.debug_helpers import DebugSession

@ -55,7 +58,7 @@ if _aux_sync_client is not None:
        _async_kwargs["default_headers"] = {
            "HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
            "X-OpenRouter-Title": "Hermes Agent",
-                "X-OpenRouter-Categories": "productivity,cli-agent",
+            "X-OpenRouter-Categories": "productivity,cli-agent",
        }
    _aux_async_client = AsyncOpenAI(**_async_kwargs)

@ -65,10 +68,10 @@ _debug = DebugSession("vision_tools", env_var="VISION_TOOLS_DEBUG")
 def _validate_image_url(url: str) -> bool:
    """
    Basic validation of image URL format.
-    
+
    Args:
        url (str): The URL to validate
-        
+
    Returns:
        bool: True if URL appears to be valid, False otherwise
    """
@ -91,23 +94,22 @@ def _validate_image_url(url: str) -> bool:
 async def _download_image(image_url: str, destination: Path, max_retries: int = 3) -> Path:
    """
    Download an image from a URL to a local destination (async) with retry logic.
-    
+
    Args:
        image_url (str): The URL of the image to download
        destination (Path): The path where the image should be saved
        max_retries (int): Maximum number of retry attempts (default: 3)
-        
+
    Returns:
        Path: The path to the downloaded image
-        
+
    Raises:
        Exception: If download fails after all retries
    """
-    import asyncio
-    
+
    # Create parent directories if they don't exist
    destination.parent.mkdir(parents=True, exist_ok=True)
-    
+
    last_error = None
    for attempt in range(max_retries):
        try:
@ -122,10 +124,10 @@ async def _download_image(image_url: str, destination: Path, max_retries: int =
                    },
                )
                response.raise_for_status()
-                
+
                # Save the image content
                destination.write_bytes(response.content)
-            
+
            return destination
        except Exception as e:
            last_error = e
@ -141,56 +143,56 @@ async def _download_image(image_url: str, destination: Path, max_retries: int =
                    str(e)[:100],
                    exc_info=True,
                )
-    
+
    raise last_error


 def _determine_mime_type(image_path: Path) -> str:
    """
    Determine the MIME type of an image based on its file extension.
-    
+
    Args:
        image_path (Path): Path to the image file
-        
+
    Returns:
        str: The MIME type (defaults to image/jpeg if unknown)
    """
    extension = image_path.suffix.lower()
    mime_types = {
-        '.jpg': 'image/jpeg',
-        '.jpeg': 'image/jpeg',
-        '.png': 'image/png',
-        '.gif': 'image/gif',
-        '.bmp': 'image/bmp',
-        '.webp': 'image/webp',
-        '.svg': 'image/svg+xml'
+        ".jpg": "image/jpeg",
+        ".jpeg": "image/jpeg",
+        ".png": "image/png",
+        ".gif": "image/gif",
+        ".bmp": "image/bmp",
+        ".webp": "image/webp",
+        ".svg": "image/svg+xml",
    }
-    return mime_types.get(extension, 'image/jpeg')
+    return mime_types.get(extension, "image/jpeg")


-def _image_to_base64_data_url(image_path: Path, mime_type: Optional[str] = None) -> str:
+def _image_to_base64_data_url(image_path: Path, mime_type: str | None = None) -> str:
    """
    Convert an image file to a base64-encoded data URL.
-    
+
    Args:
        image_path (Path): Path to the image file
        mime_type (Optional[str]): MIME type of the image (auto-detected if None)
-        
+
    Returns:
        str: Base64-encoded data URL (e.g., "data:image/jpeg;base64,...")
    """
    # Read the image as bytes
    data = image_path.read_bytes()
-    
+
    # Encode to base64
    encoded = base64.b64encode(data).decode("ascii")
-    
+
    # Determine MIME type
    mime = mime_type or _determine_mime_type(image_path)
-    
+
    # Create data URL
    data_url = f"data:{mime};base64,{encoded}"
-    
+
    return data_url


@ -201,31 +203,31 @@ async def vision_analyze_tool(
 ) -> str:
    """
    Analyze an image from a URL or local file path using vision AI.
-    
+
    This tool accepts either an HTTP/HTTPS URL or a local file path. For URLs,
    it downloads the image first. In both cases, the image is converted to base64
    and processed using Gemini 3 Flash Preview via OpenRouter API.
-    
+
    The user_prompt parameter is expected to be pre-formatted by the calling
    function (typically model_tools.py) to include both full description
    requests and specific questions.
-    
+
    Args:
        image_url (str): The URL or local file path of the image to analyze.
                         Accepts http://, https:// URLs or absolute/relative file paths.
        user_prompt (str): The pre-formatted prompt for the vision model
        model (str): The vision model to use (default: google/gemini-3-flash-preview)
-    
+
    Returns:
        str: JSON string containing the analysis results with the following structure:
             {
                 "success": bool,
                 "analysis": str (defaults to error message if None)
             }
-    
+
    Raises:
        Exception: If download fails, analysis fails, or API key is not set
-        
+
    Note:
        - For URLs, temporary images are stored in ./temp_vision_images/ and cleaned up
        - For local file paths, the file is used directly and NOT deleted
@ -235,36 +237,41 @@ async def vision_analyze_tool(
        "parameters": {
            "image_url": image_url,
            "user_prompt": user_prompt[:200] + "..." if len(user_prompt) > 200 else user_prompt,
-            "model": model
+            "model": model,
        },
        "error": None,
        "success": False,
        "analysis_length": 0,
        "model_used": model,
-        "image_size_bytes": 0
+        "image_size_bytes": 0,
    }
-    
+
    temp_image_path = None
    # Track whether we should clean up the file after processing.
    # Local files (e.g. from the image cache) should NOT be deleted.
    should_cleanup = True
-    
+
    try:
        from tools.interrupt import is_interrupted
+
        if is_interrupted():
            return json.dumps({"success": False, "error": "Interrupted"})

        logger.info("Analyzing image: %s", image_url[:60])
        logger.info("User prompt: %s", user_prompt[:100])
-        
+
        # Check auxiliary vision client availability
        if _aux_async_client is None or DEFAULT_VISION_MODEL is None:
-            return json.dumps({
-                "success": False,
-                "analysis": "Vision analysis unavailable: no auxiliary vision model configured. "
-                            "Set OPENROUTER_API_KEY or configure Nous Portal to enable vision tools."
-            }, indent=2, ensure_ascii=False)
-        
+            return json.dumps(
+                {
+                    "success": False,
+                    "analysis": "Vision analysis unavailable: no auxiliary vision model configured. "
+                    "Set OPENROUTER_API_KEY or configure Nous Portal to enable vision tools.",
+                },
+                indent=2,
+                ensure_ascii=False,
+            )
+
        # Determine if this is a local file path or a remote URL
        local_path = Path(image_url)
        if local_path.is_file():
@ -280,50 +287,41 @@ async def vision_analyze_tool(
            await _download_image(image_url, temp_image_path)
            should_cleanup = True
        else:
-            raise ValueError(
-                "Invalid image source. Provide an HTTP/HTTPS URL or a valid local file path."
-            )
-        
+            raise ValueError("Invalid image source. Provide an HTTP/HTTPS URL or a valid local file path.")
+
        # Get image file size for logging
        image_size_bytes = temp_image_path.stat().st_size
        image_size_kb = image_size_bytes / 1024
        logger.info("Image ready (%.1f KB)", image_size_kb)
-        
+
        # Convert image to base64 data URL
        logger.info("Converting image to base64...")
        image_data_url = _image_to_base64_data_url(temp_image_path)
        # Calculate size in KB for better readability
        data_size_kb = len(image_data_url) / 1024
        logger.info("Image converted to base64 (%.1f KB)", data_size_kb)
-        
+
        debug_call_data["image_size_bytes"] = image_size_bytes
-        
+
        # Use the prompt as provided (model_tools.py now handles full description formatting)
        comprehensive_prompt = user_prompt
-        
+
        # Prepare the message with base64-encoded image
        messages = [
            {
                "role": "user",
                "content": [
-                    {
-                        "type": "text",
-                        "text": comprehensive_prompt
-                    },
-                    {
-                        "type": "image_url",
-                        "image_url": {
-                            "url": image_data_url
-                        }
-                    }
-                ]
+                    {"type": "text", "text": comprehensive_prompt},
+                    {"type": "image_url", "image_url": {"url": image_data_url}},
+                ],
            }
        ]
-        
+
        logger.info("Processing image with %s...", model)
-        
+
        # Call the vision API
-        from agent.auxiliary_client import get_auxiliary_extra_body, auxiliary_max_tokens_param
+        from agent.auxiliary_client import auxiliary_max_tokens_param, get_auxiliary_extra_body
+
        _extra = get_auxiliary_extra_body()
        response = await _aux_async_client.chat.completions.create(
            model=model,
@ -332,44 +330,44 @@ async def vision_analyze_tool(
            **auxiliary_max_tokens_param(2000),
            **({} if not _extra else {"extra_body": _extra}),
        )
-        
+
        # Extract the analysis
        analysis = response.choices[0].message.content.strip()
        analysis_length = len(analysis)
-        
+
        logger.info("Image analysis completed (%s characters)", analysis_length)
-        
+
        # Prepare successful response
        result = {
            "success": True,
-            "analysis": analysis or "There was a problem with the request and the image could not be analyzed."
+            "analysis": analysis or "There was a problem with the request and the image could not be analyzed.",
        }
-        
+
        debug_call_data["success"] = True
        debug_call_data["analysis_length"] = analysis_length
-        
+
        # Log debug information
        _debug.log_call("vision_analyze_tool", debug_call_data)
        _debug.save()
-        
+
        return json.dumps(result, indent=2, ensure_ascii=False)
-        
+
    except Exception as e:
        error_msg = f"Error analyzing image: {str(e)}"
        logger.error("%s", error_msg, exc_info=True)
-        
+
        # Prepare error response
        result = {
            "success": False,
-            "analysis": "There was a problem with the request and the image could not be analyzed."
+            "analysis": "There was a problem with the request and the image could not be analyzed.",
        }
-        
+
        debug_call_data["error"] = error_msg
        _debug.log_call("vision_analyze_tool", debug_call_data)
        _debug.save()
-        
+
        return json.dumps(result, indent=2, ensure_ascii=False)
-    
+
    finally:
        # Clean up temporary image file (but NOT local/cached files)
        if should_cleanup and temp_image_path and temp_image_path.exists():
@ -377,9 +375,7 @@ async def vision_analyze_tool(
                temp_image_path.unlink()
                logger.debug("Cleaned up temporary image file")
            except Exception as cleanup_error:
-                logger.warning(
-                    "Could not delete temporary file: %s", cleanup_error, exc_info=True
-                )
+                logger.warning("Could not delete temporary file: %s", cleanup_error, exc_info=True)


 def check_vision_requirements() -> bool:
@ -387,10 +383,10 @@ def check_vision_requirements() -> bool:
    return _aux_async_client is not None


-def get_debug_session_info() -> Dict[str, Any]:
+def get_debug_session_info() -> dict[str, Any]:
    """
    Get information about the current debug session.
-    
+
    Returns:
        Dict[str, Any]: Dictionary containing debug session information
    """
@ -403,27 +399,27 @@ if __name__ == "__main__":
    """
    print("👁️ Vision Tools Module")
    print("=" * 40)
-    
+
    # Check if vision model is available
    api_available = check_vision_requirements()
-    
+
    if not api_available:
        print("❌ No auxiliary vision model available")
        print("Set OPENROUTER_API_KEY or configure Nous Portal to enable vision tools.")
        exit(1)
    else:
        print(f"✅ Vision model available: {DEFAULT_VISION_MODEL}")
-    
+
    print("🛠️ Vision tools ready for use!")
    print(f"🧠 Using model: {DEFAULT_VISION_MODEL}")
-    
+
    # Show debug mode status
    if _debug.active:
        print(f"🐛 Debug mode ENABLED - Session ID: {_debug.session_id}")
        print(f"   Debug logs will be saved to: ./logs/vision_tools_debug_{_debug.session_id}.json")
    else:
        print("🐛 Debug mode disabled (set VISION_TOOLS_DEBUG=true to enable)")
-    
+
    print("\nBasic usage:")
    print("  from vision_tools import vision_analyze_tool")
    print("  import asyncio")
@ -435,14 +431,14 @@ if __name__ == "__main__":
    print("      )")
    print("      print(result)")
    print("  asyncio.run(main())")
-    
+
    print("\nExample prompts:")
    print("  - 'What architectural style is this building?'")
    print("  - 'Describe the emotions and mood in this image'")
    print("  - 'What text can you read in this image?'")
    print("  - 'Identify any safety hazards visible'")
    print("  - 'What products or brands are shown?'")
-    
+
    print("\nDebug mode:")
    print("  # Enable debug logging")
    print("  export VISION_TOOLS_DEBUG=true")
@ -461,30 +457,24 @@ VISION_ANALYZE_SCHEMA = {
    "parameters": {
        "type": "object",
        "properties": {
-            "image_url": {
-                "type": "string",
-                "description": "Image URL (http/https) or local file path to analyze."
-            },
+            "image_url": {"type": "string", "description": "Image URL (http/https) or local file path to analyze."},
            "question": {
                "type": "string",
-                "description": "Your specific question or request about the image to resolve. The AI will automatically provide a complete image description AND answer your specific question."
-            }
+                "description": "Your specific question or request about the image to resolve. The AI will automatically provide a complete image description AND answer your specific question.",
+            },
        },
-        "required": ["image_url", "question"]
-    }
+        "required": ["image_url", "question"],
+    },
 }


-def _handle_vision_analyze(args: Dict[str, Any], **kw: Any) -> Awaitable[str]:
+def _handle_vision_analyze(args: dict[str, Any], **kw: Any) -> Awaitable[str]:
    image_url = args.get("image_url", "")
    question = args.get("question", "")
    full_prompt = (
-        "Fully describe and explain everything about this image, then answer the "
-        f"following question:\n\n{question}"
+        f"Fully describe and explain everything about this image, then answer the following question:\n\n{question}"
    )
-    model = (os.getenv("AUXILIARY_VISION_MODEL", "").strip()
-             or DEFAULT_VISION_MODEL
-             or "google/gemini-3-flash-preview")
+    model = os.getenv("AUXILIARY_VISION_MODEL", "").strip() or DEFAULT_VISION_MODEL or "google/gemini-3-flash-preview"
    return vision_analyze_tool(image_url, full_prompt, model)