feat: devex help, add Makefile, ruff, pre-commit, and modernize CI

This commit is contained in:
Brooklyn Nicholson 2026-03-09 20:36:51 -05:00
parent 172a38c344
commit f4d7e6a29e
111 changed files with 11655 additions and 10200 deletions

View file

@ -19,7 +19,7 @@ Features:
Usage:
from vision_tools import vision_analyze_tool
import asyncio
# Analyze an image
result = await vision_analyze_tool(
image_url="https://example.com/image.jpg",
@ -33,11 +33,14 @@ import json
import logging
import os
import uuid
from collections.abc import Awaitable
from pathlib import Path
from typing import Any, Awaitable, Dict, Optional
from typing import Any
from urllib.parse import urlparse
import httpx
from openai import AsyncOpenAI
from agent.auxiliary_client import get_vision_auxiliary_client
from tools.debug_helpers import DebugSession
@ -55,7 +58,7 @@ if _aux_sync_client is not None:
_async_kwargs["default_headers"] = {
"HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
"X-OpenRouter-Title": "Hermes Agent",
"X-OpenRouter-Categories": "productivity,cli-agent",
"X-OpenRouter-Categories": "productivity,cli-agent",
}
_aux_async_client = AsyncOpenAI(**_async_kwargs)
@ -65,10 +68,10 @@ _debug = DebugSession("vision_tools", env_var="VISION_TOOLS_DEBUG")
def _validate_image_url(url: str) -> bool:
"""
Basic validation of image URL format.
Args:
url (str): The URL to validate
Returns:
bool: True if URL appears to be valid, False otherwise
"""
@ -91,23 +94,22 @@ def _validate_image_url(url: str) -> bool:
async def _download_image(image_url: str, destination: Path, max_retries: int = 3) -> Path:
"""
Download an image from a URL to a local destination (async) with retry logic.
Args:
image_url (str): The URL of the image to download
destination (Path): The path where the image should be saved
max_retries (int): Maximum number of retry attempts (default: 3)
Returns:
Path: The path to the downloaded image
Raises:
Exception: If download fails after all retries
"""
import asyncio
# Create parent directories if they don't exist
destination.parent.mkdir(parents=True, exist_ok=True)
last_error = None
for attempt in range(max_retries):
try:
@ -122,10 +124,10 @@ async def _download_image(image_url: str, destination: Path, max_retries: int =
},
)
response.raise_for_status()
# Save the image content
destination.write_bytes(response.content)
return destination
except Exception as e:
last_error = e
@ -141,56 +143,56 @@ async def _download_image(image_url: str, destination: Path, max_retries: int =
str(e)[:100],
exc_info=True,
)
raise last_error
def _determine_mime_type(image_path: Path) -> str:
"""
Determine the MIME type of an image based on its file extension.
Args:
image_path (Path): Path to the image file
Returns:
str: The MIME type (defaults to image/jpeg if unknown)
"""
extension = image_path.suffix.lower()
mime_types = {
'.jpg': 'image/jpeg',
'.jpeg': 'image/jpeg',
'.png': 'image/png',
'.gif': 'image/gif',
'.bmp': 'image/bmp',
'.webp': 'image/webp',
'.svg': 'image/svg+xml'
".jpg": "image/jpeg",
".jpeg": "image/jpeg",
".png": "image/png",
".gif": "image/gif",
".bmp": "image/bmp",
".webp": "image/webp",
".svg": "image/svg+xml",
}
return mime_types.get(extension, 'image/jpeg')
return mime_types.get(extension, "image/jpeg")
def _image_to_base64_data_url(image_path: Path, mime_type: Optional[str] = None) -> str:
def _image_to_base64_data_url(image_path: Path, mime_type: str | None = None) -> str:
"""
Convert an image file to a base64-encoded data URL.
Args:
image_path (Path): Path to the image file
mime_type (Optional[str]): MIME type of the image (auto-detected if None)
Returns:
str: Base64-encoded data URL (e.g., "data:image/jpeg;base64,...")
"""
# Read the image as bytes
data = image_path.read_bytes()
# Encode to base64
encoded = base64.b64encode(data).decode("ascii")
# Determine MIME type
mime = mime_type or _determine_mime_type(image_path)
# Create data URL
data_url = f"data:{mime};base64,{encoded}"
return data_url
@ -201,31 +203,31 @@ async def vision_analyze_tool(
) -> str:
"""
Analyze an image from a URL or local file path using vision AI.
This tool accepts either an HTTP/HTTPS URL or a local file path. For URLs,
it downloads the image first. In both cases, the image is converted to base64
and processed using Gemini 3 Flash Preview via OpenRouter API.
The user_prompt parameter is expected to be pre-formatted by the calling
function (typically model_tools.py) to include both full description
requests and specific questions.
Args:
image_url (str): The URL or local file path of the image to analyze.
Accepts http://, https:// URLs or absolute/relative file paths.
user_prompt (str): The pre-formatted prompt for the vision model
model (str): The vision model to use (default: google/gemini-3-flash-preview)
Returns:
str: JSON string containing the analysis results with the following structure:
{
"success": bool,
"analysis": str (defaults to error message if None)
}
Raises:
Exception: If download fails, analysis fails, or API key is not set
Note:
- For URLs, temporary images are stored in ./temp_vision_images/ and cleaned up
- For local file paths, the file is used directly and NOT deleted
@ -235,36 +237,41 @@ async def vision_analyze_tool(
"parameters": {
"image_url": image_url,
"user_prompt": user_prompt[:200] + "..." if len(user_prompt) > 200 else user_prompt,
"model": model
"model": model,
},
"error": None,
"success": False,
"analysis_length": 0,
"model_used": model,
"image_size_bytes": 0
"image_size_bytes": 0,
}
temp_image_path = None
# Track whether we should clean up the file after processing.
# Local files (e.g. from the image cache) should NOT be deleted.
should_cleanup = True
try:
from tools.interrupt import is_interrupted
if is_interrupted():
return json.dumps({"success": False, "error": "Interrupted"})
logger.info("Analyzing image: %s", image_url[:60])
logger.info("User prompt: %s", user_prompt[:100])
# Check auxiliary vision client availability
if _aux_async_client is None or DEFAULT_VISION_MODEL is None:
return json.dumps({
"success": False,
"analysis": "Vision analysis unavailable: no auxiliary vision model configured. "
"Set OPENROUTER_API_KEY or configure Nous Portal to enable vision tools."
}, indent=2, ensure_ascii=False)
return json.dumps(
{
"success": False,
"analysis": "Vision analysis unavailable: no auxiliary vision model configured. "
"Set OPENROUTER_API_KEY or configure Nous Portal to enable vision tools.",
},
indent=2,
ensure_ascii=False,
)
# Determine if this is a local file path or a remote URL
local_path = Path(image_url)
if local_path.is_file():
@ -280,50 +287,41 @@ async def vision_analyze_tool(
await _download_image(image_url, temp_image_path)
should_cleanup = True
else:
raise ValueError(
"Invalid image source. Provide an HTTP/HTTPS URL or a valid local file path."
)
raise ValueError("Invalid image source. Provide an HTTP/HTTPS URL or a valid local file path.")
# Get image file size for logging
image_size_bytes = temp_image_path.stat().st_size
image_size_kb = image_size_bytes / 1024
logger.info("Image ready (%.1f KB)", image_size_kb)
# Convert image to base64 data URL
logger.info("Converting image to base64...")
image_data_url = _image_to_base64_data_url(temp_image_path)
# Calculate size in KB for better readability
data_size_kb = len(image_data_url) / 1024
logger.info("Image converted to base64 (%.1f KB)", data_size_kb)
debug_call_data["image_size_bytes"] = image_size_bytes
# Use the prompt as provided (model_tools.py now handles full description formatting)
comprehensive_prompt = user_prompt
# Prepare the message with base64-encoded image
messages = [
{
"role": "user",
"content": [
{
"type": "text",
"text": comprehensive_prompt
},
{
"type": "image_url",
"image_url": {
"url": image_data_url
}
}
]
{"type": "text", "text": comprehensive_prompt},
{"type": "image_url", "image_url": {"url": image_data_url}},
],
}
]
logger.info("Processing image with %s...", model)
# Call the vision API
from agent.auxiliary_client import get_auxiliary_extra_body, auxiliary_max_tokens_param
from agent.auxiliary_client import auxiliary_max_tokens_param, get_auxiliary_extra_body
_extra = get_auxiliary_extra_body()
response = await _aux_async_client.chat.completions.create(
model=model,
@ -332,44 +330,44 @@ async def vision_analyze_tool(
**auxiliary_max_tokens_param(2000),
**({} if not _extra else {"extra_body": _extra}),
)
# Extract the analysis
analysis = response.choices[0].message.content.strip()
analysis_length = len(analysis)
logger.info("Image analysis completed (%s characters)", analysis_length)
# Prepare successful response
result = {
"success": True,
"analysis": analysis or "There was a problem with the request and the image could not be analyzed."
"analysis": analysis or "There was a problem with the request and the image could not be analyzed.",
}
debug_call_data["success"] = True
debug_call_data["analysis_length"] = analysis_length
# Log debug information
_debug.log_call("vision_analyze_tool", debug_call_data)
_debug.save()
return json.dumps(result, indent=2, ensure_ascii=False)
except Exception as e:
error_msg = f"Error analyzing image: {str(e)}"
logger.error("%s", error_msg, exc_info=True)
# Prepare error response
result = {
"success": False,
"analysis": "There was a problem with the request and the image could not be analyzed."
"analysis": "There was a problem with the request and the image could not be analyzed.",
}
debug_call_data["error"] = error_msg
_debug.log_call("vision_analyze_tool", debug_call_data)
_debug.save()
return json.dumps(result, indent=2, ensure_ascii=False)
finally:
# Clean up temporary image file (but NOT local/cached files)
if should_cleanup and temp_image_path and temp_image_path.exists():
@ -377,9 +375,7 @@ async def vision_analyze_tool(
temp_image_path.unlink()
logger.debug("Cleaned up temporary image file")
except Exception as cleanup_error:
logger.warning(
"Could not delete temporary file: %s", cleanup_error, exc_info=True
)
logger.warning("Could not delete temporary file: %s", cleanup_error, exc_info=True)
def check_vision_requirements() -> bool:
@ -387,10 +383,10 @@ def check_vision_requirements() -> bool:
return _aux_async_client is not None
def get_debug_session_info() -> Dict[str, Any]:
def get_debug_session_info() -> dict[str, Any]:
"""
Get information about the current debug session.
Returns:
Dict[str, Any]: Dictionary containing debug session information
"""
@ -403,27 +399,27 @@ if __name__ == "__main__":
"""
print("👁️ Vision Tools Module")
print("=" * 40)
# Check if vision model is available
api_available = check_vision_requirements()
if not api_available:
print("❌ No auxiliary vision model available")
print("Set OPENROUTER_API_KEY or configure Nous Portal to enable vision tools.")
exit(1)
else:
print(f"✅ Vision model available: {DEFAULT_VISION_MODEL}")
print("🛠️ Vision tools ready for use!")
print(f"🧠 Using model: {DEFAULT_VISION_MODEL}")
# Show debug mode status
if _debug.active:
print(f"🐛 Debug mode ENABLED - Session ID: {_debug.session_id}")
print(f" Debug logs will be saved to: ./logs/vision_tools_debug_{_debug.session_id}.json")
else:
print("🐛 Debug mode disabled (set VISION_TOOLS_DEBUG=true to enable)")
print("\nBasic usage:")
print(" from vision_tools import vision_analyze_tool")
print(" import asyncio")
@ -435,14 +431,14 @@ if __name__ == "__main__":
print(" )")
print(" print(result)")
print(" asyncio.run(main())")
print("\nExample prompts:")
print(" - 'What architectural style is this building?'")
print(" - 'Describe the emotions and mood in this image'")
print(" - 'What text can you read in this image?'")
print(" - 'Identify any safety hazards visible'")
print(" - 'What products or brands are shown?'")
print("\nDebug mode:")
print(" # Enable debug logging")
print(" export VISION_TOOLS_DEBUG=true")
@ -461,30 +457,24 @@ VISION_ANALYZE_SCHEMA = {
"parameters": {
"type": "object",
"properties": {
"image_url": {
"type": "string",
"description": "Image URL (http/https) or local file path to analyze."
},
"image_url": {"type": "string", "description": "Image URL (http/https) or local file path to analyze."},
"question": {
"type": "string",
"description": "Your specific question or request about the image to resolve. The AI will automatically provide a complete image description AND answer your specific question."
}
"description": "Your specific question or request about the image to resolve. The AI will automatically provide a complete image description AND answer your specific question.",
},
},
"required": ["image_url", "question"]
}
"required": ["image_url", "question"],
},
}
def _handle_vision_analyze(args: Dict[str, Any], **kw: Any) -> Awaitable[str]:
def _handle_vision_analyze(args: dict[str, Any], **kw: Any) -> Awaitable[str]:
image_url = args.get("image_url", "")
question = args.get("question", "")
full_prompt = (
"Fully describe and explain everything about this image, then answer the "
f"following question:\n\n{question}"
f"Fully describe and explain everything about this image, then answer the following question:\n\n{question}"
)
model = (os.getenv("AUXILIARY_VISION_MODEL", "").strip()
or DEFAULT_VISION_MODEL
or "google/gemini-3-flash-preview")
model = os.getenv("AUXILIARY_VISION_MODEL", "").strip() or DEFAULT_VISION_MODEL or "google/gemini-3-flash-preview"
return vision_analyze_tool(image_url, full_prompt, model)