Enhance image handling and analysis capabilities across platforms

- Updated the vision tool to accept both HTTP/HTTPS URLs and local file paths for image analysis.
- Implemented caching of user-uploaded images in local directories to ensure reliable access for the vision tool, addressing issues with ephemeral URLs.
- Enhanced platform adapters (Discord, Telegram, WhatsApp) to download and cache images, allowing for immediate analysis and enriched message context.
- Added a new method to auto-analyze images attached by users, enriching the conversation with detailed descriptions.
- Improved documentation for image handling processes and updated related functions for clarity and efficiency.
This commit is contained in:
teknium1 2026-02-15 16:10:50 -08:00
parent eb49936a60
commit 5404a8fcd8
7 changed files with 303 additions and 35 deletions

View file

@ -6,10 +6,13 @@ and implement the required methods.
"""
import asyncio
import os
import re
import uuid
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from datetime import datetime
from pathlib import Path
from typing import Dict, List, Optional, Any, Callable, Awaitable, Tuple
from enum import Enum
@ -20,6 +23,91 @@ from gateway.config import Platform, PlatformConfig
from gateway.session import SessionSource
# ---------------------------------------------------------------------------
# Image cache utilities
#
# When users send images on messaging platforms, we download them to a local
# cache directory so they can be analyzed by the vision tool (which accepts
# local file paths). This avoids issues with ephemeral platform URLs
# (e.g. Telegram file URLs expire after ~1 hour).
# ---------------------------------------------------------------------------
# Default location: ~/.hermes/image_cache/
IMAGE_CACHE_DIR = Path(os.path.expanduser("~/.hermes/image_cache"))
def get_image_cache_dir() -> Path:
"""Return the image cache directory, creating it if it doesn't exist."""
IMAGE_CACHE_DIR.mkdir(parents=True, exist_ok=True)
return IMAGE_CACHE_DIR
def cache_image_from_bytes(data: bytes, ext: str = ".jpg") -> str:
"""
Save raw image bytes to the cache and return the absolute file path.
Args:
data: Raw image bytes.
ext: File extension including the dot (e.g. ".jpg", ".png").
Returns:
Absolute path to the cached image file as a string.
"""
cache_dir = get_image_cache_dir()
filename = f"img_{uuid.uuid4().hex[:12]}{ext}"
filepath = cache_dir / filename
filepath.write_bytes(data)
return str(filepath)
async def cache_image_from_url(url: str, ext: str = ".jpg") -> str:
"""
Download an image from a URL and save it to the local cache.
Uses httpx for async download with a reasonable timeout.
Args:
url: The HTTP/HTTPS URL to download from.
ext: File extension including the dot (e.g. ".jpg", ".png").
Returns:
Absolute path to the cached image file as a string.
"""
import httpx
async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
response = await client.get(
url,
headers={
"User-Agent": "Mozilla/5.0 (compatible; HermesAgent/1.0)",
"Accept": "image/*,*/*;q=0.8",
},
)
response.raise_for_status()
return cache_image_from_bytes(response.content, ext)
def cleanup_image_cache(max_age_hours: int = 24) -> int:
"""
Delete cached images older than *max_age_hours*.
Returns the number of files removed.
"""
import time
cache_dir = get_image_cache_dir()
cutoff = time.time() - (max_age_hours * 3600)
removed = 0
for f in cache_dir.iterdir():
if f.is_file() and f.stat().st_mtime < cutoff:
try:
f.unlink()
removed += 1
except OSError:
pass
return removed
class MessageType(Enum):
"""Types of incoming messages."""
TEXT = "text"