fix(vision): reject oversized images before API call, handle file:// URIs, improve 400 errors

Three fixes for vision_analyze returning cryptic 400 "Invalid request data": 1. Pre-flight base64 size check — base64 inflates data ~33%, so a 3.8 MB file exceeds the 5 MB API limit. Reject early with a clear message instead of letting the provider return a generic 400. 2. Handle file:// URIs — strip the scheme and resolve as a local path. Previously file:///path/to/image.png fell through to the "invalid image source" error since it matched neither is_file() nor http(s). 3. Separate invalid_request errors from "does not support vision" errors so the user gets actionable guidance (resize/compress/retry) instead of a misleading "model does not support vision" message. Closes #6677
2026-04-25 00:51:20 +00:00 · 2026-04-10 15:11:14 +10:00 · 2026-04-10 15:11:14 +10:00 · 4e56eacdce
commit 4e56eacdce
parent 1909877e6e
2 changed files with 154 additions and 4 deletions
--- a/tools/vision_tools.py
+++ b/tools/vision_tools.py
@ -342,7 +342,11 @@ async def vision_analyze_tool(
        logger.info("User prompt: %s", user_prompt[:100])
        
        # Determine if this is a local file path or a remote URL
-        local_path = Path(os.path.expanduser(image_url))
+        # Strip file:// scheme so file URIs resolve as local paths.
+        resolved_url = image_url
+        if resolved_url.startswith("file://"):
+            resolved_url = resolved_url[len("file://"):]
+        local_path = Path(os.path.expanduser(resolved_url))
        if local_path.is_file():
            # Local file path (e.g. from platform image cache) -- skip download
            logger.info("Using local image file: %s", image_url)
@ -378,7 +382,19 @@ async def vision_analyze_tool(
        # Calculate size in KB for better readability
        data_size_kb = len(image_data_url) / 1024
        logger.info("Image converted to base64 (%.1f KB)", data_size_kb)
-        
+
+        # Pre-flight size check: most vision APIs cap base64 payloads at 5 MB.
+        # Reject early with a clear message instead of a cryptic provider 400.
+        _MAX_BASE64_BYTES = 5 * 1024 * 1024  # 5 MB
+        # The data URL includes the header (e.g. "data:image/jpeg;base64,") which
+        # is negligible, but measure the full string to be safe.
+        if len(image_data_url) > _MAX_BASE64_BYTES:
+            raise ValueError(
+                f"Image too large for vision API: base64 payload is "
+                f"{len(image_data_url) / (1024 * 1024):.1f} MB (limit 5 MB). "
+                f"Resize or compress the image and try again."
+            )
+
        debug_call_data["image_size_bytes"] = image_size_bytes
        
        # Use the prompt as provided (model_tools.py now handles full description formatting)
@ -471,14 +487,21 @@ async def vision_analyze_tool(
                f"API provider account and try again. Error: {e}"
            )
        elif any(hint in err_str for hint in (
-            "does not support", "not support image", "invalid_request",
-            "content_policy", "image_url", "multimodal",
+            "does not support", "not support image",
+            "content_policy", "multimodal",
            "unrecognized request argument", "image input",
        )):
            analysis = (
                f"{model} does not support vision or our request was not "
                f"accepted by the server. Error: {e}"
            )
+        elif "invalid_request" in err_str or "image_url" in err_str:
+            analysis = (
+                "The vision API rejected the image. This can happen when the "
+                "image is too large, in an unsupported format, or corrupted. "
+                "Try a smaller JPEG/PNG (under 3.5 MB) and retry. "
+                f"Error: {e}"
+            )
        else:
            analysis = (
                "There was a problem with the request and the image could not "