fix(vision): reject oversized images before API call, handle file:// URIs, improve 400 errors

Three fixes for vision_analyze returning cryptic 400 "Invalid request data": 1. Pre-flight base64 size check — base64 inflates data ~33%, so a 3.8 MB file exceeds the 5 MB API limit. Reject early with a clear message instead of letting the provider return a generic 400. 2. Handle file:// URIs — strip the scheme and resolve as a local path. Previously file:///path/to/image.png fell through to the "invalid image source" error since it matched neither is_file() nor http(s). 3. Separate invalid_request errors from "does not support vision" errors so the user gets actionable guidance (resize/compress/retry) instead of a misleading "model does not support vision" message. Closes #6677
2026-06-10 08:32:09 +00:00 · 2026-04-10 15:11:14 +10:00 · 2026-04-10 15:11:14 +10:00 · 4e56eacdce
commit 4e56eacdce
parent 1909877e6e
2 changed files with 154 additions and 4 deletions
--- a/tests/tools/test_vision_tools.py
+++ b/tests/tools/test_vision_tools.py
@ -533,6 +533,133 @@ class TestTildeExpansion:
        assert data["success"] is False


+# ---------------------------------------------------------------------------
+# file:// URI support
+# ---------------------------------------------------------------------------
+
+
+class TestFileUriSupport:
+    """Verify that file:// URIs resolve as local file paths."""
+
+    @pytest.mark.asyncio
+    async def test_file_uri_resolved_as_local_path(self, tmp_path):
+        """file:///absolute/path should be treated as a local file."""
+        img = tmp_path / "photo.png"
+        img.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * 8)
+
+        mock_response = MagicMock()
+        mock_choice = MagicMock()
+        mock_choice.message.content = "A test image"
+        mock_response.choices = [mock_choice]
+
+        with (
+            patch(
+                "tools.vision_tools._image_to_base64_data_url",
+                return_value="data:image/png;base64,abc",
+            ),
+            patch(
+                "tools.vision_tools.async_call_llm",
+                new_callable=AsyncMock,
+                return_value=mock_response,
+            ),
+        ):
+            result = await vision_analyze_tool(
+                f"file://{img}", "describe this", "test/model"
+            )
+            data = json.loads(result)
+            assert data["success"] is True
+
+    @pytest.mark.asyncio
+    async def test_file_uri_nonexistent_gives_error(self, tmp_path):
+        """file:// pointing to a missing file should fail gracefully."""
+        result = await vision_analyze_tool(
+            f"file://{tmp_path}/nonexistent.png", "describe this", "test/model"
+        )
+        data = json.loads(result)
+        assert data["success"] is False
+
+
+# ---------------------------------------------------------------------------
+# Base64 size pre-flight check
+# ---------------------------------------------------------------------------
+
+
+class TestBase64SizeLimit:
+    """Verify that oversized images are rejected before hitting the API."""
+
+    @pytest.mark.asyncio
+    async def test_oversized_image_rejected_before_api_call(self, tmp_path):
+        """Images exceeding 5 MB base64 should fail with a clear size error."""
+        img = tmp_path / "huge.png"
+        img.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * (4 * 1024 * 1024))
+
+        with patch("tools.vision_tools.async_call_llm", new_callable=AsyncMock) as mock_llm:
+            result = json.loads(await vision_analyze_tool(str(img), "describe this"))
+
+        assert result["success"] is False
+        assert "too large" in result["error"].lower()
+        mock_llm.assert_not_awaited()
+
+    @pytest.mark.asyncio
+    async def test_small_image_not_rejected(self, tmp_path):
+        """Images well under the limit should pass the size check."""
+        img = tmp_path / "small.png"
+        img.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * 64)
+
+        mock_response = MagicMock()
+        mock_choice = MagicMock()
+        mock_choice.message.content = "Small image"
+        mock_response.choices = [mock_choice]
+
+        with (
+            patch(
+                "tools.vision_tools.async_call_llm",
+                new_callable=AsyncMock,
+                return_value=mock_response,
+            ),
+        ):
+            result = json.loads(await vision_analyze_tool(str(img), "describe this", "test/model"))
+
+        assert result["success"] is True
+
+
+# ---------------------------------------------------------------------------
+# Error classification for 400 responses
+# ---------------------------------------------------------------------------
+
+
+class TestErrorClassification:
+    """Verify that API 400 errors produce actionable guidance."""
+
+    @pytest.mark.asyncio
+    async def test_invalid_request_error_gives_image_guidance(self, tmp_path):
+        """An invalid_request_error from the API should mention image size/format."""
+        img = tmp_path / "test.png"
+        img.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * 8)
+
+        api_error = Exception(
+            "Error code: 400 - {'type': 'error', 'error': "
+            "{'type': 'invalid_request_error', 'message': 'Invalid request data'}}"
+        )
+
+        with (
+            patch(
+                "tools.vision_tools._image_to_base64_data_url",
+                return_value="data:image/png;base64,abc",
+            ),
+            patch(
+                "tools.vision_tools.async_call_llm",
+                new_callable=AsyncMock,
+                side_effect=api_error,
+            ),
+        ):
+            result = json.loads(await vision_analyze_tool(str(img), "describe", "test/model"))
+
+        assert result["success"] is False
+        assert "rejected the image" in result["analysis"].lower()
+        assert "smaller" in result["analysis"].lower()
+
+
 class TestVisionRegistration:
    def test_vision_analyze_registered(self):
        from tools.registry import registry
--- a/tools/vision_tools.py
+++ b/tools/vision_tools.py
@ -342,7 +342,11 @@ async def vision_analyze_tool(
        logger.info("User prompt: %s", user_prompt[:100])
        
        # Determine if this is a local file path or a remote URL
-        local_path = Path(os.path.expanduser(image_url))
+        # Strip file:// scheme so file URIs resolve as local paths.
+        resolved_url = image_url
+        if resolved_url.startswith("file://"):
+            resolved_url = resolved_url[len("file://"):]
+        local_path = Path(os.path.expanduser(resolved_url))
        if local_path.is_file():
            # Local file path (e.g. from platform image cache) -- skip download
            logger.info("Using local image file: %s", image_url)
@ -378,7 +382,19 @@ async def vision_analyze_tool(
        # Calculate size in KB for better readability
        data_size_kb = len(image_data_url) / 1024
        logger.info("Image converted to base64 (%.1f KB)", data_size_kb)
-        
+
+        # Pre-flight size check: most vision APIs cap base64 payloads at 5 MB.
+        # Reject early with a clear message instead of a cryptic provider 400.
+        _MAX_BASE64_BYTES = 5 * 1024 * 1024  # 5 MB
+        # The data URL includes the header (e.g. "data:image/jpeg;base64,") which
+        # is negligible, but measure the full string to be safe.
+        if len(image_data_url) > _MAX_BASE64_BYTES:
+            raise ValueError(
+                f"Image too large for vision API: base64 payload is "
+                f"{len(image_data_url) / (1024 * 1024):.1f} MB (limit 5 MB). "
+                f"Resize or compress the image and try again."
+            )
+
        debug_call_data["image_size_bytes"] = image_size_bytes
        
        # Use the prompt as provided (model_tools.py now handles full description formatting)
@ -471,14 +487,21 @@ async def vision_analyze_tool(
                f"API provider account and try again. Error: {e}"
            )
        elif any(hint in err_str for hint in (
-            "does not support", "not support image", "invalid_request",
-            "content_policy", "image_url", "multimodal",
+            "does not support", "not support image",
+            "content_policy", "multimodal",
            "unrecognized request argument", "image input",
        )):
            analysis = (
                f"{model} does not support vision or our request was not "
                f"accepted by the server. Error: {e}"
            )
+        elif "invalid_request" in err_str or "image_url" in err_str:
+            analysis = (
+                "The vision API rejected the image. This can happen when the "
+                "image is too large, in an unsupported format, or corrupted. "
+                "Try a smaller JPEG/PNG (under 3.5 MB) and retry. "
+                f"Error: {e}"
+            )
        else:
            analysis = (
                "There was a problem with the request and the image could not "