diff --git a/tests/tools/test_vision_tools.py b/tests/tools/test_vision_tools.py index 6612f0e89..b7b052baa 100644 --- a/tests/tools/test_vision_tools.py +++ b/tests/tools/test_vision_tools.py @@ -533,6 +533,133 @@ class TestTildeExpansion: assert data["success"] is False +# --------------------------------------------------------------------------- +# file:// URI support +# --------------------------------------------------------------------------- + + +class TestFileUriSupport: + """Verify that file:// URIs resolve as local file paths.""" + + @pytest.mark.asyncio + async def test_file_uri_resolved_as_local_path(self, tmp_path): + """file:///absolute/path should be treated as a local file.""" + img = tmp_path / "photo.png" + img.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * 8) + + mock_response = MagicMock() + mock_choice = MagicMock() + mock_choice.message.content = "A test image" + mock_response.choices = [mock_choice] + + with ( + patch( + "tools.vision_tools._image_to_base64_data_url", + return_value="data:image/png;base64,abc", + ), + patch( + "tools.vision_tools.async_call_llm", + new_callable=AsyncMock, + return_value=mock_response, + ), + ): + result = await vision_analyze_tool( + f"file://{img}", "describe this", "test/model" + ) + data = json.loads(result) + assert data["success"] is True + + @pytest.mark.asyncio + async def test_file_uri_nonexistent_gives_error(self, tmp_path): + """file:// pointing to a missing file should fail gracefully.""" + result = await vision_analyze_tool( + f"file://{tmp_path}/nonexistent.png", "describe this", "test/model" + ) + data = json.loads(result) + assert data["success"] is False + + +# --------------------------------------------------------------------------- +# Base64 size pre-flight check +# --------------------------------------------------------------------------- + + +class TestBase64SizeLimit: + """Verify that oversized images are rejected before hitting the API.""" + + @pytest.mark.asyncio + async def test_oversized_image_rejected_before_api_call(self, tmp_path): + """Images exceeding 5 MB base64 should fail with a clear size error.""" + img = tmp_path / "huge.png" + img.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * (4 * 1024 * 1024)) + + with patch("tools.vision_tools.async_call_llm", new_callable=AsyncMock) as mock_llm: + result = json.loads(await vision_analyze_tool(str(img), "describe this")) + + assert result["success"] is False + assert "too large" in result["error"].lower() + mock_llm.assert_not_awaited() + + @pytest.mark.asyncio + async def test_small_image_not_rejected(self, tmp_path): + """Images well under the limit should pass the size check.""" + img = tmp_path / "small.png" + img.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * 64) + + mock_response = MagicMock() + mock_choice = MagicMock() + mock_choice.message.content = "Small image" + mock_response.choices = [mock_choice] + + with ( + patch( + "tools.vision_tools.async_call_llm", + new_callable=AsyncMock, + return_value=mock_response, + ), + ): + result = json.loads(await vision_analyze_tool(str(img), "describe this", "test/model")) + + assert result["success"] is True + + +# --------------------------------------------------------------------------- +# Error classification for 400 responses +# --------------------------------------------------------------------------- + + +class TestErrorClassification: + """Verify that API 400 errors produce actionable guidance.""" + + @pytest.mark.asyncio + async def test_invalid_request_error_gives_image_guidance(self, tmp_path): + """An invalid_request_error from the API should mention image size/format.""" + img = tmp_path / "test.png" + img.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * 8) + + api_error = Exception( + "Error code: 400 - {'type': 'error', 'error': " + "{'type': 'invalid_request_error', 'message': 'Invalid request data'}}" + ) + + with ( + patch( + "tools.vision_tools._image_to_base64_data_url", + return_value="data:image/png;base64,abc", + ), + patch( + "tools.vision_tools.async_call_llm", + new_callable=AsyncMock, + side_effect=api_error, + ), + ): + result = json.loads(await vision_analyze_tool(str(img), "describe", "test/model")) + + assert result["success"] is False + assert "rejected the image" in result["analysis"].lower() + assert "smaller" in result["analysis"].lower() + + class TestVisionRegistration: def test_vision_analyze_registered(self): from tools.registry import registry diff --git a/tools/vision_tools.py b/tools/vision_tools.py index 4ae2f1164..df8fa68c8 100644 --- a/tools/vision_tools.py +++ b/tools/vision_tools.py @@ -342,7 +342,11 @@ async def vision_analyze_tool( logger.info("User prompt: %s", user_prompt[:100]) # Determine if this is a local file path or a remote URL - local_path = Path(os.path.expanduser(image_url)) + # Strip file:// scheme so file URIs resolve as local paths. + resolved_url = image_url + if resolved_url.startswith("file://"): + resolved_url = resolved_url[len("file://"):] + local_path = Path(os.path.expanduser(resolved_url)) if local_path.is_file(): # Local file path (e.g. from platform image cache) -- skip download logger.info("Using local image file: %s", image_url) @@ -378,7 +382,19 @@ async def vision_analyze_tool( # Calculate size in KB for better readability data_size_kb = len(image_data_url) / 1024 logger.info("Image converted to base64 (%.1f KB)", data_size_kb) - + + # Pre-flight size check: most vision APIs cap base64 payloads at 5 MB. + # Reject early with a clear message instead of a cryptic provider 400. + _MAX_BASE64_BYTES = 5 * 1024 * 1024 # 5 MB + # The data URL includes the header (e.g. "data:image/jpeg;base64,") which + # is negligible, but measure the full string to be safe. + if len(image_data_url) > _MAX_BASE64_BYTES: + raise ValueError( + f"Image too large for vision API: base64 payload is " + f"{len(image_data_url) / (1024 * 1024):.1f} MB (limit 5 MB). " + f"Resize or compress the image and try again." + ) + debug_call_data["image_size_bytes"] = image_size_bytes # Use the prompt as provided (model_tools.py now handles full description formatting) @@ -471,14 +487,21 @@ async def vision_analyze_tool( f"API provider account and try again. Error: {e}" ) elif any(hint in err_str for hint in ( - "does not support", "not support image", "invalid_request", - "content_policy", "image_url", "multimodal", + "does not support", "not support image", + "content_policy", "multimodal", "unrecognized request argument", "image input", )): analysis = ( f"{model} does not support vision or our request was not " f"accepted by the server. Error: {e}" ) + elif "invalid_request" in err_str or "image_url" in err_str: + analysis = ( + "The vision API rejected the image. This can happen when the " + "image is too large, in an unsupported format, or corrupted. " + "Try a smaller JPEG/PNG (under 3.5 MB) and retry. " + f"Error: {e}" + ) else: analysis = ( "There was a problem with the request and the image could not "