fix(vision): reject oversized images before API call, handle file:// URIs, improve 400 errors

Three fixes for vision_analyze returning cryptic 400 "Invalid request data":

1. Pre-flight base64 size check — base64 inflates data ~33%, so a 3.8 MB
   file exceeds the 5 MB API limit. Reject early with a clear message
   instead of letting the provider return a generic 400.

2. Handle file:// URIs — strip the scheme and resolve as a local path.
   Previously file:///path/to/image.png fell through to the "invalid
   image source" error since it matched neither is_file() nor http(s).

3. Separate invalid_request errors from "does not support vision" errors
   so the user gets actionable guidance (resize/compress/retry) instead
   of a misleading "model does not support vision" message.

Closes #6677
This commit is contained in:
Tranquil-Flow 2026-04-10 15:11:14 +10:00 committed by Teknium
parent 1909877e6e
commit 4e56eacdce
2 changed files with 154 additions and 4 deletions

View file

@ -533,6 +533,133 @@ class TestTildeExpansion:
assert data["success"] is False
# ---------------------------------------------------------------------------
# file:// URI support
# ---------------------------------------------------------------------------
class TestFileUriSupport:
"""Verify that file:// URIs resolve as local file paths."""
@pytest.mark.asyncio
async def test_file_uri_resolved_as_local_path(self, tmp_path):
"""file:///absolute/path should be treated as a local file."""
img = tmp_path / "photo.png"
img.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * 8)
mock_response = MagicMock()
mock_choice = MagicMock()
mock_choice.message.content = "A test image"
mock_response.choices = [mock_choice]
with (
patch(
"tools.vision_tools._image_to_base64_data_url",
return_value="data:image/png;base64,abc",
),
patch(
"tools.vision_tools.async_call_llm",
new_callable=AsyncMock,
return_value=mock_response,
),
):
result = await vision_analyze_tool(
f"file://{img}", "describe this", "test/model"
)
data = json.loads(result)
assert data["success"] is True
@pytest.mark.asyncio
async def test_file_uri_nonexistent_gives_error(self, tmp_path):
"""file:// pointing to a missing file should fail gracefully."""
result = await vision_analyze_tool(
f"file://{tmp_path}/nonexistent.png", "describe this", "test/model"
)
data = json.loads(result)
assert data["success"] is False
# ---------------------------------------------------------------------------
# Base64 size pre-flight check
# ---------------------------------------------------------------------------
class TestBase64SizeLimit:
"""Verify that oversized images are rejected before hitting the API."""
@pytest.mark.asyncio
async def test_oversized_image_rejected_before_api_call(self, tmp_path):
"""Images exceeding 5 MB base64 should fail with a clear size error."""
img = tmp_path / "huge.png"
img.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * (4 * 1024 * 1024))
with patch("tools.vision_tools.async_call_llm", new_callable=AsyncMock) as mock_llm:
result = json.loads(await vision_analyze_tool(str(img), "describe this"))
assert result["success"] is False
assert "too large" in result["error"].lower()
mock_llm.assert_not_awaited()
@pytest.mark.asyncio
async def test_small_image_not_rejected(self, tmp_path):
"""Images well under the limit should pass the size check."""
img = tmp_path / "small.png"
img.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * 64)
mock_response = MagicMock()
mock_choice = MagicMock()
mock_choice.message.content = "Small image"
mock_response.choices = [mock_choice]
with (
patch(
"tools.vision_tools.async_call_llm",
new_callable=AsyncMock,
return_value=mock_response,
),
):
result = json.loads(await vision_analyze_tool(str(img), "describe this", "test/model"))
assert result["success"] is True
# ---------------------------------------------------------------------------
# Error classification for 400 responses
# ---------------------------------------------------------------------------
class TestErrorClassification:
"""Verify that API 400 errors produce actionable guidance."""
@pytest.mark.asyncio
async def test_invalid_request_error_gives_image_guidance(self, tmp_path):
"""An invalid_request_error from the API should mention image size/format."""
img = tmp_path / "test.png"
img.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * 8)
api_error = Exception(
"Error code: 400 - {'type': 'error', 'error': "
"{'type': 'invalid_request_error', 'message': 'Invalid request data'}}"
)
with (
patch(
"tools.vision_tools._image_to_base64_data_url",
return_value="data:image/png;base64,abc",
),
patch(
"tools.vision_tools.async_call_llm",
new_callable=AsyncMock,
side_effect=api_error,
),
):
result = json.loads(await vision_analyze_tool(str(img), "describe", "test/model"))
assert result["success"] is False
assert "rejected the image" in result["analysis"].lower()
assert "smaller" in result["analysis"].lower()
class TestVisionRegistration:
def test_vision_analyze_registered(self):
from tools.registry import registry

View file

@ -342,7 +342,11 @@ async def vision_analyze_tool(
logger.info("User prompt: %s", user_prompt[:100])
# Determine if this is a local file path or a remote URL
local_path = Path(os.path.expanduser(image_url))
# Strip file:// scheme so file URIs resolve as local paths.
resolved_url = image_url
if resolved_url.startswith("file://"):
resolved_url = resolved_url[len("file://"):]
local_path = Path(os.path.expanduser(resolved_url))
if local_path.is_file():
# Local file path (e.g. from platform image cache) -- skip download
logger.info("Using local image file: %s", image_url)
@ -378,7 +382,19 @@ async def vision_analyze_tool(
# Calculate size in KB for better readability
data_size_kb = len(image_data_url) / 1024
logger.info("Image converted to base64 (%.1f KB)", data_size_kb)
# Pre-flight size check: most vision APIs cap base64 payloads at 5 MB.
# Reject early with a clear message instead of a cryptic provider 400.
_MAX_BASE64_BYTES = 5 * 1024 * 1024 # 5 MB
# The data URL includes the header (e.g. "data:image/jpeg;base64,") which
# is negligible, but measure the full string to be safe.
if len(image_data_url) > _MAX_BASE64_BYTES:
raise ValueError(
f"Image too large for vision API: base64 payload is "
f"{len(image_data_url) / (1024 * 1024):.1f} MB (limit 5 MB). "
f"Resize or compress the image and try again."
)
debug_call_data["image_size_bytes"] = image_size_bytes
# Use the prompt as provided (model_tools.py now handles full description formatting)
@ -471,14 +487,21 @@ async def vision_analyze_tool(
f"API provider account and try again. Error: {e}"
)
elif any(hint in err_str for hint in (
"does not support", "not support image", "invalid_request",
"content_policy", "image_url", "multimodal",
"does not support", "not support image",
"content_policy", "multimodal",
"unrecognized request argument", "image input",
)):
analysis = (
f"{model} does not support vision or our request was not "
f"accepted by the server. Error: {e}"
)
elif "invalid_request" in err_str or "image_url" in err_str:
analysis = (
"The vision API rejected the image. This can happen when the "
"image is too large, in an unsupported format, or corrupted. "
"Try a smaller JPEG/PNG (under 3.5 MB) and retry. "
f"Error: {e}"
)
else:
analysis = (
"There was a problem with the request and the image could not "