mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix(vision): reject oversized images before API call, handle file:// URIs, improve 400 errors
Three fixes for vision_analyze returning cryptic 400 "Invalid request data": 1. Pre-flight base64 size check — base64 inflates data ~33%, so a 3.8 MB file exceeds the 5 MB API limit. Reject early with a clear message instead of letting the provider return a generic 400. 2. Handle file:// URIs — strip the scheme and resolve as a local path. Previously file:///path/to/image.png fell through to the "invalid image source" error since it matched neither is_file() nor http(s). 3. Separate invalid_request errors from "does not support vision" errors so the user gets actionable guidance (resize/compress/retry) instead of a misleading "model does not support vision" message. Closes #6677
This commit is contained in:
parent
1909877e6e
commit
4e56eacdce
2 changed files with 154 additions and 4 deletions
|
|
@ -533,6 +533,133 @@ class TestTildeExpansion:
|
|||
assert data["success"] is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# file:// URI support
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestFileUriSupport:
|
||||
"""Verify that file:// URIs resolve as local file paths."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_file_uri_resolved_as_local_path(self, tmp_path):
|
||||
"""file:///absolute/path should be treated as a local file."""
|
||||
img = tmp_path / "photo.png"
|
||||
img.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * 8)
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_choice = MagicMock()
|
||||
mock_choice.message.content = "A test image"
|
||||
mock_response.choices = [mock_choice]
|
||||
|
||||
with (
|
||||
patch(
|
||||
"tools.vision_tools._image_to_base64_data_url",
|
||||
return_value="data:image/png;base64,abc",
|
||||
),
|
||||
patch(
|
||||
"tools.vision_tools.async_call_llm",
|
||||
new_callable=AsyncMock,
|
||||
return_value=mock_response,
|
||||
),
|
||||
):
|
||||
result = await vision_analyze_tool(
|
||||
f"file://{img}", "describe this", "test/model"
|
||||
)
|
||||
data = json.loads(result)
|
||||
assert data["success"] is True
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_file_uri_nonexistent_gives_error(self, tmp_path):
|
||||
"""file:// pointing to a missing file should fail gracefully."""
|
||||
result = await vision_analyze_tool(
|
||||
f"file://{tmp_path}/nonexistent.png", "describe this", "test/model"
|
||||
)
|
||||
data = json.loads(result)
|
||||
assert data["success"] is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Base64 size pre-flight check
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestBase64SizeLimit:
|
||||
"""Verify that oversized images are rejected before hitting the API."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_oversized_image_rejected_before_api_call(self, tmp_path):
|
||||
"""Images exceeding 5 MB base64 should fail with a clear size error."""
|
||||
img = tmp_path / "huge.png"
|
||||
img.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * (4 * 1024 * 1024))
|
||||
|
||||
with patch("tools.vision_tools.async_call_llm", new_callable=AsyncMock) as mock_llm:
|
||||
result = json.loads(await vision_analyze_tool(str(img), "describe this"))
|
||||
|
||||
assert result["success"] is False
|
||||
assert "too large" in result["error"].lower()
|
||||
mock_llm.assert_not_awaited()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_small_image_not_rejected(self, tmp_path):
|
||||
"""Images well under the limit should pass the size check."""
|
||||
img = tmp_path / "small.png"
|
||||
img.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * 64)
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_choice = MagicMock()
|
||||
mock_choice.message.content = "Small image"
|
||||
mock_response.choices = [mock_choice]
|
||||
|
||||
with (
|
||||
patch(
|
||||
"tools.vision_tools.async_call_llm",
|
||||
new_callable=AsyncMock,
|
||||
return_value=mock_response,
|
||||
),
|
||||
):
|
||||
result = json.loads(await vision_analyze_tool(str(img), "describe this", "test/model"))
|
||||
|
||||
assert result["success"] is True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Error classification for 400 responses
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestErrorClassification:
|
||||
"""Verify that API 400 errors produce actionable guidance."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_invalid_request_error_gives_image_guidance(self, tmp_path):
|
||||
"""An invalid_request_error from the API should mention image size/format."""
|
||||
img = tmp_path / "test.png"
|
||||
img.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * 8)
|
||||
|
||||
api_error = Exception(
|
||||
"Error code: 400 - {'type': 'error', 'error': "
|
||||
"{'type': 'invalid_request_error', 'message': 'Invalid request data'}}"
|
||||
)
|
||||
|
||||
with (
|
||||
patch(
|
||||
"tools.vision_tools._image_to_base64_data_url",
|
||||
return_value="data:image/png;base64,abc",
|
||||
),
|
||||
patch(
|
||||
"tools.vision_tools.async_call_llm",
|
||||
new_callable=AsyncMock,
|
||||
side_effect=api_error,
|
||||
),
|
||||
):
|
||||
result = json.loads(await vision_analyze_tool(str(img), "describe", "test/model"))
|
||||
|
||||
assert result["success"] is False
|
||||
assert "rejected the image" in result["analysis"].lower()
|
||||
assert "smaller" in result["analysis"].lower()
|
||||
|
||||
|
||||
class TestVisionRegistration:
|
||||
def test_vision_analyze_registered(self):
|
||||
from tools.registry import registry
|
||||
|
|
|
|||
|
|
@ -342,7 +342,11 @@ async def vision_analyze_tool(
|
|||
logger.info("User prompt: %s", user_prompt[:100])
|
||||
|
||||
# Determine if this is a local file path or a remote URL
|
||||
local_path = Path(os.path.expanduser(image_url))
|
||||
# Strip file:// scheme so file URIs resolve as local paths.
|
||||
resolved_url = image_url
|
||||
if resolved_url.startswith("file://"):
|
||||
resolved_url = resolved_url[len("file://"):]
|
||||
local_path = Path(os.path.expanduser(resolved_url))
|
||||
if local_path.is_file():
|
||||
# Local file path (e.g. from platform image cache) -- skip download
|
||||
logger.info("Using local image file: %s", image_url)
|
||||
|
|
@ -378,7 +382,19 @@ async def vision_analyze_tool(
|
|||
# Calculate size in KB for better readability
|
||||
data_size_kb = len(image_data_url) / 1024
|
||||
logger.info("Image converted to base64 (%.1f KB)", data_size_kb)
|
||||
|
||||
|
||||
# Pre-flight size check: most vision APIs cap base64 payloads at 5 MB.
|
||||
# Reject early with a clear message instead of a cryptic provider 400.
|
||||
_MAX_BASE64_BYTES = 5 * 1024 * 1024 # 5 MB
|
||||
# The data URL includes the header (e.g. "data:image/jpeg;base64,") which
|
||||
# is negligible, but measure the full string to be safe.
|
||||
if len(image_data_url) > _MAX_BASE64_BYTES:
|
||||
raise ValueError(
|
||||
f"Image too large for vision API: base64 payload is "
|
||||
f"{len(image_data_url) / (1024 * 1024):.1f} MB (limit 5 MB). "
|
||||
f"Resize or compress the image and try again."
|
||||
)
|
||||
|
||||
debug_call_data["image_size_bytes"] = image_size_bytes
|
||||
|
||||
# Use the prompt as provided (model_tools.py now handles full description formatting)
|
||||
|
|
@ -471,14 +487,21 @@ async def vision_analyze_tool(
|
|||
f"API provider account and try again. Error: {e}"
|
||||
)
|
||||
elif any(hint in err_str for hint in (
|
||||
"does not support", "not support image", "invalid_request",
|
||||
"content_policy", "image_url", "multimodal",
|
||||
"does not support", "not support image",
|
||||
"content_policy", "multimodal",
|
||||
"unrecognized request argument", "image input",
|
||||
)):
|
||||
analysis = (
|
||||
f"{model} does not support vision or our request was not "
|
||||
f"accepted by the server. Error: {e}"
|
||||
)
|
||||
elif "invalid_request" in err_str or "image_url" in err_str:
|
||||
analysis = (
|
||||
"The vision API rejected the image. This can happen when the "
|
||||
"image is too large, in an unsupported format, or corrupted. "
|
||||
"Try a smaller JPEG/PNG (under 3.5 MB) and retry. "
|
||||
f"Error: {e}"
|
||||
)
|
||||
else:
|
||||
analysis = (
|
||||
"There was a problem with the request and the image could not "
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue