From 1909877e6edc4d946e63333b704bdae0549ad48c Mon Sep 17 00:00:00 2001 From: aaronagent <1115117931@qq.com> Date: Fri, 10 Apr 2026 12:13:42 +0800 Subject: [PATCH] fix: cap image download size at 50 MB, validate tool call parser fields vision_tools.py: _download_image() loads the full HTTP response body into memory via response.content (line 190) with no Content-Length check and no max file size limit. An attacker-hosted multi-gigabyte file causes OOM. Add a 50 MB hard cap: check Content-Length header before download, and verify actual body size before writing to disk. hermes_parser.py: tc_data["name"] at line 57 raises KeyError when the LLM outputs a tool call JSON without a "name" field. The outer except catches it silently, causing the entire tool call to be lost with zero diagnostics. Add "name" field validation before constructing the ChatCompletionMessage. mistral_parser.py: tc["name"] at line 101 has the same KeyError issue in the pre-v11 format path. The fallback decoder (line 112) already checks "name" correctly, but the primary path does not. Add validation to match. Co-Authored-By: Claude Sonnet 4.6 (1M context) --- .../tool_call_parsers/hermes_parser.py | 2 ++ .../tool_call_parsers/mistral_parser.py | 2 ++ tools/vision_tools.py | 20 +++++++++++++++++-- 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/environments/tool_call_parsers/hermes_parser.py b/environments/tool_call_parsers/hermes_parser.py index c1902fd62..c6f911db0 100644 --- a/environments/tool_call_parsers/hermes_parser.py +++ b/environments/tool_call_parsers/hermes_parser.py @@ -49,6 +49,8 @@ class HermesToolCallParser(ToolCallParser): continue tc_data = json.loads(raw_json) + if "name" not in tc_data: + continue tool_calls.append( ChatCompletionMessageToolCall( id=f"call_{uuid.uuid4().hex[:8]}", diff --git a/environments/tool_call_parsers/mistral_parser.py b/environments/tool_call_parsers/mistral_parser.py index 50e98a6f8..a23684e87 100644 --- a/environments/tool_call_parsers/mistral_parser.py +++ b/environments/tool_call_parsers/mistral_parser.py @@ -89,6 +89,8 @@ class MistralToolCallParser(ToolCallParser): parsed = [parsed] for tc in parsed: + if "name" not in tc: + continue args = tc.get("arguments", {}) if isinstance(args, dict): args = json.dumps(args, ensure_ascii=False) diff --git a/tools/vision_tools.py b/tools/vision_tools.py index 2223032c3..4ae2f1164 100644 --- a/tools/vision_tools.py +++ b/tools/vision_tools.py @@ -67,6 +67,10 @@ def _resolve_download_timeout() -> float: _VISION_DOWNLOAD_TIMEOUT = _resolve_download_timeout() +# Hard cap on downloaded image file size (50 MB). Prevents OOM from +# attacker-hosted multi-gigabyte files or decompression bombs. +_VISION_MAX_DOWNLOAD_BYTES = 50 * 1024 * 1024 + def _validate_image_url(url: str) -> bool: """ @@ -181,13 +185,25 @@ async def _download_image(image_url: str, destination: Path, max_retries: int = ) response.raise_for_status() + # Reject overly large images early via Content-Length header. + cl = response.headers.get("content-length") + if cl and int(cl) > _VISION_MAX_DOWNLOAD_BYTES: + raise ValueError( + f"Image too large ({int(cl)} bytes, max {_VISION_MAX_DOWNLOAD_BYTES})" + ) + final_url = str(response.url) blocked = check_website_access(final_url) if blocked: raise PermissionError(blocked["message"]) - # Save the image content - destination.write_bytes(response.content) + # Save the image content (double-check actual size) + body = response.content + if len(body) > _VISION_MAX_DOWNLOAD_BYTES: + raise ValueError( + f"Image too large ({len(body)} bytes, max {_VISION_MAX_DOWNLOAD_BYTES})" + ) + destination.write_bytes(body) return destination except Exception as e: