mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix(vision): auto-resize oversized images, increase default timeout, fix vision capability detection
Cherry-picked from PR #7749 by kshitijk4poor with modifications: - Raise hard image limit from 5 MB to 20 MB (matches most restrictive provider) - Send images at full resolution first; only auto-resize to 5 MB on API failure - Add _is_image_size_error() helper to detect size-related API rejections - Auto-resize uses Pillow (soft dep) with progressive downscale + JPEG quality reduction - Fix get_model_capabilities() to check modalities.input for vision support - Increase default vision timeout from 30s to 120s (matches hardcoded fallback intent) - Applied retry-with-resize to both vision_analyze_tool and browser_vision Closes #7740
This commit is contained in:
parent
06e1d9cdd4
commit
50bb4fe010
6 changed files with 399 additions and 25 deletions
|
|
@ -383,7 +383,14 @@ def get_model_capabilities(provider: str, model: str) -> Optional[ModelCapabilit
|
|||
|
||||
# Extract capability flags (default to False if missing)
|
||||
supports_tools = bool(entry.get("tool_call", False))
|
||||
supports_vision = bool(entry.get("attachment", False))
|
||||
# Vision: check both the `attachment` flag and `modalities.input` for "image".
|
||||
# Some models (e.g. gemma-4) list image in input modalities but not attachment.
|
||||
input_mods = entry.get("modalities", {})
|
||||
if isinstance(input_mods, dict):
|
||||
input_mods = input_mods.get("input", [])
|
||||
else:
|
||||
input_mods = []
|
||||
supports_vision = bool(entry.get("attachment", False)) or "image" in input_mods
|
||||
supports_reasoning = bool(entry.get("reasoning", False))
|
||||
|
||||
# Extract limits
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue