mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-29 06:31:32 +00:00
feat(kanban): attach images referenced in task bodies to worker vision (#34210)
Kanban workers now scan the task body for local image paths and http(s) image URLs and attach them to the worker's first user turn — matching the CLI/gateway behaviour for inbound images. Before, a user pasting `/home/me/screenshot.png` or `https://example.com/img.png` into a kanban task description had it sent to the model as plain text and the pixels were never seen. How it works: * agent/image_routing.py gains extract_image_refs(text) → (paths, urls) that mirrors gateway/platforms/base.py:extract_local_files (absolute / ~-relative paths, image extensions only, ignores fenced/inline code). * build_native_content_parts() accepts an optional image_urls= kwarg and emits passthrough image_url parts for remote URLs alongside the base64 data: URLs used for local paths. * cli.py (single-query/quiet branch — the path every dispatcher-spawned worker takes) detects HERMES_KANBAN_TASK, reads the task body via kanban_db.get_task, runs extract_image_refs, and threads the results into the existing image-routing decision (native vs text). Best-effort: enrichment failures never block worker startup. Tested: * tests/agent/test_image_routing.py — 22 new tests for extract_image_refs and URL pass-through in build_native_content_parts. * tests/hermes_cli/test_kanban_worker_image_extraction.py — 10 new tests driving real kanban_db round-trip (create task → read body → extract refs → build parts). * E2E: created a fake kanban task with a body referencing both a local PNG and an https URL; verified the worker pipeline produces a multimodal user turn with 1 text part + 2 image_url parts (data URL for the local file, passthrough URL for the remote).
This commit is contained in:
parent
1b1e30510a
commit
769ee86cd2
4 changed files with 610 additions and 20 deletions
|
|
@ -16,6 +16,7 @@ from agent.image_routing import (
|
|||
_supports_vision_override,
|
||||
build_native_content_parts,
|
||||
decide_image_input_mode,
|
||||
extract_image_refs,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -449,3 +450,190 @@ class TestLargeImageHandling:
|
|||
assert len(parts) == 2
|
||||
assert parts[0]["type"] == "text"
|
||||
assert parts[1]["type"] == "image_url"
|
||||
|
||||
|
||||
# ─── extract_image_refs ──────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestExtractImageRefs:
|
||||
"""Scan task body / inbound text for image paths and URLs (kanban worker
|
||||
enrichment, issue raised May 2026)."""
|
||||
|
||||
def test_empty_or_none_returns_empty(self):
|
||||
assert extract_image_refs("") == ([], [])
|
||||
assert extract_image_refs(None) == ([], []) # type: ignore[arg-type]
|
||||
|
||||
def test_finds_absolute_path(self, tmp_path: Path):
|
||||
img = tmp_path / "screenshot.png"
|
||||
img.write_bytes(_png_bytes())
|
||||
body = f"Look at {img} and tell me what's wrong."
|
||||
paths, urls = extract_image_refs(body)
|
||||
assert paths == [str(img)]
|
||||
assert urls == []
|
||||
|
||||
def test_finds_home_relative_path(self, tmp_path: Path, monkeypatch):
|
||||
# Simulate ~/foo.png by pointing HOME at tmp_path and creating the file
|
||||
monkeypatch.setenv("HOME", str(tmp_path))
|
||||
img = tmp_path / "foo.png"
|
||||
img.write_bytes(_png_bytes())
|
||||
paths, urls = extract_image_refs("see ~/foo.png please")
|
||||
assert paths == [str(img)]
|
||||
assert urls == []
|
||||
|
||||
def test_skips_nonexistent_paths(self, tmp_path: Path):
|
||||
# Path-shaped but no file on disk → skipped.
|
||||
body = f"What's at {tmp_path}/never_created.png ?"
|
||||
paths, urls = extract_image_refs(body)
|
||||
assert paths == []
|
||||
assert urls == []
|
||||
|
||||
def test_finds_http_image_url(self):
|
||||
body = "Check out https://example.com/photos/cat.png — cute right?"
|
||||
paths, urls = extract_image_refs(body)
|
||||
assert paths == []
|
||||
assert urls == ["https://example.com/photos/cat.png"]
|
||||
|
||||
def test_finds_https_url_with_query_string(self):
|
||||
body = "Diagram: https://cdn.example.com/img.jpeg?size=large&v=2 here"
|
||||
paths, urls = extract_image_refs(body)
|
||||
assert urls == ["https://cdn.example.com/img.jpeg?size=large&v=2"]
|
||||
|
||||
def test_url_trailing_punctuation_stripped(self):
|
||||
# Prose punctuation right after the URL must not be part of the URL.
|
||||
body = "See https://example.com/a.png."
|
||||
paths, urls = extract_image_refs(body)
|
||||
assert urls == ["https://example.com/a.png"]
|
||||
|
||||
def test_ignores_non_image_urls(self):
|
||||
body = "See https://example.com/page.html and https://x.com/y.pdf"
|
||||
paths, urls = extract_image_refs(body)
|
||||
assert urls == []
|
||||
|
||||
def test_dedupes_paths_and_urls(self, tmp_path: Path):
|
||||
img = tmp_path / "dup.png"
|
||||
img.write_bytes(_png_bytes())
|
||||
body = (
|
||||
f"First {img} then again {img}. "
|
||||
"Also https://example.com/x.png and https://example.com/x.png again."
|
||||
)
|
||||
paths, urls = extract_image_refs(body)
|
||||
assert paths == [str(img)]
|
||||
assert urls == ["https://example.com/x.png"]
|
||||
|
||||
def test_ignores_paths_in_fenced_code_block(self, tmp_path: Path):
|
||||
img = tmp_path / "real.png"
|
||||
img.write_bytes(_png_bytes())
|
||||
body = (
|
||||
"Outside the block, attach this:\n"
|
||||
f"{img}\n"
|
||||
"But not these examples:\n"
|
||||
"```\n"
|
||||
f"some_other_image: /tmp/example.png\n"
|
||||
f"url: https://example.com/example.png\n"
|
||||
"```\n"
|
||||
)
|
||||
paths, urls = extract_image_refs(body)
|
||||
assert paths == [str(img)]
|
||||
assert urls == []
|
||||
|
||||
def test_ignores_paths_in_inline_code(self, tmp_path: Path):
|
||||
img = tmp_path / "real.jpg"
|
||||
img.write_bytes(_png_bytes())
|
||||
body = (
|
||||
f"Attach {img}, but ignore the example "
|
||||
"`https://example.com/skip.png` in backticks."
|
||||
)
|
||||
paths, urls = extract_image_refs(body)
|
||||
assert paths == [str(img)]
|
||||
assert urls == []
|
||||
|
||||
def test_does_not_match_paths_inside_urls(self, tmp_path: Path):
|
||||
# The lookbehind in the regex prevents matching the path-portion of
|
||||
# a URL as a local path. Only the URL should be detected.
|
||||
body = "Just the URL: https://example.com/some/dir/image.png"
|
||||
paths, urls = extract_image_refs(body)
|
||||
assert paths == []
|
||||
assert urls == ["https://example.com/some/dir/image.png"]
|
||||
|
||||
def test_mixed_paths_and_urls(self, tmp_path: Path):
|
||||
img = tmp_path / "local.png"
|
||||
img.write_bytes(_png_bytes())
|
||||
body = (
|
||||
f"Compare local {img} against the design at "
|
||||
"https://example.com/design/v2.png — does it match?"
|
||||
)
|
||||
paths, urls = extract_image_refs(body)
|
||||
assert paths == [str(img)]
|
||||
assert urls == ["https://example.com/design/v2.png"]
|
||||
|
||||
def test_case_insensitive_extension(self, tmp_path: Path):
|
||||
img = tmp_path / "shouty.PNG"
|
||||
img.write_bytes(_png_bytes())
|
||||
body = f"see {img}"
|
||||
paths, urls = extract_image_refs(body)
|
||||
assert paths == [str(img)]
|
||||
|
||||
|
||||
# ─── build_native_content_parts with URLs ────────────────────────────────────
|
||||
|
||||
|
||||
class TestBuildNativeContentPartsURLs:
|
||||
"""URL pass-through support added so kanban task bodies (and other
|
||||
inbound surfaces) can route remote image URLs straight to the model."""
|
||||
|
||||
def test_url_only_no_local_paths(self):
|
||||
parts, skipped = build_native_content_parts(
|
||||
"what is this?",
|
||||
[],
|
||||
image_urls=["https://example.com/diagram.png"],
|
||||
)
|
||||
assert skipped == []
|
||||
assert len(parts) == 2
|
||||
assert parts[0]["type"] == "text"
|
||||
assert "[Image attached: https://example.com/diagram.png]" in parts[0]["text"]
|
||||
assert parts[0]["text"].startswith("what is this?")
|
||||
assert parts[1] == {
|
||||
"type": "image_url",
|
||||
"image_url": {"url": "https://example.com/diagram.png"},
|
||||
}
|
||||
|
||||
def test_mixed_path_and_url(self, tmp_path: Path):
|
||||
img = tmp_path / "local.png"
|
||||
img.write_bytes(_png_bytes())
|
||||
parts, skipped = build_native_content_parts(
|
||||
"compare these",
|
||||
[str(img)],
|
||||
image_urls=["https://example.com/remote.jpg"],
|
||||
)
|
||||
assert skipped == []
|
||||
# 1 text + 2 image parts (local data URL first, then remote URL).
|
||||
image_parts = [p for p in parts if p.get("type") == "image_url"]
|
||||
assert len(image_parts) == 2
|
||||
assert image_parts[0]["image_url"]["url"].startswith("data:image/png;base64,")
|
||||
assert image_parts[1]["image_url"]["url"] == "https://example.com/remote.jpg"
|
||||
text = parts[0]["text"]
|
||||
assert "[Image attached at:" in text
|
||||
assert "[Image attached: https://example.com/remote.jpg]" in text
|
||||
|
||||
def test_empty_url_list_is_no_op(self, tmp_path: Path):
|
||||
img = tmp_path / "x.png"
|
||||
img.write_bytes(_png_bytes())
|
||||
# image_urls=[] should behave the same as not passing it at all.
|
||||
parts_no_urls, _ = build_native_content_parts("hi", [str(img)])
|
||||
parts_empty_urls, _ = build_native_content_parts("hi", [str(img)], image_urls=[])
|
||||
assert parts_no_urls == parts_empty_urls
|
||||
|
||||
def test_blank_url_strings_are_dropped(self):
|
||||
parts, _ = build_native_content_parts(
|
||||
"x", [], image_urls=["", " ", "https://example.com/a.png"]
|
||||
)
|
||||
image_parts = [p for p in parts if p.get("type") == "image_url"]
|
||||
assert len(image_parts) == 1
|
||||
assert image_parts[0]["image_url"]["url"] == "https://example.com/a.png"
|
||||
|
||||
def test_url_only_inserts_default_prompt_when_text_empty(self):
|
||||
parts, _ = build_native_content_parts(
|
||||
"", [], image_urls=["https://example.com/a.png"]
|
||||
)
|
||||
assert parts[0]["type"] == "text"
|
||||
assert parts[0]["text"].startswith("What do you see in this image?")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue