From 769ee86cd2b346f6bffedd84ca9067fde2790eeb Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Thu, 28 May 2026 17:50:42 -0700 Subject: [PATCH] feat(kanban): attach images referenced in task bodies to worker vision (#34210) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Kanban workers now scan the task body for local image paths and http(s) image URLs and attach them to the worker's first user turn — matching the CLI/gateway behaviour for inbound images. Before, a user pasting `/home/me/screenshot.png` or `https://example.com/img.png` into a kanban task description had it sent to the model as plain text and the pixels were never seen. How it works: * agent/image_routing.py gains extract_image_refs(text) → (paths, urls) that mirrors gateway/platforms/base.py:extract_local_files (absolute / ~-relative paths, image extensions only, ignores fenced/inline code). * build_native_content_parts() accepts an optional image_urls= kwarg and emits passthrough image_url parts for remote URLs alongside the base64 data: URLs used for local paths. * cli.py (single-query/quiet branch — the path every dispatcher-spawned worker takes) detects HERMES_KANBAN_TASK, reads the task body via kanban_db.get_task, runs extract_image_refs, and threads the results into the existing image-routing decision (native vs text). Best-effort: enrichment failures never block worker startup. Tested: * tests/agent/test_image_routing.py — 22 new tests for extract_image_refs and URL pass-through in build_native_content_parts. * tests/hermes_cli/test_kanban_worker_image_extraction.py — 10 new tests driving real kanban_db round-trip (create task → read body → extract refs → build parts). * E2E: created a fake kanban task with a body referencing both a local PNG and an https URL; verified the worker pipeline produces a multimodal user turn with 1 text part + 2 image_url parts (data URL for the local file, passthrough URL for the remote). --- agent/image_routing.py | 148 +++++++++-- cli.py | 56 ++++- tests/agent/test_image_routing.py | 188 ++++++++++++++ .../test_kanban_worker_image_extraction.py | 238 ++++++++++++++++++ 4 files changed, 610 insertions(+), 20 deletions(-) create mode 100644 tests/hermes_cli/test_kanban_worker_image_extraction.py diff --git a/agent/image_routing.py b/agent/image_routing.py index 37e1cbbf102..74b29af7cd8 100644 --- a/agent/image_routing.py +++ b/agent/image_routing.py @@ -37,6 +37,8 @@ from __future__ import annotations import base64 import logging import mimetypes +import os +import re from pathlib import Path from typing import Any, Dict, List, Optional, Tuple @@ -46,6 +48,102 @@ logger = logging.getLogger(__name__) _VALID_MODES = frozenset({"auto", "native", "text"}) +# Image extensions used by extract_image_refs(). Kept tight on purpose — we +# only auto-attach things the model can actually see. Documents/archives are +# excluded because the gateway's broader extract_local_files() also routes +# them differently (send_document), and we don't want to attach a PDF as a +# vision part. +_IMAGE_EXTS = ( + ".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp", ".tiff", ".tif", ".heic", +) +_IMAGE_EXT_PATTERN = "|".join(e.lstrip(".") for e in _IMAGE_EXTS) + +# Absolute / home-relative local image path. Matches the same shape gateway's +# extract_local_files() uses: anchors to ``~/`` or ``/``, ignores matches inside +# URLs (the ``(?\"']+?\.(?:" + _IMAGE_EXT_PATTERN + r")(?:\?[^\s<>\"']*)?", + re.IGNORECASE, +) + + +def extract_image_refs(text: str) -> Tuple[List[str], List[str]]: + """Scan free-form text for image references the model should see. + + Returns ``(local_paths, urls)``: + + * ``local_paths`` — absolute (``/``) or home-relative (``~/``) paths + whose suffix is an image extension AND whose expanded form exists + on disk as a file. Order-preserving, deduplicated. + * ``urls`` — ``http(s)://…`` URLs whose path ends in an image + extension (a ``?query`` is allowed after the extension). + Order-preserving, deduplicated. + + Matches inside fenced code blocks (``` ``` ```) and inline backticks + (`` `…` ``) are skipped so that snippets pasted into a task body for + reference aren't mistaken for live attachments. This mirrors the + behaviour of ``gateway.platforms.base.BaseAdapter.extract_local_files``. + + Local paths are validated against the filesystem; URLs are not + (the provider fetches them at request time). + """ + if not isinstance(text, str) or not text: + return [], [] + + # Build spans covered by fenced code blocks and inline code so we can + # ignore references the author embedded purely as example text. + code_spans: list[tuple[int, int]] = [] + for m in re.finditer(r"```[^\n]*\n.*?```", text, re.DOTALL): + code_spans.append((m.start(), m.end())) + for m in re.finditer(r"`[^`\n]+`", text): + code_spans.append((m.start(), m.end())) + + def _in_code(pos: int) -> bool: + return any(s <= pos < e for s, e in code_spans) + + local_paths: list[str] = [] + seen_paths: set[str] = set() + for match in _LOCAL_IMAGE_PATH_RE.finditer(text): + if _in_code(match.start()): + continue + raw = match.group(0) + expanded = os.path.expanduser(raw) + try: + if not os.path.isfile(expanded): + continue + except OSError: + # ENAMETOOLONG / EINVAL on pathological inputs — skip rather than crash. + continue + if expanded in seen_paths: + continue + seen_paths.add(expanded) + local_paths.append(expanded) + + urls: list[str] = [] + seen_urls: set[str] = set() + for match in _IMAGE_URL_RE.finditer(text): + if _in_code(match.start()): + continue + url = match.group(0) + # Strip trailing punctuation that's almost certainly prose, not part + # of the URL (e.g. "see https://x.com/a.png." or "/a.png)"). + url = url.rstrip(".,;:!?)]>") + if url in seen_urls: + continue + seen_urls.add(url) + urls.append(url) + + return local_paths, urls + + # Strict YAML/JSON boolean coercion for capability overrides. # # ``bool("false")`` is True in Python because non-empty strings are truthy, so @@ -320,20 +418,29 @@ def _file_to_data_url(path: Path) -> Optional[str]: def build_native_content_parts( user_text: str, image_paths: List[str], + image_urls: Optional[List[str]] = None, ) -> Tuple[List[Dict[str, Any]], List[str]]: """Build an OpenAI-style ``content`` list for a user turn. Shape: [{"type": "text", "text": "...\\n\\n[Image attached at: /local/path]"}, {"type": "image_url", "image_url": {"url": "data:image/png;base64,..."}}, + {"type": "image_url", "image_url": {"url": "https://example.com/a.png"}}, ...] - The local path of each successfully attached image is appended to the - text part as ``[Image attached at: ]``. The model still sees the - pixels via the ``image_url`` part (full native vision); the path note - just gives it a string handle so MCP/skill tools that take an image - path or URL argument can be invoked on the same image without an - extra round-trip. This parallels the text-mode hint produced by + Local paths are read from disk and embedded as base64 ``data:`` URLs. + Remote URLs (``http(s)://``) are passed through verbatim — the provider + fetches them server-side. The model still sees the pixels either way. + + For each successfully attached image, a hint is appended to the text + part: + + * local path → ``[Image attached at: ]`` + * URL → ``[Image attached: ]`` + + The hint gives the model a string handle so MCP/skill tools that take + an image path or URL argument can be invoked on the same image without + an extra round-trip. This parallels the text-mode hint produced by ``Runner._enrich_message_with_vision`` (``vision_analyze using image_url: ``) so behaviour is consistent across both image input modes. @@ -342,12 +449,14 @@ def build_native_content_parts( ceiling), the agent's retry loop transparently shrinks and retries once — see ``run_agent._try_shrink_image_parts_in_messages``. - Returns (content_parts, skipped_paths). Skipped paths are files that - couldn't be read from disk and are NOT advertised in the path hints. + Returns (content_parts, skipped). Skipped entries are local paths + that couldn't be read from disk; URLs are never skipped (they're + not validated here). """ skipped: List[str] = [] image_parts: List[Dict[str, Any]] = [] attached_paths: List[str] = [] + attached_urls: List[str] = [] for raw_path in image_paths: p = Path(raw_path) @@ -364,16 +473,26 @@ def build_native_content_parts( }) attached_paths.append(str(raw_path)) + for url in image_urls or []: + url = (url or "").strip() + if not url: + continue + image_parts.append({ + "type": "image_url", + "image_url": {"url": url}, + }) + attached_urls.append(url) + text = (user_text or "").strip() # If at least one image attached, build a single text part that combines - # the user's caption (or a neutral default) with one path hint per image. - if attached_paths: + # the user's caption (or a neutral default) with one hint per image. + if attached_paths or attached_urls: base_text = text or "What do you see in this image?" - path_hints = "\n".join( - f"[Image attached at: {p}]" for p in attached_paths - ) - combined_text = f"{base_text}\n\n{path_hints}" + hint_lines: List[str] = [] + hint_lines.extend(f"[Image attached at: {p}]" for p in attached_paths) + hint_lines.extend(f"[Image attached: {u}]" for u in attached_urls) + combined_text = f"{base_text}\n\n" + "\n".join(hint_lines) parts: List[Dict[str, Any]] = [{"type": "text", "text": combined_text}] parts.extend(image_parts) return parts, skipped @@ -388,4 +507,5 @@ def build_native_content_parts( __all__ = [ "decide_image_input_mode", "build_native_content_parts", + "extract_image_refs", ] diff --git a/cli.py b/cli.py index aeffd8bad8a..28ae0a371d4 100644 --- a/cli.py +++ b/cli.py @@ -15125,13 +15125,50 @@ def main( # Handle single query mode if query or image: query, single_query_images = _collect_query_images(query, image) + # Kanban workers spawn with ``hermes chat -q "work kanban task "``; + # the actual task description lives in the task body. Mirror the + # gateway/CLI behaviour for inbound images by scanning the body for + # local image paths and http(s) image URLs and attaching them to the + # worker's first turn. Without this, users who paste a screenshot + # path or URL into a kanban task body never get it routed to the + # model's vision input. + single_query_image_urls: list[str] = [] + _kanban_task_id = os.environ.get("HERMES_KANBAN_TASK", "").strip() + if _kanban_task_id: + try: + from hermes_cli import kanban_db as _kb + from agent.image_routing import extract_image_refs as _extract_refs + + _conn = _kb.connect() + try: + _task = _kb.get_task(_conn, _kanban_task_id) + finally: + try: + _conn.close() + except Exception: + pass + _body = getattr(_task, "body", "") if _task is not None else "" + if _body: + _kb_paths, _kb_urls = _extract_refs(_body) + if _kb_paths: + # Dedupe against any --image the user already passed. + _seen = {str(p) for p in single_query_images} + for _p in _kb_paths: + if _p not in _seen: + _seen.add(_p) + single_query_images.append(Path(_p)) + if _kb_urls: + single_query_image_urls.extend(_kb_urls) + except Exception as _exc: + # Best-effort enrichment; never block worker startup on it. + logger.debug("kanban image-ref extraction failed: %s", _exc) if quiet: # Quiet mode: suppress banner, spinner, tool previews. # Only print the final response and parseable session info. cli.tool_progress_mode = "off" if cli._ensure_runtime_credentials(): effective_query: Any = query - if single_query_images: + if single_query_images or single_query_image_urls: # Honour the same image-routing decision used by the # interactive path. With a vision-capable model (incl. # custom-provider models declared via @@ -15160,19 +15197,26 @@ def main( _parts, _skipped = _build_parts( query if isinstance(query, str) else "", [str(p) for p in single_query_images], + image_urls=list(single_query_image_urls) or None, ) if any(p.get("type") == "image_url" for p in _parts): effective_query = _parts else: # All images unreadable — text fallback. + # ``_preprocess_images_with_vision`` only knows + # about local files; URLs would be lost there, + # so keep the original query text intact when + # only URLs were supplied. + if single_query_images: + effective_query = cli._preprocess_images_with_vision( + query, single_query_images, announce=False, + ) + except Exception: + if single_query_images: effective_query = cli._preprocess_images_with_vision( query, single_query_images, announce=False, ) - except Exception: - effective_query = cli._preprocess_images_with_vision( - query, single_query_images, announce=False, - ) - else: + elif single_query_images: effective_query = cli._preprocess_images_with_vision( query, single_query_images, diff --git a/tests/agent/test_image_routing.py b/tests/agent/test_image_routing.py index ddb11cba409..4ec5986ce77 100644 --- a/tests/agent/test_image_routing.py +++ b/tests/agent/test_image_routing.py @@ -16,6 +16,7 @@ from agent.image_routing import ( _supports_vision_override, build_native_content_parts, decide_image_input_mode, + extract_image_refs, ) @@ -449,3 +450,190 @@ class TestLargeImageHandling: assert len(parts) == 2 assert parts[0]["type"] == "text" assert parts[1]["type"] == "image_url" + + +# ─── extract_image_refs ────────────────────────────────────────────────────── + + +class TestExtractImageRefs: + """Scan task body / inbound text for image paths and URLs (kanban worker + enrichment, issue raised May 2026).""" + + def test_empty_or_none_returns_empty(self): + assert extract_image_refs("") == ([], []) + assert extract_image_refs(None) == ([], []) # type: ignore[arg-type] + + def test_finds_absolute_path(self, tmp_path: Path): + img = tmp_path / "screenshot.png" + img.write_bytes(_png_bytes()) + body = f"Look at {img} and tell me what's wrong." + paths, urls = extract_image_refs(body) + assert paths == [str(img)] + assert urls == [] + + def test_finds_home_relative_path(self, tmp_path: Path, monkeypatch): + # Simulate ~/foo.png by pointing HOME at tmp_path and creating the file + monkeypatch.setenv("HOME", str(tmp_path)) + img = tmp_path / "foo.png" + img.write_bytes(_png_bytes()) + paths, urls = extract_image_refs("see ~/foo.png please") + assert paths == [str(img)] + assert urls == [] + + def test_skips_nonexistent_paths(self, tmp_path: Path): + # Path-shaped but no file on disk → skipped. + body = f"What's at {tmp_path}/never_created.png ?" + paths, urls = extract_image_refs(body) + assert paths == [] + assert urls == [] + + def test_finds_http_image_url(self): + body = "Check out https://example.com/photos/cat.png — cute right?" + paths, urls = extract_image_refs(body) + assert paths == [] + assert urls == ["https://example.com/photos/cat.png"] + + def test_finds_https_url_with_query_string(self): + body = "Diagram: https://cdn.example.com/img.jpeg?size=large&v=2 here" + paths, urls = extract_image_refs(body) + assert urls == ["https://cdn.example.com/img.jpeg?size=large&v=2"] + + def test_url_trailing_punctuation_stripped(self): + # Prose punctuation right after the URL must not be part of the URL. + body = "See https://example.com/a.png." + paths, urls = extract_image_refs(body) + assert urls == ["https://example.com/a.png"] + + def test_ignores_non_image_urls(self): + body = "See https://example.com/page.html and https://x.com/y.pdf" + paths, urls = extract_image_refs(body) + assert urls == [] + + def test_dedupes_paths_and_urls(self, tmp_path: Path): + img = tmp_path / "dup.png" + img.write_bytes(_png_bytes()) + body = ( + f"First {img} then again {img}. " + "Also https://example.com/x.png and https://example.com/x.png again." + ) + paths, urls = extract_image_refs(body) + assert paths == [str(img)] + assert urls == ["https://example.com/x.png"] + + def test_ignores_paths_in_fenced_code_block(self, tmp_path: Path): + img = tmp_path / "real.png" + img.write_bytes(_png_bytes()) + body = ( + "Outside the block, attach this:\n" + f"{img}\n" + "But not these examples:\n" + "```\n" + f"some_other_image: /tmp/example.png\n" + f"url: https://example.com/example.png\n" + "```\n" + ) + paths, urls = extract_image_refs(body) + assert paths == [str(img)] + assert urls == [] + + def test_ignores_paths_in_inline_code(self, tmp_path: Path): + img = tmp_path / "real.jpg" + img.write_bytes(_png_bytes()) + body = ( + f"Attach {img}, but ignore the example " + "`https://example.com/skip.png` in backticks." + ) + paths, urls = extract_image_refs(body) + assert paths == [str(img)] + assert urls == [] + + def test_does_not_match_paths_inside_urls(self, tmp_path: Path): + # The lookbehind in the regex prevents matching the path-portion of + # a URL as a local path. Only the URL should be detected. + body = "Just the URL: https://example.com/some/dir/image.png" + paths, urls = extract_image_refs(body) + assert paths == [] + assert urls == ["https://example.com/some/dir/image.png"] + + def test_mixed_paths_and_urls(self, tmp_path: Path): + img = tmp_path / "local.png" + img.write_bytes(_png_bytes()) + body = ( + f"Compare local {img} against the design at " + "https://example.com/design/v2.png — does it match?" + ) + paths, urls = extract_image_refs(body) + assert paths == [str(img)] + assert urls == ["https://example.com/design/v2.png"] + + def test_case_insensitive_extension(self, tmp_path: Path): + img = tmp_path / "shouty.PNG" + img.write_bytes(_png_bytes()) + body = f"see {img}" + paths, urls = extract_image_refs(body) + assert paths == [str(img)] + + +# ─── build_native_content_parts with URLs ──────────────────────────────────── + + +class TestBuildNativeContentPartsURLs: + """URL pass-through support added so kanban task bodies (and other + inbound surfaces) can route remote image URLs straight to the model.""" + + def test_url_only_no_local_paths(self): + parts, skipped = build_native_content_parts( + "what is this?", + [], + image_urls=["https://example.com/diagram.png"], + ) + assert skipped == [] + assert len(parts) == 2 + assert parts[0]["type"] == "text" + assert "[Image attached: https://example.com/diagram.png]" in parts[0]["text"] + assert parts[0]["text"].startswith("what is this?") + assert parts[1] == { + "type": "image_url", + "image_url": {"url": "https://example.com/diagram.png"}, + } + + def test_mixed_path_and_url(self, tmp_path: Path): + img = tmp_path / "local.png" + img.write_bytes(_png_bytes()) + parts, skipped = build_native_content_parts( + "compare these", + [str(img)], + image_urls=["https://example.com/remote.jpg"], + ) + assert skipped == [] + # 1 text + 2 image parts (local data URL first, then remote URL). + image_parts = [p for p in parts if p.get("type") == "image_url"] + assert len(image_parts) == 2 + assert image_parts[0]["image_url"]["url"].startswith("data:image/png;base64,") + assert image_parts[1]["image_url"]["url"] == "https://example.com/remote.jpg" + text = parts[0]["text"] + assert "[Image attached at:" in text + assert "[Image attached: https://example.com/remote.jpg]" in text + + def test_empty_url_list_is_no_op(self, tmp_path: Path): + img = tmp_path / "x.png" + img.write_bytes(_png_bytes()) + # image_urls=[] should behave the same as not passing it at all. + parts_no_urls, _ = build_native_content_parts("hi", [str(img)]) + parts_empty_urls, _ = build_native_content_parts("hi", [str(img)], image_urls=[]) + assert parts_no_urls == parts_empty_urls + + def test_blank_url_strings_are_dropped(self): + parts, _ = build_native_content_parts( + "x", [], image_urls=["", " ", "https://example.com/a.png"] + ) + image_parts = [p for p in parts if p.get("type") == "image_url"] + assert len(image_parts) == 1 + assert image_parts[0]["image_url"]["url"] == "https://example.com/a.png" + + def test_url_only_inserts_default_prompt_when_text_empty(self): + parts, _ = build_native_content_parts( + "", [], image_urls=["https://example.com/a.png"] + ) + assert parts[0]["type"] == "text" + assert parts[0]["text"].startswith("What do you see in this image?") diff --git a/tests/hermes_cli/test_kanban_worker_image_extraction.py b/tests/hermes_cli/test_kanban_worker_image_extraction.py new file mode 100644 index 00000000000..c0724a2904d --- /dev/null +++ b/tests/hermes_cli/test_kanban_worker_image_extraction.py @@ -0,0 +1,238 @@ +"""Worker-side image enrichment for kanban tasks. + +When a kanban task body contains a local image path or an ``http(s)://`` +image URL, the worker must surface that image to the model on its first +user turn — matching the CLI/gateway behaviour for inbound images. + +The dispatcher spawns the worker as +``hermes -p chat -q "work kanban task "``. The task body +itself never appears in argv; the worker has to read it from the kanban +DB during startup. These tests cover the round-trip: + + task body → kanban_db.get_task → extract_image_refs → + build_native_content_parts → multimodal user turn +""" +from __future__ import annotations + +import base64 +from pathlib import Path + +import pytest + +from hermes_cli import kanban_db as kb +from agent.image_routing import ( + build_native_content_parts, + extract_image_refs, +) + + +# Tiny 1×1 transparent PNG used to back any path the tests stick into a +# task body. extract_image_refs validates the path exists on disk, so the +# byte content has to be a real readable file (any image bytes will do). +_PNG = base64.b64decode( + "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGNgYGBgAAAABQABpfZFQAAAAABJRU5ErkJggg==" +) + + +@pytest.fixture +def kanban_home(tmp_path: Path, monkeypatch): + """Isolated HERMES_HOME with a fresh kanban DB for each test.""" + home = tmp_path / ".hermes" + home.mkdir() + monkeypatch.setenv("HERMES_HOME", str(home)) + monkeypatch.setattr(Path, "home", lambda: tmp_path) + kb.init_db() + return home + + +def _add_task_with_body(body: str, *, title: str = "Look at this") -> str: + conn = kb.connect() + try: + task_id = kb.create_task( + conn, + title=title, + body=body, + assignee="worker-a", + tenant=None, + ) + finally: + conn.close() + return task_id + + +def _read_body(task_id: str) -> str: + conn = kb.connect() + try: + task = kb.get_task(conn, task_id) + return (task.body if task is not None else "") or "" + finally: + conn.close() + + +class TestExtractFromTaskBody: + """Read a real kanban task body and run it through extract_image_refs.""" + + def test_local_path_in_body_round_trips(self, kanban_home, tmp_path): + img = tmp_path / "screenshot.png" + img.write_bytes(_PNG) + tid = _add_task_with_body( + f"Please review the screenshot at {img} and confirm " + "the alignment is right." + ) + + body = _read_body(tid) + paths, urls = extract_image_refs(body) + assert paths == [str(img)] + assert urls == [] + + def test_url_in_body_round_trips(self, kanban_home): + tid = _add_task_with_body( + "The design lives at https://example.com/mock/v3.png — " + "make the implementation match it." + ) + + body = _read_body(tid) + paths, urls = extract_image_refs(body) + assert paths == [] + assert urls == ["https://example.com/mock/v3.png"] + + def test_mixed_path_and_url_in_body(self, kanban_home, tmp_path): + img = tmp_path / "current.png" + img.write_bytes(_PNG) + tid = _add_task_with_body( + f"Compare the current screenshot {img} against the design at " + "https://example.com/target.png and write a diff." + ) + + body = _read_body(tid) + paths, urls = extract_image_refs(body) + assert paths == [str(img)] + assert urls == ["https://example.com/target.png"] + + def test_body_without_images_yields_nothing(self, kanban_home): + tid = _add_task_with_body( + "Refactor the auth module to use the new session helper." + ) + + body = _read_body(tid) + paths, urls = extract_image_refs(body) + assert paths == [] + assert urls == [] + + def test_empty_body_is_safe(self, kanban_home): + tid = _add_task_with_body("") + + body = _read_body(tid) + paths, urls = extract_image_refs(body) + assert paths == [] + assert urls == [] + + +class TestBuildPartsFromTaskBody: + """Verify the full pipeline produces a multimodal user turn.""" + + def test_local_path_becomes_native_image_part(self, kanban_home, tmp_path): + img = tmp_path / "design.png" + img.write_bytes(_PNG) + tid = _add_task_with_body(f"Check out {img} — what's broken?") + body = _read_body(tid) + paths, urls = extract_image_refs(body) + + # Mirrors the cli.py wiring: pass the worker's literal -q argument + # (the dispatcher uses ``"work kanban task "``) plus the + # extracted refs through build_native_content_parts. + parts, skipped = build_native_content_parts( + f"work kanban task {tid}", + paths, + image_urls=urls or None, + ) + + assert skipped == [] + # text part + one image_url part + assert len(parts) == 2 + assert parts[0]["type"] == "text" + assert parts[0]["text"].startswith(f"work kanban task {tid}") + assert f"[Image attached at: {img}]" in parts[0]["text"] + assert parts[1]["type"] == "image_url" + assert parts[1]["image_url"]["url"].startswith("data:image/png;base64,") + + def test_url_becomes_image_url_part(self, kanban_home): + tid = _add_task_with_body( + "Reference: https://example.com/target.jpg — match it." + ) + body = _read_body(tid) + paths, urls = extract_image_refs(body) + + parts, skipped = build_native_content_parts( + f"work kanban task {tid}", + paths, + image_urls=urls or None, + ) + + assert skipped == [] + assert len(parts) == 2 + assert parts[0]["type"] == "text" + assert "[Image attached: https://example.com/target.jpg]" in parts[0]["text"] + assert parts[1] == { + "type": "image_url", + "image_url": {"url": "https://example.com/target.jpg"}, + } + + def test_body_with_both_yields_two_image_parts(self, kanban_home, tmp_path): + img = tmp_path / "local.png" + img.write_bytes(_PNG) + tid = _add_task_with_body( + f"Diff {img} vs https://example.com/target.png — explain it." + ) + body = _read_body(tid) + paths, urls = extract_image_refs(body) + + parts, skipped = build_native_content_parts( + f"work kanban task {tid}", + paths, + image_urls=urls or None, + ) + + assert skipped == [] + image_parts = [p for p in parts if p.get("type") == "image_url"] + assert len(image_parts) == 2 + # Local file is embedded as a data URL; remote URL passes through. + assert image_parts[0]["image_url"]["url"].startswith("data:image/png;base64,") + assert image_parts[1]["image_url"]["url"] == "https://example.com/target.png" + + def test_body_with_no_images_leaves_query_untouched(self, kanban_home): + tid = _add_task_with_body( + "Rewrite the README intro paragraph to focus on use cases." + ) + body = _read_body(tid) + paths, urls = extract_image_refs(body) + + parts, skipped = build_native_content_parts( + f"work kanban task {tid}", + paths, + image_urls=urls or None, + ) + + # No images → plain text-only return (single part, no list mutation). + assert skipped == [] + assert len(parts) == 1 + assert parts[0]["type"] == "text" + assert parts[0]["text"] == f"work kanban task {tid}" + + def test_code_block_example_is_not_attached(self, kanban_home, tmp_path): + # Only the real image outside the fenced code block should attach. + real = tmp_path / "real.png" + real.write_bytes(_PNG) + tid = _add_task_with_body( + f"Real screenshot:\n{real}\n\n" + "Example we DON'T want attached:\n" + "```\n" + "image: /tmp/example_only.png\n" + "url: https://example.com/example.png\n" + "```\n" + ) + body = _read_body(tid) + paths, urls = extract_image_refs(body) + + assert paths == [str(real)] + assert urls == []