feat(kanban): attach images referenced in task bodies to worker vision (#34210)

Kanban workers now scan the task body for local image paths and
http(s) image URLs and attach them to the worker's first user turn —
matching the CLI/gateway behaviour for inbound images. Before, a
user pasting `/home/me/screenshot.png` or `https://example.com/img.png`
into a kanban task description had it sent to the model as plain
text and the pixels were never seen.

How it works:
* agent/image_routing.py gains extract_image_refs(text) → (paths, urls)
  that mirrors gateway/platforms/base.py:extract_local_files (absolute /
  ~-relative paths, image extensions only, ignores fenced/inline code).
* build_native_content_parts() accepts an optional image_urls= kwarg
  and emits passthrough image_url parts for remote URLs alongside the
  base64 data: URLs used for local paths.
* cli.py (single-query/quiet branch — the path every dispatcher-spawned
  worker takes) detects HERMES_KANBAN_TASK, reads the task body via
  kanban_db.get_task, runs extract_image_refs, and threads the results
  into the existing image-routing decision (native vs text). Best-effort:
  enrichment failures never block worker startup.

Tested:
* tests/agent/test_image_routing.py — 22 new tests for extract_image_refs
  and URL pass-through in build_native_content_parts.
* tests/hermes_cli/test_kanban_worker_image_extraction.py — 10 new tests
  driving real kanban_db round-trip (create task → read body → extract
  refs → build parts).
* E2E: created a fake kanban task with a body referencing both a local
  PNG and an https URL; verified the worker pipeline produces a
  multimodal user turn with 1 text part + 2 image_url parts (data URL
  for the local file, passthrough URL for the remote).
This commit is contained in:
Teknium 2026-05-28 17:50:42 -07:00 committed by GitHub
parent 1b1e30510a
commit 769ee86cd2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 610 additions and 20 deletions

View file

@ -37,6 +37,8 @@ from __future__ import annotations
import base64
import logging
import mimetypes
import os
import re
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
@ -46,6 +48,102 @@ logger = logging.getLogger(__name__)
_VALID_MODES = frozenset({"auto", "native", "text"})
# Image extensions used by extract_image_refs(). Kept tight on purpose — we
# only auto-attach things the model can actually see. Documents/archives are
# excluded because the gateway's broader extract_local_files() also routes
# them differently (send_document), and we don't want to attach a PDF as a
# vision part.
_IMAGE_EXTS = (
".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp", ".tiff", ".tif", ".heic",
)
_IMAGE_EXT_PATTERN = "|".join(e.lstrip(".") for e in _IMAGE_EXTS)
# Absolute / home-relative local image path. Matches the same shape gateway's
# extract_local_files() uses: anchors to ``~/`` or ``/``, ignores matches inside
# URLs (the ``(?<![/:\w.])`` lookbehind), and case-insensitive on the extension.
_LOCAL_IMAGE_PATH_RE = re.compile(
r"(?<![/:\w.])(?:~/|/)(?:[\w.\-]+/)*[\w.\-]+\.(?:" + _IMAGE_EXT_PATTERN + r")\b",
re.IGNORECASE,
)
# http(s) URL ending in an image extension (optionally followed by a
# query string). Case-insensitive on the extension. Strict ``http(s)://``
# scheme so we don't accidentally grab ``file://`` URLs or other shapes.
_IMAGE_URL_RE = re.compile(
r"https?://[^\s<>\"']+?\.(?:" + _IMAGE_EXT_PATTERN + r")(?:\?[^\s<>\"']*)?",
re.IGNORECASE,
)
def extract_image_refs(text: str) -> Tuple[List[str], List[str]]:
"""Scan free-form text for image references the model should see.
Returns ``(local_paths, urls)``:
* ``local_paths`` absolute (``/``) or home-relative (``~/``) paths
whose suffix is an image extension AND whose expanded form exists
on disk as a file. Order-preserving, deduplicated.
* ``urls`` ``http(s)://`` URLs whose path ends in an image
extension (a ``?query`` is allowed after the extension).
Order-preserving, deduplicated.
Matches inside fenced code blocks (``` ``` ```) and inline backticks
(`` `` ``) are skipped so that snippets pasted into a task body for
reference aren't mistaken for live attachments. This mirrors the
behaviour of ``gateway.platforms.base.BaseAdapter.extract_local_files``.
Local paths are validated against the filesystem; URLs are not
(the provider fetches them at request time).
"""
if not isinstance(text, str) or not text:
return [], []
# Build spans covered by fenced code blocks and inline code so we can
# ignore references the author embedded purely as example text.
code_spans: list[tuple[int, int]] = []
for m in re.finditer(r"```[^\n]*\n.*?```", text, re.DOTALL):
code_spans.append((m.start(), m.end()))
for m in re.finditer(r"`[^`\n]+`", text):
code_spans.append((m.start(), m.end()))
def _in_code(pos: int) -> bool:
return any(s <= pos < e for s, e in code_spans)
local_paths: list[str] = []
seen_paths: set[str] = set()
for match in _LOCAL_IMAGE_PATH_RE.finditer(text):
if _in_code(match.start()):
continue
raw = match.group(0)
expanded = os.path.expanduser(raw)
try:
if not os.path.isfile(expanded):
continue
except OSError:
# ENAMETOOLONG / EINVAL on pathological inputs — skip rather than crash.
continue
if expanded in seen_paths:
continue
seen_paths.add(expanded)
local_paths.append(expanded)
urls: list[str] = []
seen_urls: set[str] = set()
for match in _IMAGE_URL_RE.finditer(text):
if _in_code(match.start()):
continue
url = match.group(0)
# Strip trailing punctuation that's almost certainly prose, not part
# of the URL (e.g. "see https://x.com/a.png." or "/a.png)").
url = url.rstrip(".,;:!?)]>")
if url in seen_urls:
continue
seen_urls.add(url)
urls.append(url)
return local_paths, urls
# Strict YAML/JSON boolean coercion for capability overrides.
#
# ``bool("false")`` is True in Python because non-empty strings are truthy, so
@ -320,20 +418,29 @@ def _file_to_data_url(path: Path) -> Optional[str]:
def build_native_content_parts(
user_text: str,
image_paths: List[str],
image_urls: Optional[List[str]] = None,
) -> Tuple[List[Dict[str, Any]], List[str]]:
"""Build an OpenAI-style ``content`` list for a user turn.
Shape:
[{"type": "text", "text": "...\\n\\n[Image attached at: /local/path]"},
{"type": "image_url", "image_url": {"url": "data:image/png;base64,..."}},
{"type": "image_url", "image_url": {"url": "https://example.com/a.png"}},
...]
The local path of each successfully attached image is appended to the
text part as ``[Image attached at: <path>]``. The model still sees the
pixels via the ``image_url`` part (full native vision); the path note
just gives it a string handle so MCP/skill tools that take an image
path or URL argument can be invoked on the same image without an
extra round-trip. This parallels the text-mode hint produced by
Local paths are read from disk and embedded as base64 ``data:`` URLs.
Remote URLs (``http(s)://``) are passed through verbatim the provider
fetches them server-side. The model still sees the pixels either way.
For each successfully attached image, a hint is appended to the text
part:
* local path ``[Image attached at: <path>]``
* URL ``[Image attached: <url>]``
The hint gives the model a string handle so MCP/skill tools that take
an image path or URL argument can be invoked on the same image without
an extra round-trip. This parallels the text-mode hint produced by
``Runner._enrich_message_with_vision`` (``vision_analyze using image_url:
<path>``) so behaviour is consistent across both image input modes.
@ -342,12 +449,14 @@ def build_native_content_parts(
ceiling), the agent's retry loop transparently shrinks and retries
once see ``run_agent._try_shrink_image_parts_in_messages``.
Returns (content_parts, skipped_paths). Skipped paths are files that
couldn't be read from disk and are NOT advertised in the path hints.
Returns (content_parts, skipped). Skipped entries are local paths
that couldn't be read from disk; URLs are never skipped (they're
not validated here).
"""
skipped: List[str] = []
image_parts: List[Dict[str, Any]] = []
attached_paths: List[str] = []
attached_urls: List[str] = []
for raw_path in image_paths:
p = Path(raw_path)
@ -364,16 +473,26 @@ def build_native_content_parts(
})
attached_paths.append(str(raw_path))
for url in image_urls or []:
url = (url or "").strip()
if not url:
continue
image_parts.append({
"type": "image_url",
"image_url": {"url": url},
})
attached_urls.append(url)
text = (user_text or "").strip()
# If at least one image attached, build a single text part that combines
# the user's caption (or a neutral default) with one path hint per image.
if attached_paths:
# the user's caption (or a neutral default) with one hint per image.
if attached_paths or attached_urls:
base_text = text or "What do you see in this image?"
path_hints = "\n".join(
f"[Image attached at: {p}]" for p in attached_paths
)
combined_text = f"{base_text}\n\n{path_hints}"
hint_lines: List[str] = []
hint_lines.extend(f"[Image attached at: {p}]" for p in attached_paths)
hint_lines.extend(f"[Image attached: {u}]" for u in attached_urls)
combined_text = f"{base_text}\n\n" + "\n".join(hint_lines)
parts: List[Dict[str, Any]] = [{"type": "text", "text": combined_text}]
parts.extend(image_parts)
return parts, skipped
@ -388,4 +507,5 @@ def build_native_content_parts(
__all__ = [
"decide_image_input_mode",
"build_native_content_parts",
"extract_image_refs",
]

56
cli.py
View file

@ -15125,13 +15125,50 @@ def main(
# Handle single query mode
if query or image:
query, single_query_images = _collect_query_images(query, image)
# Kanban workers spawn with ``hermes chat -q "work kanban task <id>"``;
# the actual task description lives in the task body. Mirror the
# gateway/CLI behaviour for inbound images by scanning the body for
# local image paths and http(s) image URLs and attaching them to the
# worker's first turn. Without this, users who paste a screenshot
# path or URL into a kanban task body never get it routed to the
# model's vision input.
single_query_image_urls: list[str] = []
_kanban_task_id = os.environ.get("HERMES_KANBAN_TASK", "").strip()
if _kanban_task_id:
try:
from hermes_cli import kanban_db as _kb
from agent.image_routing import extract_image_refs as _extract_refs
_conn = _kb.connect()
try:
_task = _kb.get_task(_conn, _kanban_task_id)
finally:
try:
_conn.close()
except Exception:
pass
_body = getattr(_task, "body", "") if _task is not None else ""
if _body:
_kb_paths, _kb_urls = _extract_refs(_body)
if _kb_paths:
# Dedupe against any --image the user already passed.
_seen = {str(p) for p in single_query_images}
for _p in _kb_paths:
if _p not in _seen:
_seen.add(_p)
single_query_images.append(Path(_p))
if _kb_urls:
single_query_image_urls.extend(_kb_urls)
except Exception as _exc:
# Best-effort enrichment; never block worker startup on it.
logger.debug("kanban image-ref extraction failed: %s", _exc)
if quiet:
# Quiet mode: suppress banner, spinner, tool previews.
# Only print the final response and parseable session info.
cli.tool_progress_mode = "off"
if cli._ensure_runtime_credentials():
effective_query: Any = query
if single_query_images:
if single_query_images or single_query_image_urls:
# Honour the same image-routing decision used by the
# interactive path. With a vision-capable model (incl.
# custom-provider models declared via
@ -15160,19 +15197,26 @@ def main(
_parts, _skipped = _build_parts(
query if isinstance(query, str) else "",
[str(p) for p in single_query_images],
image_urls=list(single_query_image_urls) or None,
)
if any(p.get("type") == "image_url" for p in _parts):
effective_query = _parts
else:
# All images unreadable — text fallback.
# ``_preprocess_images_with_vision`` only knows
# about local files; URLs would be lost there,
# so keep the original query text intact when
# only URLs were supplied.
if single_query_images:
effective_query = cli._preprocess_images_with_vision(
query, single_query_images, announce=False,
)
except Exception:
if single_query_images:
effective_query = cli._preprocess_images_with_vision(
query, single_query_images, announce=False,
)
except Exception:
effective_query = cli._preprocess_images_with_vision(
query, single_query_images, announce=False,
)
else:
elif single_query_images:
effective_query = cli._preprocess_images_with_vision(
query,
single_query_images,

View file

@ -16,6 +16,7 @@ from agent.image_routing import (
_supports_vision_override,
build_native_content_parts,
decide_image_input_mode,
extract_image_refs,
)
@ -449,3 +450,190 @@ class TestLargeImageHandling:
assert len(parts) == 2
assert parts[0]["type"] == "text"
assert parts[1]["type"] == "image_url"
# ─── extract_image_refs ──────────────────────────────────────────────────────
class TestExtractImageRefs:
"""Scan task body / inbound text for image paths and URLs (kanban worker
enrichment, issue raised May 2026)."""
def test_empty_or_none_returns_empty(self):
assert extract_image_refs("") == ([], [])
assert extract_image_refs(None) == ([], []) # type: ignore[arg-type]
def test_finds_absolute_path(self, tmp_path: Path):
img = tmp_path / "screenshot.png"
img.write_bytes(_png_bytes())
body = f"Look at {img} and tell me what's wrong."
paths, urls = extract_image_refs(body)
assert paths == [str(img)]
assert urls == []
def test_finds_home_relative_path(self, tmp_path: Path, monkeypatch):
# Simulate ~/foo.png by pointing HOME at tmp_path and creating the file
monkeypatch.setenv("HOME", str(tmp_path))
img = tmp_path / "foo.png"
img.write_bytes(_png_bytes())
paths, urls = extract_image_refs("see ~/foo.png please")
assert paths == [str(img)]
assert urls == []
def test_skips_nonexistent_paths(self, tmp_path: Path):
# Path-shaped but no file on disk → skipped.
body = f"What's at {tmp_path}/never_created.png ?"
paths, urls = extract_image_refs(body)
assert paths == []
assert urls == []
def test_finds_http_image_url(self):
body = "Check out https://example.com/photos/cat.png — cute right?"
paths, urls = extract_image_refs(body)
assert paths == []
assert urls == ["https://example.com/photos/cat.png"]
def test_finds_https_url_with_query_string(self):
body = "Diagram: https://cdn.example.com/img.jpeg?size=large&v=2 here"
paths, urls = extract_image_refs(body)
assert urls == ["https://cdn.example.com/img.jpeg?size=large&v=2"]
def test_url_trailing_punctuation_stripped(self):
# Prose punctuation right after the URL must not be part of the URL.
body = "See https://example.com/a.png."
paths, urls = extract_image_refs(body)
assert urls == ["https://example.com/a.png"]
def test_ignores_non_image_urls(self):
body = "See https://example.com/page.html and https://x.com/y.pdf"
paths, urls = extract_image_refs(body)
assert urls == []
def test_dedupes_paths_and_urls(self, tmp_path: Path):
img = tmp_path / "dup.png"
img.write_bytes(_png_bytes())
body = (
f"First {img} then again {img}. "
"Also https://example.com/x.png and https://example.com/x.png again."
)
paths, urls = extract_image_refs(body)
assert paths == [str(img)]
assert urls == ["https://example.com/x.png"]
def test_ignores_paths_in_fenced_code_block(self, tmp_path: Path):
img = tmp_path / "real.png"
img.write_bytes(_png_bytes())
body = (
"Outside the block, attach this:\n"
f"{img}\n"
"But not these examples:\n"
"```\n"
f"some_other_image: /tmp/example.png\n"
f"url: https://example.com/example.png\n"
"```\n"
)
paths, urls = extract_image_refs(body)
assert paths == [str(img)]
assert urls == []
def test_ignores_paths_in_inline_code(self, tmp_path: Path):
img = tmp_path / "real.jpg"
img.write_bytes(_png_bytes())
body = (
f"Attach {img}, but ignore the example "
"`https://example.com/skip.png` in backticks."
)
paths, urls = extract_image_refs(body)
assert paths == [str(img)]
assert urls == []
def test_does_not_match_paths_inside_urls(self, tmp_path: Path):
# The lookbehind in the regex prevents matching the path-portion of
# a URL as a local path. Only the URL should be detected.
body = "Just the URL: https://example.com/some/dir/image.png"
paths, urls = extract_image_refs(body)
assert paths == []
assert urls == ["https://example.com/some/dir/image.png"]
def test_mixed_paths_and_urls(self, tmp_path: Path):
img = tmp_path / "local.png"
img.write_bytes(_png_bytes())
body = (
f"Compare local {img} against the design at "
"https://example.com/design/v2.png — does it match?"
)
paths, urls = extract_image_refs(body)
assert paths == [str(img)]
assert urls == ["https://example.com/design/v2.png"]
def test_case_insensitive_extension(self, tmp_path: Path):
img = tmp_path / "shouty.PNG"
img.write_bytes(_png_bytes())
body = f"see {img}"
paths, urls = extract_image_refs(body)
assert paths == [str(img)]
# ─── build_native_content_parts with URLs ────────────────────────────────────
class TestBuildNativeContentPartsURLs:
"""URL pass-through support added so kanban task bodies (and other
inbound surfaces) can route remote image URLs straight to the model."""
def test_url_only_no_local_paths(self):
parts, skipped = build_native_content_parts(
"what is this?",
[],
image_urls=["https://example.com/diagram.png"],
)
assert skipped == []
assert len(parts) == 2
assert parts[0]["type"] == "text"
assert "[Image attached: https://example.com/diagram.png]" in parts[0]["text"]
assert parts[0]["text"].startswith("what is this?")
assert parts[1] == {
"type": "image_url",
"image_url": {"url": "https://example.com/diagram.png"},
}
def test_mixed_path_and_url(self, tmp_path: Path):
img = tmp_path / "local.png"
img.write_bytes(_png_bytes())
parts, skipped = build_native_content_parts(
"compare these",
[str(img)],
image_urls=["https://example.com/remote.jpg"],
)
assert skipped == []
# 1 text + 2 image parts (local data URL first, then remote URL).
image_parts = [p for p in parts if p.get("type") == "image_url"]
assert len(image_parts) == 2
assert image_parts[0]["image_url"]["url"].startswith("data:image/png;base64,")
assert image_parts[1]["image_url"]["url"] == "https://example.com/remote.jpg"
text = parts[0]["text"]
assert "[Image attached at:" in text
assert "[Image attached: https://example.com/remote.jpg]" in text
def test_empty_url_list_is_no_op(self, tmp_path: Path):
img = tmp_path / "x.png"
img.write_bytes(_png_bytes())
# image_urls=[] should behave the same as not passing it at all.
parts_no_urls, _ = build_native_content_parts("hi", [str(img)])
parts_empty_urls, _ = build_native_content_parts("hi", [str(img)], image_urls=[])
assert parts_no_urls == parts_empty_urls
def test_blank_url_strings_are_dropped(self):
parts, _ = build_native_content_parts(
"x", [], image_urls=["", " ", "https://example.com/a.png"]
)
image_parts = [p for p in parts if p.get("type") == "image_url"]
assert len(image_parts) == 1
assert image_parts[0]["image_url"]["url"] == "https://example.com/a.png"
def test_url_only_inserts_default_prompt_when_text_empty(self):
parts, _ = build_native_content_parts(
"", [], image_urls=["https://example.com/a.png"]
)
assert parts[0]["type"] == "text"
assert parts[0]["text"].startswith("What do you see in this image?")

View file

@ -0,0 +1,238 @@
"""Worker-side image enrichment for kanban tasks.
When a kanban task body contains a local image path or an ``http(s)://``
image URL, the worker must surface that image to the model on its first
user turn matching the CLI/gateway behaviour for inbound images.
The dispatcher spawns the worker as
``hermes -p <profile> chat -q "work kanban task <id>"``. The task body
itself never appears in argv; the worker has to read it from the kanban
DB during startup. These tests cover the round-trip:
task body kanban_db.get_task extract_image_refs
build_native_content_parts multimodal user turn
"""
from __future__ import annotations
import base64
from pathlib import Path
import pytest
from hermes_cli import kanban_db as kb
from agent.image_routing import (
build_native_content_parts,
extract_image_refs,
)
# Tiny 1×1 transparent PNG used to back any path the tests stick into a
# task body. extract_image_refs validates the path exists on disk, so the
# byte content has to be a real readable file (any image bytes will do).
_PNG = base64.b64decode(
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGNgYGBgAAAABQABpfZFQAAAAABJRU5ErkJggg=="
)
@pytest.fixture
def kanban_home(tmp_path: Path, monkeypatch):
"""Isolated HERMES_HOME with a fresh kanban DB for each test."""
home = tmp_path / ".hermes"
home.mkdir()
monkeypatch.setenv("HERMES_HOME", str(home))
monkeypatch.setattr(Path, "home", lambda: tmp_path)
kb.init_db()
return home
def _add_task_with_body(body: str, *, title: str = "Look at this") -> str:
conn = kb.connect()
try:
task_id = kb.create_task(
conn,
title=title,
body=body,
assignee="worker-a",
tenant=None,
)
finally:
conn.close()
return task_id
def _read_body(task_id: str) -> str:
conn = kb.connect()
try:
task = kb.get_task(conn, task_id)
return (task.body if task is not None else "") or ""
finally:
conn.close()
class TestExtractFromTaskBody:
"""Read a real kanban task body and run it through extract_image_refs."""
def test_local_path_in_body_round_trips(self, kanban_home, tmp_path):
img = tmp_path / "screenshot.png"
img.write_bytes(_PNG)
tid = _add_task_with_body(
f"Please review the screenshot at {img} and confirm "
"the alignment is right."
)
body = _read_body(tid)
paths, urls = extract_image_refs(body)
assert paths == [str(img)]
assert urls == []
def test_url_in_body_round_trips(self, kanban_home):
tid = _add_task_with_body(
"The design lives at https://example.com/mock/v3.png — "
"make the implementation match it."
)
body = _read_body(tid)
paths, urls = extract_image_refs(body)
assert paths == []
assert urls == ["https://example.com/mock/v3.png"]
def test_mixed_path_and_url_in_body(self, kanban_home, tmp_path):
img = tmp_path / "current.png"
img.write_bytes(_PNG)
tid = _add_task_with_body(
f"Compare the current screenshot {img} against the design at "
"https://example.com/target.png and write a diff."
)
body = _read_body(tid)
paths, urls = extract_image_refs(body)
assert paths == [str(img)]
assert urls == ["https://example.com/target.png"]
def test_body_without_images_yields_nothing(self, kanban_home):
tid = _add_task_with_body(
"Refactor the auth module to use the new session helper."
)
body = _read_body(tid)
paths, urls = extract_image_refs(body)
assert paths == []
assert urls == []
def test_empty_body_is_safe(self, kanban_home):
tid = _add_task_with_body("")
body = _read_body(tid)
paths, urls = extract_image_refs(body)
assert paths == []
assert urls == []
class TestBuildPartsFromTaskBody:
"""Verify the full pipeline produces a multimodal user turn."""
def test_local_path_becomes_native_image_part(self, kanban_home, tmp_path):
img = tmp_path / "design.png"
img.write_bytes(_PNG)
tid = _add_task_with_body(f"Check out {img} — what's broken?")
body = _read_body(tid)
paths, urls = extract_image_refs(body)
# Mirrors the cli.py wiring: pass the worker's literal -q argument
# (the dispatcher uses ``"work kanban task <id>"``) plus the
# extracted refs through build_native_content_parts.
parts, skipped = build_native_content_parts(
f"work kanban task {tid}",
paths,
image_urls=urls or None,
)
assert skipped == []
# text part + one image_url part
assert len(parts) == 2
assert parts[0]["type"] == "text"
assert parts[0]["text"].startswith(f"work kanban task {tid}")
assert f"[Image attached at: {img}]" in parts[0]["text"]
assert parts[1]["type"] == "image_url"
assert parts[1]["image_url"]["url"].startswith("data:image/png;base64,")
def test_url_becomes_image_url_part(self, kanban_home):
tid = _add_task_with_body(
"Reference: https://example.com/target.jpg — match it."
)
body = _read_body(tid)
paths, urls = extract_image_refs(body)
parts, skipped = build_native_content_parts(
f"work kanban task {tid}",
paths,
image_urls=urls or None,
)
assert skipped == []
assert len(parts) == 2
assert parts[0]["type"] == "text"
assert "[Image attached: https://example.com/target.jpg]" in parts[0]["text"]
assert parts[1] == {
"type": "image_url",
"image_url": {"url": "https://example.com/target.jpg"},
}
def test_body_with_both_yields_two_image_parts(self, kanban_home, tmp_path):
img = tmp_path / "local.png"
img.write_bytes(_PNG)
tid = _add_task_with_body(
f"Diff {img} vs https://example.com/target.png — explain it."
)
body = _read_body(tid)
paths, urls = extract_image_refs(body)
parts, skipped = build_native_content_parts(
f"work kanban task {tid}",
paths,
image_urls=urls or None,
)
assert skipped == []
image_parts = [p for p in parts if p.get("type") == "image_url"]
assert len(image_parts) == 2
# Local file is embedded as a data URL; remote URL passes through.
assert image_parts[0]["image_url"]["url"].startswith("data:image/png;base64,")
assert image_parts[1]["image_url"]["url"] == "https://example.com/target.png"
def test_body_with_no_images_leaves_query_untouched(self, kanban_home):
tid = _add_task_with_body(
"Rewrite the README intro paragraph to focus on use cases."
)
body = _read_body(tid)
paths, urls = extract_image_refs(body)
parts, skipped = build_native_content_parts(
f"work kanban task {tid}",
paths,
image_urls=urls or None,
)
# No images → plain text-only return (single part, no list mutation).
assert skipped == []
assert len(parts) == 1
assert parts[0]["type"] == "text"
assert parts[0]["text"] == f"work kanban task {tid}"
def test_code_block_example_is_not_attached(self, kanban_home, tmp_path):
# Only the real image outside the fenced code block should attach.
real = tmp_path / "real.png"
real.write_bytes(_PNG)
tid = _add_task_with_body(
f"Real screenshot:\n{real}\n\n"
"Example we DON'T want attached:\n"
"```\n"
"image: /tmp/example_only.png\n"
"url: https://example.com/example.png\n"
"```\n"
)
body = _read_body(tid)
paths, urls = extract_image_refs(body)
assert paths == [str(real)]
assert urls == []