mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-27 11:22:03 +00:00
Merge pull request #52303 from NousResearch/bb/pets-gen-qa
feat(pets): quality-first OpenRouter chain, stronger atlas gates, global pet-gen notifications
This commit is contained in:
commit
0c442fa1d3
17 changed files with 918 additions and 194 deletions
|
|
@ -127,6 +127,22 @@ def _near_key_mask(image, key: tuple[int, int, int], tol: int = 48):
|
|||
)
|
||||
|
||||
|
||||
def _defringe(rgba):
|
||||
"""Shave the 1px antialiased edge ring left after keying.
|
||||
|
||||
Chroma keying can't catch the antialiased band where the sprite meets the
|
||||
backdrop — those pixels are a key/sprite blend, too far from the key to be
|
||||
removed, so they ring the cutout in magenta/green. Erode the alpha by one
|
||||
pixel (a 3x3 min filter) to drop that contaminated ring; the sprite's own
|
||||
thick dark outline keeps the silhouette intact. Built on a C-level filter, no
|
||||
per-pixel Python.
|
||||
"""
|
||||
from PIL import ImageFilter
|
||||
|
||||
rgba.putalpha(rgba.getchannel("A").filter(ImageFilter.MinFilter(3)))
|
||||
return rgba
|
||||
|
||||
|
||||
def remove_background(image, *, chroma_key: tuple[int, int, int] | None = None, threshold: float = 90.0):
|
||||
"""Return *image* (RGBA) with its flat background keyed out to transparent.
|
||||
|
||||
|
|
@ -163,7 +179,8 @@ def remove_background(image, *, chroma_key: tuple[int, int, int] | None = None,
|
|||
near = _near_key_mask(rgba, key) # L mask, 255 where near key
|
||||
opaque = rgba.getchannel("A").point(lambda a: 255 if a > _ALPHA_FLOOR else 0)
|
||||
remove_mask = ImageChops.darker(near, opaque)
|
||||
return Image.composite(Image.new("RGBA", rgba.size, (0, 0, 0, 0)), rgba, remove_mask)
|
||||
keyed = Image.composite(Image.new("RGBA", rgba.size, (0, 0, 0, 0)), rgba, remove_mask)
|
||||
return _defringe(keyed)
|
||||
|
||||
visited = bytearray(w * h)
|
||||
# Mark removals in a flat mask and apply them in one C composite at the end —
|
||||
|
|
@ -209,7 +226,7 @@ def remove_background(image, *, chroma_key: tuple[int, int, int] | None = None,
|
|||
# One C-level composite instead of millions of per-pixel writes: paint the
|
||||
# flooded pixels to (0,0,0,0) wherever the mask is set.
|
||||
mask = Image.frombytes("L", (w, h), bytes(remove)).point(lambda v: 255 if v else 0)
|
||||
return Image.composite(Image.new("RGBA", rgba.size, (0, 0, 0, 0)), rgba, mask)
|
||||
return _defringe(Image.composite(Image.new("RGBA", rgba.size, (0, 0, 0, 0)), rgba, mask))
|
||||
|
||||
|
||||
def _repair_internal_alpha_holes(image):
|
||||
|
|
@ -369,6 +386,279 @@ def _drop_side_bleed(image):
|
|||
return rgba
|
||||
|
||||
|
||||
def _erase_long_axis_lines(image):
|
||||
"""Remove thin slot-spanning guide/floor/divider lines.
|
||||
|
||||
Gemini will sometimes satisfy "baseline" / "cell" language by drawing
|
||||
literal horizontal floors or vertical panel dividers. They survive chroma
|
||||
keying and connect otherwise clean poses. Drop only *thin* rows/columns that
|
||||
span nearly the whole slot; thick sprite body rows are left alone.
|
||||
"""
|
||||
from PIL import Image
|
||||
|
||||
rgba = image.convert("RGBA").copy()
|
||||
w, h = rgba.size
|
||||
alpha = rgba.getchannel("A")
|
||||
|
||||
def _thin_groups(indices: list[int]) -> list[tuple[int, int]]:
|
||||
groups: list[tuple[int, int]] = []
|
||||
start: int | None = None
|
||||
prev: int | None = None
|
||||
for idx in indices:
|
||||
if start is None:
|
||||
start = prev = idx
|
||||
continue
|
||||
if prev is not None and idx == prev + 1:
|
||||
prev = idx
|
||||
continue
|
||||
if start is not None and prev is not None and prev - start + 1 <= 4:
|
||||
groups.append((start, prev + 1))
|
||||
start = prev = idx
|
||||
if start is not None and prev is not None and prev - start + 1 <= 4:
|
||||
groups.append((start, prev + 1))
|
||||
return groups
|
||||
|
||||
wide_rows = [
|
||||
y
|
||||
for y in range(h)
|
||||
if sum(1 for x in range(w) if alpha.getpixel((x, y)) > _ALPHA_FLOOR) >= w * 0.85
|
||||
]
|
||||
tall_cols = [
|
||||
x
|
||||
for x in range(w)
|
||||
if sum(1 for y in range(h) if alpha.getpixel((x, y)) > _ALPHA_FLOOR) >= h * 0.85
|
||||
]
|
||||
|
||||
clear = Image.new("RGBA", rgba.size, (0, 0, 0, 0))
|
||||
for top, bottom in _thin_groups(wide_rows):
|
||||
rgba.paste(clear.crop((0, top, w, bottom)), (0, top))
|
||||
for left, right in _thin_groups(tall_cols):
|
||||
rgba.paste(clear.crop((left, 0, right, h)), (left, 0))
|
||||
return rgba
|
||||
|
||||
|
||||
def _component_boxes(image) -> list[tuple[tuple[int, int, int, int], int]]:
|
||||
"""Connected opaque components as ``[(bbox, mass)]``.
|
||||
|
||||
A full ML segmenter would be overkill here: after chroma keying, "the pet" is
|
||||
the dominant connected alpha component inside each known slot. Tiny detached
|
||||
sparkles, tears, UI dots, and neighbour slivers are separate components.
|
||||
"""
|
||||
from collections import deque
|
||||
|
||||
rgba = image.convert("RGBA")
|
||||
bbox = rgba.getbbox()
|
||||
if bbox is None:
|
||||
return []
|
||||
l0, t0, r0, b0 = bbox
|
||||
w, h = r0 - l0, b0 - t0
|
||||
alpha = rgba.getchannel("A").load()
|
||||
visited = bytearray(w * h)
|
||||
out: list[tuple[tuple[int, int, int, int], int]] = []
|
||||
|
||||
for start in range(w * h):
|
||||
if visited[start]:
|
||||
continue
|
||||
sx, sy = start % w, start // w
|
||||
ax, ay = l0 + sx, t0 + sy
|
||||
visited[start] = 1
|
||||
if alpha[ax, ay] <= _ALPHA_FLOOR:
|
||||
continue
|
||||
|
||||
queue: deque[tuple[int, int]] = deque([(sx, sy)])
|
||||
left = right = sx
|
||||
top = bottom = sy
|
||||
mass = 0
|
||||
while queue:
|
||||
x, y = queue.popleft()
|
||||
mass += 1
|
||||
left, right = min(left, x), max(right, x)
|
||||
top, bottom = min(top, y), max(bottom, y)
|
||||
for nx, ny in ((x + 1, y), (x - 1, y), (x, y + 1), (x, y - 1)):
|
||||
if 0 <= nx < w and 0 <= ny < h:
|
||||
idx = ny * w + nx
|
||||
if not visited[idx]:
|
||||
visited[idx] = 1
|
||||
if alpha[l0 + nx, t0 + ny] > _ALPHA_FLOOR:
|
||||
queue.append((nx, ny))
|
||||
out.append(((l0 + left, t0 + top, l0 + right + 1, t0 + bottom + 1), mass))
|
||||
return out
|
||||
|
||||
|
||||
def _isolate_slot_subject(image):
|
||||
"""Keep the slot's real subject; drop detached effects/noise."""
|
||||
from PIL import Image
|
||||
|
||||
rgba = _erase_long_axis_lines(image)
|
||||
comps = _component_boxes(rgba)
|
||||
if not comps:
|
||||
return rgba
|
||||
|
||||
main_box, main_mass = max(comps, key=lambda item: item[1])
|
||||
ml, mt, mr, mb = main_box
|
||||
mw = max(1, mr - ml)
|
||||
keep: list[tuple[int, int, int, int]] = []
|
||||
for box, mass in comps:
|
||||
if box == main_box:
|
||||
keep.append(box)
|
||||
continue
|
||||
left, _top, right, _bottom = box
|
||||
overlap = max(0, min(right, mr) - max(left, ml))
|
||||
center_x = (left + right) / 2
|
||||
near_main = (ml - mw * 0.25) <= center_x <= (mr + mw * 0.25)
|
||||
# Keep meaningful attached-looking accessories such as halos; drop
|
||||
# sparkles/tears/noise that don't overlap the body column.
|
||||
if mass >= max(24, main_mass * 0.035) and (overlap >= mw * 0.3 or near_main):
|
||||
keep.append(box)
|
||||
|
||||
out = Image.new("RGBA", rgba.size, (0, 0, 0, 0))
|
||||
for box in keep:
|
||||
out.alpha_composite(rgba.crop(box), (box[0], box[1]))
|
||||
return out
|
||||
|
||||
|
||||
def _has_slot_padding(image) -> bool:
|
||||
"""True when content has empty room on all four slot edges."""
|
||||
bbox = image.getbbox()
|
||||
if bbox is None:
|
||||
return False
|
||||
w, h = image.size
|
||||
left, top, right, bottom = bbox
|
||||
min_x = max(4, min(12, round(w * 0.025)))
|
||||
min_y = max(4, min(16, round(h * 0.02)))
|
||||
return left >= min_x and top >= min_y and w - right >= min_x and h - bottom >= min_y
|
||||
|
||||
|
||||
def _slot_bounds(width: int, frame_count: int) -> list[tuple[int, int]]:
|
||||
return [
|
||||
(round(i * width / frame_count), round((i + 1) * width / frame_count))
|
||||
for i in range(frame_count)
|
||||
]
|
||||
|
||||
|
||||
def _group_component_rows(boxes: list[tuple[int, int, int, int]]) -> list[list[tuple[int, int, int, int]]]:
|
||||
"""Group component boxes into visual rows, then sort left→right."""
|
||||
if not boxes:
|
||||
return []
|
||||
heights = sorted(max(1, b[3] - b[1]) for b in boxes)
|
||||
row_tol = max(12, heights[len(heights) // 2] * 0.55)
|
||||
rows: list[list[tuple[int, int, int, int]]] = []
|
||||
centers: list[float] = []
|
||||
for box in sorted(boxes, key=lambda b: (b[1] + b[3]) / 2):
|
||||
cy = (box[1] + box[3]) / 2
|
||||
for i, center in enumerate(centers):
|
||||
if abs(cy - center) <= row_tol:
|
||||
rows[i].append(box)
|
||||
centers[i] = sum((b[1] + b[3]) / 2 for b in rows[i]) / len(rows[i])
|
||||
break
|
||||
else:
|
||||
rows.append([box])
|
||||
centers.append(cy)
|
||||
ordered = [row for _center, row in sorted(zip(centers, rows, strict=False), key=lambda item: item[0])]
|
||||
for row in ordered:
|
||||
row.sort(key=lambda b: (b[0] + b[2]) / 2)
|
||||
return ordered
|
||||
|
||||
|
||||
def _merge_related_boxes(boxes: list[tuple[int, int, int, int]]) -> list[tuple[int, int, int, int]]:
|
||||
"""Merge disconnected parts that clearly belong to one subject.
|
||||
|
||||
Capes, tails, horns, and held props sometimes key as separate components.
|
||||
Merge components on the same visual row when their vertical spans overlap and
|
||||
the horizontal gap is tiny compared with the component size. Do not bridge the
|
||||
much larger gaps between separate poses.
|
||||
"""
|
||||
boxes = list(boxes)
|
||||
changed = True
|
||||
while changed:
|
||||
changed = False
|
||||
merged: list[tuple[int, int, int, int]] = []
|
||||
used = [False] * len(boxes)
|
||||
for i, a in enumerate(boxes):
|
||||
if used[i]:
|
||||
continue
|
||||
al, at, ar, ab = a
|
||||
used[i] = True
|
||||
for j in range(i + 1, len(boxes)):
|
||||
if used[j]:
|
||||
continue
|
||||
bl, bt, br, bb = boxes[j]
|
||||
v_overlap = max(0, min(ab, bb) - max(at, bt))
|
||||
min_h = max(1, min(ab - at, bb - bt))
|
||||
gap = max(0, max(al, bl) - min(ar, br))
|
||||
min_w = max(1, min(ar - al, br - bl))
|
||||
if v_overlap >= min_h * 0.45 and gap <= max(14, min_w * 0.22):
|
||||
al, at, ar, ab = min(al, bl), min(at, bt), max(ar, br), max(ab, bb)
|
||||
used[j] = True
|
||||
changed = True
|
||||
merged.append((al, at, ar, ab))
|
||||
boxes = merged
|
||||
return boxes
|
||||
|
||||
|
||||
def _component_crops(strip, frame_count: int, *, require_padding: bool = False) -> list | None:
|
||||
"""Extract frame subjects as connected non-background objects.
|
||||
|
||||
This is the robust path for models that ignore "one horizontal row" and emit a
|
||||
2D sprite grid. We count real opaque subject components, discard tiny
|
||||
detached effects, sort in reading order, and return exactly *frame_count*
|
||||
frames. Slot slicing is only a fallback when object detection can't satisfy
|
||||
the contract.
|
||||
"""
|
||||
from PIL import Image
|
||||
|
||||
def attempt(source) -> list | None:
|
||||
comps = _component_boxes(source)
|
||||
if not comps:
|
||||
return None
|
||||
|
||||
max_mass = max(m for _box, m in comps)
|
||||
subjects = _merge_related_boxes([box for box, mass in comps if mass >= max(64, max_mass * 0.12)])
|
||||
if len(subjects) < frame_count:
|
||||
return None
|
||||
|
||||
rows = _group_component_rows(subjects)
|
||||
ordered = [box for row in rows for box in row][:frame_count]
|
||||
if len(ordered) < frame_count:
|
||||
return None
|
||||
|
||||
if require_padding:
|
||||
min_x = max(4, min(12, round(source.width * 0.01)))
|
||||
min_y = max(4, min(16, round(source.height * 0.015)))
|
||||
for left, top, right, bottom in ordered:
|
||||
if left < min_x or top < min_y or source.width - right < min_x or source.height - bottom < min_y:
|
||||
return None
|
||||
|
||||
multirow = len(rows) > 1
|
||||
frames = []
|
||||
for left, top, right, bottom in ordered:
|
||||
pad_x = max(8, round((right - left) * 0.08))
|
||||
pad_y = max(8, round((bottom - top) * 0.08))
|
||||
if multirow:
|
||||
crop_box = (
|
||||
max(0, left - pad_x),
|
||||
max(0, top - pad_y),
|
||||
min(source.width, right + pad_x),
|
||||
min(source.height, bottom + pad_y),
|
||||
)
|
||||
elif frame_count == 1:
|
||||
crop_box = (0, 0, source.width, source.height)
|
||||
else:
|
||||
# Preserve vertical motion for true one-row strips (jumping,
|
||||
# bobbing) while still narrowing X around the object.
|
||||
crop_box = (max(0, left - pad_x), 0, min(source.width, right + pad_x), source.height)
|
||||
frame = Image.new("RGBA", (crop_box[2] - crop_box[0], crop_box[3] - crop_box[1]), (0, 0, 0, 0))
|
||||
rel = (left - crop_box[0], top - crop_box[1], right - crop_box[0], bottom - crop_box[1])
|
||||
frame.alpha_composite(source.crop((left, top, right, bottom)), (rel[0], rel[1]))
|
||||
# The global component pass already chose the subject box. Do not run
|
||||
# another component filter here: capes/tails can be legitimate
|
||||
# disconnected lobes inside the chosen subject box.
|
||||
frames.append(frame)
|
||||
return frames
|
||||
|
||||
return attempt(strip) or attempt(_erase_long_axis_lines(strip))
|
||||
|
||||
|
||||
def _sever_expected_gutters(strip, frame_count: int):
|
||||
"""Cut thin vertical gutters at expected frame boundaries before labeling.
|
||||
|
||||
|
|
@ -397,17 +687,23 @@ def _sever_expected_gutters(strip, frame_count: int):
|
|||
return out
|
||||
|
||||
|
||||
def _slot_crops(strip, frame_count: int) -> list:
|
||||
def _slot_crops(strip, frame_count: int, *, require_padding: bool = False) -> list | None:
|
||||
"""Slice *strip* into *frame_count* uniform columns (one coordinate space).
|
||||
|
||||
Equal-width columns keep every frame in a single shared coordinate frame, so
|
||||
a later union-crop + shared placement (:func:`normalize_cells`) preserves the
|
||||
row's real motion without the per-frame re-centering that makes a pet visibly
|
||||
slide. Neighbour side-bleed is trimmed per column.
|
||||
slide. Each slot is cleaned independently so detached effects, floors,
|
||||
dividers, and neighbour slivers do not become "frames".
|
||||
"""
|
||||
w0 = max(1, strip.width // frame_count)
|
||||
h = strip.height
|
||||
return [_drop_side_bleed(strip.crop((i * w0, 0, i * w0 + w0, h))) for i in range(frame_count)]
|
||||
frames = []
|
||||
for left, right in _slot_bounds(strip.width, frame_count):
|
||||
slot = _drop_side_bleed(_isolate_slot_subject(strip.crop((left, 0, right, h))))
|
||||
if require_padding and not _has_slot_padding(slot):
|
||||
return None
|
||||
frames.append(slot)
|
||||
return frames
|
||||
|
||||
|
||||
def _content_runs(profile: list[int], *, threshold: int = 2) -> list[tuple[int, int]]:
|
||||
|
|
@ -465,6 +761,52 @@ def _frame_x_ranges(strip, frame_count: int) -> list[tuple[int, int]] | None:
|
|||
return [(l, r) for l, r in groups]
|
||||
|
||||
|
||||
def _significant_subject_boxes(image) -> list[tuple[int, int, int, int]]:
|
||||
comps = _component_boxes(image)
|
||||
if not comps:
|
||||
return []
|
||||
max_mass = max(mass for _box, mass in comps)
|
||||
return _merge_related_boxes([box for box, mass in comps if mass >= max(32, max_mass * 0.12)])
|
||||
|
||||
|
||||
def _validate_extracted_frames(frames: list, frame_count: int) -> None:
|
||||
"""Reject rows where one "frame" is really multiple poses.
|
||||
|
||||
A bad provider roll can collapse a strip into tiny repeated poses. If we let
|
||||
that through, normalization sees a huge motion envelope and shrinks the
|
||||
entire pet to postage-stamp size. Catch the row here so hatch can regenerate
|
||||
it instead of saving a technically non-empty but visually broken atlas.
|
||||
"""
|
||||
if len(frames) != frame_count:
|
||||
raise ValueError(f"expected {frame_count} frames, got {len(frames)}")
|
||||
|
||||
boxes = []
|
||||
for i, frame in enumerate(frames):
|
||||
bbox = frame.getbbox()
|
||||
if bbox is None:
|
||||
raise ValueError(f"frame {i} is empty")
|
||||
subjects = _significant_subject_boxes(frame)
|
||||
if len(subjects) >= 3:
|
||||
raise ValueError(f"frame {i} contains multiple separated subjects")
|
||||
boxes.append(bbox)
|
||||
|
||||
if frame_count <= 1:
|
||||
return
|
||||
|
||||
widths = sorted(b[2] - b[0] for b in boxes)
|
||||
heights = sorted(b[3] - b[1] for b in boxes)
|
||||
med_w = max(1, widths[len(widths) // 2])
|
||||
med_h = max(1, heights[len(heights) // 2])
|
||||
for i, (left, top, right, bottom) in enumerate(boxes):
|
||||
width = right - left
|
||||
height = bottom - top
|
||||
# A legitimate wing/arm can be wider than the median pose. A frame that is
|
||||
# several times wider while not proportionally taller is usually multiple
|
||||
# mini-poses packed into one accepted frame.
|
||||
if width > max(med_w * 3.0, med_w + 96) and height <= med_h * 1.6:
|
||||
raise ValueError(f"frame {i} is a multi-pose width outlier")
|
||||
|
||||
|
||||
def extract_strip_frames(
|
||||
strip,
|
||||
frame_count: int,
|
||||
|
|
@ -475,15 +817,15 @@ def extract_strip_frames(
|
|||
) -> list:
|
||||
"""Turn one generated row strip into *frame_count* frames.
|
||||
|
||||
The background is keyed out, thin connecting bridges at the expected
|
||||
boundaries are severed, then the strip is sliced at its empty chroma gutters
|
||||
(:func:`_frame_x_ranges`) — the plain "find each object, make a frame" cut
|
||||
that works once poses are spaced apart (which generation now enforces).
|
||||
The background is keyed out, then strict extraction treats the requested
|
||||
frame count as the source of truth: slice known equal slots, isolate the real
|
||||
subject in each slot, and require empty padding on X and Y. Empty chroma
|
||||
gutters are only a lenient salvage fallback.
|
||||
|
||||
Each frame is cropped at full cell height so tall ears / halos are never
|
||||
clipped; :func:`_drop_side_bleed` trims any faint neighbour sliver. When the
|
||||
poses are touching (fewer gutters than frames) ``components`` raises and
|
||||
``auto`` falls back to equal-width slots.
|
||||
clipped; detached effects and neighbour slivers are dropped per slot. When a
|
||||
pose does not have required space around it, ``components`` raises and
|
||||
``auto`` falls back to best-effort slicing.
|
||||
|
||||
*fit* (default) fits+centers each frame into a 192x208 cell — the standalone
|
||||
contract for callers that don't normalize. Hatching passes ``fit=False`` to
|
||||
|
|
@ -500,28 +842,38 @@ def extract_strip_frames(
|
|||
|
||||
strip = remove_background(strip, chroma_key=chroma_key)
|
||||
|
||||
# Prefer the real gutters as-is: when poses are already spaced (generation
|
||||
# enforces this), slicing the strip untouched keeps each pose's own bounds and
|
||||
# never cuts through an unevenly-placed silhouette. Only fall back to severing
|
||||
# the expected boundaries when gaps alone can't separate the row — i.e. poses
|
||||
# are bridged by a shared shadow/glow/1px line and read as one blob.
|
||||
source = strip
|
||||
ranges = _frame_x_ranges(source, frame_count)
|
||||
if ranges is None:
|
||||
source = _sever_expected_gutters(strip, frame_count)
|
||||
ranges = _frame_x_ranges(source, frame_count)
|
||||
|
||||
if ranges is None:
|
||||
# Strict path: count actual non-background subjects first. This handles both
|
||||
# the intended one-row strip and model-cheated 2D grids without ever stacking
|
||||
# two visual rows into one frame.
|
||||
frames = _component_crops(strip, frame_count, require_padding=True)
|
||||
if frames is None:
|
||||
frames = _slot_crops(strip, frame_count, require_padding=True)
|
||||
if frames is None:
|
||||
if method == "components":
|
||||
raise ValueError(f"could not segment {frame_count} sprites from strip")
|
||||
frames = _slot_crops(source, frame_count)
|
||||
else:
|
||||
h = source.height
|
||||
pad = max(2, min(16, round((source.width / max(1, frame_count)) * 0.04)))
|
||||
frames = [
|
||||
_drop_side_bleed(source.crop((max(0, left - pad), 0, min(source.width, right + pad), h)))
|
||||
for left, right in ranges
|
||||
]
|
||||
raise ValueError(f"could not segment {frame_count} padded sprites from strip")
|
||||
|
||||
# Lenient salvage for the final attempt: prefer real gutters when they
|
||||
# exist, then sever expected boundaries, then fall back to raw slots. Still
|
||||
# try object extraction first, just without edge-padding enforcement, so
|
||||
# cached/borderline model rolls can be inspected without stacking a 2D grid.
|
||||
frames = _component_crops(strip, frame_count, require_padding=False)
|
||||
if frames is None:
|
||||
source = strip
|
||||
ranges = _frame_x_ranges(source, frame_count)
|
||||
if ranges is None:
|
||||
source = _sever_expected_gutters(strip, frame_count)
|
||||
ranges = _frame_x_ranges(source, frame_count)
|
||||
|
||||
if ranges is None:
|
||||
frames = _slot_crops(source, frame_count, require_padding=False) or []
|
||||
else:
|
||||
h = source.height
|
||||
pad = max(2, min(16, round((source.width / max(1, frame_count)) * 0.04)))
|
||||
frames = [
|
||||
_drop_side_bleed(_isolate_slot_subject(source.crop((max(0, left - pad), 0, min(source.width, right + pad), h))))
|
||||
for left, right in ranges
|
||||
]
|
||||
_validate_extracted_frames(frames, frame_count)
|
||||
return [_fit_to_cell(f) for f in frames] if fit else frames
|
||||
|
||||
|
||||
|
|
@ -746,22 +1098,72 @@ def validate_atlas(atlas) -> dict:
|
|||
return {"ok": False, "width": atlas.width, "height": atlas.height, "errors": errors, "warnings": warnings, "filled_states": []}
|
||||
|
||||
filled_states: list[str] = []
|
||||
cell_boxes_by_state: dict[str, list[tuple[int, int, int, int]]] = {}
|
||||
for state, row, count in ROW_SPECS:
|
||||
row_pixels = 0
|
||||
boxes: list[tuple[int, int, int, int]] = []
|
||||
for col in range(count):
|
||||
left = col * CELL_WIDTH
|
||||
top = row * CELL_HEIGHT
|
||||
cell = atlas.crop((left, top, left + CELL_WIDTH, top + CELL_HEIGHT))
|
||||
nonblank = sum(cell.getchannel("A").histogram()[1:])
|
||||
row_pixels += nonblank
|
||||
bbox = cell.getbbox()
|
||||
if bbox is not None:
|
||||
boxes.append(bbox)
|
||||
if row_pixels > 0:
|
||||
filled_states.append(state)
|
||||
cell_boxes_by_state[state] = boxes
|
||||
else:
|
||||
warnings.append(f"state '{state}' has no frames")
|
||||
|
||||
if not filled_states:
|
||||
errors.append("atlas is empty — no state produced any frames")
|
||||
|
||||
# A visually valid pet must occupy the cell. A single bad row can otherwise
|
||||
# poison global normalization and shrink every state to a tiny postage stamp
|
||||
# while still passing the old "non-empty cells" check.
|
||||
all_widths = sorted(
|
||||
right - left
|
||||
for boxes in cell_boxes_by_state.values()
|
||||
for left, _top, right, _bottom in boxes
|
||||
)
|
||||
all_heights = sorted(
|
||||
bottom - top
|
||||
for boxes in cell_boxes_by_state.values()
|
||||
for _left, top, _right, bottom in boxes
|
||||
)
|
||||
global_med_w = 0
|
||||
global_med_h = 0
|
||||
if all_widths and all_heights:
|
||||
global_med_w = all_widths[len(all_widths) // 2]
|
||||
median_h = all_heights[len(all_heights) // 2]
|
||||
global_med_h = median_h
|
||||
min_h = max(56, round(CELL_HEIGHT * 0.28))
|
||||
if median_h < min_h:
|
||||
errors.append(f"atlas sprites are too small after normalization (median frame height {median_h}px)")
|
||||
|
||||
for state, boxes in cell_boxes_by_state.items():
|
||||
if len(boxes) <= 1:
|
||||
continue
|
||||
widths = sorted(right - left for left, _top, right, _bottom in boxes)
|
||||
heights = sorted(bottom - top for _left, top, _right, bottom in boxes)
|
||||
med_w = max(1, widths[len(widths) // 2])
|
||||
med_h = max(1, heights[len(heights) // 2])
|
||||
max_w = widths[-1]
|
||||
max_h = heights[-1]
|
||||
if max_w > max(med_w * 3.0, med_w + 96) and max_h <= med_h * 1.6:
|
||||
errors.append(f"state '{state}' contains a multi-pose frame outlier")
|
||||
# Per-state collapse guard: one malformed row (tiny slivers / chopped
|
||||
# fragments) should not pass because other rows are healthy.
|
||||
if global_med_w and global_med_h:
|
||||
min_state_w = max(32, round(global_med_w * 0.42))
|
||||
min_state_h = max(40, round(global_med_h * 0.50))
|
||||
if med_w < min_state_w or med_h < min_state_h:
|
||||
errors.append(
|
||||
f"state '{state}' appears collapsed (median {med_w}x{med_h}px, global median {global_med_w}x{global_med_h}px)"
|
||||
)
|
||||
|
||||
# Transparent pixels must carry zero RGB (no halo residue).
|
||||
data = atlas.tobytes()
|
||||
residue = 0
|
||||
|
|
|
|||
|
|
@ -14,29 +14,40 @@ producing an ungrounded, drifting pet.
|
|||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Providers that can ground generation on a reference image.
|
||||
# openrouter / nous reach Gemini Flash Image (and friends) over the
|
||||
# OpenRouter-compatible chat-completions image protocol, which accepts
|
||||
# reference images for grounding. Nous Portal proxies OpenRouter, so both
|
||||
# qualify.
|
||||
_REF_CAPABLE = ("openai", "openai-codex", "krea", "openrouter", "nous")
|
||||
# Providers that can ground generation on a reference image, in preference order
|
||||
# (Nous Portal → OpenAI → OpenRouter → …). OpenRouter/Nous run a quality-first
|
||||
# model chain and may fall back depending on account access and endpoint behavior,
|
||||
# so fidelity can vary by configured backend + model availability.
|
||||
_REF_CAPABLE = ("nous", "openai", "openai-codex", "openrouter", "krea")
|
||||
|
||||
# Friendly label + one-line speed/quality note per reference-capable provider,
|
||||
# surfaced in the desktop pet-gen picker so users can trade speed for fidelity.
|
||||
_PROVIDER_META: dict[str, dict[str, str]] = {
|
||||
"nous": {"label": "Nous Portal", "note": "Fast, balanced quality"},
|
||||
"openrouter": {"label": "OpenRouter", "note": "Fastest — Gemini Flash Image"},
|
||||
"openai": {"label": "OpenAI", "note": "Highest fidelity, slower"},
|
||||
"openai-codex": {"label": "OpenAI (Codex)", "note": "Highest fidelity, slower"},
|
||||
"krea": {"label": "Krea", "note": "Stylized, style-reference grounding"},
|
||||
# Friendly display label per reference-capable provider, surfaced in the desktop
|
||||
# pet-gen picker.
|
||||
_PROVIDER_LABELS: dict[str, str] = {
|
||||
"nous": "Nous Portal",
|
||||
"openrouter": "OpenRouter",
|
||||
"openai": "OpenAI",
|
||||
"openai-codex": "OpenAI (Codex)",
|
||||
"krea": "Krea",
|
||||
}
|
||||
|
||||
|
||||
def _forced_provider_from_env() -> str | None:
|
||||
"""Optional QA override to force a pet-gen backend.
|
||||
|
||||
`HERMES_PET_IMAGE_PROVIDER=<name>` (e.g. `openrouter`) bypasses the normal
|
||||
active/default provider resolution for pet generation only. Unknown values are
|
||||
ignored so existing users are unaffected.
|
||||
"""
|
||||
forced = os.environ.get("HERMES_PET_IMAGE_PROVIDER", "").strip().lower()
|
||||
return forced if forced in _REF_CAPABLE else None
|
||||
|
||||
|
||||
class GenerationError(RuntimeError):
|
||||
"""Raised on any image-generation failure (no provider, API error, IO)."""
|
||||
|
||||
|
|
@ -71,6 +82,14 @@ def resolve_provider(*, require_references: bool = True, prefer: str | None = No
|
|||
_discover()
|
||||
from agent.image_gen_registry import get_active_provider, get_provider
|
||||
|
||||
# QA override: force one provider for pet-gen iteration regardless of the
|
||||
# globally active image_gen backend.
|
||||
forced = _forced_provider_from_env()
|
||||
if forced:
|
||||
chosen = get_provider(forced)
|
||||
if chosen is not None and chosen.is_available():
|
||||
return SpriteProvider(name=forced, provider=chosen, supports_references=True)
|
||||
|
||||
# An explicit user pick wins when it's reference-capable and has credentials;
|
||||
# otherwise we ignore it and fall through to the normal resolution.
|
||||
if prefer:
|
||||
|
|
@ -110,10 +129,11 @@ def resolve_provider(*, require_references: bool = True, prefer: str | None = No
|
|||
def list_sprite_providers() -> list[dict]:
|
||||
"""The reference-capable providers available to pick for pet generation.
|
||||
|
||||
Returns ``[{name, label, note, default}]`` for every ref-capable provider the
|
||||
user actually has credentials for, marking the one :func:`resolve_provider`
|
||||
would choose with no explicit preference. Empty when none is configured (the
|
||||
picker hides itself). Best-effort: discovery hiccups yield an empty list.
|
||||
Returns ``[{name, label, default}]`` for every ref-capable provider the user
|
||||
actually has credentials for, in preference order, marking the one
|
||||
:func:`resolve_provider` would choose with no explicit preference. Empty when
|
||||
none is configured (the picker hides itself). Best-effort: discovery hiccups
|
||||
yield an empty list.
|
||||
"""
|
||||
_discover()
|
||||
from agent.image_gen_registry import get_provider
|
||||
|
|
@ -128,12 +148,10 @@ def list_sprite_providers() -> list[dict]:
|
|||
provider = get_provider(name)
|
||||
if provider is None or not provider.is_available():
|
||||
continue
|
||||
meta = _PROVIDER_META.get(name, {})
|
||||
out.append(
|
||||
{
|
||||
"name": name,
|
||||
"label": meta.get("label", name),
|
||||
"note": meta.get("note", ""),
|
||||
"label": _PROVIDER_LABELS.get(name, name),
|
||||
"default": name == default_name,
|
||||
}
|
||||
)
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@ _MAX_PARALLEL_GENERATIONS = 4
|
|||
# How many times to (re)generate a single row before accepting a best-effort
|
||||
# slice. Early attempts demand clean per-pose gutters; the last is lenient so a
|
||||
# stubborn row still yields frames instead of dropping out entirely.
|
||||
_ROW_GEN_ATTEMPTS = 2
|
||||
_ROW_GEN_ATTEMPTS = 3
|
||||
_MIN_FILLED_STATES = 6
|
||||
_REQUIRED_STATES = frozenset({"idle", "running-right", "waving"})
|
||||
|
||||
|
|
|
|||
|
|
@ -63,12 +63,14 @@ _STYLE_HINTS: dict[str, str] = {
|
|||
}
|
||||
|
||||
_BACKGROUND = (
|
||||
"Center one full-body character on a flat, uniform, high-contrast chroma-key "
|
||||
"background (prefer pure hot magenta #FF00FF unless that color appears on "
|
||||
"the character). The background must completely surround the character: one "
|
||||
"even color with NO gradient, vignette, texture, pattern, scenery, shadow, "
|
||||
"ground line, frame, or border, so it keys out cleanly. The background color "
|
||||
"must not appear anywhere on the character itself. No text, no labels."
|
||||
"Center the character on a SINGLE flat, uniform, high-contrast chroma-key "
|
||||
"background — pure hot magenta #FF00FF (only if magenta appears on the "
|
||||
"character, use pure green #00FF00 instead). The background is ONE continuous "
|
||||
"even color that completely surrounds the character with NO gradient, "
|
||||
"vignette, texture, pattern, scenery, shadow, ground line, frame, border, "
|
||||
"panel, comic cell, gutter line, grid, or divider of any kind, so it keys out "
|
||||
"cleanly. The background color must not appear anywhere on the character. "
|
||||
"No text, no labels, no speech bubbles, no UI."
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -149,8 +151,12 @@ def build_row_prompt(state: str, frame_count: int, concept: str, *, style: str |
|
|||
f"(same species, face, colors, markings, proportions, and props), "
|
||||
"preserving the same emotional tone/mood (e.g., scary stays scary, cute stays cute), "
|
||||
f"draw a single WIDE horizontal strip of {frame_count} animation frames showing {action}. "
|
||||
f"LAYOUT: split the wide strip into {frame_count} equal vertical cells, one "
|
||||
"pose centered in each cell. "
|
||||
f"LAYOUT: arrange {frame_count} poses in ONE horizontal row at equal spacing, "
|
||||
"each pose centered in its own imaginary equal region. Draw NO panel borders, "
|
||||
"NO comic cells, NO boxes, NO vertical divider/gutter lines, NO grid, NO frame "
|
||||
"outlines between poses — the backdrop is one unbroken flat field behind all of them. "
|
||||
"Fill the WHOLE strip with the SAME single flat chroma-key color as the attached "
|
||||
"reference image's background (identical hue in every frame, no per-pose color shifts). "
|
||||
f"SPACING (critical): draw each pose at a consistent, healthy, clearly "
|
||||
f"visible size (roughly {pose_px}px wide on a {_ASSUMED_STRIP_WIDTH}px "
|
||||
f"strip) — do NOT shrink it tiny — but keep its ENTIRE silhouette "
|
||||
|
|
@ -166,8 +172,9 @@ def build_row_prompt(state: str, frame_count: int, concept: str, *, style: str |
|
|||
# so only the action moves — this is what stops the loop sliding/pulsing.
|
||||
"REGISTRATION (critical): the character is the SAME height and SAME width "
|
||||
"in every frame, drawn at the SAME scale, centered over the SAME point, "
|
||||
"with all feet resting on ONE shared horizontal ground line across the "
|
||||
"whole strip. Keep the body's center, size, and stance fixed frame to "
|
||||
"with all feet aligned to the SAME invisible horizontal baseline across the "
|
||||
"whole strip — this baseline is conceptual ONLY: draw NO ground line, floor, "
|
||||
"platform, horizon, or contact shadow beneath the feet. Keep the body's center, size, and stance fixed frame to "
|
||||
"frame — ONLY the limbs/features the action needs may move. Capes, cloaks, "
|
||||
"bags, and scarves stay in the SAME place and shape every frame (no "
|
||||
"swinging, flowing, or drifting) unless the action itself requires it. No "
|
||||
|
|
|
|||
|
|
@ -5,8 +5,9 @@ import { Check, ChevronDown } from '@/lib/icons'
|
|||
import { $petGenProvider, $petGenProviders, setPetGenProvider } from '@/store/pet-generate'
|
||||
|
||||
// Image-backend picker for pet generation — the composer's model-pill pattern:
|
||||
// a quiet trigger + a dropdown of options, each with a one-line speed/quality
|
||||
// note. Hidden unless there are 2+ reference-capable backends (nothing to pick).
|
||||
// a quiet trigger + a dropdown of options. No per-option notes: every backend
|
||||
// resolves to the same faithful OpenAI image model, so there's no tradeoff to
|
||||
// describe. Hidden unless there are 2+ reference-capable backends (nothing to pick).
|
||||
export function ProviderPicker() {
|
||||
const providers = useStore($petGenProviders)
|
||||
const picked = useStore($petGenProvider)
|
||||
|
|
@ -32,19 +33,16 @@ export function ProviderPicker() {
|
|||
</DropdownMenuTrigger>
|
||||
{/* The picker lives inside the pet-gen Dialog (z-130) and portals to body,
|
||||
so lift its menu above the dialog or it opens behind it. */}
|
||||
<DropdownMenuContent align="start" className="z-[140] w-56">
|
||||
<DropdownMenuContent align="start" className="z-[140]">
|
||||
{providers.map(provider => (
|
||||
<DropdownMenuItem
|
||||
className="flex-col items-start gap-0.5"
|
||||
className="flex items-center gap-1.5"
|
||||
key={provider.name}
|
||||
// Picking the default clears the override (no need to pin it).
|
||||
onSelect={() => setPetGenProvider(provider.default ? '' : provider.name)}
|
||||
>
|
||||
<span className="flex w-full items-center gap-1.5">
|
||||
<span className="min-w-0 flex-1 truncate font-medium text-foreground">{provider.label}</span>
|
||||
{provider.name === current?.name && <Check className="size-3.5 text-primary" />}
|
||||
</span>
|
||||
{provider.note && <span className="text-[0.6875rem] text-(--ui-text-tertiary)">{provider.note}</span>}
|
||||
<span className="min-w-0 flex-1 truncate font-medium text-foreground">{provider.label}</span>
|
||||
{provider.name === current?.name && <Check className="size-3.5 text-primary" />}
|
||||
</DropdownMenuItem>
|
||||
))}
|
||||
</DropdownMenuContent>
|
||||
|
|
|
|||
|
|
@ -62,10 +62,11 @@ export function PetGenerateOverlay() {
|
|||
|
||||
// The footer banner narrates the dialog's async state: the failure reason on a
|
||||
// dead-end error, else the "you can close this, we'll notify you" reassurance
|
||||
// while a generate/hatch runs in the background.
|
||||
// while a generate/hatch runs in the background. On step 1, show a neutral ETA.
|
||||
const working = status === 'generating' || status === 'hatching'
|
||||
const errored = status === 'error' && drafts.length === 0
|
||||
const banner = errored ? error || copy.genericError : working ? copy.backgroundHint : undefined
|
||||
const stepOne = status === 'idle' || status === 'ready'
|
||||
const banner = errored ? error || copy.genericError : working ? copy.backgroundHint : stepOne ? copy.slowProviderHint : undefined
|
||||
|
||||
return (
|
||||
<Dialog onOpenChange={handleOpenChange} open={open}>
|
||||
|
|
|
|||
|
|
@ -798,6 +798,7 @@ export const en: Translations = {
|
|||
namePlaceholder: 'Name your pet',
|
||||
staleBackend: 'Update Hermes to generate pets.',
|
||||
backgroundHint: 'You can close this — Hermes will notify you when it’s done.',
|
||||
slowProviderHint: 'This can take up to 5 minutes',
|
||||
genericError: 'Generation failed — try again or pick a suggestion.',
|
||||
referenceImageTooLarge: 'Reference image is too large. Use one under 16 MB.',
|
||||
referenceImageInvalid: 'Could not read that reference image. Try a PNG, JPG, WebP, or GIF.',
|
||||
|
|
|
|||
|
|
@ -916,6 +916,7 @@ export const ja = defineLocale({
|
|||
namePlaceholder: 'ペットに名前を付ける',
|
||||
staleBackend: 'ペットを生成するには Hermes を更新してください。',
|
||||
backgroundHint: 'このウィンドウは閉じても大丈夫です。完了したら Hermes が通知します。',
|
||||
slowProviderHint: 'これには最大5分かかることがあります。',
|
||||
genericError: '生成に失敗しました。もう一度試すか、候補を選んでください。',
|
||||
referenceImageTooLarge: '参照画像が大きすぎます。16 MB 未満の画像を使ってください。',
|
||||
referenceImageInvalid: '参照画像を読み込めませんでした。PNG/JPG/WebP/GIF を試してください。',
|
||||
|
|
|
|||
|
|
@ -702,6 +702,7 @@ export interface Translations {
|
|||
namePlaceholder: string
|
||||
staleBackend: string
|
||||
backgroundHint: string
|
||||
slowProviderHint: string
|
||||
genericError: string
|
||||
referenceImageTooLarge: string
|
||||
referenceImageInvalid: string
|
||||
|
|
|
|||
|
|
@ -888,6 +888,7 @@ export const zhHant = defineLocale({
|
|||
namePlaceholder: '為寵物命名',
|
||||
staleBackend: '請更新 Hermes 以生成寵物。',
|
||||
backgroundHint: '你可以關閉此視窗——完成後 Hermes 會通知你。',
|
||||
slowProviderHint: '這可能最多需要 5 分鐘。',
|
||||
genericError: '生成失敗——請重試或選一個建議。',
|
||||
referenceImageTooLarge: '參考圖片過大。請使用小於 16 MB 的圖片。',
|
||||
referenceImageInvalid: '無法讀取該參考圖片。請嘗試 PNG、JPG、WebP 或 GIF。',
|
||||
|
|
|
|||
|
|
@ -986,6 +986,7 @@ export const zh: Translations = {
|
|||
namePlaceholder: '给宠物起个名字',
|
||||
staleBackend: '请更新 Hermes 以生成宠物。',
|
||||
backgroundHint: '你可以关闭此窗口——完成后 Hermes 会通知你。',
|
||||
slowProviderHint: '这可能最多需要 5 分钟。',
|
||||
genericError: '生成失败——请重试或选择一个建议。',
|
||||
referenceImageTooLarge: '参考图过大。请使用小于 16 MB 的图片。',
|
||||
referenceImageInvalid: '无法读取该参考图。请尝试 PNG、JPG、WebP 或 GIF。',
|
||||
|
|
|
|||
|
|
@ -96,6 +96,19 @@ describe('dispatchNativeNotification focus gating', () => {
|
|||
dispatchNativeNotification({ kind: 'approval', sessionId: 'on-screen', title: 'approve' })
|
||||
expect(notify).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it('fires a global completion notification while away with no active session (pet gen)', () => {
|
||||
setActiveSessionId(null)
|
||||
dispatchNativeNotification({ global: true, kind: 'backgroundDone', title: 'Your pet hatched' })
|
||||
expect(notify).toHaveBeenCalledTimes(1)
|
||||
})
|
||||
|
||||
it('suppresses a global notification when the window is focused', () => {
|
||||
setWindowState({ focused: true, hidden: false })
|
||||
setActiveSessionId(null)
|
||||
dispatchNativeNotification({ global: true, kind: 'backgroundDone', title: 'Your pet hatched' })
|
||||
expect(notify).not.toHaveBeenCalled()
|
||||
})
|
||||
})
|
||||
|
||||
describe('dispatchNativeNotification preferences', () => {
|
||||
|
|
|
|||
|
|
@ -113,7 +113,15 @@ function isBackgrounded(): boolean {
|
|||
return typeof document.hasFocus === 'function' && !document.hasFocus()
|
||||
}
|
||||
|
||||
function shouldFire(kind: NativeNotificationKind, sessionId?: null | string): boolean {
|
||||
function shouldFire(kind: NativeNotificationKind, sessionId?: null | string, global = false): boolean {
|
||||
// Global notifications aren't tied to a chat session (e.g. pet generation,
|
||||
// which runs from the command center with no active conversation). They fire
|
||||
// whenever the user is away, with no session-match requirement — otherwise a
|
||||
// background run started without an open session would be silently dropped.
|
||||
if (global) {
|
||||
return isBackgrounded()
|
||||
}
|
||||
|
||||
// Attention kinds break through for an off-screen session even while focused.
|
||||
if (ATTENTION_KINDS.has(kind)) {
|
||||
return isBackgrounded() || (Boolean(sessionId) && sessionId !== $activeSessionId.get())
|
||||
|
|
@ -134,6 +142,12 @@ export interface NativeNotificationInput {
|
|||
title: string
|
||||
body?: string
|
||||
sessionId?: null | string
|
||||
/**
|
||||
* Not tied to a chat session (e.g. pet generation). Fires whenever the user
|
||||
* is away, bypassing the session-match gate that completion kinds normally
|
||||
* require.
|
||||
*/
|
||||
global?: boolean
|
||||
silent?: boolean
|
||||
actions?: NativeNotificationAction[]
|
||||
}
|
||||
|
|
@ -145,11 +159,11 @@ export function dispatchNativeNotification(input: NativeNotificationInput): void
|
|||
return
|
||||
}
|
||||
|
||||
if (!shouldFire(input.kind, input.sessionId)) {
|
||||
if (!shouldFire(input.kind, input.sessionId, input.global)) {
|
||||
return
|
||||
}
|
||||
|
||||
if (throttled(`${input.kind}:${input.sessionId ?? ''}`, Date.now())) {
|
||||
if (throttled(`${input.kind}:${input.sessionId ?? (input.global ? 'global' : '')}`, Date.now())) {
|
||||
return
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -6,8 +6,6 @@ import { dispatchNativeNotification } from '@/store/native-notifications'
|
|||
import { notify } from '@/store/notifications'
|
||||
import { type PetInfo } from '@/store/pet'
|
||||
import { applyAdoptedPet, type GatewayRequest } from '@/store/pet-gallery'
|
||||
import { $activeSessionId } from '@/store/session'
|
||||
|
||||
/**
|
||||
* Feature store for the "generate a pet" flow (Cmd-K → Pets → Generate).
|
||||
*
|
||||
|
|
@ -111,8 +109,6 @@ export const $petGenAvailable = atom<boolean | null>(null)
|
|||
export interface PetGenProvider {
|
||||
name: string
|
||||
label: string
|
||||
/** One-line speed/quality tradeoff note. */
|
||||
note: string
|
||||
/** Whether this is the backend's default pick (no override needed). */
|
||||
default: boolean
|
||||
}
|
||||
|
|
@ -227,7 +223,10 @@ function notifyPetGenDone(title: string, message: string, kind: 'error' | 'succe
|
|||
}
|
||||
|
||||
notify({ kind, title, message, action: { label: 'View', onClick: openPetGenerate } })
|
||||
dispatchNativeNotification({ kind: 'backgroundDone', title, body: message, sessionId: $activeSessionId.get() })
|
||||
// Pet generation isn't tied to a chat session — mark it global so the OS
|
||||
// notification fires whenever the user is away, even with no active session
|
||||
// (the common case: generating from the command center with no conversation).
|
||||
dispatchNativeNotification({ kind: 'backgroundDone', title, body: message, global: true })
|
||||
}
|
||||
|
||||
interface GenerateOptions {
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
Both OpenRouter and the Nous Portal inference endpoint speak the same
|
||||
OpenAI-style ``/chat/completions`` image-generation protocol: send
|
||||
``modalities: ["image", "text"]`` with an image-output model (e.g.
|
||||
``google/gemini-2.5-flash-image``), pass reference images as ``image_url``
|
||||
``google/gemini-3-pro-image``), pass reference images as ``image_url``
|
||||
content parts for grounding, and read the generated images back from
|
||||
``choices[0].message.images[].image_url.url`` (a ``data:image/...;base64`` URI).
|
||||
|
||||
|
|
@ -40,10 +40,17 @@ from agent.image_gen_provider import (
|
|||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Default image-output model. Gemini 2.5 Flash Image ("nano-banana") is GA on
|
||||
# OpenRouter, accepts reference images for grounding, and honors
|
||||
# ``image_config.aspect_ratio``.
|
||||
DEFAULT_MODEL = "google/gemini-2.5-flash-image"
|
||||
# Quality-first model chain for OpenRouter-compatible endpoints.
|
||||
#
|
||||
# Default behavior (no env/config override): try the highest-fidelity OpenAI
|
||||
# image model first, then fall back to Gemini 3 Pro Image if the OpenAI model
|
||||
# is access-gated / unavailable / times out on this endpoint.
|
||||
#
|
||||
# Explicit override (OPENROUTER_IMAGE_MODEL or image_gen.<provider>.model):
|
||||
# use exactly that model (no auto fallback), so power users keep full control.
|
||||
DEFAULT_MODEL = "openai/gpt-5.4-image-2"
|
||||
_FALLBACK_MODEL = "google/gemini-3-pro-image"
|
||||
_DEFAULT_MODEL_CHAIN = (DEFAULT_MODEL, _FALLBACK_MODEL)
|
||||
|
||||
# Semantic aspect ratio (the image_gen contract) → OpenRouter's image_config
|
||||
# aspect_ratio strings.
|
||||
|
|
@ -121,6 +128,43 @@ def _extract_images(payload: Dict[str, Any]) -> List[str]:
|
|||
return out
|
||||
|
||||
|
||||
def _access_error_hint(
|
||||
display: str, model_id: str, env_var: str, status: int, err_msg: str
|
||||
) -> Optional[str]:
|
||||
"""A targeted hint when an access-gated OpenAI image model can't be reached.
|
||||
|
||||
Some OpenAI image models on OpenRouter need account enablement / BYOK, so the
|
||||
failure isn't a missing key (the key is valid) — the *model* is unreachable.
|
||||
The generic "check your key" message is misleading there, so we detect that
|
||||
case and point the user at the real fix. Returns one actionable line, or
|
||||
``None`` when this isn't the access-gated case.
|
||||
"""
|
||||
if not model_id.startswith("openai/"):
|
||||
return None
|
||||
low = (err_msg or "").lower()
|
||||
gated = status in (402, 403, 404) or any(
|
||||
s in low for s in ("no endpoints", "no allowed", "not a valid model", "data policy")
|
||||
)
|
||||
if not gated:
|
||||
return None
|
||||
return (
|
||||
f"{display} can't reach image model '{model_id}' ({status}) — enable OpenAI "
|
||||
f"image access in your {display} account, or set {env_var}={_FALLBACK_MODEL}."
|
||||
)
|
||||
|
||||
|
||||
def _dedupe_models(models: list[str]) -> list[str]:
|
||||
out: list[str] = []
|
||||
seen: set[str] = set()
|
||||
for model in models:
|
||||
m = (model or "").strip()
|
||||
if not m or m in seen:
|
||||
continue
|
||||
seen.add(m)
|
||||
out.append(m)
|
||||
return out
|
||||
|
||||
|
||||
class OpenRouterCompatImageProvider(ImageGenProvider):
|
||||
"""Image generation over an OpenRouter-compatible chat-completions endpoint.
|
||||
|
||||
|
|
@ -180,9 +224,14 @@ class OpenRouterCompatImageProvider(ImageGenProvider):
|
|||
return [
|
||||
{
|
||||
"id": DEFAULT_MODEL,
|
||||
"display": "Gemini 2.5 Flash Image (nano-banana)",
|
||||
"strengths": "Reference-grounded edits; aspect-ratio control",
|
||||
}
|
||||
"display": "OpenAI GPT-5.4 Image 2",
|
||||
"strengths": "Highest fidelity; best prompt adherence; slower on OpenRouter",
|
||||
},
|
||||
{
|
||||
"id": _FALLBACK_MODEL,
|
||||
"display": "Gemini 3 Pro Image",
|
||||
"strengths": "Fast, reliable fallback with good layout adherence",
|
||||
},
|
||||
]
|
||||
|
||||
def default_model(self) -> Optional[str]:
|
||||
|
|
@ -193,16 +242,24 @@ class OpenRouterCompatImageProvider(ImageGenProvider):
|
|||
|
||||
def _resolve_model(self) -> str:
|
||||
"""Pick the image model: env override → config → :data:`DEFAULT_MODEL`."""
|
||||
return self._resolve_model_chain()[0]
|
||||
|
||||
def _resolve_model_chain(self) -> list[str]:
|
||||
"""Ordered model attempts for this request.
|
||||
|
||||
Explicit user/model config means "use this exact model", so no fallback.
|
||||
Without overrides we run the quality-first default chain.
|
||||
"""
|
||||
env_override = os.environ.get(self._model_env_var, "").strip()
|
||||
if env_override:
|
||||
return env_override
|
||||
return [env_override]
|
||||
cfg = _load_image_gen_config()
|
||||
scoped = cfg.get(self._config_key) if isinstance(cfg.get(self._config_key), dict) else {}
|
||||
if isinstance(scoped, dict):
|
||||
value = scoped.get("model")
|
||||
if isinstance(value, str) and value.strip():
|
||||
return value.strip()
|
||||
return DEFAULT_MODEL
|
||||
return [value.strip()]
|
||||
return _dedupe_models(list(_DEFAULT_MODEL_CHAIN))
|
||||
|
||||
def generate(
|
||||
self,
|
||||
|
|
@ -237,7 +294,7 @@ class OpenRouterCompatImageProvider(ImageGenProvider):
|
|||
aspect_ratio=aspect_ratio,
|
||||
)
|
||||
|
||||
model_id = self._resolve_model()
|
||||
model_chain = self._resolve_model_chain()
|
||||
aspect = resolve_aspect_ratio(aspect_ratio)
|
||||
or_aspect = _ASPECT_RATIOS.get(aspect, "1:1")
|
||||
|
||||
|
|
@ -258,12 +315,6 @@ class OpenRouterCompatImageProvider(ImageGenProvider):
|
|||
if part:
|
||||
content.append({"type": "image_url", "image_url": {"url": part}})
|
||||
|
||||
payload: Dict[str, Any] = {
|
||||
"model": model_id,
|
||||
"modalities": ["image", "text"],
|
||||
"messages": [{"role": "user", "content": content}],
|
||||
"image_config": {"aspect_ratio": or_aspect},
|
||||
}
|
||||
headers = {
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"Content-Type": "application/json",
|
||||
|
|
@ -271,102 +322,145 @@ class OpenRouterCompatImageProvider(ImageGenProvider):
|
|||
"HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
|
||||
"X-Title": "Hermes Agent",
|
||||
}
|
||||
|
||||
try:
|
||||
response = requests.post(
|
||||
f"{base_url}/chat/completions",
|
||||
headers=headers,
|
||||
json=payload,
|
||||
timeout=_REQUEST_TIMEOUT,
|
||||
)
|
||||
response.raise_for_status()
|
||||
except requests.HTTPError as exc:
|
||||
resp = exc.response
|
||||
status = resp.status_code if resp is not None else 0
|
||||
last_error: Optional[Dict[str, Any]] = None
|
||||
for i, model_id in enumerate(model_chain):
|
||||
payload: Dict[str, Any] = {
|
||||
"model": model_id,
|
||||
"modalities": ["image", "text"],
|
||||
"messages": [{"role": "user", "content": content}],
|
||||
"image_config": {"aspect_ratio": or_aspect},
|
||||
}
|
||||
is_last = i == len(model_chain) - 1
|
||||
try:
|
||||
err_msg = resp.json().get("error", {}).get("message", resp.text[:300])
|
||||
except Exception: # noqa: BLE001
|
||||
err_msg = resp.text[:300] if resp is not None else str(exc)
|
||||
logger.error("%s image gen failed (%d): %s", self._name, status, err_msg)
|
||||
return error_response(
|
||||
error=f"{self._display} image generation failed ({status}): {err_msg}",
|
||||
error_type="api_error",
|
||||
provider=self._name,
|
||||
response = requests.post(
|
||||
f"{base_url}/chat/completions",
|
||||
headers=headers,
|
||||
json=payload,
|
||||
timeout=_REQUEST_TIMEOUT,
|
||||
)
|
||||
response.raise_for_status()
|
||||
except requests.HTTPError as exc:
|
||||
resp = exc.response
|
||||
status = resp.status_code if resp is not None else 0
|
||||
try:
|
||||
err_msg = resp.json().get("error", {}).get("message", resp.text[:300])
|
||||
except Exception: # noqa: BLE001
|
||||
err_msg = resp.text[:300] if resp is not None else str(exc)
|
||||
logger.error("%s image gen failed (%d) on %s: %s", self._name, status, model_id, err_msg)
|
||||
hint = _access_error_hint(self._display, model_id, self._model_env_var, status, err_msg)
|
||||
if hint and not is_last:
|
||||
logger.info(
|
||||
"%s model %s unavailable; retrying with fallback %s",
|
||||
self._name,
|
||||
model_id,
|
||||
model_chain[i + 1],
|
||||
)
|
||||
continue
|
||||
last_error = error_response(
|
||||
error=hint or f"{self._display} image generation failed ({status}): {err_msg}",
|
||||
error_type="model_access" if hint else "api_error",
|
||||
provider=self._name,
|
||||
model=model_id,
|
||||
prompt=prompt,
|
||||
aspect_ratio=aspect,
|
||||
)
|
||||
return last_error
|
||||
except requests.Timeout:
|
||||
if not is_last:
|
||||
logger.info(
|
||||
"%s model %s timed out; retrying with fallback %s",
|
||||
self._name,
|
||||
model_id,
|
||||
model_chain[i + 1],
|
||||
)
|
||||
continue
|
||||
return error_response(
|
||||
error=f"{self._display} image generation timed out "
|
||||
f"({int(_REQUEST_TIMEOUT)}s)",
|
||||
error_type="timeout",
|
||||
provider=self._name,
|
||||
model=model_id,
|
||||
prompt=prompt,
|
||||
aspect_ratio=aspect,
|
||||
)
|
||||
except requests.ConnectionError as exc:
|
||||
return error_response(
|
||||
error=f"{self._display} connection error: {exc}",
|
||||
error_type="connection_error",
|
||||
provider=self._name,
|
||||
model=model_id,
|
||||
prompt=prompt,
|
||||
aspect_ratio=aspect,
|
||||
)
|
||||
|
||||
try:
|
||||
result = response.json()
|
||||
except Exception as exc: # noqa: BLE001
|
||||
return error_response(
|
||||
error=f"{self._display} returned invalid JSON: {exc}",
|
||||
error_type="invalid_response",
|
||||
provider=self._name,
|
||||
model=model_id,
|
||||
prompt=prompt,
|
||||
aspect_ratio=aspect,
|
||||
)
|
||||
|
||||
images = _extract_images(result)
|
||||
if not images:
|
||||
if not is_last:
|
||||
logger.info(
|
||||
"%s model %s returned no image; retrying with fallback %s",
|
||||
self._name,
|
||||
model_id,
|
||||
model_chain[i + 1],
|
||||
)
|
||||
continue
|
||||
# A response with text but no image usually means the model didn't
|
||||
# honor image output (wrong model or modalities); surface that.
|
||||
return error_response(
|
||||
error=(
|
||||
f"{self._display} returned no image. Ensure the model "
|
||||
f"'{model_id}' supports image output."
|
||||
),
|
||||
error_type="empty_response",
|
||||
provider=self._name,
|
||||
model=model_id,
|
||||
prompt=prompt,
|
||||
aspect_ratio=aspect,
|
||||
)
|
||||
|
||||
first = images[0]
|
||||
try:
|
||||
if first.startswith("data:"):
|
||||
b64 = first.split(",", 1)[1] if "," in first else ""
|
||||
saved_path = save_b64_image(b64, prefix=f"{self._name}_gen")
|
||||
else:
|
||||
saved_path = save_url_image(first, prefix=f"{self._name}_gen")
|
||||
except Exception as exc: # noqa: BLE001
|
||||
return error_response(
|
||||
error=f"Could not save generated image: {exc}",
|
||||
error_type="io_error",
|
||||
provider=self._name,
|
||||
model=model_id,
|
||||
prompt=prompt,
|
||||
aspect_ratio=aspect,
|
||||
)
|
||||
|
||||
return success_response(
|
||||
image=str(saved_path),
|
||||
model=model_id,
|
||||
prompt=prompt,
|
||||
aspect_ratio=aspect,
|
||||
)
|
||||
except requests.Timeout:
|
||||
return error_response(
|
||||
error=f"{self._display} image generation timed out "
|
||||
f"({int(_REQUEST_TIMEOUT)}s)",
|
||||
error_type="timeout",
|
||||
provider=self._name,
|
||||
model=model_id,
|
||||
prompt=prompt,
|
||||
aspect_ratio=aspect,
|
||||
)
|
||||
except requests.ConnectionError as exc:
|
||||
return error_response(
|
||||
error=f"{self._display} connection error: {exc}",
|
||||
error_type="connection_error",
|
||||
provider=self._name,
|
||||
model=model_id,
|
||||
prompt=prompt,
|
||||
aspect_ratio=aspect,
|
||||
)
|
||||
|
||||
try:
|
||||
result = response.json()
|
||||
except Exception as exc: # noqa: BLE001
|
||||
return error_response(
|
||||
error=f"{self._display} returned invalid JSON: {exc}",
|
||||
error_type="invalid_response",
|
||||
provider=self._name,
|
||||
model=model_id,
|
||||
prompt=prompt,
|
||||
aspect_ratio=aspect,
|
||||
)
|
||||
|
||||
images = _extract_images(result)
|
||||
if not images:
|
||||
# A response with text but no image usually means the model didn't
|
||||
# honor image output (wrong model or modalities); surface that.
|
||||
return error_response(
|
||||
error=(
|
||||
f"{self._display} returned no image. Ensure the model "
|
||||
f"'{model_id}' supports image output."
|
||||
),
|
||||
error_type="empty_response",
|
||||
provider=self._name,
|
||||
model=model_id,
|
||||
prompt=prompt,
|
||||
aspect_ratio=aspect,
|
||||
)
|
||||
|
||||
first = images[0]
|
||||
try:
|
||||
if first.startswith("data:"):
|
||||
b64 = first.split(",", 1)[1] if "," in first else ""
|
||||
saved_path = save_b64_image(b64, prefix=f"{self._name}_gen")
|
||||
else:
|
||||
saved_path = save_url_image(first, prefix=f"{self._name}_gen")
|
||||
except Exception as exc: # noqa: BLE001
|
||||
return error_response(
|
||||
error=f"Could not save generated image: {exc}",
|
||||
error_type="io_error",
|
||||
provider=self._name,
|
||||
model=model_id,
|
||||
prompt=prompt,
|
||||
aspect_ratio=aspect,
|
||||
)
|
||||
|
||||
return success_response(
|
||||
image=str(saved_path),
|
||||
model=model_id,
|
||||
return last_error or error_response(
|
||||
error=f"{self._display} image generation failed after trying all candidate models.",
|
||||
error_type="api_error",
|
||||
provider=self._name,
|
||||
model=model_chain[-1] if model_chain else "",
|
||||
prompt=prompt,
|
||||
aspect_ratio=aspect,
|
||||
provider=self._name,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -52,6 +52,21 @@ def test_extract_strip_frames_keys_out_solid_background():
|
|||
assert frames[0].getpixel((0, 0))[3] == 0
|
||||
|
||||
|
||||
def test_remove_background_defringes_antialiased_edge():
|
||||
# The contaminated antialiased ring where sprite meets backdrop survives the
|
||||
# key (it's a blend, too far from pure magenta). Defringe shaves that 1px ring:
|
||||
# the keyed silhouette comes back eroded ~1px on every side, core intact.
|
||||
img = Image.new("RGBA", (200, 200), (255, 0, 255, 255))
|
||||
draw = ImageDraw.Draw(img)
|
||||
draw.rectangle((50, 50, 149, 149), fill=(40, 200, 60, 255)) # 100x100 green
|
||||
keyed = atlas.remove_background(img)
|
||||
bbox = keyed.getbbox()
|
||||
assert bbox is not None
|
||||
w, h = bbox[2] - bbox[0], bbox[3] - bbox[1]
|
||||
assert 96 <= w <= 99 and 96 <= h <= 99 # ~1px shaved per side
|
||||
assert keyed.getpixel((100, 100))[3] > 0 # core intact
|
||||
|
||||
|
||||
def test_remove_background_clears_trapped_chroma_pocket():
|
||||
# Green body enclosing a magenta pocket (the "pink between the arm" case):
|
||||
# the pocket isn't border-reachable, so it must be cleared by interior seeding.
|
||||
|
|
@ -106,6 +121,47 @@ def test_extract_strip_frames_drops_small_side_lobes_from_adjacent_frames():
|
|||
assert right_edge_mass == 0
|
||||
|
||||
|
||||
def test_extract_strip_frames_drops_detached_slot_effects():
|
||||
img = Image.new("RGBA", (atlas.CELL_WIDTH, atlas.CELL_HEIGHT), (0, 0, 0, 0))
|
||||
draw = ImageDraw.Draw(img)
|
||||
draw.ellipse((72, 54, 148, 172), fill=(70, 190, 70, 255)) # subject
|
||||
draw.polygon([(10, 76), (16, 84), (24, 78), (18, 88)], fill=(255, 255, 160, 255)) # sparkle
|
||||
|
||||
frame = atlas.extract_strip_frames(img, 1, method="components", fit=False)[0]
|
||||
bbox = frame.getbbox()
|
||||
assert bbox is not None
|
||||
assert bbox[0] > 40 # detached sparkle was removed
|
||||
|
||||
|
||||
def test_extract_strip_frames_requires_slot_padding_in_strict_mode():
|
||||
img = Image.new("RGBA", (atlas.CELL_WIDTH * 2, atlas.CELL_HEIGHT), (0, 0, 0, 0))
|
||||
draw = ImageDraw.Draw(img)
|
||||
# Frame 0 touches the top edge; strict mode should reject the row so the
|
||||
# caller regenerates instead of accepting a clipped pet frame.
|
||||
draw.rectangle((40, 0, 120, 130), fill=(70, 190, 70, 255))
|
||||
draw.rectangle((atlas.CELL_WIDTH + 40, 40, atlas.CELL_WIDTH + 120, 170), fill=(70, 190, 70, 255))
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
atlas.extract_strip_frames(img, 2, method="components", fit=False)
|
||||
|
||||
|
||||
def test_extract_strip_frames_rejects_multi_pose_frame_outlier():
|
||||
frames = []
|
||||
for _ in range(3):
|
||||
frame = Image.new("RGBA", (atlas.CELL_WIDTH, atlas.CELL_HEIGHT), (0, 0, 0, 0))
|
||||
ImageDraw.Draw(frame).rectangle((82, 120, 108, 178), fill=(220, 240, 255, 255))
|
||||
frames.append(frame)
|
||||
|
||||
bad = Image.new("RGBA", (atlas.CELL_WIDTH, atlas.CELL_HEIGHT), (0, 0, 0, 0))
|
||||
draw = ImageDraw.Draw(bad)
|
||||
for x in (10, 50, 90, 130, 166):
|
||||
draw.rectangle((x, 124, x + 12, 172), fill=(220, 240, 255, 255))
|
||||
frames.append(bad)
|
||||
|
||||
with pytest.raises(ValueError, match="multiple separated subjects"):
|
||||
atlas._validate_extracted_frames(frames, 4)
|
||||
|
||||
|
||||
def test_extract_strip_frames_uses_real_gutters_when_spacing_is_uneven():
|
||||
# gpt-image often returns a square chroma strip whose poses are separated but
|
||||
# not laid out on exact equal-width slots. Equal slot slicing would include
|
||||
|
|
@ -183,6 +239,35 @@ def test_validate_atlas_rejects_rgb_residue():
|
|||
assert any("residue" in e for e in result["errors"])
|
||||
|
||||
|
||||
def test_validate_atlas_rejects_postage_stamp_sprite():
|
||||
sheet = Image.new("RGBA", (atlas.ATLAS_WIDTH, atlas.ATLAS_HEIGHT), (0, 0, 0, 0))
|
||||
frame = Image.new("RGBA", (atlas.CELL_WIDTH, atlas.CELL_HEIGHT), (0, 0, 0, 0))
|
||||
ImageDraw.Draw(frame).rectangle((86, 174, 106, 201), fill=(220, 240, 255, 255))
|
||||
|
||||
for _state, row, count in atlas.ROW_SPECS:
|
||||
for col in range(count):
|
||||
sheet.alpha_composite(frame, (col * atlas.CELL_WIDTH, row * atlas.CELL_HEIGHT))
|
||||
|
||||
result = atlas.validate_atlas(sheet)
|
||||
|
||||
assert not result["ok"]
|
||||
assert any("too small" in e for e in result["errors"])
|
||||
|
||||
|
||||
def test_validate_atlas_rejects_one_collapsed_state_row():
|
||||
frames = _frames_for_all_states()
|
||||
tiny = Image.new("RGBA", (atlas.CELL_WIDTH, atlas.CELL_HEIGHT), (0, 0, 0, 0))
|
||||
draw = ImageDraw.Draw(tiny)
|
||||
draw.rectangle((90, 150, 106, 199), fill=(220, 240, 255, 255))
|
||||
frames["failed"] = [tiny.copy() for _ in range(atlas.FRAME_COUNTS["failed"])]
|
||||
|
||||
sheet = atlas.compose_atlas(frames)
|
||||
result = atlas.validate_atlas(sheet)
|
||||
|
||||
assert not result["ok"]
|
||||
assert any("appears collapsed" in e and "failed" in e for e in result["errors"])
|
||||
|
||||
|
||||
def test_validate_atlas_warns_on_empty_state():
|
||||
frames = _frames_for_all_states()
|
||||
frames["jumping"] = []
|
||||
|
|
@ -463,9 +548,12 @@ def test_list_sprite_providers_marks_default(monkeypatch):
|
|||
listed = imagegen.list_sprite_providers()
|
||||
names = {p["name"] for p in listed}
|
||||
assert names == {"openai", "nous"}
|
||||
# Every entry carries display metadata, and exactly one is the default.
|
||||
assert all(p["label"] and "note" in p for p in listed)
|
||||
# Every entry carries a display label (no quality note — all backends are equal).
|
||||
assert all(p["label"] for p in listed)
|
||||
assert all("note" not in p for p in listed)
|
||||
assert [p["name"] for p in listed if p["default"]] == ["openai"]
|
||||
# Listed in preference order: Nous Portal before OpenAI.
|
||||
assert [p["name"] for p in listed] == ["nous", "openai"]
|
||||
|
||||
|
||||
def test_generate_retries_without_transparent_background(monkeypatch, tmp_path):
|
||||
|
|
|
|||
|
|
@ -99,11 +99,22 @@ class TestProviderClass:
|
|||
|
||||
with patch("plugins.image_gen.openrouter._load_image_gen_config", return_value={}):
|
||||
assert _openrouter().default_model() == DEFAULT_MODEL
|
||||
assert DEFAULT_MODEL == "google/gemini-2.5-flash-image"
|
||||
# Default must be an image-output model id (provider/model form).
|
||||
assert "/" in DEFAULT_MODEL and "image" in DEFAULT_MODEL
|
||||
|
||||
def test_default_chain_prefers_quality_then_fallback(self):
|
||||
from plugins.image_gen.openrouter import _FALLBACK_MODEL, _DEFAULT_MODEL_CHAIN
|
||||
|
||||
with patch("plugins.image_gen.openrouter._load_image_gen_config", return_value={}):
|
||||
chain = _openrouter()._resolve_model_chain()
|
||||
assert chain == list(_DEFAULT_MODEL_CHAIN)
|
||||
assert chain[0].startswith("openai/")
|
||||
assert chain[-1] == _FALLBACK_MODEL
|
||||
|
||||
def test_model_env_override(self, monkeypatch):
|
||||
monkeypatch.setenv("OPENROUTER_IMAGE_MODEL", "black-forest-labs/flux.2-pro")
|
||||
assert _openrouter()._resolve_model() == "black-forest-labs/flux.2-pro"
|
||||
assert _openrouter()._resolve_model_chain() == ["black-forest-labs/flux.2-pro"]
|
||||
|
||||
def test_model_config_override(self):
|
||||
cfg = {"openrouter": {"model": "google/gemini-3.1-flash-image-preview"}}
|
||||
|
|
@ -153,6 +164,30 @@ class TestHelpers:
|
|||
|
||||
assert _extract_images({"choices": [{"message": {"content": "no image"}}]}) == []
|
||||
|
||||
def test_access_error_hint_for_gated_openai_model(self):
|
||||
from plugins.image_gen.openrouter import _FALLBACK_MODEL, _access_error_hint
|
||||
|
||||
hint = _access_error_hint(
|
||||
"OpenRouter", "openai/gpt-5.4-image-2", "OPENROUTER_IMAGE_MODEL", 404, "No endpoints found"
|
||||
)
|
||||
assert hint is not None
|
||||
assert "openai/gpt-5.4-image-2" in hint
|
||||
assert "OPENROUTER_IMAGE_MODEL" in hint
|
||||
assert _FALLBACK_MODEL in hint
|
||||
# Stays a single line under the humanizer's 200-char truncation.
|
||||
assert "\n" not in hint and len(hint) <= 200
|
||||
|
||||
def test_access_error_hint_ignores_non_openai_models(self):
|
||||
from plugins.image_gen.openrouter import _access_error_hint
|
||||
|
||||
assert _access_error_hint("OpenRouter", "google/gemini-3-pro-image", "X", 404, "boom") is None
|
||||
|
||||
def test_access_error_hint_ignores_unrelated_errors(self):
|
||||
from plugins.image_gen.openrouter import _access_error_hint
|
||||
|
||||
# A 200-class transient with an openai model but no access signal → no hint.
|
||||
assert _access_error_hint("OpenRouter", "openai/gpt-5.4-image-2", "X", 500, "server error") is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# generate()
|
||||
|
|
@ -260,10 +295,11 @@ class TestGenerate:
|
|||
resp.raise_for_status.side_effect = req_lib.HTTPError(response=resp)
|
||||
|
||||
with patch(_RUNTIME, return_value=_runtime_ok()), \
|
||||
patch("requests.post", return_value=resp):
|
||||
patch("requests.post", return_value=resp) as mock_post:
|
||||
result = _openrouter().generate(prompt="a pet")
|
||||
assert result["success"] is False
|
||||
assert result["error_type"] == "api_error"
|
||||
assert mock_post.call_count == 1
|
||||
|
||||
def test_timeout(self):
|
||||
import requests as req_lib
|
||||
|
|
@ -274,6 +310,55 @@ class TestGenerate:
|
|||
assert result["success"] is False
|
||||
assert result["error_type"] == "timeout"
|
||||
|
||||
def test_access_gated_model_surfaces_hint(self, monkeypatch):
|
||||
"""A 404 on an OpenAI image model yields the actionable access hint (not
|
||||
the misleading generic 'check your key' message)."""
|
||||
import requests as req_lib
|
||||
|
||||
monkeypatch.setenv("OPENROUTER_IMAGE_MODEL", "openai/gpt-5.4-image-2")
|
||||
resp = MagicMock()
|
||||
resp.status_code = 404
|
||||
resp.text = "No endpoints found for openai/gpt-5.4-image-2"
|
||||
resp.json.return_value = {"error": {"message": "No endpoints found"}}
|
||||
resp.raise_for_status.side_effect = req_lib.HTTPError(response=resp)
|
||||
|
||||
with patch(_RUNTIME, return_value=_runtime_ok()), \
|
||||
patch("requests.post", return_value=resp) as mock_post:
|
||||
result = _openrouter().generate(prompt="a pet")
|
||||
|
||||
assert result["success"] is False
|
||||
assert result["error_type"] == "model_access"
|
||||
assert "OpenAI image access" in result["error"]
|
||||
assert mock_post.call_count == 1 # explicit override: no auto-fallback chain
|
||||
|
||||
def test_access_gated_default_model_falls_back_to_gemini(self):
|
||||
import requests as req_lib
|
||||
|
||||
from plugins.image_gen.openrouter import DEFAULT_MODEL, _FALLBACK_MODEL
|
||||
|
||||
gated = MagicMock()
|
||||
gated.status_code = 404
|
||||
gated.text = f"No endpoints found for {DEFAULT_MODEL}"
|
||||
gated.json.return_value = {"error": {"message": "No endpoints found"}}
|
||||
gated.raise_for_status.side_effect = req_lib.HTTPError(response=gated)
|
||||
|
||||
with patch(_RUNTIME, return_value=_runtime_ok()), \
|
||||
patch("requests.post", side_effect=[gated, _mock_chat_response([_PNG_DATA_URI])]) as mock_post, \
|
||||
patch(
|
||||
"plugins.image_gen.openrouter.save_b64_image",
|
||||
return_value=Path("/tmp/openrouter_gen_fallback.png"),
|
||||
):
|
||||
result = _openrouter().generate(prompt="a pet")
|
||||
|
||||
assert result["success"] is True
|
||||
assert result["model"] == _FALLBACK_MODEL
|
||||
assert result["image"] == "/tmp/openrouter_gen_fallback.png"
|
||||
assert mock_post.call_count == 2
|
||||
first_model = mock_post.call_args_list[0].kwargs["json"]["model"]
|
||||
second_model = mock_post.call_args_list[1].kwargs["json"]["model"]
|
||||
assert first_model == DEFAULT_MODEL
|
||||
assert second_model == _FALLBACK_MODEL
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Registration + pet integration
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue