diff --git a/agent/pet/generate/atlas.py b/agent/pet/generate/atlas.py
index 8559ddb530d..2d316110e73 100644
--- a/agent/pet/generate/atlas.py
+++ b/agent/pet/generate/atlas.py
@@ -141,6 +141,8 @@ def remove_background(image, *, chroma_key: tuple[int, int, int] | None = None,
"""
from collections import deque
+ from PIL import Image, ImageChops
+
rgba = image.convert("RGBA")
if _has_transparency(rgba):
return _repair_internal_alpha_holes(rgba)
@@ -153,7 +155,21 @@ def remove_background(image, *, chroma_key: tuple[int, int, int] | None = None,
r, g, b, a = px[x, y]
return a > _ALPHA_FLOOR and _color_distance(r, g, b, key) <= threshold
+ # Fast path for strongly-saturated chroma keys (our normal sprite prompts use
+ # hot magenta): remove all near-key opaque pixels with C-level channel ops.
+ # This clears both border-connected backdrop and enclosed triangular pockets
+ # between connected limbs/capes, without a Python flood over ~1.5M pixels.
+ if max(key) - min(key) >= 120:
+ near = _near_key_mask(rgba, key) # L mask, 255 where near key
+ opaque = rgba.getchannel("A").point(lambda a: 255 if a > _ALPHA_FLOOR else 0)
+ remove_mask = ImageChops.darker(near, opaque)
+ return Image.composite(Image.new("RGBA", rgba.size, (0, 0, 0, 0)), rgba, remove_mask)
+
visited = bytearray(w * h)
+ # Mark removals in a flat mask and apply them in one C composite at the end —
+ # writing `px[x, y] = (0,0,0,0)` per pixel was ~3M PixelAccess calls (84% of
+ # the whole pipeline) and pegged a core in pure Python, stalling the gateway.
+ remove = bytearray(w * h)
queue: deque[tuple[int, int]] = deque()
# Seed from every border pixel that looks like background.
@@ -181,7 +197,7 @@ def remove_background(image, *, chroma_key: tuple[int, int, int] | None = None,
while queue:
x, y = queue.popleft()
- px[x, y] = (0, 0, 0, 0)
+ remove[y * w + x] = 1
for nx, ny in ((x + 1, y), (x - 1, y), (x, y + 1), (x, y - 1)):
if 0 <= nx < w and 0 <= ny < h:
idx = ny * w + nx
@@ -189,7 +205,11 @@ def remove_background(image, *, chroma_key: tuple[int, int, int] | None = None,
visited[idx] = 1
if _is_bg(nx, ny):
queue.append((nx, ny))
- return rgba
+
+ # One C-level composite instead of millions of per-pixel writes: paint the
+ # flooded pixels to (0,0,0,0) wherever the mask is set.
+ mask = Image.frombytes("L", (w, h), bytes(remove)).point(lambda v: 255 if v else 0)
+ return Image.composite(Image.new("RGBA", rgba.size, (0, 0, 0, 0)), rgba, mask)
def _repair_internal_alpha_holes(image):
@@ -298,9 +318,13 @@ def _fit_to_cell(image):
max_h = CELL_HEIGHT - _CELL_PAD
scale = min(max_w / sprite.width, max_h / sprite.height, 1.0)
if scale != 1.0:
+ # NEAREST, not LANCZOS: the generated "pixel art" has hard edges, and any
+ # interpolating resample anti-aliases them into a blurry, washed-out
+ # sprite once the renderer upscales the cell. Crisp blocky downscale reads
+ # as real pixel art.
sprite = sprite.resize(
(max(1, round(sprite.width * scale)), max(1, round(sprite.height * scale))),
- Image.Resampling.LANCZOS,
+ Image.Resampling.NEAREST,
)
left = (CELL_WIDTH - sprite.width) // 2
top = (CELL_HEIGHT - sprite.height) // 2
@@ -324,23 +348,13 @@ def _drop_side_bleed(image):
w, h = rgba.size
profile = _column_profile(rgba) # mean alpha per column (fast C resize)
- segments: list[tuple[int, int, int]] = [] # (left, right, mass)
- start = mass = 0
- started = False
- for x, v in enumerate(profile + [0]):
- if v > 2:
- if not started:
- start, mass, started = x, 0, True
- mass += v
- elif started:
- segments.append((start, x, mass))
- started = False
-
- if len(segments) < 2:
+ runs = _content_runs(profile)
+ if len(runs) < 2:
return rgba
- keep_mass = max(m for _, _, m in segments) * _SIDE_LOBE_RATIO
- keep = [(l, r) for l, r, m in segments if m >= keep_mass]
- if len(keep) == len(segments):
+ masses = [sum(profile[l:r]) for l, r in runs]
+ keep_mass = max(masses) * _SIDE_LOBE_RATIO
+ keep = [run for run, m in zip(runs, masses) if m >= keep_mass]
+ if len(keep) == len(runs):
return rgba
# Zero every column band that isn't a kept segment (box paste, not per-pixel).
@@ -355,53 +369,6 @@ def _drop_side_bleed(image):
return rgba
-def _connected_components(image) -> list[dict]:
- """Flood-fill the alpha mask into connected blobs (4-connectivity)."""
- alpha = image.getchannel("A")
- w, h = image.size
- data = alpha.tobytes()
- visited = bytearray(w * h)
- out: list[dict] = []
-
- for start, a in enumerate(data):
- if a <= _ALPHA_FLOOR or visited[start]:
- continue
- stack = [start]
- visited[start] = 1
- pixels: list[int] = []
- min_x = w
- min_y = h
- max_x = 0
- max_y = 0
- while stack:
- cur = stack.pop()
- pixels.append(cur)
- x = cur % w
- y = cur // w
- min_x = min(min_x, x)
- min_y = min(min_y, y)
- max_x = max(max_x, x)
- max_y = max(max_y, y)
- for nb, ok in (
- (cur - 1, x > 0),
- (cur + 1, x + 1 < w),
- (cur - w, y > 0),
- (cur + w, y + 1 < h),
- ):
- if ok and not visited[nb] and data[nb] > _ALPHA_FLOOR:
- visited[nb] = 1
- stack.append(nb)
- out.append(
- {
- "pixels": pixels,
- "area": len(pixels),
- "bbox": (min_x, min_y, max_x + 1, max_y + 1),
- "center_x": (min_x + max_x + 1) / 2,
- }
- )
- return out
-
-
def _sever_expected_gutters(strip, frame_count: int):
"""Cut thin vertical gutters at expected frame boundaries before labeling.
@@ -418,7 +385,7 @@ def _sever_expected_gutters(strip, frame_count: int):
out = strip.copy()
px = out.load()
slot = out.width / frame_count
- half = max(2, min(8, round(slot * 0.02)))
+ half = max(3, min(18, round(slot * 0.06)))
for i in range(1, frame_count):
x = round(i * slot)
left = max(0, x - half)
@@ -430,21 +397,6 @@ def _sever_expected_gutters(strip, frame_count: int):
return out
-def _segmentable(strip, frame_count: int) -> bool:
- """True if the (gutter-severed) strip yields ≥ *frame_count* distinct blobs.
-
- Used only as a quality gate: a row that can't show this many separable poses
- is a bad generation (caller retries / falls back), never silently sliced into
- merged frames.
- """
- components = _connected_components(strip)
- if not components:
- return False
- largest = max(c["area"] for c in components)
- seed_threshold = max(120, largest * 0.20)
- return sum(1 for c in components if c["area"] >= seed_threshold) >= frame_count
-
-
def _slot_crops(strip, frame_count: int) -> list:
"""Slice *strip* into *frame_count* uniform columns (one coordinate space).
@@ -458,6 +410,61 @@ def _slot_crops(strip, frame_count: int) -> list:
return [_drop_side_bleed(strip.crop((i * w0, 0, i * w0 + w0, h))) for i in range(frame_count)]
+def _content_runs(profile: list[int], *, threshold: int = 2) -> list[tuple[int, int]]:
+ """Contiguous column spans whose alpha mass exceeds *threshold*.
+
+ A column-projection of the alpha mask: empty (background) columns separate
+ one pose from the next, so the runs ARE the candidate frames.
+ """
+ runs: list[tuple[int, int]] = []
+ start: int | None = None
+ for x, v in enumerate(list(profile) + [0]):
+ if v > threshold:
+ if start is None:
+ start = x
+ elif start is not None:
+ runs.append((start, x))
+ start = None
+ return runs
+
+
+def _frame_x_ranges(strip, frame_count: int) -> list[tuple[int, int]] | None:
+ """Per-frame ``(left, right)`` column ranges from the row's empty gutters.
+
+ The standard sprite-sheet slice — once poses are separated by real gaps
+ (which generation now enforces), splitting is just "find the empty columns":
+
+ * spans == frames → one span per frame.
+ * spans > frames → merge across the smallest gaps. A detached halo/ear sits
+ a tiny gap from its body, while the inter-pose gutter is the big gap that
+ survives — so over-segmentation (and any over-eager gutter sever) repairs
+ itself by collapsing only the small internal gaps.
+ * spans < frames → poses are touching; not separable by gutters (the caller
+ raises for ``components`` or falls back to even slots for ``auto``).
+
+ Ranges span content only; the caller crops full cell height, so tall ears /
+ halos are never cut.
+ """
+ profile = _column_profile(strip)
+ runs = _content_runs(profile)
+ if not runs:
+ return None
+
+ # Drop trivial specks so stray noise never counts as a pose.
+ masses = [sum(profile[l:r]) for l, r in runs]
+ floor = max(masses) * 0.02
+ runs = [run for run, m in zip(runs, masses) if m >= floor]
+ if len(runs) < frame_count:
+ return None
+
+ groups = [[l, r] for l, r in runs]
+ while len(groups) > frame_count:
+ gi = min(range(len(groups) - 1), key=lambda i: groups[i + 1][0] - groups[i][1])
+ groups[gi][1] = groups[gi + 1][1]
+ del groups[gi + 1]
+ return [(l, r) for l, r in groups]
+
+
def extract_strip_frames(
strip,
frame_count: int,
@@ -468,10 +475,15 @@ def extract_strip_frames(
) -> list:
"""Turn one generated row strip into *frame_count* frames.
- Background is keyed out, the expected frame gutters are severed, then the
- strip is sliced into equal columns. Connected components only *validate* that
- the row holds *frame_count* separable poses (``components`` raises, ``auto``
- falls back to slicing the un-severed strip).
+ The background is keyed out, thin connecting bridges at the expected
+ boundaries are severed, then the strip is sliced at its empty chroma gutters
+ (:func:`_frame_x_ranges`) — the plain "find each object, make a frame" cut
+ that works once poses are spaced apart (which generation now enforces).
+
+ Each frame is cropped at full cell height so tall ears / halos are never
+ clipped; :func:`_drop_side_bleed` trims any faint neighbour sliver. When the
+ poses are touching (fewer gutters than frames) ``components`` raises and
+ ``auto`` falls back to equal-width slots.
*fit* (default) fits+centers each frame into a 192x208 cell — the standalone
contract for callers that don't normalize. Hatching passes ``fit=False`` to
@@ -487,12 +499,29 @@ def extract_strip_frames(
strip = strip.convert("RGBA")
strip = remove_background(strip, chroma_key=chroma_key)
- severed = _sever_expected_gutters(strip, frame_count)
- segmentable = _segmentable(severed, frame_count)
- if method == "components" and not segmentable:
- raise ValueError(f"could not segment {frame_count} sprites from strip")
- frames = _slot_crops(severed if segmentable else strip, frame_count)
+ # Prefer the real gutters as-is: when poses are already spaced (generation
+ # enforces this), slicing the strip untouched keeps each pose's own bounds and
+ # never cuts through an unevenly-placed silhouette. Only fall back to severing
+ # the expected boundaries when gaps alone can't separate the row — i.e. poses
+ # are bridged by a shared shadow/glow/1px line and read as one blob.
+ source = strip
+ ranges = _frame_x_ranges(source, frame_count)
+ if ranges is None:
+ source = _sever_expected_gutters(strip, frame_count)
+ ranges = _frame_x_ranges(source, frame_count)
+
+ if ranges is None:
+ if method == "components":
+ raise ValueError(f"could not segment {frame_count} sprites from strip")
+ frames = _slot_crops(source, frame_count)
+ else:
+ h = source.height
+ pad = max(2, min(16, round((source.width / max(1, frame_count)) * 0.04)))
+ frames = [
+ _drop_side_bleed(source.crop((max(0, left - pad), 0, min(source.width, right + pad), h)))
+ for left, right in ranges
+ ]
return [_fit_to_cell(f) for f in frames] if fit else frames
@@ -535,15 +564,22 @@ def normalize_cells(frames_by_state: dict[str, list], *, pad: int = _NORMALIZE_P
1. **Cross-correlate** each frame's column profile against the per-state
*median* profile to find the integer shift that locks the **body** in
place — robust to limbs/cape because the body dominates the profile.
- 2. **Union-crop** the registered frames through one shared window and apply
- **one shared scale** + bottom-anchor, so size and baseline are uniform and
- intra-state vertical motion (a jump's lift) is preserved.
+ 2. **Union-crop** through one shared state window, then scale every state by a
+ single global factor keyed to its median pose height, so the character is
+ the same on-screen size in every row while a jump's lift still fits.
"""
from PIL import Image
blank = lambda: Image.new("RGBA", (CELL_WIDTH, CELL_HEIGHT), (0, 0, 0, 0))
+ med = lambda vs: sorted(vs)[len(vs) // 2] # robust center; ignores a limb/cape outlier
out: dict[str, list] = {}
+ prepared: dict[str, tuple[list, tuple[int, int, int, int], tuple[int, int]]] = {}
+ # Fill the cell — real petdex pets sit ~pad from the edges; the K cap below
+ # keeps a tall pose (a jump's lift) from clipping.
+ target_w = CELL_WIDTH - pad
+ target_h = CELL_HEIGHT - pad
+
for state, frames in frames_by_state.items():
rgba = [f.convert("RGBA") for f in frames]
if not any(f.getbbox() for f in rgba):
@@ -572,14 +608,34 @@ def normalize_cells(frames_by_state: dict[str, list], *, pad: int = _NORMALIZE_P
shifted.alpha_composite(f, (margin + _best_shift(ref, prof, window), 0))
aligned.append(shifted)
- # Shared window + scale over the registered set; bottom-anchored, centered.
+ # Shared window over the registered set; scale is resolved against a
+ # common apparent-character target below.
boxes = [b for b in (a.getbbox() for a in aligned) if b]
left = min(b[0] for b in boxes)
top = min(b[1] for b in boxes)
right = max(b[2] for b in boxes)
bottom = max(b[3] for b in boxes)
+ prepared[state] = (
+ aligned,
+ (left, top, right, bottom),
+ (med([b[2] - b[0] for b in boxes]), med([b[3] - b[1] for b in boxes])),
+ )
+
+ if not prepared:
+ return out
+
+ # Uniform apparent size: scale each state by K / pose_h, so a row the model
+ # drew small renders as big as one it drew large. K is the one global cap that
+ # keeps the tallest/widest motion envelope (a jump's lift) inside the cell —
+ # for a still row union ≈ pose so its term ≈ target_h (full fill).
+ K = target_h
+ for (_aligned, (left, top, right, bottom), (_pose_w, pose_h)) in prepared.values():
uw, uh = right - left, bottom - top
- scale = min((CELL_WIDTH - pad) / uw, (CELL_HEIGHT - pad) / uh)
+ K = min(K, target_h * pose_h / max(1, uh), target_w * pose_h / max(1, uw))
+
+ for state, (aligned, (left, top, right, bottom), (_pose_w, pose_h)) in prepared.items():
+ uw, uh = right - left, bottom - top
+ scale = K / max(1, pose_h)
sw, sh = max(1, round(uw * scale)), max(1, round(uh * scale))
px, py = round((CELL_WIDTH - sw) / 2), round((CELL_HEIGHT - pad // 2) - sh)
@@ -587,7 +643,8 @@ def normalize_cells(frames_by_state: dict[str, list], *, pad: int = _NORMALIZE_P
for a in aligned:
crop = a.crop((left, top, right, bottom))
if crop.size != (sw, sh):
- crop = crop.resize((sw, sh), Image.Resampling.LANCZOS)
+ # NEAREST keeps the pixel-art edges crisp; LANCZOS blurred them.
+ crop = crop.resize((sw, sh), Image.Resampling.NEAREST)
cell = blank()
cell.alpha_composite(crop, (px, py))
cells.append(cell)
diff --git a/agent/pet/generate/imagegen.py b/agent/pet/generate/imagegen.py
index 1e01592e4bf..00390d1ca8b 100644
--- a/agent/pet/generate/imagegen.py
+++ b/agent/pet/generate/imagegen.py
@@ -26,6 +26,16 @@ logger = logging.getLogger(__name__)
# qualify.
_REF_CAPABLE = ("openai", "openai-codex", "krea", "openrouter", "nous")
+# Friendly label + one-line speed/quality note per reference-capable provider,
+# surfaced in the desktop pet-gen picker so users can trade speed for fidelity.
+_PROVIDER_META: dict[str, dict[str, str]] = {
+ "nous": {"label": "Nous Portal", "note": "Fast, balanced quality"},
+ "openrouter": {"label": "OpenRouter", "note": "Fastest — Gemini Flash Image"},
+ "openai": {"label": "OpenAI", "note": "Highest fidelity, slower"},
+ "openai-codex": {"label": "OpenAI (Codex)", "note": "Highest fidelity, slower"},
+ "krea": {"label": "Krea", "note": "Stylized, style-reference grounding"},
+}
+
class GenerationError(RuntimeError):
"""Raised on any image-generation failure (no provider, API error, IO)."""
@@ -49,16 +59,25 @@ def _discover() -> None:
logger.debug("image-gen plugin discovery failed: %s", exc)
-def resolve_provider(*, require_references: bool = True) -> SpriteProvider:
+def resolve_provider(*, require_references: bool = True, prefer: str | None = None) -> SpriteProvider:
"""Pick the image provider to use for sprite work.
- Preference: the configured provider when it's reference-capable, else the
- first available reference-capable provider. With *require_references* off we
- fall back to any available provider (used for prompt-only base drafts).
+ Preference: an explicit *prefer* choice (the desktop pet-gen picker) when it's
+ reference-capable and configured, then the configured/active provider when
+ it's reference-capable, else the first available reference-capable provider.
+ With *require_references* off we fall back to any available provider (used for
+ prompt-only base drafts).
"""
_discover()
from agent.image_gen_registry import get_active_provider, get_provider
+ # An explicit user pick wins when it's reference-capable and has credentials;
+ # otherwise we ignore it and fall through to the normal resolution.
+ if prefer:
+ chosen = get_provider(prefer)
+ if prefer in _REF_CAPABLE and chosen is not None and chosen.is_available():
+ return SpriteProvider(name=prefer, provider=chosen, supports_references=True)
+
# Configured / active provider first.
active = None
try:
@@ -83,11 +102,44 @@ def resolve_provider(*, require_references: bool = True) -> SpriteProvider:
raise GenerationError(
"Pet generation needs an image backend that supports reference images. "
- "Open `hermes tools` → Image Generation and configure OpenRouter, Nous "
- "Portal, or OpenAI (gpt-image-2) with an API key."
+ "Open `hermes tools` → Image Generation and configure Nous Portal, "
+ "OpenRouter, or OpenAI (gpt-image-2) with an API key."
)
+def list_sprite_providers() -> list[dict]:
+ """The reference-capable providers available to pick for pet generation.
+
+ Returns ``[{name, label, note, default}]`` for every ref-capable provider the
+ user actually has credentials for, marking the one :func:`resolve_provider`
+ would choose with no explicit preference. Empty when none is configured (the
+ picker hides itself). Best-effort: discovery hiccups yield an empty list.
+ """
+ _discover()
+ from agent.image_gen_registry import get_provider
+
+ try:
+ default_name = resolve_provider(require_references=True).name
+ except GenerationError:
+ default_name = ""
+
+ out: list[dict] = []
+ for name in _REF_CAPABLE:
+ provider = get_provider(name)
+ if provider is None or not provider.is_available():
+ continue
+ meta = _PROVIDER_META.get(name, {})
+ out.append(
+ {
+ "name": name,
+ "label": meta.get("label", name),
+ "note": meta.get("note", ""),
+ "default": name == default_name,
+ }
+ )
+ return out
+
+
def _save_local(image_ref: str, *, prefix: str) -> Path:
"""Return a local path for *image_ref*, downloading it if it's a URL."""
if image_ref.startswith(("http://", "https://")):
@@ -116,10 +168,15 @@ def generate(
reference_images: list[Path] | None = None,
provider: SpriteProvider | None = None,
prefix: str = "pet_gen",
+ aspect_ratio: str = "square",
) -> list[Path]:
- """Generate *n* square sprite images and return their local paths.
+ """Generate *n* sprite images and return their local paths.
*reference_images* grounds the output on a base image (required for rows).
+ *aspect_ratio* picks the canvas: ``"square"`` for single-character base
+ drafts, ``"landscape"`` for multi-frame row strips (the wider 1536px canvas
+ gives every frame real horizontal room so winged poses don't have to be
+ shrunk to avoid touching their neighbors).
We *ask* for a transparent background, but fall back to an opaque generation
(cleaned up downstream by the chroma-key pass) on models that reject the
flag. Raises :class:`GenerationError` if nothing usable comes back.
@@ -134,7 +191,7 @@ def generate(
refs = [str(p) for p in (reference_images or [])]
def _run(extra: dict) -> tuple[Path | None, str]:
- kwargs: dict = {"aspect_ratio": "square", **extra}
+ kwargs: dict = {"aspect_ratio": aspect_ratio, **extra}
if refs:
# Providers disagree on the ref kwarg name: our OpenRouter/Nous
# backends read ``reference_images``, OpenAI's gpt-image-2 reads
diff --git a/agent/pet/generate/orchestrate.py b/agent/pet/generate/orchestrate.py
index 238c490a22a..f160046ebf9 100644
--- a/agent/pet/generate/orchestrate.py
+++ b/agent/pet/generate/orchestrate.py
@@ -35,6 +35,10 @@ ProgressFn = Callable[[str, str], None]
# back-to-back and routinely blow past the client's RPC timeout. Capped so we
# don't hammer the provider's rate limit (one cold call can still be slow).
_MAX_PARALLEL_GENERATIONS = 4
+# How many times to (re)generate a single row before accepting a best-effort
+# slice. Early attempts demand clean per-pose gutters; the last is lenient so a
+# stubborn row still yields frames instead of dropping out entirely.
+_ROW_GEN_ATTEMPTS = 2
_MIN_FILLED_STATES = 6
_REQUIRED_STATES = frozenset({"idle", "running-right", "waving"})
@@ -80,6 +84,7 @@ def generate_base_drafts(
*,
n: int = 4,
style: str = "auto",
+ reference_images: list[Path] | None = None,
provider: SpriteProvider | None = None,
on_draft: Callable[[int, Path], None] | None = None,
is_cancelled: Callable[[], bool] | None = None,
@@ -96,7 +101,10 @@ def generate_base_drafts(
drafts and cancel any queued work (already-in-flight provider calls can't be
hard-killed, but their results are dropped).
"""
- sprite = provider or imagegen.resolve_provider(require_references=False)
+ # A user reference image (e.g. their own pet) grounds every draft, so it
+ # needs a reference-capable provider — same requirement as the row passes.
+ refs = reference_images or None
+ sprite = provider or imagegen.resolve_provider(require_references=bool(refs))
cancelled = is_cancelled or (lambda: False)
# Each draft is its own one-shot generation, run concurrently so the user
@@ -104,25 +112,26 @@ def generate_base_drafts(
# Each gets a distinct variation nudge so the options aren't near-duplicates.
logger.info("pet generate: drafting %d base looks for %r (style=%s)", n, concept, style)
- def _one(index: int) -> tuple[int, Path | None]:
+ def _one(index: int) -> tuple[int, Path | None, str | None]:
if cancelled():
- return index, None
+ return index, None, None
t0 = time.monotonic()
variation = prompts.BASE_VARIATIONS[index % len(prompts.BASE_VARIATIONS)]
prompt = prompts.build_base_prompt(concept, style=style, variation=variation)
try:
- out = imagegen.generate(prompt, n=1, provider=sprite, prefix="pet_base")
+ out = imagegen.generate(prompt, n=1, reference_images=refs, provider=sprite, prefix="pet_base")
except Exception as exc: # noqa: BLE001 - tolerate a single failed draft
logger.warning("pet generate: draft %d failed after %.1fs: %s", index, time.monotonic() - t0, exc)
- return index, None
+ return index, None, str(exc)
if not out:
logger.warning("pet generate: draft %d produced no image", index)
- return index, None
+ return index, None, "the image provider returned no image"
logger.info("pet generate: draft %d ready in %.1fs", index, time.monotonic() - t0)
- return index, _harden_transparency(out[0])
+ return index, _harden_transparency(out[0]), None
workers = max(1, min(n, _MAX_PARALLEL_GENERATIONS))
results: dict[int, Path] = {}
+ errors: list[str] = []
with ThreadPoolExecutor(max_workers=workers) as pool:
futures = [pool.submit(_one, i) for i in range(n)]
# as_completed runs in *this* (the caller's) thread, so on_draft — and any
@@ -134,8 +143,10 @@ def generate_base_drafts(
for pending in futures:
pending.cancel()
break
- index, path = fut.result()
+ index, path, err = fut.result()
if path is None:
+ if err:
+ errors.append(err)
continue
results[index] = path
if on_draft is not None:
@@ -146,10 +157,42 @@ def generate_base_drafts(
drafts = [results[i] for i in sorted(results)]
if not drafts and not cancelled():
- raise GenerationError("image generation produced no usable drafts")
+ # Surface *why* — every draft failed for a reason (a content-policy refusal
+ # on a name like "minion", a provider/auth error, …); the most common one
+ # is the representative cause. Far more useful than "no usable drafts".
+ raise GenerationError(_drafts_failed_reason(errors))
return drafts
+def _drafts_failed_reason(errors: list[str]) -> str:
+ """The representative reason a draft round produced nothing, humanized."""
+ if not errors:
+ return "image generation produced no usable drafts"
+ from collections import Counter
+
+ return _humanize_image_error(Counter(errors).most_common(1)[0][0])
+
+
+def _humanize_image_error(error: str) -> str:
+ """Turn a raw provider error into a friendly, actionable sentence.
+
+ The big one is moderation: image models refuse trademarked characters and
+ real people (e.g. "minion"), which reads as an opaque 400 otherwise.
+ """
+ low = error.lower()
+ if any(s in low for s in ("moderation_blocked", "safety system", "content policy", "content_policy")):
+ return (
+ "The image provider blocked this prompt — its safety filter rejects "
+ "trademarked characters and real people. Try an original description."
+ )
+ if any(s in low for s in ("api key", "unauthorized", "401", "auth")):
+ return "The image provider rejected the request — check your API key in Settings → Providers."
+ if "rate limit" in low or "429" in low:
+ return "The image provider is rate-limiting — wait a moment and try again."
+ # Otherwise the first line, trimmed of the noisy provider envelope.
+ return error.splitlines()[0].strip()[:200]
+
+
def hatch_pet(
*,
base_image: str | Path,
@@ -194,25 +237,48 @@ def hatch_pet(
if cancelled():
return state, None
t0 = time.monotonic()
- try:
- strips = imagegen.generate(
- prompts.build_row_prompt(state, count, label, style=style),
- n=1,
- reference_images=[base],
- provider=sprite,
- prefix=f"pet_row_{state}",
- )
- # One image call per row (the expensive part). ``auto`` validates by
- # connected components with an equal-slot fallback; raw (fit=False) so
- # normalize_cells registers the whole pet at once. We deliberately do
- # NOT re-generate a ragged row — the registration pass salvages it far
- # cheaper than another image-model round-trip.
- frames = atlas.extract_strip_frames(strips[0], count, method="auto", fit=False)
- logger.info("pet hatch %r: row %r ready in %.1fs", slug, state, time.monotonic() - t0)
- return state, frames
- except Exception as exc: # noqa: BLE001 - one bad row is tolerated (idle guaranteed)
- logger.warning("pet hatch %r: row %r failed after %.1fs: %s", slug, state, time.monotonic() - t0, exc)
- return state, None
+ last_exc: Exception | None = None
+ # Self-healing: a model occasionally returns a row whose poses are touching
+ # (no clean gutters), which slices badly. We retry such rolls; only the
+ # final attempt falls back to lenient ``auto`` slicing so a stubborn row
+ # still yields *something* rather than dropping the whole row.
+ for attempt in range(_ROW_GEN_ATTEMPTS):
+ if cancelled():
+ return state, None
+ strict = attempt < _ROW_GEN_ATTEMPTS - 1
+ try:
+ strips = imagegen.generate(
+ prompts.build_row_prompt(state, count, label, style=style),
+ n=1,
+ reference_images=[base],
+ provider=sprite,
+ prefix=f"pet_row_{state}",
+ # Wider canvas → each frame gets real horizontal room, so winged
+ # poses keep a full, healthy size and still leave clean gutters.
+ aspect_ratio="landscape",
+ )
+ # ``components`` requires clean per-pose gutters (raises otherwise),
+ # so a touching roll is rejected and regenerated; the last attempt
+ # uses ``auto`` (equal-slot fallback, never raises). Raw (fit=False)
+ # so normalize_cells registers the whole pet at once.
+ method = "components" if strict else "auto"
+ frames = atlas.extract_strip_frames(strips[0], count, method=method, fit=False)
+ logger.info(
+ "pet hatch %r: row %r ready in %.1fs (attempt %d)",
+ slug, state, time.monotonic() - t0, attempt + 1,
+ )
+ return state, frames
+ except Exception as exc: # noqa: BLE001 - retried; one bad row is tolerated
+ last_exc = exc
+ logger.warning(
+ "pet hatch %r: row %r attempt %d/%d failed: %s",
+ slug, state, attempt + 1, _ROW_GEN_ATTEMPTS, exc,
+ )
+ logger.warning(
+ "pet hatch %r: row %r gave up after %.1fs: %s",
+ slug, state, time.monotonic() - t0, last_exc,
+ )
+ return state, None
# running-left is derived by mirroring running-right (guaranteed-consistent
# and one fewer generation), so we don't generate it directly.
diff --git a/agent/pet/generate/prompts.py b/agent/pet/generate/prompts.py
index c6afbc28313..eab72e593f3 100644
--- a/agent/pet/generate/prompts.py
+++ b/agent/pet/generate/prompts.py
@@ -76,6 +76,29 @@ def style_hint(style: str | None) -> str:
return _STYLE_HINTS.get((style or "auto").strip().lower(), "")
+# Row strips are generated on the wider landscape canvas (see imagegen.generate /
+# orchestrate). The extra width is what lets each pose stay a healthy size AND
+# leave a real gutter — used here only to cite concrete pixel numbers.
+_ASSUMED_STRIP_WIDTH = 1536
+
+
+def _spacing_spec(frame_count: int) -> tuple[int, int]:
+ """(per-pose width px, gap px) for a row of *frame_count* poses.
+
+ Pixel counts alone don't hold — the model fills each slot edge-to-edge with
+ the full wingspan, so neighbors touch even when bodies are spaced. The lever
+ that works is proportional containment on a wide canvas: give each pose its
+ own equal cell and keep the ENTIRE silhouette (wings/tail/halo included)
+ inside it. On the 1536px landscape strip ~70% occupancy still leaves a
+ generous gutter, so the pet stays a normal, good-looking size — no shrinking.
+ """
+ slots = max(1, frame_count)
+ slot_w = _ASSUMED_STRIP_WIDTH / slots
+ pose_px = round(slot_w * 0.7)
+ gap_px = max(48, round(slot_w * 0.3))
+ return pose_px, gap_px
+
+
# Per-draft nudges so the 4 base options are actually distinct — gpt-image returns
# near-duplicates for a single prompt. We vary the *look* (palette, build,
# expression, accents), NOT the pose, so the chosen base still grounds clean,
@@ -118,14 +141,24 @@ def build_row_prompt(state: str, frame_count: int, concept: str, *, style: str |
"""
action = STATE_ACTIONS.get(state, "a simple idle pose")
concept = (concept or "the mascot").strip()
+ pose_px, gap_px = _spacing_spec(frame_count)
return (
f"Using the attached reference image as the exact same character "
f"(same species, face, colors, markings, proportions, and props), "
- f"draw a single horizontal strip of {frame_count} animation frames showing {action}. "
- f"The {frame_count} poses must be evenly spaced left to right, each fully separated "
- "by clear empty chroma-key gutters; silhouettes must NEVER touch, overlap, "
- "share a shadow, share a ground line, share motion trails, or merge into "
- "one connected shape. "
+ f"draw a single WIDE horizontal strip of {frame_count} animation frames showing {action}. "
+ f"LAYOUT: split the wide strip into {frame_count} equal vertical cells, one "
+ "pose centered in each cell. "
+ f"SPACING (critical): draw each pose at a consistent, healthy, clearly "
+ f"visible size (roughly {pose_px}px wide on a {_ASSUMED_STRIP_WIDTH}px "
+ f"strip) — do NOT shrink it tiny — but keep its ENTIRE silhouette "
+ f"(wings, tail, halo, horns, cape, every appendage) fully INSIDE its own "
+ f"cell. Leave at least {gap_px}px of empty chroma-key background between "
+ f"neighboring silhouettes at their closest point (wingtip to wingtip), and "
+ f"the same empty margin before the first pose and after the last. If a wing, "
+ f"cape, or tail would reach into a neighbor, FOLD or angle it inward rather "
+ f"than letting it cross the gap. Silhouettes must NEVER touch, overlap, "
+ f"share a shadow, share a ground line, share motion trails, or merge into "
+ f"one connected shape. "
# Registration: a clean sprite sheet keeps the character locked in place
# so only the action moves — this is what stops the loop sliding/pulsing.
"REGISTRATION (critical): the character is the SAME height and SAME width "
diff --git a/apps/desktop/src/app/pet-generate/components/draft-grid.tsx b/apps/desktop/src/app/pet-generate/components/draft-grid.tsx
new file mode 100644
index 00000000000..abef61f027f
--- /dev/null
+++ b/apps/desktop/src/app/pet-generate/components/draft-grid.tsx
@@ -0,0 +1,89 @@
+import { PixelEggSprite } from '@/components/pet/pixel-egg-sprite'
+import { Button } from '@/components/ui/button'
+import { useI18n } from '@/i18n'
+import { PawPrint } from '@/lib/icons'
+import { selectableCardClass } from '@/lib/selectable-card'
+import { cn } from '@/lib/utils'
+
+const VARIANT_COUNT = 4
+
+interface DraftGridProps {
+ drafts: { index: number; dataUri: string }[]
+ generating: boolean
+ hasDrafts: boolean
+ onCancel: () => void
+ onHatch: () => void
+ onSelect: (index: number) => void
+ selected: number | null
+}
+
+export function DraftGrid({ drafts, generating, hasDrafts, onCancel, onHatch, onSelect, selected }: DraftGridProps) {
+ const { t } = useI18n()
+ const copy = t.commandCenter.generatePet
+
+ const slots = generating
+ ? Array.from({ length: VARIANT_COUNT }, (_, i) => drafts.find(draft => draft.index === i) ?? null)
+ : drafts
+
+ return (
+
+ {slots.map((draft, i) => {
+ // A streamed draft is selectable immediately — even mid-generation —
+ // so the user can commit to one without waiting for the rest.
+ const isSelected = draft != null && selected === draft.index
+
+ return (
+
+ )
+ })}
+
+
+ {/* Same abort/go-back text link in both states (sits right under the grid);
+ once drafts land, the full-width Hatch drops in below it. */}
+
+ {hasDrafts && (
+
+ )}
+
+ )
+}
diff --git a/apps/desktop/src/app/pet-generate/components/empty-hint.tsx b/apps/desktop/src/app/pet-generate/components/empty-hint.tsx
new file mode 100644
index 00000000000..99b9822ea82
--- /dev/null
+++ b/apps/desktop/src/app/pet-generate/components/empty-hint.tsx
@@ -0,0 +1,27 @@
+import { Button } from '@/components/ui/button'
+
+interface EmptyHintProps {
+ onExample: (prompt: string) => void
+}
+
+// Creative seed prompts — specifics make better pets (petdex's own advice).
+// Short chips that wrap into a tight, centered cluster (capped width → 2 rows).
+const EXAMPLE_PROMPTS = ['bubble-tea otter', 'sock elf', 'pixel dragon', 'office cat', 'neon axolotl', 'moss golem']
+
+export function EmptyHint({ onExample }: EmptyHintProps) {
+ return (
+
+ {EXAMPLE_PROMPTS.map(example => (
+
+ ))}
+
+ )
+}
diff --git a/apps/desktop/src/app/pet-generate/components/generate-unavailable.tsx b/apps/desktop/src/app/pet-generate/components/generate-unavailable.tsx
new file mode 100644
index 00000000000..d3161d2a771
--- /dev/null
+++ b/apps/desktop/src/app/pet-generate/components/generate-unavailable.tsx
@@ -0,0 +1,52 @@
+import { Button } from '@/components/ui/button'
+import { ExternalLink } from '@/lib/external-link'
+import { PawPrint, Settings2 } from '@/lib/icons'
+
+interface GenerateUnavailableProps {
+ onSetup: () => void
+}
+
+// Shown when no reference-capable image backend is configured: generation is
+// impossible, so we replace the prompt entirely with a friendly path to set one
+// up (in-app) plus where to grab a key.
+export function GenerateUnavailable({ onSetup }: GenerateUnavailableProps) {
+ return (
+
+
+
+
+
+
Add an image backend to generate
+
+ Hatching a custom pet needs a provider that can ground on a reference image.
+
+
+
+
+ Grab a key from
+
+ Nous Portal
+
+ ·
+
+ OpenRouter
+
+ ·
+
+ OpenAI
+
+
+
+ )
+}
diff --git a/apps/desktop/src/app/pet-generate/components/hatch-preview.tsx b/apps/desktop/src/app/pet-generate/components/hatch-preview.tsx
new file mode 100644
index 00000000000..8adb6c3f9f2
--- /dev/null
+++ b/apps/desktop/src/app/pet-generate/components/hatch-preview.tsx
@@ -0,0 +1,137 @@
+import { useEffect, useState } from 'react'
+
+import { PetSprite } from '@/components/pet/pet-sprite'
+import { PetStarShower } from '@/components/pet/pet-star-shower'
+import { PixelEggSprite } from '@/components/pet/pixel-egg-sprite'
+import { Alert, AlertDescription } from '@/components/ui/alert'
+import { Button } from '@/components/ui/button'
+import { Input } from '@/components/ui/input'
+import { useI18n } from '@/i18n'
+import { triggerHaptic } from '@/lib/haptics'
+import { Loader2, PawPrint, RefreshCw } from '@/lib/icons'
+import { type PetInfo } from '@/store/pet'
+
+import { frameCountForRow } from '../lib/frame-count'
+
+const PREVIEW_SCALE = 0.7
+const PREVIEW_STATE_MS = 1400
+
+const PREVIEW_ROWS = ['idle', 'waving', 'running-right', 'running-left', 'running', 'review', 'jumping', 'failed', 'waiting']
+
+interface HatchPreviewProps {
+ pet: PetInfo
+ adopting: boolean
+ error: string | null
+ onAdopt: (name: string) => void
+ onDiscard: () => void
+}
+
+export function HatchPreview({ pet, adopting, error, onAdopt, onDiscard }: HatchPreviewProps) {
+ const { t } = useI18n()
+ const copy = t.commandCenter.generatePet
+ // Empty so the "Name your pet" placeholder shows; blank adopt keeps the
+ // provisional name from the prompt.
+ const [name, setName] = useState('')
+ // Play the egg's crack/hatch frames once before swapping in the live pet.
+ const [revealed, setRevealed] = useState(false)
+ // Right after the egg cracks the pet plays its "yay" jump a couple times, then
+ // hands off to the normal state-cycling preview.
+ const [celebrating, setCelebrating] = useState(false)
+ const [stateIndex, setStateIndex] = useState(0)
+ const previewRows = (pet.stateRows?.length ? pet.stateRows : PREVIEW_ROWS).filter(row => frameCountForRow(pet, row) > 0)
+ const rows = previewRows.length > 0 ? previewRows : ['idle']
+ const activeRow = rows[stateIndex % rows.length] ?? 'idle'
+ const canJump = frameCountForRow(pet, 'jumping') > 0
+ const rowOverride = celebrating && canJump ? 'jumping' : activeRow
+
+ useEffect(() => {
+ const id = setInterval(() => setStateIndex(i => (i + 1) % rows.length), PREVIEW_STATE_MS)
+
+ return () => clearInterval(id)
+ }, [rows.length])
+
+ // On reveal: celebrate (jump) ~2 loops, then drop into the cycling preview.
+ useEffect(() => {
+ if (!revealed) {
+ return
+ }
+
+ setCelebrating(true)
+
+ const id = setTimeout(() => {
+ setCelebrating(false)
+ setStateIndex(0)
+ }, 2 * (pet.loopMs ?? 1100))
+
+ return () => clearTimeout(id)
+ }, [revealed, pet.loopMs])
+
+ useEffect(() => {
+ setStateIndex(0)
+ setName('')
+ setRevealed(false)
+ setCelebrating(false)
+ }, [pet.slug])
+
+ const previewInfo: PetInfo = { ...pet, scale: PREVIEW_SCALE }
+
+ return (
+
+ {/* Fills the (now narrow) dialog so the pet frame is the screen width. */}
+
+ {revealed ? (
+ <>
+
+
+
+
+
+
+
+ >
+ ) : (
+ // The egg cracks open, then we swap in the live pet.
+ {
+ setRevealed(true)
+ triggerHaptic('crisp')
+ }}
+ size={150}
+ />
+ )}
+
+ )
+}
diff --git a/apps/desktop/src/app/pet-generate/components/hatching-view.tsx b/apps/desktop/src/app/pet-generate/components/hatching-view.tsx
new file mode 100644
index 00000000000..8e347741d6b
--- /dev/null
+++ b/apps/desktop/src/app/pet-generate/components/hatching-view.tsx
@@ -0,0 +1,24 @@
+import { PetEggHatch } from '@/components/pet/pet-egg-hatch'
+import { useI18n } from '@/i18n'
+import { cancelHatch, type PetHatchStage } from '@/store/pet-generate'
+
+interface HatchingViewProps {
+ stage: PetHatchStage | null
+}
+
+// The hatch progress screen — a beating egg with a phase-tracking subtitle
+// (per-row → composing → saving).
+export function HatchingView({ stage }: HatchingViewProps) {
+ const { t } = useI18n()
+ const copy = t.commandCenter.generatePet
+
+ const subtitle = stage
+ ? stage.phase === 'row'
+ ? copy.hatchRow(stage.state ?? '', stage.done ?? 0, stage.total ?? 0)
+ : stage.phase === 'compose'
+ ? copy.hatchComposing
+ : copy.hatchSaving
+ : copy.hatchingSub
+
+ return
+}
diff --git a/apps/desktop/src/app/pet-generate/components/provider-picker.tsx b/apps/desktop/src/app/pet-generate/components/provider-picker.tsx
new file mode 100644
index 00000000000..bd40a30ba31
--- /dev/null
+++ b/apps/desktop/src/app/pet-generate/components/provider-picker.tsx
@@ -0,0 +1,53 @@
+import { useStore } from '@nanostores/react'
+
+import { DropdownMenu, DropdownMenuContent, DropdownMenuItem, DropdownMenuTrigger } from '@/components/ui/dropdown-menu'
+import { Check, ChevronDown } from '@/lib/icons'
+import { $petGenProvider, $petGenProviders, setPetGenProvider } from '@/store/pet-generate'
+
+// Image-backend picker for pet generation — the composer's model-pill pattern:
+// a quiet trigger + a dropdown of options, each with a one-line speed/quality
+// note. Hidden unless there are 2+ reference-capable backends (nothing to pick).
+export function ProviderPicker() {
+ const providers = useStore($petGenProviders)
+ const picked = useStore($petGenProvider)
+
+ if (providers.length < 2) {
+ return null
+ }
+
+ const fallback = providers.find(p => p.default) ?? providers[0]
+ const current = providers.find(p => p.name === picked) ?? fallback
+
+ return (
+
+
+ {/* Plain text affordance (matches "Add a reference"), not a padded pill. */}
+
+
+ {/* The picker lives inside the pet-gen Dialog (z-130) and portals to body,
+ so lift its menu above the dialog or it opens behind it. */}
+
+ {providers.map(provider => (
+ setPetGenProvider(provider.default ? '' : provider.name)}
+ >
+
+ {provider.label}
+ {provider.name === current?.name && }
+
+ {provider.note && {provider.note}}
+
+ ))}
+
+
+ )
+}
diff --git a/apps/desktop/src/app/pet-generate/components/reference-chip.tsx b/apps/desktop/src/app/pet-generate/components/reference-chip.tsx
new file mode 100644
index 00000000000..266658a9dab
--- /dev/null
+++ b/apps/desktop/src/app/pet-generate/components/reference-chip.tsx
@@ -0,0 +1,48 @@
+import { useState } from 'react'
+
+import { ImageLightbox } from '@/components/chat/zoomable-image'
+import { useImageDownload } from '@/hooks/use-image-download'
+import { useI18n } from '@/i18n'
+import { X } from '@/lib/icons'
+
+interface ReferenceChipProps {
+ name: string
+ onRemove: () => void
+ src: string
+}
+
+// The reference photo as an attachment chip: filename + thumbnail that opens
+// the shared image viewer (lightbox), with a remove affordance.
+export function ReferenceChip({ name, onRemove, src }: ReferenceChipProps) {
+ const { t } = useI18n()
+ const { download, saving } = useImageDownload(src)
+ const [viewing, setViewing] = useState(false)
+
+ return (
+
+
+
+ {name || 'Reference'}
+
+
+
+
+ )
+}
diff --git a/apps/desktop/src/app/pet-generate/lib/frame-count.ts b/apps/desktop/src/app/pet-generate/lib/frame-count.ts
new file mode 100644
index 00000000000..97a49a8cd6b
--- /dev/null
+++ b/apps/desktop/src/app/pet-generate/lib/frame-count.ts
@@ -0,0 +1,26 @@
+import { type PetInfo } from '@/store/pet'
+
+// Sprite row → the PetInfo frame-count key it resolves to (directional walks and
+// aliases collapse onto their base state).
+const ROW_TO_FRAME_KEY: Record = {
+ idle: 'idle',
+ wave: 'wave',
+ waving: 'wave',
+ jump: 'jump',
+ jumping: 'jump',
+ run: 'run',
+ running: 'run',
+ 'running-right': 'run',
+ 'running-left': 'run',
+ failed: 'failed',
+ review: 'review',
+ waiting: 'waiting'
+}
+
+// Real frame count for a row, preferring the concrete per-row count, then the
+// per-state count, then the mapped base state, then the sheet-wide default.
+export function frameCountForRow(pet: PetInfo, row: string): number {
+ const mapped = ROW_TO_FRAME_KEY[row]
+
+ return pet.framesByRow?.[row] ?? pet.framesByState?.[row] ?? (mapped ? pet.framesByState?.[mapped] : undefined) ?? pet.framesPerState ?? 0
+}
diff --git a/apps/desktop/src/app/pet-generate/lib/read-reference-image.ts b/apps/desktop/src/app/pet-generate/lib/read-reference-image.ts
new file mode 100644
index 00000000000..06c480e95ed
--- /dev/null
+++ b/apps/desktop/src/app/pet-generate/lib/read-reference-image.ts
@@ -0,0 +1,49 @@
+const DEFAULT_MAX_INPUT_BYTES = 16 * 1024 * 1024
+
+function loadImage(url: string): Promise {
+ const img = new Image()
+
+ return new Promise((resolve, reject) => {
+ img.onload = () => resolve(img)
+ img.onerror = () => reject(new Error('unreadable image'))
+ img.src = url
+ })
+}
+
+// Read an image file as a downscaled PNG data URL. We decode from an object URL
+// (not readAsDataURL) so large files don't inflate into giant base64 strings
+// before we scale them down for generation.
+export async function readReferenceImage(
+ file: File,
+ max = 1024,
+ maxInputBytes = DEFAULT_MAX_INPUT_BYTES
+): Promise {
+ if (file.size > maxInputBytes) {
+ throw new Error('reference image too large')
+ }
+
+ const objectUrl = URL.createObjectURL(file)
+
+ try {
+ const img = await loadImage(objectUrl)
+ const scale = Math.min(1, max / Math.max(img.width, img.height))
+ const width = Math.max(1, Math.round(img.width * scale))
+ const height = Math.max(1, Math.round(img.height * scale))
+
+ const canvas = document.createElement('canvas')
+ canvas.width = width
+ canvas.height = height
+
+ const ctx = canvas.getContext('2d')
+
+ if (!ctx) {
+ throw new Error('could not create canvas context')
+ }
+
+ ctx.drawImage(img, 0, 0, width, height)
+
+ return canvas.toDataURL('image/png')
+ } finally {
+ URL.revokeObjectURL(objectUrl)
+ }
+}
diff --git a/apps/desktop/src/app/pet-generate/pet-generate-content.tsx b/apps/desktop/src/app/pet-generate/pet-generate-content.tsx
new file mode 100644
index 00000000000..2c6f2a815de
--- /dev/null
+++ b/apps/desktop/src/app/pet-generate/pet-generate-content.tsx
@@ -0,0 +1,291 @@
+import { useStore } from '@nanostores/react'
+import { useEffect, useRef } from 'react'
+import { useNavigate } from 'react-router-dom'
+
+import { useGatewayRequest } from '@/app/gateway/hooks/use-gateway-request'
+import { SETTINGS_ROUTE } from '@/app/routes'
+import { Alert, AlertDescription } from '@/components/ui/alert'
+import { DialogHeader, DialogTitle } from '@/components/ui/dialog'
+import { GenerateButton } from '@/components/ui/generate-button'
+import { Input } from '@/components/ui/input'
+import { useI18n } from '@/i18n'
+import { triggerHaptic } from '@/lib/haptics'
+import { Egg, ImageIcon } from '@/lib/icons'
+import { cn } from '@/lib/utils'
+import {
+ $petGenAvailable,
+ $petGenDrafts,
+ $petGenError,
+ $petGenInput,
+ $petGenPreview,
+ $petGenRefImage,
+ $petGenRefName,
+ $petGenSelected,
+ $petGenStage,
+ $petGenStatus,
+ adoptHatched,
+ cancelGenerate,
+ checkPetGenAvailable,
+ cleanPetName,
+ closePetGenerate,
+ discardDrafts,
+ discardHatched,
+ generateDrafts,
+ hatchSelected
+} from '@/store/pet-generate'
+
+import { DraftGrid } from './components/draft-grid'
+import { EmptyHint } from './components/empty-hint'
+import { GenerateUnavailable } from './components/generate-unavailable'
+import { HatchPreview } from './components/hatch-preview'
+import { HatchingView } from './components/hatching-view'
+import { ProviderPicker } from './components/provider-picker'
+import { ReferenceChip } from './components/reference-chip'
+import { readReferenceImage } from './lib/read-reference-image'
+
+// The generate → hatch → adopt controller. A thin view over the `pet-generate`
+// store; the store owns the steps and persists inputs across close/reopen.
+export function PetGenerateContent() {
+ const { t } = useI18n()
+ const copy = t.commandCenter.generatePet
+ const { requestGateway } = useGatewayRequest()
+ const navigate = useNavigate()
+
+ const status = useStore($petGenStatus)
+ const error = useStore($petGenError)
+ const available = useStore($petGenAvailable)
+ // `null` = not yet probed → stay optimistic (show the prompt); only the
+ // confirmed-no-backend case swaps in the setup card.
+ const unavailable = available === false
+ const drafts = useStore($petGenDrafts)
+ const selected = useStore($petGenSelected)
+ const preview = useStore($petGenPreview)
+ const stage = useStore($petGenStage)
+
+ // Inputs live in atoms so they survive a close/reopen (and background runs).
+ const prompt = useStore($petGenInput)
+ const refImage = useStore($petGenRefImage)
+ const refName = useStore($petGenRefName)
+ const fileRef = useRef(null)
+
+ // Probe backend availability on open — and again whenever the content
+ // remounts (e.g. after returning from the providers settings), so adding a
+ // key flips the setup card to the prompt with no manual refresh.
+ useEffect(() => {
+ void checkPetGenAvailable(requestGateway)
+ }, [requestGateway])
+
+ const busy = status === 'generating' || status === 'hatching'
+ const hasDrafts = drafts.length > 0
+ const generating = status === 'generating'
+
+ // The idle "describe a pet" state — egg + suggestions get generous, equidistant
+ // breathing room (gap-4) from the prompt; the working states stay compact.
+ const isEmptyState =
+ !hasDrafts &&
+ !generating &&
+ status !== 'hatching' &&
+ status !== 'preview' &&
+ status !== 'adopting' &&
+ status !== 'stale'
+
+ const generate = () => {
+ if ((prompt.trim() || refImage) && !busy) {
+ void generateDrafts(requestGateway, { prompt: prompt.trim(), referenceImage: refImage ?? undefined })
+ }
+ }
+
+ const clearReference = () => {
+ $petGenRefImage.set(null)
+ $petGenRefName.set('')
+ }
+
+ const pickReference = (file: File | undefined) => {
+ if (!file) {
+ return
+ }
+
+ const mapReferenceError = (reason: unknown): string => {
+ const message = reason instanceof Error ? reason.message.toLowerCase() : ''
+
+ return message.includes('too large') ? copy.referenceImageTooLarge : copy.referenceImageInvalid
+ }
+
+ void readReferenceImage(file)
+ .then(dataUrl => {
+ $petGenRefImage.set(dataUrl)
+ $petGenRefName.set(file.name)
+ // Clear picker-only errors once the reference is valid again.
+
+ if ($petGenStatus.get() === 'error' && $petGenDrafts.get().length === 0) {
+ $petGenStatus.set('idle')
+ $petGenError.set(null)
+ }
+ })
+ .catch(reason => {
+ $petGenRefImage.set(null)
+ $petGenRefName.set('')
+ $petGenError.set(mapReferenceError(reason))
+
+ if (!busy) {
+ $petGenStatus.set('error')
+ }
+ })
+ }
+
+ // One-click an example prompt straight into a draft round.
+ const runExample = (example: string) => {
+ $petGenInput.set(example)
+ void generateDrafts(requestGateway, { prompt: example })
+ }
+
+ // Hatch the selected draft. The user can pick one before the rest stream in —
+ // if so, abort the remaining generations first (keeping the drafts we have).
+ // The prompt is grounding text, not a label; the user names it on reveal.
+ const hatch = () => {
+ if (selected === null) {
+ return
+ }
+
+ if (generating) {
+ cancelGenerate()
+ }
+
+ void hatchSelected(requestGateway, { name: cleanPetName(prompt), prompt: prompt.trim() })
+ }
+
+ const adopt = (finalName: string) => {
+ void adoptHatched(requestGateway, finalName).then(out => {
+ if (out.ok) {
+ triggerHaptic('crisp')
+ closePetGenerate()
+ }
+ })
+ }
+
+ // The header title tracks the phase instead of sticking on "Generate a pet".
+ const headerTitle =
+ status === 'hatching' ? copy.spawning : status === 'preview' || status === 'adopting' ? copy.hatched : copy.title
+
+ // Send the user to set up a key without closing — the overlay yields to the
+ // settings route (useRouteOverlayActive) and reappears + re-checks on return.
+ const setupImageGen = () => navigate(`${SETTINGS_ROUTE}?tab=providers`)
+
+ // Prompt input only belongs on the describe/draft screens (and never when
+ // there's no backend to generate with).
+ const showPrompt = !unavailable && status !== 'hatching' && status !== 'preview' && status !== 'adopting'
+
+ return (
+ <>
+ {unavailable ? (
+ {copy.title}
+ ) : (
+
+ {headerTitle}
+
+ )}
+
+
+ {/* Concept prompt with the inline sparkle generate/stop affordance (the
+ same primitive as the commit-message + project-idea fields). */}
+ {showPrompt && (
+
+
+ {/* Optional reference photo — make a pet from the user's own image.
+ Styled like the chat composer's attachment pill. */}
+ {
+ pickReference(event.target.files?.[0])
+ event.target.value = ''
+ }}
+ ref={fileRef}
+ type="file"
+ />
+
+ )}
+
+ {/* Hatch failed but the drafts are still here — show why above the grid so
+ the user can re-pick and retry without losing their options. */}
+ {status === 'error' && hasDrafts && (
+
+ {error || copy.genericError}
+
+ )}
+
+ {unavailable ? (
+
+ ) : status === 'stale' ? (
+
+ {copy.staleBackend}
+
+ ) : status === 'hatching' ? (
+
+ ) : (status === 'preview' || status === 'adopting') && preview ? (
+ void discardHatched(requestGateway)}
+ pet={preview}
+ />
+ ) : !hasDrafts && !generating ? (
+ // Doubles as the error-empty state — the failure reason rides the
+ // dialog's footer banner, so here we just offer the retry sparks.
+
+ ) : (
+ $petGenSelected.set(index)}
+ selected={selected}
+ />
+ )}
+
+ >
+ )
+}
diff --git a/apps/desktop/src/app/pet-generate/pet-generate-overlay.tsx b/apps/desktop/src/app/pet-generate/pet-generate-overlay.tsx
index 954dac23bfd..cd262e142c6 100644
--- a/apps/desktop/src/app/pet-generate/pet-generate-overlay.tsx
+++ b/apps/desktop/src/app/pet-generate/pet-generate-overlay.tsx
@@ -6,92 +6,37 @@
* breathe: a device-framed header, its own concept prompt, a roomy draft grid
* that streams in live, and the egg-hatch + reveal flow. It's a thin view over
* the `pet-generate` store; the store owns the generate → hatch → adopt steps.
+ *
+ * This file is just the dialog shell + sizing; the flow lives in
+ * `PetGenerateContent`, and each screen is its own atomic component under
+ * `./components`.
*/
import { useStore } from '@nanostores/react'
-import { useEffect, useState } from 'react'
-import { useNavigate } from 'react-router-dom'
-import { SETTINGS_ROUTE } from '@/app/routes'
import { useGatewayRequest } from '@/app/gateway/hooks/use-gateway-request'
import { useRouteOverlayActive } from '@/app/hooks/use-route-overlay-active'
-import { PetEggHatch } from '@/components/pet/pet-egg-hatch'
-import { PetStarShower } from '@/components/pet/pet-star-shower'
-import { PetSprite } from '@/components/pet/pet-sprite'
-import { PixelEggSprite } from '@/components/pet/pixel-egg-sprite'
-import { Alert, AlertDescription } from '@/components/ui/alert'
-import { Button } from '@/components/ui/button'
-import { Dialog, DialogContent, DialogHeader, DialogTitle } from '@/components/ui/dialog'
-import { GenerateButton } from '@/components/ui/generate-button'
-import { Input } from '@/components/ui/input'
+import { Dialog, DialogContent } from '@/components/ui/dialog'
import { useI18n } from '@/i18n'
-import { ExternalLink } from '@/lib/external-link'
-import { triggerHaptic } from '@/lib/haptics'
-import { Egg, Loader2, PawPrint, RefreshCw, Settings2 } from '@/lib/icons'
-import { selectableCardClass } from '@/lib/selectable-card'
import { cn } from '@/lib/utils'
-import { type PetInfo } from '@/store/pet'
import {
- $petGenAvailable,
$petGenDrafts,
$petGenerateOpen,
$petGenError,
- $petGenPreview,
- $petGenSelected,
- $petGenStage,
$petGenStatus,
- adoptHatched,
- cancelGenerate,
- cancelHatch,
- checkPetGenAvailable,
- cleanPetName,
- cleanupPetGen,
- closePetGenerate,
- discardHatched,
- generateDrafts,
- hatchSelected
+ cleanupPetGenOnClose,
+ closePetGenerate
} from '@/store/pet-generate'
-const VARIANT_COUNT = 4
-const PREVIEW_SCALE = 0.7
-const PREVIEW_ROWS = [
- 'idle',
- 'waving',
- 'running-right',
- 'running-left',
- 'running',
- 'review',
- 'jumping',
- 'failed',
- 'waiting'
-]
-const PREVIEW_STATE_MS = 1400
-
-const ROW_TO_FRAME_KEY: Record = {
- idle: 'idle',
- wave: 'wave',
- waving: 'wave',
- jump: 'jump',
- jumping: 'jump',
- run: 'run',
- running: 'run',
- 'running-right': 'run',
- 'running-left': 'run',
- failed: 'failed',
- review: 'review',
- waiting: 'waiting'
-}
-
-function frameCountForRow(pet: PetInfo, row: string): number {
- const byState = pet.framesByState
- const mapped = ROW_TO_FRAME_KEY[row]
- return byState?.[row] ?? (mapped ? byState?.[mapped] : undefined) ?? pet.framesPerState ?? 0
-}
+import { PetGenerateContent } from './pet-generate-content'
export function PetGenerateOverlay() {
+ const { t } = useI18n()
+ const { requestGateway } = useGatewayRequest()
const open = useStore($petGenerateOpen)
const status = useStore($petGenStatus)
- const { requestGateway } = useGatewayRequest()
+ const error = useStore($petGenError)
+ const drafts = useStore($petGenDrafts)
// Yield the screen to a full-screen route overlay (e.g. /settings while the
// user adds an image-gen key) without tearing down — the store keeps us open,
@@ -102,449 +47,39 @@ export function PetGenerateOverlay() {
const handleOpenChange = (next: boolean) => {
if (!next) {
- // Deletes a hatched-but-unadopted preview pet so it doesn't linger, then
- // resets all generation state.
- cleanupPetGen(requestGateway)
+ cleanupPetGenOnClose(requestGateway)
+ // Never interrupt in-flight work. Generating/hatching continues in the
+ // background; only an unadopted finished preview is discarded on close.
closePetGenerate()
}
}
// The draft screen needs room for the 2×2 grid; the single-pet screens
// (hatch egg, reveal) shrink to the pet's frame so it isn't lost in a wide box.
+ // `fitContent` lets the dialog size to content; the `min-w` floors each phase.
const single = status === 'hatching' || status === 'preview' || status === 'adopting'
+ const copy = t.commandCenter.generatePet
+
+ // The footer banner narrates the dialog's async state: the failure reason on a
+ // dead-end error, else the "you can close this, we'll notify you" reassurance
+ // while a generate/hatch runs in the background.
+ const working = status === 'generating' || status === 'hatching'
+ const errored = status === 'error' && drafts.length === 0
+ const banner = errored ? error || copy.genericError : working ? copy.backgroundHint : undefined
return (
)
}
-
-function PetGenerateContent() {
- const { t } = useI18n()
- const copy = t.commandCenter.generatePet
- const { requestGateway } = useGatewayRequest()
- const navigate = useNavigate()
-
- const status = useStore($petGenStatus)
- const error = useStore($petGenError)
- const available = useStore($petGenAvailable)
- // `null` = not yet probed → stay optimistic (show the prompt); only the
- // confirmed-no-backend case swaps in the setup card.
- const unavailable = available === false
- const drafts = useStore($petGenDrafts)
- const selected = useStore($petGenSelected)
- const preview = useStore($petGenPreview)
- const stage = useStore($petGenStage)
-
- const [prompt, setPrompt] = useState('')
-
- // Probe backend availability on open — and again whenever the content
- // remounts (e.g. after returning from the providers settings), so adding a
- // key flips the setup card to the prompt with no manual refresh.
- useEffect(() => {
- void checkPetGenAvailable(requestGateway)
- }, [requestGateway])
-
- const busy = status === 'generating' || status === 'hatching'
- const hasDrafts = drafts.length > 0
- const generating = status === 'generating'
- // The idle "describe a pet" state — egg + suggestions get generous, equidistant
- // breathing room (gap-7.5) from the prompt; the working states stay compact.
- const isEmptyState =
- !hasDrafts &&
- !generating &&
- status !== 'hatching' &&
- status !== 'preview' &&
- status !== 'adopting' &&
- status !== 'stale'
-
- const close = () => {
- cleanupPetGen(requestGateway)
- closePetGenerate()
- }
-
- const generate = () => {
- if (prompt.trim() && !busy) {
- void generateDrafts(requestGateway, { prompt: prompt.trim() })
- }
- }
-
- // One-click an example prompt straight into a draft round.
- const runExample = (example: string) => {
- setPrompt(example)
- void generateDrafts(requestGateway, { prompt: example })
- }
-
- // Hatch with a clean default name derived from the prompt (the prompt itself
- // is grounding text, not a label); the user names it on the reveal screen.
- const hatch = () => {
- if (prompt.trim()) {
- void hatchSelected(requestGateway, { name: cleanPetName(prompt), prompt: prompt.trim() })
- }
- }
-
- const adopt = (finalName: string) => {
- void adoptHatched(requestGateway, finalName).then(out => {
- if (out.ok) {
- triggerHaptic('crisp')
- close()
- }
- })
- }
-
- // The header title tracks the phase instead of sticking on "Generate a pet".
- const headerTitle =
- status === 'hatching' ? copy.spawning : status === 'preview' || status === 'adopting' ? copy.hatched : copy.title
- // Send the user to set up a key without closing — the overlay yields to the
- // settings route (useRouteOverlayActive) and reappears + re-checks on return.
- const setupImageGen = () => navigate(`${SETTINGS_ROUTE}?tab=providers`)
-
- // Prompt input only belongs on the describe/draft screens (and never when
- // there's no backend to generate with).
- const showPrompt = !unavailable && status !== 'hatching' && status !== 'preview' && status !== 'adopting'
-
- return (
- <>
- {unavailable ? (
- {copy.title}
- ) : (
-
- {headerTitle}
-
- )}
-
-
- {/* Concept prompt with the inline sparkle generate/stop affordance (the
- same primitive as the commit-message + project-idea fields). */}
- {showPrompt && (
-
- >
- )
-}
-
-// Creative seed prompts — specifics make better pets (petdex's own advice).
-// Doubling as guidance and a one-click way to see the flow.
-const EXAMPLE_PROMPTS = ['a bubble-tea otter', 'a tiny sock elf', 'a pixel dragon', 'a grumpy office cat', 'a neon axolotl']
-
-// Shown when no reference-capable image backend is configured: generation is
-// impossible, so we replace the prompt entirely with a friendly path to set one
-// up (in-app) plus where to grab a key.
-function GenerateUnavailable({ onSetup }: { onSetup: () => void }) {
- return (
-
-
-
-
-
-
Add an image backend to generate
-
- Hatching a custom pet needs a provider that can ground on a reference image.
-
-
-
-
- Grab a key from
-
- Nous Portal
-
- ·
-
- OpenRouter
-
- ·
-
- OpenAI
-
-