From 1fe013ee16f19f6390f2efce39397447e7ec2f67 Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Wed, 24 Jun 2026 19:08:06 -0500 Subject: [PATCH] feat(pets): polish generate flow and reduce hatch CPU pressure Ship the final pet-generation UX polish (provider picker behavior, step-2 cancel flow, banner integration, and visual consistency) and make saturated-chroma background removal C-op driven so hatch processing no longer hammers the machine during long runs. --- agent/pet/generate/atlas.py | 251 +++++---- agent/pet/generate/imagegen.py | 73 ++- agent/pet/generate/orchestrate.py | 122 +++- agent/pet/generate/prompts.py | 43 +- .../pet-generate/components/draft-grid.tsx | 89 +++ .../pet-generate/components/empty-hint.tsx | 27 + .../components/generate-unavailable.tsx | 52 ++ .../pet-generate/components/hatch-preview.tsx | 137 +++++ .../pet-generate/components/hatching-view.tsx | 24 + .../components/provider-picker.tsx | 53 ++ .../components/reference-chip.tsx | 48 ++ .../src/app/pet-generate/lib/frame-count.ts | 26 + .../pet-generate/lib/read-reference-image.ts | 49 ++ .../app/pet-generate/pet-generate-content.tsx | 291 ++++++++++ .../app/pet-generate/pet-generate-overlay.tsx | 525 +----------------- .../app/session/hooks/use-prompt-actions.ts | 13 + .../src/components/pet/floating-pet.tsx | 65 ++- .../src/components/pet/pet-egg-hatch.tsx | 8 +- .../desktop/src/components/pet/pet-sprite.tsx | 19 +- apps/desktop/src/components/ui/dialog.tsx | 99 +++- apps/desktop/src/i18n/en.ts | 12 +- apps/desktop/src/i18n/ja.ts | 12 +- apps/desktop/src/i18n/types.ts | 4 + apps/desktop/src/i18n/zh-hant.ts | 12 +- apps/desktop/src/i18n/zh.ts | 12 +- .../desktop/src/lib/desktop-slash-commands.ts | 2 + apps/desktop/src/store/pet-generate.ts | 139 ++++- apps/desktop/src/store/pet.ts | 6 + apps/desktop/src/styles.css | 21 +- cli.py | 3 + hermes_cli/cli_commands_mixin.py | 68 +++ hermes_cli/commands.py | 2 + tests/agent/test_pet_generate.py | 90 ++- tests/tui_gateway/test_pet_generate_rpc.py | 71 ++- tui_gateway/server.py | 274 +++++++-- 35 files changed, 2013 insertions(+), 729 deletions(-) create mode 100644 apps/desktop/src/app/pet-generate/components/draft-grid.tsx create mode 100644 apps/desktop/src/app/pet-generate/components/empty-hint.tsx create mode 100644 apps/desktop/src/app/pet-generate/components/generate-unavailable.tsx create mode 100644 apps/desktop/src/app/pet-generate/components/hatch-preview.tsx create mode 100644 apps/desktop/src/app/pet-generate/components/hatching-view.tsx create mode 100644 apps/desktop/src/app/pet-generate/components/provider-picker.tsx create mode 100644 apps/desktop/src/app/pet-generate/components/reference-chip.tsx create mode 100644 apps/desktop/src/app/pet-generate/lib/frame-count.ts create mode 100644 apps/desktop/src/app/pet-generate/lib/read-reference-image.ts create mode 100644 apps/desktop/src/app/pet-generate/pet-generate-content.tsx diff --git a/agent/pet/generate/atlas.py b/agent/pet/generate/atlas.py index 8559ddb530d..2d316110e73 100644 --- a/agent/pet/generate/atlas.py +++ b/agent/pet/generate/atlas.py @@ -141,6 +141,8 @@ def remove_background(image, *, chroma_key: tuple[int, int, int] | None = None, """ from collections import deque + from PIL import Image, ImageChops + rgba = image.convert("RGBA") if _has_transparency(rgba): return _repair_internal_alpha_holes(rgba) @@ -153,7 +155,21 @@ def remove_background(image, *, chroma_key: tuple[int, int, int] | None = None, r, g, b, a = px[x, y] return a > _ALPHA_FLOOR and _color_distance(r, g, b, key) <= threshold + # Fast path for strongly-saturated chroma keys (our normal sprite prompts use + # hot magenta): remove all near-key opaque pixels with C-level channel ops. + # This clears both border-connected backdrop and enclosed triangular pockets + # between connected limbs/capes, without a Python flood over ~1.5M pixels. + if max(key) - min(key) >= 120: + near = _near_key_mask(rgba, key) # L mask, 255 where near key + opaque = rgba.getchannel("A").point(lambda a: 255 if a > _ALPHA_FLOOR else 0) + remove_mask = ImageChops.darker(near, opaque) + return Image.composite(Image.new("RGBA", rgba.size, (0, 0, 0, 0)), rgba, remove_mask) + visited = bytearray(w * h) + # Mark removals in a flat mask and apply them in one C composite at the end — + # writing `px[x, y] = (0,0,0,0)` per pixel was ~3M PixelAccess calls (84% of + # the whole pipeline) and pegged a core in pure Python, stalling the gateway. + remove = bytearray(w * h) queue: deque[tuple[int, int]] = deque() # Seed from every border pixel that looks like background. @@ -181,7 +197,7 @@ def remove_background(image, *, chroma_key: tuple[int, int, int] | None = None, while queue: x, y = queue.popleft() - px[x, y] = (0, 0, 0, 0) + remove[y * w + x] = 1 for nx, ny in ((x + 1, y), (x - 1, y), (x, y + 1), (x, y - 1)): if 0 <= nx < w and 0 <= ny < h: idx = ny * w + nx @@ -189,7 +205,11 @@ def remove_background(image, *, chroma_key: tuple[int, int, int] | None = None, visited[idx] = 1 if _is_bg(nx, ny): queue.append((nx, ny)) - return rgba + + # One C-level composite instead of millions of per-pixel writes: paint the + # flooded pixels to (0,0,0,0) wherever the mask is set. + mask = Image.frombytes("L", (w, h), bytes(remove)).point(lambda v: 255 if v else 0) + return Image.composite(Image.new("RGBA", rgba.size, (0, 0, 0, 0)), rgba, mask) def _repair_internal_alpha_holes(image): @@ -298,9 +318,13 @@ def _fit_to_cell(image): max_h = CELL_HEIGHT - _CELL_PAD scale = min(max_w / sprite.width, max_h / sprite.height, 1.0) if scale != 1.0: + # NEAREST, not LANCZOS: the generated "pixel art" has hard edges, and any + # interpolating resample anti-aliases them into a blurry, washed-out + # sprite once the renderer upscales the cell. Crisp blocky downscale reads + # as real pixel art. sprite = sprite.resize( (max(1, round(sprite.width * scale)), max(1, round(sprite.height * scale))), - Image.Resampling.LANCZOS, + Image.Resampling.NEAREST, ) left = (CELL_WIDTH - sprite.width) // 2 top = (CELL_HEIGHT - sprite.height) // 2 @@ -324,23 +348,13 @@ def _drop_side_bleed(image): w, h = rgba.size profile = _column_profile(rgba) # mean alpha per column (fast C resize) - segments: list[tuple[int, int, int]] = [] # (left, right, mass) - start = mass = 0 - started = False - for x, v in enumerate(profile + [0]): - if v > 2: - if not started: - start, mass, started = x, 0, True - mass += v - elif started: - segments.append((start, x, mass)) - started = False - - if len(segments) < 2: + runs = _content_runs(profile) + if len(runs) < 2: return rgba - keep_mass = max(m for _, _, m in segments) * _SIDE_LOBE_RATIO - keep = [(l, r) for l, r, m in segments if m >= keep_mass] - if len(keep) == len(segments): + masses = [sum(profile[l:r]) for l, r in runs] + keep_mass = max(masses) * _SIDE_LOBE_RATIO + keep = [run for run, m in zip(runs, masses) if m >= keep_mass] + if len(keep) == len(runs): return rgba # Zero every column band that isn't a kept segment (box paste, not per-pixel). @@ -355,53 +369,6 @@ def _drop_side_bleed(image): return rgba -def _connected_components(image) -> list[dict]: - """Flood-fill the alpha mask into connected blobs (4-connectivity).""" - alpha = image.getchannel("A") - w, h = image.size - data = alpha.tobytes() - visited = bytearray(w * h) - out: list[dict] = [] - - for start, a in enumerate(data): - if a <= _ALPHA_FLOOR or visited[start]: - continue - stack = [start] - visited[start] = 1 - pixels: list[int] = [] - min_x = w - min_y = h - max_x = 0 - max_y = 0 - while stack: - cur = stack.pop() - pixels.append(cur) - x = cur % w - y = cur // w - min_x = min(min_x, x) - min_y = min(min_y, y) - max_x = max(max_x, x) - max_y = max(max_y, y) - for nb, ok in ( - (cur - 1, x > 0), - (cur + 1, x + 1 < w), - (cur - w, y > 0), - (cur + w, y + 1 < h), - ): - if ok and not visited[nb] and data[nb] > _ALPHA_FLOOR: - visited[nb] = 1 - stack.append(nb) - out.append( - { - "pixels": pixels, - "area": len(pixels), - "bbox": (min_x, min_y, max_x + 1, max_y + 1), - "center_x": (min_x + max_x + 1) / 2, - } - ) - return out - - def _sever_expected_gutters(strip, frame_count: int): """Cut thin vertical gutters at expected frame boundaries before labeling. @@ -418,7 +385,7 @@ def _sever_expected_gutters(strip, frame_count: int): out = strip.copy() px = out.load() slot = out.width / frame_count - half = max(2, min(8, round(slot * 0.02))) + half = max(3, min(18, round(slot * 0.06))) for i in range(1, frame_count): x = round(i * slot) left = max(0, x - half) @@ -430,21 +397,6 @@ def _sever_expected_gutters(strip, frame_count: int): return out -def _segmentable(strip, frame_count: int) -> bool: - """True if the (gutter-severed) strip yields ≥ *frame_count* distinct blobs. - - Used only as a quality gate: a row that can't show this many separable poses - is a bad generation (caller retries / falls back), never silently sliced into - merged frames. - """ - components = _connected_components(strip) - if not components: - return False - largest = max(c["area"] for c in components) - seed_threshold = max(120, largest * 0.20) - return sum(1 for c in components if c["area"] >= seed_threshold) >= frame_count - - def _slot_crops(strip, frame_count: int) -> list: """Slice *strip* into *frame_count* uniform columns (one coordinate space). @@ -458,6 +410,61 @@ def _slot_crops(strip, frame_count: int) -> list: return [_drop_side_bleed(strip.crop((i * w0, 0, i * w0 + w0, h))) for i in range(frame_count)] +def _content_runs(profile: list[int], *, threshold: int = 2) -> list[tuple[int, int]]: + """Contiguous column spans whose alpha mass exceeds *threshold*. + + A column-projection of the alpha mask: empty (background) columns separate + one pose from the next, so the runs ARE the candidate frames. + """ + runs: list[tuple[int, int]] = [] + start: int | None = None + for x, v in enumerate(list(profile) + [0]): + if v > threshold: + if start is None: + start = x + elif start is not None: + runs.append((start, x)) + start = None + return runs + + +def _frame_x_ranges(strip, frame_count: int) -> list[tuple[int, int]] | None: + """Per-frame ``(left, right)`` column ranges from the row's empty gutters. + + The standard sprite-sheet slice — once poses are separated by real gaps + (which generation now enforces), splitting is just "find the empty columns": + + * spans == frames → one span per frame. + * spans > frames → merge across the smallest gaps. A detached halo/ear sits + a tiny gap from its body, while the inter-pose gutter is the big gap that + survives — so over-segmentation (and any over-eager gutter sever) repairs + itself by collapsing only the small internal gaps. + * spans < frames → poses are touching; not separable by gutters (the caller + raises for ``components`` or falls back to even slots for ``auto``). + + Ranges span content only; the caller crops full cell height, so tall ears / + halos are never cut. + """ + profile = _column_profile(strip) + runs = _content_runs(profile) + if not runs: + return None + + # Drop trivial specks so stray noise never counts as a pose. + masses = [sum(profile[l:r]) for l, r in runs] + floor = max(masses) * 0.02 + runs = [run for run, m in zip(runs, masses) if m >= floor] + if len(runs) < frame_count: + return None + + groups = [[l, r] for l, r in runs] + while len(groups) > frame_count: + gi = min(range(len(groups) - 1), key=lambda i: groups[i + 1][0] - groups[i][1]) + groups[gi][1] = groups[gi + 1][1] + del groups[gi + 1] + return [(l, r) for l, r in groups] + + def extract_strip_frames( strip, frame_count: int, @@ -468,10 +475,15 @@ def extract_strip_frames( ) -> list: """Turn one generated row strip into *frame_count* frames. - Background is keyed out, the expected frame gutters are severed, then the - strip is sliced into equal columns. Connected components only *validate* that - the row holds *frame_count* separable poses (``components`` raises, ``auto`` - falls back to slicing the un-severed strip). + The background is keyed out, thin connecting bridges at the expected + boundaries are severed, then the strip is sliced at its empty chroma gutters + (:func:`_frame_x_ranges`) — the plain "find each object, make a frame" cut + that works once poses are spaced apart (which generation now enforces). + + Each frame is cropped at full cell height so tall ears / halos are never + clipped; :func:`_drop_side_bleed` trims any faint neighbour sliver. When the + poses are touching (fewer gutters than frames) ``components`` raises and + ``auto`` falls back to equal-width slots. *fit* (default) fits+centers each frame into a 192x208 cell — the standalone contract for callers that don't normalize. Hatching passes ``fit=False`` to @@ -487,12 +499,29 @@ def extract_strip_frames( strip = strip.convert("RGBA") strip = remove_background(strip, chroma_key=chroma_key) - severed = _sever_expected_gutters(strip, frame_count) - segmentable = _segmentable(severed, frame_count) - if method == "components" and not segmentable: - raise ValueError(f"could not segment {frame_count} sprites from strip") - frames = _slot_crops(severed if segmentable else strip, frame_count) + # Prefer the real gutters as-is: when poses are already spaced (generation + # enforces this), slicing the strip untouched keeps each pose's own bounds and + # never cuts through an unevenly-placed silhouette. Only fall back to severing + # the expected boundaries when gaps alone can't separate the row — i.e. poses + # are bridged by a shared shadow/glow/1px line and read as one blob. + source = strip + ranges = _frame_x_ranges(source, frame_count) + if ranges is None: + source = _sever_expected_gutters(strip, frame_count) + ranges = _frame_x_ranges(source, frame_count) + + if ranges is None: + if method == "components": + raise ValueError(f"could not segment {frame_count} sprites from strip") + frames = _slot_crops(source, frame_count) + else: + h = source.height + pad = max(2, min(16, round((source.width / max(1, frame_count)) * 0.04))) + frames = [ + _drop_side_bleed(source.crop((max(0, left - pad), 0, min(source.width, right + pad), h))) + for left, right in ranges + ] return [_fit_to_cell(f) for f in frames] if fit else frames @@ -535,15 +564,22 @@ def normalize_cells(frames_by_state: dict[str, list], *, pad: int = _NORMALIZE_P 1. **Cross-correlate** each frame's column profile against the per-state *median* profile to find the integer shift that locks the **body** in place — robust to limbs/cape because the body dominates the profile. - 2. **Union-crop** the registered frames through one shared window and apply - **one shared scale** + bottom-anchor, so size and baseline are uniform and - intra-state vertical motion (a jump's lift) is preserved. + 2. **Union-crop** through one shared state window, then scale every state by a + single global factor keyed to its median pose height, so the character is + the same on-screen size in every row while a jump's lift still fits. """ from PIL import Image blank = lambda: Image.new("RGBA", (CELL_WIDTH, CELL_HEIGHT), (0, 0, 0, 0)) + med = lambda vs: sorted(vs)[len(vs) // 2] # robust center; ignores a limb/cape outlier out: dict[str, list] = {} + prepared: dict[str, tuple[list, tuple[int, int, int, int], tuple[int, int]]] = {} + # Fill the cell — real petdex pets sit ~pad from the edges; the K cap below + # keeps a tall pose (a jump's lift) from clipping. + target_w = CELL_WIDTH - pad + target_h = CELL_HEIGHT - pad + for state, frames in frames_by_state.items(): rgba = [f.convert("RGBA") for f in frames] if not any(f.getbbox() for f in rgba): @@ -572,14 +608,34 @@ def normalize_cells(frames_by_state: dict[str, list], *, pad: int = _NORMALIZE_P shifted.alpha_composite(f, (margin + _best_shift(ref, prof, window), 0)) aligned.append(shifted) - # Shared window + scale over the registered set; bottom-anchored, centered. + # Shared window over the registered set; scale is resolved against a + # common apparent-character target below. boxes = [b for b in (a.getbbox() for a in aligned) if b] left = min(b[0] for b in boxes) top = min(b[1] for b in boxes) right = max(b[2] for b in boxes) bottom = max(b[3] for b in boxes) + prepared[state] = ( + aligned, + (left, top, right, bottom), + (med([b[2] - b[0] for b in boxes]), med([b[3] - b[1] for b in boxes])), + ) + + if not prepared: + return out + + # Uniform apparent size: scale each state by K / pose_h, so a row the model + # drew small renders as big as one it drew large. K is the one global cap that + # keeps the tallest/widest motion envelope (a jump's lift) inside the cell — + # for a still row union ≈ pose so its term ≈ target_h (full fill). + K = target_h + for (_aligned, (left, top, right, bottom), (_pose_w, pose_h)) in prepared.values(): uw, uh = right - left, bottom - top - scale = min((CELL_WIDTH - pad) / uw, (CELL_HEIGHT - pad) / uh) + K = min(K, target_h * pose_h / max(1, uh), target_w * pose_h / max(1, uw)) + + for state, (aligned, (left, top, right, bottom), (_pose_w, pose_h)) in prepared.items(): + uw, uh = right - left, bottom - top + scale = K / max(1, pose_h) sw, sh = max(1, round(uw * scale)), max(1, round(uh * scale)) px, py = round((CELL_WIDTH - sw) / 2), round((CELL_HEIGHT - pad // 2) - sh) @@ -587,7 +643,8 @@ def normalize_cells(frames_by_state: dict[str, list], *, pad: int = _NORMALIZE_P for a in aligned: crop = a.crop((left, top, right, bottom)) if crop.size != (sw, sh): - crop = crop.resize((sw, sh), Image.Resampling.LANCZOS) + # NEAREST keeps the pixel-art edges crisp; LANCZOS blurred them. + crop = crop.resize((sw, sh), Image.Resampling.NEAREST) cell = blank() cell.alpha_composite(crop, (px, py)) cells.append(cell) diff --git a/agent/pet/generate/imagegen.py b/agent/pet/generate/imagegen.py index 1e01592e4bf..00390d1ca8b 100644 --- a/agent/pet/generate/imagegen.py +++ b/agent/pet/generate/imagegen.py @@ -26,6 +26,16 @@ logger = logging.getLogger(__name__) # qualify. _REF_CAPABLE = ("openai", "openai-codex", "krea", "openrouter", "nous") +# Friendly label + one-line speed/quality note per reference-capable provider, +# surfaced in the desktop pet-gen picker so users can trade speed for fidelity. +_PROVIDER_META: dict[str, dict[str, str]] = { + "nous": {"label": "Nous Portal", "note": "Fast, balanced quality"}, + "openrouter": {"label": "OpenRouter", "note": "Fastest — Gemini Flash Image"}, + "openai": {"label": "OpenAI", "note": "Highest fidelity, slower"}, + "openai-codex": {"label": "OpenAI (Codex)", "note": "Highest fidelity, slower"}, + "krea": {"label": "Krea", "note": "Stylized, style-reference grounding"}, +} + class GenerationError(RuntimeError): """Raised on any image-generation failure (no provider, API error, IO).""" @@ -49,16 +59,25 @@ def _discover() -> None: logger.debug("image-gen plugin discovery failed: %s", exc) -def resolve_provider(*, require_references: bool = True) -> SpriteProvider: +def resolve_provider(*, require_references: bool = True, prefer: str | None = None) -> SpriteProvider: """Pick the image provider to use for sprite work. - Preference: the configured provider when it's reference-capable, else the - first available reference-capable provider. With *require_references* off we - fall back to any available provider (used for prompt-only base drafts). + Preference: an explicit *prefer* choice (the desktop pet-gen picker) when it's + reference-capable and configured, then the configured/active provider when + it's reference-capable, else the first available reference-capable provider. + With *require_references* off we fall back to any available provider (used for + prompt-only base drafts). """ _discover() from agent.image_gen_registry import get_active_provider, get_provider + # An explicit user pick wins when it's reference-capable and has credentials; + # otherwise we ignore it and fall through to the normal resolution. + if prefer: + chosen = get_provider(prefer) + if prefer in _REF_CAPABLE and chosen is not None and chosen.is_available(): + return SpriteProvider(name=prefer, provider=chosen, supports_references=True) + # Configured / active provider first. active = None try: @@ -83,11 +102,44 @@ def resolve_provider(*, require_references: bool = True) -> SpriteProvider: raise GenerationError( "Pet generation needs an image backend that supports reference images. " - "Open `hermes tools` → Image Generation and configure OpenRouter, Nous " - "Portal, or OpenAI (gpt-image-2) with an API key." + "Open `hermes tools` → Image Generation and configure Nous Portal, " + "OpenRouter, or OpenAI (gpt-image-2) with an API key." ) +def list_sprite_providers() -> list[dict]: + """The reference-capable providers available to pick for pet generation. + + Returns ``[{name, label, note, default}]`` for every ref-capable provider the + user actually has credentials for, marking the one :func:`resolve_provider` + would choose with no explicit preference. Empty when none is configured (the + picker hides itself). Best-effort: discovery hiccups yield an empty list. + """ + _discover() + from agent.image_gen_registry import get_provider + + try: + default_name = resolve_provider(require_references=True).name + except GenerationError: + default_name = "" + + out: list[dict] = [] + for name in _REF_CAPABLE: + provider = get_provider(name) + if provider is None or not provider.is_available(): + continue + meta = _PROVIDER_META.get(name, {}) + out.append( + { + "name": name, + "label": meta.get("label", name), + "note": meta.get("note", ""), + "default": name == default_name, + } + ) + return out + + def _save_local(image_ref: str, *, prefix: str) -> Path: """Return a local path for *image_ref*, downloading it if it's a URL.""" if image_ref.startswith(("http://", "https://")): @@ -116,10 +168,15 @@ def generate( reference_images: list[Path] | None = None, provider: SpriteProvider | None = None, prefix: str = "pet_gen", + aspect_ratio: str = "square", ) -> list[Path]: - """Generate *n* square sprite images and return their local paths. + """Generate *n* sprite images and return their local paths. *reference_images* grounds the output on a base image (required for rows). + *aspect_ratio* picks the canvas: ``"square"`` for single-character base + drafts, ``"landscape"`` for multi-frame row strips (the wider 1536px canvas + gives every frame real horizontal room so winged poses don't have to be + shrunk to avoid touching their neighbors). We *ask* for a transparent background, but fall back to an opaque generation (cleaned up downstream by the chroma-key pass) on models that reject the flag. Raises :class:`GenerationError` if nothing usable comes back. @@ -134,7 +191,7 @@ def generate( refs = [str(p) for p in (reference_images or [])] def _run(extra: dict) -> tuple[Path | None, str]: - kwargs: dict = {"aspect_ratio": "square", **extra} + kwargs: dict = {"aspect_ratio": aspect_ratio, **extra} if refs: # Providers disagree on the ref kwarg name: our OpenRouter/Nous # backends read ``reference_images``, OpenAI's gpt-image-2 reads diff --git a/agent/pet/generate/orchestrate.py b/agent/pet/generate/orchestrate.py index 238c490a22a..f160046ebf9 100644 --- a/agent/pet/generate/orchestrate.py +++ b/agent/pet/generate/orchestrate.py @@ -35,6 +35,10 @@ ProgressFn = Callable[[str, str], None] # back-to-back and routinely blow past the client's RPC timeout. Capped so we # don't hammer the provider's rate limit (one cold call can still be slow). _MAX_PARALLEL_GENERATIONS = 4 +# How many times to (re)generate a single row before accepting a best-effort +# slice. Early attempts demand clean per-pose gutters; the last is lenient so a +# stubborn row still yields frames instead of dropping out entirely. +_ROW_GEN_ATTEMPTS = 2 _MIN_FILLED_STATES = 6 _REQUIRED_STATES = frozenset({"idle", "running-right", "waving"}) @@ -80,6 +84,7 @@ def generate_base_drafts( *, n: int = 4, style: str = "auto", + reference_images: list[Path] | None = None, provider: SpriteProvider | None = None, on_draft: Callable[[int, Path], None] | None = None, is_cancelled: Callable[[], bool] | None = None, @@ -96,7 +101,10 @@ def generate_base_drafts( drafts and cancel any queued work (already-in-flight provider calls can't be hard-killed, but their results are dropped). """ - sprite = provider or imagegen.resolve_provider(require_references=False) + # A user reference image (e.g. their own pet) grounds every draft, so it + # needs a reference-capable provider — same requirement as the row passes. + refs = reference_images or None + sprite = provider or imagegen.resolve_provider(require_references=bool(refs)) cancelled = is_cancelled or (lambda: False) # Each draft is its own one-shot generation, run concurrently so the user @@ -104,25 +112,26 @@ def generate_base_drafts( # Each gets a distinct variation nudge so the options aren't near-duplicates. logger.info("pet generate: drafting %d base looks for %r (style=%s)", n, concept, style) - def _one(index: int) -> tuple[int, Path | None]: + def _one(index: int) -> tuple[int, Path | None, str | None]: if cancelled(): - return index, None + return index, None, None t0 = time.monotonic() variation = prompts.BASE_VARIATIONS[index % len(prompts.BASE_VARIATIONS)] prompt = prompts.build_base_prompt(concept, style=style, variation=variation) try: - out = imagegen.generate(prompt, n=1, provider=sprite, prefix="pet_base") + out = imagegen.generate(prompt, n=1, reference_images=refs, provider=sprite, prefix="pet_base") except Exception as exc: # noqa: BLE001 - tolerate a single failed draft logger.warning("pet generate: draft %d failed after %.1fs: %s", index, time.monotonic() - t0, exc) - return index, None + return index, None, str(exc) if not out: logger.warning("pet generate: draft %d produced no image", index) - return index, None + return index, None, "the image provider returned no image" logger.info("pet generate: draft %d ready in %.1fs", index, time.monotonic() - t0) - return index, _harden_transparency(out[0]) + return index, _harden_transparency(out[0]), None workers = max(1, min(n, _MAX_PARALLEL_GENERATIONS)) results: dict[int, Path] = {} + errors: list[str] = [] with ThreadPoolExecutor(max_workers=workers) as pool: futures = [pool.submit(_one, i) for i in range(n)] # as_completed runs in *this* (the caller's) thread, so on_draft — and any @@ -134,8 +143,10 @@ def generate_base_drafts( for pending in futures: pending.cancel() break - index, path = fut.result() + index, path, err = fut.result() if path is None: + if err: + errors.append(err) continue results[index] = path if on_draft is not None: @@ -146,10 +157,42 @@ def generate_base_drafts( drafts = [results[i] for i in sorted(results)] if not drafts and not cancelled(): - raise GenerationError("image generation produced no usable drafts") + # Surface *why* — every draft failed for a reason (a content-policy refusal + # on a name like "minion", a provider/auth error, …); the most common one + # is the representative cause. Far more useful than "no usable drafts". + raise GenerationError(_drafts_failed_reason(errors)) return drafts +def _drafts_failed_reason(errors: list[str]) -> str: + """The representative reason a draft round produced nothing, humanized.""" + if not errors: + return "image generation produced no usable drafts" + from collections import Counter + + return _humanize_image_error(Counter(errors).most_common(1)[0][0]) + + +def _humanize_image_error(error: str) -> str: + """Turn a raw provider error into a friendly, actionable sentence. + + The big one is moderation: image models refuse trademarked characters and + real people (e.g. "minion"), which reads as an opaque 400 otherwise. + """ + low = error.lower() + if any(s in low for s in ("moderation_blocked", "safety system", "content policy", "content_policy")): + return ( + "The image provider blocked this prompt — its safety filter rejects " + "trademarked characters and real people. Try an original description." + ) + if any(s in low for s in ("api key", "unauthorized", "401", "auth")): + return "The image provider rejected the request — check your API key in Settings → Providers." + if "rate limit" in low or "429" in low: + return "The image provider is rate-limiting — wait a moment and try again." + # Otherwise the first line, trimmed of the noisy provider envelope. + return error.splitlines()[0].strip()[:200] + + def hatch_pet( *, base_image: str | Path, @@ -194,25 +237,48 @@ def hatch_pet( if cancelled(): return state, None t0 = time.monotonic() - try: - strips = imagegen.generate( - prompts.build_row_prompt(state, count, label, style=style), - n=1, - reference_images=[base], - provider=sprite, - prefix=f"pet_row_{state}", - ) - # One image call per row (the expensive part). ``auto`` validates by - # connected components with an equal-slot fallback; raw (fit=False) so - # normalize_cells registers the whole pet at once. We deliberately do - # NOT re-generate a ragged row — the registration pass salvages it far - # cheaper than another image-model round-trip. - frames = atlas.extract_strip_frames(strips[0], count, method="auto", fit=False) - logger.info("pet hatch %r: row %r ready in %.1fs", slug, state, time.monotonic() - t0) - return state, frames - except Exception as exc: # noqa: BLE001 - one bad row is tolerated (idle guaranteed) - logger.warning("pet hatch %r: row %r failed after %.1fs: %s", slug, state, time.monotonic() - t0, exc) - return state, None + last_exc: Exception | None = None + # Self-healing: a model occasionally returns a row whose poses are touching + # (no clean gutters), which slices badly. We retry such rolls; only the + # final attempt falls back to lenient ``auto`` slicing so a stubborn row + # still yields *something* rather than dropping the whole row. + for attempt in range(_ROW_GEN_ATTEMPTS): + if cancelled(): + return state, None + strict = attempt < _ROW_GEN_ATTEMPTS - 1 + try: + strips = imagegen.generate( + prompts.build_row_prompt(state, count, label, style=style), + n=1, + reference_images=[base], + provider=sprite, + prefix=f"pet_row_{state}", + # Wider canvas → each frame gets real horizontal room, so winged + # poses keep a full, healthy size and still leave clean gutters. + aspect_ratio="landscape", + ) + # ``components`` requires clean per-pose gutters (raises otherwise), + # so a touching roll is rejected and regenerated; the last attempt + # uses ``auto`` (equal-slot fallback, never raises). Raw (fit=False) + # so normalize_cells registers the whole pet at once. + method = "components" if strict else "auto" + frames = atlas.extract_strip_frames(strips[0], count, method=method, fit=False) + logger.info( + "pet hatch %r: row %r ready in %.1fs (attempt %d)", + slug, state, time.monotonic() - t0, attempt + 1, + ) + return state, frames + except Exception as exc: # noqa: BLE001 - retried; one bad row is tolerated + last_exc = exc + logger.warning( + "pet hatch %r: row %r attempt %d/%d failed: %s", + slug, state, attempt + 1, _ROW_GEN_ATTEMPTS, exc, + ) + logger.warning( + "pet hatch %r: row %r gave up after %.1fs: %s", + slug, state, time.monotonic() - t0, last_exc, + ) + return state, None # running-left is derived by mirroring running-right (guaranteed-consistent # and one fewer generation), so we don't generate it directly. diff --git a/agent/pet/generate/prompts.py b/agent/pet/generate/prompts.py index c6afbc28313..eab72e593f3 100644 --- a/agent/pet/generate/prompts.py +++ b/agent/pet/generate/prompts.py @@ -76,6 +76,29 @@ def style_hint(style: str | None) -> str: return _STYLE_HINTS.get((style or "auto").strip().lower(), "") +# Row strips are generated on the wider landscape canvas (see imagegen.generate / +# orchestrate). The extra width is what lets each pose stay a healthy size AND +# leave a real gutter — used here only to cite concrete pixel numbers. +_ASSUMED_STRIP_WIDTH = 1536 + + +def _spacing_spec(frame_count: int) -> tuple[int, int]: + """(per-pose width px, gap px) for a row of *frame_count* poses. + + Pixel counts alone don't hold — the model fills each slot edge-to-edge with + the full wingspan, so neighbors touch even when bodies are spaced. The lever + that works is proportional containment on a wide canvas: give each pose its + own equal cell and keep the ENTIRE silhouette (wings/tail/halo included) + inside it. On the 1536px landscape strip ~70% occupancy still leaves a + generous gutter, so the pet stays a normal, good-looking size — no shrinking. + """ + slots = max(1, frame_count) + slot_w = _ASSUMED_STRIP_WIDTH / slots + pose_px = round(slot_w * 0.7) + gap_px = max(48, round(slot_w * 0.3)) + return pose_px, gap_px + + # Per-draft nudges so the 4 base options are actually distinct — gpt-image returns # near-duplicates for a single prompt. We vary the *look* (palette, build, # expression, accents), NOT the pose, so the chosen base still grounds clean, @@ -118,14 +141,24 @@ def build_row_prompt(state: str, frame_count: int, concept: str, *, style: str | """ action = STATE_ACTIONS.get(state, "a simple idle pose") concept = (concept or "the mascot").strip() + pose_px, gap_px = _spacing_spec(frame_count) return ( f"Using the attached reference image as the exact same character " f"(same species, face, colors, markings, proportions, and props), " - f"draw a single horizontal strip of {frame_count} animation frames showing {action}. " - f"The {frame_count} poses must be evenly spaced left to right, each fully separated " - "by clear empty chroma-key gutters; silhouettes must NEVER touch, overlap, " - "share a shadow, share a ground line, share motion trails, or merge into " - "one connected shape. " + f"draw a single WIDE horizontal strip of {frame_count} animation frames showing {action}. " + f"LAYOUT: split the wide strip into {frame_count} equal vertical cells, one " + "pose centered in each cell. " + f"SPACING (critical): draw each pose at a consistent, healthy, clearly " + f"visible size (roughly {pose_px}px wide on a {_ASSUMED_STRIP_WIDTH}px " + f"strip) — do NOT shrink it tiny — but keep its ENTIRE silhouette " + f"(wings, tail, halo, horns, cape, every appendage) fully INSIDE its own " + f"cell. Leave at least {gap_px}px of empty chroma-key background between " + f"neighboring silhouettes at their closest point (wingtip to wingtip), and " + f"the same empty margin before the first pose and after the last. If a wing, " + f"cape, or tail would reach into a neighbor, FOLD or angle it inward rather " + f"than letting it cross the gap. Silhouettes must NEVER touch, overlap, " + f"share a shadow, share a ground line, share motion trails, or merge into " + f"one connected shape. " # Registration: a clean sprite sheet keeps the character locked in place # so only the action moves — this is what stops the loop sliding/pulsing. "REGISTRATION (critical): the character is the SAME height and SAME width " diff --git a/apps/desktop/src/app/pet-generate/components/draft-grid.tsx b/apps/desktop/src/app/pet-generate/components/draft-grid.tsx new file mode 100644 index 00000000000..abef61f027f --- /dev/null +++ b/apps/desktop/src/app/pet-generate/components/draft-grid.tsx @@ -0,0 +1,89 @@ +import { PixelEggSprite } from '@/components/pet/pixel-egg-sprite' +import { Button } from '@/components/ui/button' +import { useI18n } from '@/i18n' +import { PawPrint } from '@/lib/icons' +import { selectableCardClass } from '@/lib/selectable-card' +import { cn } from '@/lib/utils' + +const VARIANT_COUNT = 4 + +interface DraftGridProps { + drafts: { index: number; dataUri: string }[] + generating: boolean + hasDrafts: boolean + onCancel: () => void + onHatch: () => void + onSelect: (index: number) => void + selected: number | null +} + +export function DraftGrid({ drafts, generating, hasDrafts, onCancel, onHatch, onSelect, selected }: DraftGridProps) { + const { t } = useI18n() + const copy = t.commandCenter.generatePet + + const slots = generating + ? Array.from({ length: VARIANT_COUNT }, (_, i) => drafts.find(draft => draft.index === i) ?? null) + : drafts + + return ( +
+
+ + {copy.generating} + + + {Math.min(drafts.length, VARIANT_COUNT)}/{VARIANT_COUNT} + +
+ +
+ {slots.map((draft, i) => { + // A streamed draft is selectable immediately — even mid-generation — + // so the user can commit to one without waiting for the rest. + const isSelected = draft != null && selected === draft.index + + return ( + + ) + })} +
+ + {/* Same abort/go-back text link in both states (sits right under the grid); + once drafts land, the full-width Hatch drops in below it. */} + + {hasDrafts && ( + + )} +
+ ) +} diff --git a/apps/desktop/src/app/pet-generate/components/empty-hint.tsx b/apps/desktop/src/app/pet-generate/components/empty-hint.tsx new file mode 100644 index 00000000000..99b9822ea82 --- /dev/null +++ b/apps/desktop/src/app/pet-generate/components/empty-hint.tsx @@ -0,0 +1,27 @@ +import { Button } from '@/components/ui/button' + +interface EmptyHintProps { + onExample: (prompt: string) => void +} + +// Creative seed prompts — specifics make better pets (petdex's own advice). +// Short chips that wrap into a tight, centered cluster (capped width → 2 rows). +const EXAMPLE_PROMPTS = ['bubble-tea otter', 'sock elf', 'pixel dragon', 'office cat', 'neon axolotl', 'moss golem'] + +export function EmptyHint({ onExample }: EmptyHintProps) { + return ( +
+ {EXAMPLE_PROMPTS.map(example => ( + + ))} +
+ ) +} diff --git a/apps/desktop/src/app/pet-generate/components/generate-unavailable.tsx b/apps/desktop/src/app/pet-generate/components/generate-unavailable.tsx new file mode 100644 index 00000000000..d3161d2a771 --- /dev/null +++ b/apps/desktop/src/app/pet-generate/components/generate-unavailable.tsx @@ -0,0 +1,52 @@ +import { Button } from '@/components/ui/button' +import { ExternalLink } from '@/lib/external-link' +import { PawPrint, Settings2 } from '@/lib/icons' + +interface GenerateUnavailableProps { + onSetup: () => void +} + +// Shown when no reference-capable image backend is configured: generation is +// impossible, so we replace the prompt entirely with a friendly path to set one +// up (in-app) plus where to grab a key. +export function GenerateUnavailable({ onSetup }: GenerateUnavailableProps) { + return ( +
+ + + +
+

Add an image backend to generate

+

+ Hatching a custom pet needs a provider that can ground on a reference image. +

+
+ +

+ Grab a key from + + Nous Portal + + · + + OpenRouter + + · + + OpenAI + +

+
+ ) +} diff --git a/apps/desktop/src/app/pet-generate/components/hatch-preview.tsx b/apps/desktop/src/app/pet-generate/components/hatch-preview.tsx new file mode 100644 index 00000000000..8adb6c3f9f2 --- /dev/null +++ b/apps/desktop/src/app/pet-generate/components/hatch-preview.tsx @@ -0,0 +1,137 @@ +import { useEffect, useState } from 'react' + +import { PetSprite } from '@/components/pet/pet-sprite' +import { PetStarShower } from '@/components/pet/pet-star-shower' +import { PixelEggSprite } from '@/components/pet/pixel-egg-sprite' +import { Alert, AlertDescription } from '@/components/ui/alert' +import { Button } from '@/components/ui/button' +import { Input } from '@/components/ui/input' +import { useI18n } from '@/i18n' +import { triggerHaptic } from '@/lib/haptics' +import { Loader2, PawPrint, RefreshCw } from '@/lib/icons' +import { type PetInfo } from '@/store/pet' + +import { frameCountForRow } from '../lib/frame-count' + +const PREVIEW_SCALE = 0.7 +const PREVIEW_STATE_MS = 1400 + +const PREVIEW_ROWS = ['idle', 'waving', 'running-right', 'running-left', 'running', 'review', 'jumping', 'failed', 'waiting'] + +interface HatchPreviewProps { + pet: PetInfo + adopting: boolean + error: string | null + onAdopt: (name: string) => void + onDiscard: () => void +} + +export function HatchPreview({ pet, adopting, error, onAdopt, onDiscard }: HatchPreviewProps) { + const { t } = useI18n() + const copy = t.commandCenter.generatePet + // Empty so the "Name your pet" placeholder shows; blank adopt keeps the + // provisional name from the prompt. + const [name, setName] = useState('') + // Play the egg's crack/hatch frames once before swapping in the live pet. + const [revealed, setRevealed] = useState(false) + // Right after the egg cracks the pet plays its "yay" jump a couple times, then + // hands off to the normal state-cycling preview. + const [celebrating, setCelebrating] = useState(false) + const [stateIndex, setStateIndex] = useState(0) + const previewRows = (pet.stateRows?.length ? pet.stateRows : PREVIEW_ROWS).filter(row => frameCountForRow(pet, row) > 0) + const rows = previewRows.length > 0 ? previewRows : ['idle'] + const activeRow = rows[stateIndex % rows.length] ?? 'idle' + const canJump = frameCountForRow(pet, 'jumping') > 0 + const rowOverride = celebrating && canJump ? 'jumping' : activeRow + + useEffect(() => { + const id = setInterval(() => setStateIndex(i => (i + 1) % rows.length), PREVIEW_STATE_MS) + + return () => clearInterval(id) + }, [rows.length]) + + // On reveal: celebrate (jump) ~2 loops, then drop into the cycling preview. + useEffect(() => { + if (!revealed) { + return + } + + setCelebrating(true) + + const id = setTimeout(() => { + setCelebrating(false) + setStateIndex(0) + }, 2 * (pet.loopMs ?? 1100)) + + return () => clearTimeout(id) + }, [revealed, pet.loopMs]) + + useEffect(() => { + setStateIndex(0) + setName('') + setRevealed(false) + setCelebrating(false) + }, [pet.slug]) + + const previewInfo: PetInfo = { ...pet, scale: PREVIEW_SCALE } + + return ( +
+ {/* Fills the (now narrow) dialog so the pet frame is the screen width. */} +
+ {revealed ? ( + <> +
+ +
+ +
+
+ + + ) : ( + // The egg cracks open, then we swap in the live pet. + { + setRevealed(true) + triggerHaptic('crisp') + }} + size={150} + /> + )} +
+ + setName(event.target.value)} + onKeyDown={event => { + if (event.key === 'Enter') { + event.preventDefault() + onAdopt(name) + } + }} + placeholder={copy.namePlaceholder} + value={name} + /> + + {error && ( + + {error} + + )} + +
+ + +
+
+ ) +} diff --git a/apps/desktop/src/app/pet-generate/components/hatching-view.tsx b/apps/desktop/src/app/pet-generate/components/hatching-view.tsx new file mode 100644 index 00000000000..8e347741d6b --- /dev/null +++ b/apps/desktop/src/app/pet-generate/components/hatching-view.tsx @@ -0,0 +1,24 @@ +import { PetEggHatch } from '@/components/pet/pet-egg-hatch' +import { useI18n } from '@/i18n' +import { cancelHatch, type PetHatchStage } from '@/store/pet-generate' + +interface HatchingViewProps { + stage: PetHatchStage | null +} + +// The hatch progress screen — a beating egg with a phase-tracking subtitle +// (per-row → composing → saving). +export function HatchingView({ stage }: HatchingViewProps) { + const { t } = useI18n() + const copy = t.commandCenter.generatePet + + const subtitle = stage + ? stage.phase === 'row' + ? copy.hatchRow(stage.state ?? '', stage.done ?? 0, stage.total ?? 0) + : stage.phase === 'compose' + ? copy.hatchComposing + : copy.hatchSaving + : copy.hatchingSub + + return +} diff --git a/apps/desktop/src/app/pet-generate/components/provider-picker.tsx b/apps/desktop/src/app/pet-generate/components/provider-picker.tsx new file mode 100644 index 00000000000..bd40a30ba31 --- /dev/null +++ b/apps/desktop/src/app/pet-generate/components/provider-picker.tsx @@ -0,0 +1,53 @@ +import { useStore } from '@nanostores/react' + +import { DropdownMenu, DropdownMenuContent, DropdownMenuItem, DropdownMenuTrigger } from '@/components/ui/dropdown-menu' +import { Check, ChevronDown } from '@/lib/icons' +import { $petGenProvider, $petGenProviders, setPetGenProvider } from '@/store/pet-generate' + +// Image-backend picker for pet generation — the composer's model-pill pattern: +// a quiet trigger + a dropdown of options, each with a one-line speed/quality +// note. Hidden unless there are 2+ reference-capable backends (nothing to pick). +export function ProviderPicker() { + const providers = useStore($petGenProviders) + const picked = useStore($petGenProvider) + + if (providers.length < 2) { + return null + } + + const fallback = providers.find(p => p.default) ?? providers[0] + const current = providers.find(p => p.name === picked) ?? fallback + + return ( + + + {/* Plain text affordance (matches "Add a reference"), not a padded pill. */} + + + {/* The picker lives inside the pet-gen Dialog (z-130) and portals to body, + so lift its menu above the dialog or it opens behind it. */} + + {providers.map(provider => ( + setPetGenProvider(provider.default ? '' : provider.name)} + > + + {provider.label} + {provider.name === current?.name && } + + {provider.note && {provider.note}} + + ))} + + + ) +} diff --git a/apps/desktop/src/app/pet-generate/components/reference-chip.tsx b/apps/desktop/src/app/pet-generate/components/reference-chip.tsx new file mode 100644 index 00000000000..266658a9dab --- /dev/null +++ b/apps/desktop/src/app/pet-generate/components/reference-chip.tsx @@ -0,0 +1,48 @@ +import { useState } from 'react' + +import { ImageLightbox } from '@/components/chat/zoomable-image' +import { useImageDownload } from '@/hooks/use-image-download' +import { useI18n } from '@/i18n' +import { X } from '@/lib/icons' + +interface ReferenceChipProps { + name: string + onRemove: () => void + src: string +} + +// The reference photo as an attachment chip: filename + thumbnail that opens +// the shared image viewer (lightbox), with a remove affordance. +export function ReferenceChip({ name, onRemove, src }: ReferenceChipProps) { + const { t } = useI18n() + const { download, saving } = useImageDownload(src) + const [viewing, setViewing] = useState(false) + + return ( +
+ + + {name || 'Reference'} + + + +
+ ) +} diff --git a/apps/desktop/src/app/pet-generate/lib/frame-count.ts b/apps/desktop/src/app/pet-generate/lib/frame-count.ts new file mode 100644 index 00000000000..97a49a8cd6b --- /dev/null +++ b/apps/desktop/src/app/pet-generate/lib/frame-count.ts @@ -0,0 +1,26 @@ +import { type PetInfo } from '@/store/pet' + +// Sprite row → the PetInfo frame-count key it resolves to (directional walks and +// aliases collapse onto their base state). +const ROW_TO_FRAME_KEY: Record = { + idle: 'idle', + wave: 'wave', + waving: 'wave', + jump: 'jump', + jumping: 'jump', + run: 'run', + running: 'run', + 'running-right': 'run', + 'running-left': 'run', + failed: 'failed', + review: 'review', + waiting: 'waiting' +} + +// Real frame count for a row, preferring the concrete per-row count, then the +// per-state count, then the mapped base state, then the sheet-wide default. +export function frameCountForRow(pet: PetInfo, row: string): number { + const mapped = ROW_TO_FRAME_KEY[row] + + return pet.framesByRow?.[row] ?? pet.framesByState?.[row] ?? (mapped ? pet.framesByState?.[mapped] : undefined) ?? pet.framesPerState ?? 0 +} diff --git a/apps/desktop/src/app/pet-generate/lib/read-reference-image.ts b/apps/desktop/src/app/pet-generate/lib/read-reference-image.ts new file mode 100644 index 00000000000..06c480e95ed --- /dev/null +++ b/apps/desktop/src/app/pet-generate/lib/read-reference-image.ts @@ -0,0 +1,49 @@ +const DEFAULT_MAX_INPUT_BYTES = 16 * 1024 * 1024 + +function loadImage(url: string): Promise { + const img = new Image() + + return new Promise((resolve, reject) => { + img.onload = () => resolve(img) + img.onerror = () => reject(new Error('unreadable image')) + img.src = url + }) +} + +// Read an image file as a downscaled PNG data URL. We decode from an object URL +// (not readAsDataURL) so large files don't inflate into giant base64 strings +// before we scale them down for generation. +export async function readReferenceImage( + file: File, + max = 1024, + maxInputBytes = DEFAULT_MAX_INPUT_BYTES +): Promise { + if (file.size > maxInputBytes) { + throw new Error('reference image too large') + } + + const objectUrl = URL.createObjectURL(file) + + try { + const img = await loadImage(objectUrl) + const scale = Math.min(1, max / Math.max(img.width, img.height)) + const width = Math.max(1, Math.round(img.width * scale)) + const height = Math.max(1, Math.round(img.height * scale)) + + const canvas = document.createElement('canvas') + canvas.width = width + canvas.height = height + + const ctx = canvas.getContext('2d') + + if (!ctx) { + throw new Error('could not create canvas context') + } + + ctx.drawImage(img, 0, 0, width, height) + + return canvas.toDataURL('image/png') + } finally { + URL.revokeObjectURL(objectUrl) + } +} diff --git a/apps/desktop/src/app/pet-generate/pet-generate-content.tsx b/apps/desktop/src/app/pet-generate/pet-generate-content.tsx new file mode 100644 index 00000000000..2c6f2a815de --- /dev/null +++ b/apps/desktop/src/app/pet-generate/pet-generate-content.tsx @@ -0,0 +1,291 @@ +import { useStore } from '@nanostores/react' +import { useEffect, useRef } from 'react' +import { useNavigate } from 'react-router-dom' + +import { useGatewayRequest } from '@/app/gateway/hooks/use-gateway-request' +import { SETTINGS_ROUTE } from '@/app/routes' +import { Alert, AlertDescription } from '@/components/ui/alert' +import { DialogHeader, DialogTitle } from '@/components/ui/dialog' +import { GenerateButton } from '@/components/ui/generate-button' +import { Input } from '@/components/ui/input' +import { useI18n } from '@/i18n' +import { triggerHaptic } from '@/lib/haptics' +import { Egg, ImageIcon } from '@/lib/icons' +import { cn } from '@/lib/utils' +import { + $petGenAvailable, + $petGenDrafts, + $petGenError, + $petGenInput, + $petGenPreview, + $petGenRefImage, + $petGenRefName, + $petGenSelected, + $petGenStage, + $petGenStatus, + adoptHatched, + cancelGenerate, + checkPetGenAvailable, + cleanPetName, + closePetGenerate, + discardDrafts, + discardHatched, + generateDrafts, + hatchSelected +} from '@/store/pet-generate' + +import { DraftGrid } from './components/draft-grid' +import { EmptyHint } from './components/empty-hint' +import { GenerateUnavailable } from './components/generate-unavailable' +import { HatchPreview } from './components/hatch-preview' +import { HatchingView } from './components/hatching-view' +import { ProviderPicker } from './components/provider-picker' +import { ReferenceChip } from './components/reference-chip' +import { readReferenceImage } from './lib/read-reference-image' + +// The generate → hatch → adopt controller. A thin view over the `pet-generate` +// store; the store owns the steps and persists inputs across close/reopen. +export function PetGenerateContent() { + const { t } = useI18n() + const copy = t.commandCenter.generatePet + const { requestGateway } = useGatewayRequest() + const navigate = useNavigate() + + const status = useStore($petGenStatus) + const error = useStore($petGenError) + const available = useStore($petGenAvailable) + // `null` = not yet probed → stay optimistic (show the prompt); only the + // confirmed-no-backend case swaps in the setup card. + const unavailable = available === false + const drafts = useStore($petGenDrafts) + const selected = useStore($petGenSelected) + const preview = useStore($petGenPreview) + const stage = useStore($petGenStage) + + // Inputs live in atoms so they survive a close/reopen (and background runs). + const prompt = useStore($petGenInput) + const refImage = useStore($petGenRefImage) + const refName = useStore($petGenRefName) + const fileRef = useRef(null) + + // Probe backend availability on open — and again whenever the content + // remounts (e.g. after returning from the providers settings), so adding a + // key flips the setup card to the prompt with no manual refresh. + useEffect(() => { + void checkPetGenAvailable(requestGateway) + }, [requestGateway]) + + const busy = status === 'generating' || status === 'hatching' + const hasDrafts = drafts.length > 0 + const generating = status === 'generating' + + // The idle "describe a pet" state — egg + suggestions get generous, equidistant + // breathing room (gap-4) from the prompt; the working states stay compact. + const isEmptyState = + !hasDrafts && + !generating && + status !== 'hatching' && + status !== 'preview' && + status !== 'adopting' && + status !== 'stale' + + const generate = () => { + if ((prompt.trim() || refImage) && !busy) { + void generateDrafts(requestGateway, { prompt: prompt.trim(), referenceImage: refImage ?? undefined }) + } + } + + const clearReference = () => { + $petGenRefImage.set(null) + $petGenRefName.set('') + } + + const pickReference = (file: File | undefined) => { + if (!file) { + return + } + + const mapReferenceError = (reason: unknown): string => { + const message = reason instanceof Error ? reason.message.toLowerCase() : '' + + return message.includes('too large') ? copy.referenceImageTooLarge : copy.referenceImageInvalid + } + + void readReferenceImage(file) + .then(dataUrl => { + $petGenRefImage.set(dataUrl) + $petGenRefName.set(file.name) + // Clear picker-only errors once the reference is valid again. + + if ($petGenStatus.get() === 'error' && $petGenDrafts.get().length === 0) { + $petGenStatus.set('idle') + $petGenError.set(null) + } + }) + .catch(reason => { + $petGenRefImage.set(null) + $petGenRefName.set('') + $petGenError.set(mapReferenceError(reason)) + + if (!busy) { + $petGenStatus.set('error') + } + }) + } + + // One-click an example prompt straight into a draft round. + const runExample = (example: string) => { + $petGenInput.set(example) + void generateDrafts(requestGateway, { prompt: example }) + } + + // Hatch the selected draft. The user can pick one before the rest stream in — + // if so, abort the remaining generations first (keeping the drafts we have). + // The prompt is grounding text, not a label; the user names it on reveal. + const hatch = () => { + if (selected === null) { + return + } + + if (generating) { + cancelGenerate() + } + + void hatchSelected(requestGateway, { name: cleanPetName(prompt), prompt: prompt.trim() }) + } + + const adopt = (finalName: string) => { + void adoptHatched(requestGateway, finalName).then(out => { + if (out.ok) { + triggerHaptic('crisp') + closePetGenerate() + } + }) + } + + // The header title tracks the phase instead of sticking on "Generate a pet". + const headerTitle = + status === 'hatching' ? copy.spawning : status === 'preview' || status === 'adopting' ? copy.hatched : copy.title + + // Send the user to set up a key without closing — the overlay yields to the + // settings route (useRouteOverlayActive) and reappears + re-checks on return. + const setupImageGen = () => navigate(`${SETTINGS_ROUTE}?tab=providers`) + + // Prompt input only belongs on the describe/draft screens (and never when + // there's no backend to generate with). + const showPrompt = !unavailable && status !== 'hatching' && status !== 'preview' && status !== 'adopting' + + return ( + <> + {unavailable ? ( + {copy.title} + ) : ( + + {headerTitle} + + )} + +
+ {/* Concept prompt with the inline sparkle generate/stop affordance (the + same primitive as the commit-message + project-idea fields). */} + {showPrompt && ( +
+
+ $petGenInput.set(event.target.value)} + onKeyDown={event => { + if (event.key === 'Enter') { + event.preventDefault() + generate() + } + }} + placeholder={copy.placeholder} + value={prompt} + /> + +
+ +
+ + {refImage ? ( + + ) : ( + + )} +
+ + {/* Optional reference photo — make a pet from the user's own image. + Styled like the chat composer's attachment pill. */} + { + pickReference(event.target.files?.[0]) + event.target.value = '' + }} + ref={fileRef} + type="file" + /> +
+ )} + + {/* Hatch failed but the drafts are still here — show why above the grid so + the user can re-pick and retry without losing their options. */} + {status === 'error' && hasDrafts && ( + + {error || copy.genericError} + + )} + + {unavailable ? ( + + ) : status === 'stale' ? ( + + {copy.staleBackend} + + ) : status === 'hatching' ? ( + + ) : (status === 'preview' || status === 'adopting') && preview ? ( + void discardHatched(requestGateway)} + pet={preview} + /> + ) : !hasDrafts && !generating ? ( + // Doubles as the error-empty state — the failure reason rides the + // dialog's footer banner, so here we just offer the retry sparks. + + ) : ( + $petGenSelected.set(index)} + selected={selected} + /> + )} +
+ + ) +} diff --git a/apps/desktop/src/app/pet-generate/pet-generate-overlay.tsx b/apps/desktop/src/app/pet-generate/pet-generate-overlay.tsx index 954dac23bfd..cd262e142c6 100644 --- a/apps/desktop/src/app/pet-generate/pet-generate-overlay.tsx +++ b/apps/desktop/src/app/pet-generate/pet-generate-overlay.tsx @@ -6,92 +6,37 @@ * breathe: a device-framed header, its own concept prompt, a roomy draft grid * that streams in live, and the egg-hatch + reveal flow. It's a thin view over * the `pet-generate` store; the store owns the generate → hatch → adopt steps. + * + * This file is just the dialog shell + sizing; the flow lives in + * `PetGenerateContent`, and each screen is its own atomic component under + * `./components`. */ import { useStore } from '@nanostores/react' -import { useEffect, useState } from 'react' -import { useNavigate } from 'react-router-dom' -import { SETTINGS_ROUTE } from '@/app/routes' import { useGatewayRequest } from '@/app/gateway/hooks/use-gateway-request' import { useRouteOverlayActive } from '@/app/hooks/use-route-overlay-active' -import { PetEggHatch } from '@/components/pet/pet-egg-hatch' -import { PetStarShower } from '@/components/pet/pet-star-shower' -import { PetSprite } from '@/components/pet/pet-sprite' -import { PixelEggSprite } from '@/components/pet/pixel-egg-sprite' -import { Alert, AlertDescription } from '@/components/ui/alert' -import { Button } from '@/components/ui/button' -import { Dialog, DialogContent, DialogHeader, DialogTitle } from '@/components/ui/dialog' -import { GenerateButton } from '@/components/ui/generate-button' -import { Input } from '@/components/ui/input' +import { Dialog, DialogContent } from '@/components/ui/dialog' import { useI18n } from '@/i18n' -import { ExternalLink } from '@/lib/external-link' -import { triggerHaptic } from '@/lib/haptics' -import { Egg, Loader2, PawPrint, RefreshCw, Settings2 } from '@/lib/icons' -import { selectableCardClass } from '@/lib/selectable-card' import { cn } from '@/lib/utils' -import { type PetInfo } from '@/store/pet' import { - $petGenAvailable, $petGenDrafts, $petGenerateOpen, $petGenError, - $petGenPreview, - $petGenSelected, - $petGenStage, $petGenStatus, - adoptHatched, - cancelGenerate, - cancelHatch, - checkPetGenAvailable, - cleanPetName, - cleanupPetGen, - closePetGenerate, - discardHatched, - generateDrafts, - hatchSelected + cleanupPetGenOnClose, + closePetGenerate } from '@/store/pet-generate' -const VARIANT_COUNT = 4 -const PREVIEW_SCALE = 0.7 -const PREVIEW_ROWS = [ - 'idle', - 'waving', - 'running-right', - 'running-left', - 'running', - 'review', - 'jumping', - 'failed', - 'waiting' -] -const PREVIEW_STATE_MS = 1400 - -const ROW_TO_FRAME_KEY: Record = { - idle: 'idle', - wave: 'wave', - waving: 'wave', - jump: 'jump', - jumping: 'jump', - run: 'run', - running: 'run', - 'running-right': 'run', - 'running-left': 'run', - failed: 'failed', - review: 'review', - waiting: 'waiting' -} - -function frameCountForRow(pet: PetInfo, row: string): number { - const byState = pet.framesByState - const mapped = ROW_TO_FRAME_KEY[row] - return byState?.[row] ?? (mapped ? byState?.[mapped] : undefined) ?? pet.framesPerState ?? 0 -} +import { PetGenerateContent } from './pet-generate-content' export function PetGenerateOverlay() { + const { t } = useI18n() + const { requestGateway } = useGatewayRequest() const open = useStore($petGenerateOpen) const status = useStore($petGenStatus) - const { requestGateway } = useGatewayRequest() + const error = useStore($petGenError) + const drafts = useStore($petGenDrafts) // Yield the screen to a full-screen route overlay (e.g. /settings while the // user adds an image-gen key) without tearing down — the store keeps us open, @@ -102,449 +47,39 @@ export function PetGenerateOverlay() { const handleOpenChange = (next: boolean) => { if (!next) { - // Deletes a hatched-but-unadopted preview pet so it doesn't linger, then - // resets all generation state. - cleanupPetGen(requestGateway) + cleanupPetGenOnClose(requestGateway) + // Never interrupt in-flight work. Generating/hatching continues in the + // background; only an unadopted finished preview is discarded on close. closePetGenerate() } } // The draft screen needs room for the 2×2 grid; the single-pet screens // (hatch egg, reveal) shrink to the pet's frame so it isn't lost in a wide box. + // `fitContent` lets the dialog size to content; the `min-w` floors each phase. const single = status === 'hatching' || status === 'preview' || status === 'adopting' + const copy = t.commandCenter.generatePet + + // The footer banner narrates the dialog's async state: the failure reason on a + // dead-end error, else the "you can close this, we'll notify you" reassurance + // while a generate/hatch runs in the background. + const working = status === 'generating' || status === 'hatching' + const errored = status === 'error' && drafts.length === 0 + const banner = errored ? error || copy.genericError : working ? copy.backgroundHint : undefined return ( {open && } ) } - -function PetGenerateContent() { - const { t } = useI18n() - const copy = t.commandCenter.generatePet - const { requestGateway } = useGatewayRequest() - const navigate = useNavigate() - - const status = useStore($petGenStatus) - const error = useStore($petGenError) - const available = useStore($petGenAvailable) - // `null` = not yet probed → stay optimistic (show the prompt); only the - // confirmed-no-backend case swaps in the setup card. - const unavailable = available === false - const drafts = useStore($petGenDrafts) - const selected = useStore($petGenSelected) - const preview = useStore($petGenPreview) - const stage = useStore($petGenStage) - - const [prompt, setPrompt] = useState('') - - // Probe backend availability on open — and again whenever the content - // remounts (e.g. after returning from the providers settings), so adding a - // key flips the setup card to the prompt with no manual refresh. - useEffect(() => { - void checkPetGenAvailable(requestGateway) - }, [requestGateway]) - - const busy = status === 'generating' || status === 'hatching' - const hasDrafts = drafts.length > 0 - const generating = status === 'generating' - // The idle "describe a pet" state — egg + suggestions get generous, equidistant - // breathing room (gap-7.5) from the prompt; the working states stay compact. - const isEmptyState = - !hasDrafts && - !generating && - status !== 'hatching' && - status !== 'preview' && - status !== 'adopting' && - status !== 'stale' - - const close = () => { - cleanupPetGen(requestGateway) - closePetGenerate() - } - - const generate = () => { - if (prompt.trim() && !busy) { - void generateDrafts(requestGateway, { prompt: prompt.trim() }) - } - } - - // One-click an example prompt straight into a draft round. - const runExample = (example: string) => { - setPrompt(example) - void generateDrafts(requestGateway, { prompt: example }) - } - - // Hatch with a clean default name derived from the prompt (the prompt itself - // is grounding text, not a label); the user names it on the reveal screen. - const hatch = () => { - if (prompt.trim()) { - void hatchSelected(requestGateway, { name: cleanPetName(prompt), prompt: prompt.trim() }) - } - } - - const adopt = (finalName: string) => { - void adoptHatched(requestGateway, finalName).then(out => { - if (out.ok) { - triggerHaptic('crisp') - close() - } - }) - } - - // The header title tracks the phase instead of sticking on "Generate a pet". - const headerTitle = - status === 'hatching' ? copy.spawning : status === 'preview' || status === 'adopting' ? copy.hatched : copy.title - // Send the user to set up a key without closing — the overlay yields to the - // settings route (useRouteOverlayActive) and reappears + re-checks on return. - const setupImageGen = () => navigate(`${SETTINGS_ROUTE}?tab=providers`) - - // Prompt input only belongs on the describe/draft screens (and never when - // there's no backend to generate with). - const showPrompt = !unavailable && status !== 'hatching' && status !== 'preview' && status !== 'adopting' - - return ( - <> - {unavailable ? ( - {copy.title} - ) : ( - - {headerTitle} - - )} - -
- {/* Concept prompt with the inline sparkle generate/stop affordance (the - same primitive as the commit-message + project-idea fields). */} - {showPrompt && ( -
- setPrompt(event.target.value)} - onKeyDown={event => { - if (event.key === 'Enter') { - event.preventDefault() - generate() - } - }} - placeholder={copy.placeholder} - value={prompt} - /> - -
- )} - - {error && !unavailable && status !== 'preview' && status !== 'adopting' && ( - - {error} - - )} - - {unavailable ? ( - - ) : status === 'stale' ? ( - - {copy.staleBackend} - - ) : status === 'hatching' ? ( - - ) : (status === 'preview' || status === 'adopting') && preview ? ( - void discardHatched(requestGateway)} - pet={preview} - /> - ) : !hasDrafts && !generating ? ( - - ) : ( - $petGenSelected.set(index)} - selected={selected} - /> - )} -
- - ) -} - -// Creative seed prompts — specifics make better pets (petdex's own advice). -// Doubling as guidance and a one-click way to see the flow. -const EXAMPLE_PROMPTS = ['a bubble-tea otter', 'a tiny sock elf', 'a pixel dragon', 'a grumpy office cat', 'a neon axolotl'] - -// Shown when no reference-capable image backend is configured: generation is -// impossible, so we replace the prompt entirely with a friendly path to set one -// up (in-app) plus where to grab a key. -function GenerateUnavailable({ onSetup }: { onSetup: () => void }) { - return ( -
- - - -
-

Add an image backend to generate

-

- Hatching a custom pet needs a provider that can ground on a reference image. -

-
- -

- Grab a key from - - Nous Portal - - · - - OpenRouter - - · - - OpenAI - -

-
- ) -} - -function EmptyHint({ onExample }: { onExample: (prompt: string) => void }) { - return ( -
-

Need a spark?

-
- {EXAMPLE_PROMPTS.map(example => ( - - ))} -
-
- ) -} - -function HatchingView({ stage }: { stage: { phase: string; state?: string; done?: number; total?: number } | null }) { - const { t } = useI18n() - const copy = t.commandCenter.generatePet - - const subtitle = stage - ? stage.phase === 'row' - ? copy.hatchRow(stage.state ?? '', stage.done ?? 0, stage.total ?? 0) - : stage.phase === 'compose' - ? copy.hatchComposing - : copy.hatchSaving - : copy.hatchingSub - - return -} - -interface DraftGridProps { - busy: boolean - drafts: { index: number; dataUri: string }[] - generating: boolean - hasDrafts: boolean - onHatch: () => void - onSelect: (index: number) => void - selected: number | null -} - -function DraftGrid({ busy, drafts, generating, hasDrafts, onHatch, onSelect, selected }: DraftGridProps) { - const { t } = useI18n() - const copy = t.commandCenter.generatePet - - const slots = generating - ? Array.from({ length: VARIANT_COUNT }, (_, i) => drafts.find(draft => draft.index === i) ?? null) - : drafts - - return ( -
- {generating && ( -
- {copy.generating} - - {drafts.length}/{VARIANT_COUNT} - -
- )} - -
- {slots.map((draft, i) => { - const isSelected = !generating && draft != null && selected === draft.index - - return ( - - ) - })} -
- - {hasDrafts && ( - - )} -
- ) -} - -interface HatchPreviewProps { - pet: PetInfo - adopting: boolean - error: string | null - onAdopt: (name: string) => void - onDiscard: () => void -} - -function HatchPreview({ pet, adopting, error, onAdopt, onDiscard }: HatchPreviewProps) { - const { t } = useI18n() - const copy = t.commandCenter.generatePet - // Empty so the "Name your pet" placeholder shows; blank adopt keeps the - // provisional name from the prompt. - const [name, setName] = useState('') - // Play the egg's crack/hatch frames once before swapping in the live pet. - const [revealed, setRevealed] = useState(false) - // Right after the egg cracks the pet plays its "yay" jump a couple times, then - // hands off to the normal state-cycling preview. - const [celebrating, setCelebrating] = useState(false) - const [stateIndex, setStateIndex] = useState(0) - const previewRows = (pet.stateRows?.length ? pet.stateRows : PREVIEW_ROWS).filter(row => frameCountForRow(pet, row) > 0) - const rows = previewRows.length > 0 ? previewRows : ['idle'] - const activeRow = rows[stateIndex % rows.length] ?? 'idle' - const canJump = frameCountForRow(pet, 'jumping') > 0 - const rowOverride = celebrating && canJump ? 'jumping' : activeRow - - useEffect(() => { - const id = setInterval(() => setStateIndex(i => (i + 1) % rows.length), PREVIEW_STATE_MS) - return () => clearInterval(id) - }, [rows.length]) - - // On reveal: celebrate (jump) ~2 loops, then drop into the cycling preview. - useEffect(() => { - if (!revealed) { - return - } - setCelebrating(true) - const id = setTimeout(() => { - setCelebrating(false) - setStateIndex(0) - }, 2 * (pet.loopMs ?? 1100)) - return () => clearTimeout(id) - }, [revealed, pet.loopMs]) - - useEffect(() => { - setStateIndex(0) - setName('') - setRevealed(false) - setCelebrating(false) - }, [pet.slug]) - - const previewInfo: PetInfo = { ...pet, scale: PREVIEW_SCALE } - - return ( -
- {/* Fills the (now narrow) dialog so the pet frame is the screen width. */} -
- {revealed ? ( - <> -
- -
- - - ) : ( - // The egg cracks open, then we swap in the live pet. - { - setRevealed(true) - triggerHaptic('crisp') - }} - size={150} - /> - )} -
- - setName(event.target.value)} - onKeyDown={event => { - if (event.key === 'Enter') { - event.preventDefault() - onAdopt(name) - } - }} - placeholder={copy.namePlaceholder} - value={name} - /> - - {error && ( - - {error} - - )} - -
- - -
-
- ) -} - diff --git a/apps/desktop/src/app/session/hooks/use-prompt-actions.ts b/apps/desktop/src/app/session/hooks/use-prompt-actions.ts index 92d5a540351..863854a738b 100644 --- a/apps/desktop/src/app/session/hooks/use-prompt-actions.ts +++ b/apps/desktop/src/app/session/hooks/use-prompt-actions.ts @@ -42,6 +42,7 @@ import { clearPreviewArtifacts } from '@/store/preview-status' import { clearNotifications, notify, notifyError } from '@/store/notifications' import { requestDesktopOnboarding } from '@/store/onboarding' import { setPetScale } from '@/store/pet-gallery' +import { $petGenInput, openPetGenerate } from '@/store/pet-generate' import { $activeGatewayProfile, $newChatProfile, ensureGatewayProfile, normalizeProfileKey } from '@/store/profile' import { $busy, @@ -1178,6 +1179,18 @@ export function usePromptActions({ renderSlashOutput(`error: ${err instanceof Error ? err.message : String(err)}`) } }, + // /hatch opens the pet generator overlay (the desktop's rich, multi-step + // generate→pick→hatch→adopt flow). A typed description seeds the prompt + // so `/hatch a cyber fox` lands on the composer step prefilled. + hatch: async ({ arg }) => { + const concept = arg.trim() + + if (concept) { + $petGenInput.set(concept) + } + + openPetGenerate() + }, pet: async ctx => { const [sub = '', rawValue = ''] = ctx.arg.trim().split(/\s+/) const lower = sub.toLowerCase() diff --git a/apps/desktop/src/components/pet/floating-pet.tsx b/apps/desktop/src/components/pet/floating-pet.tsx index d69c35ab6b6..2ceba5ac0c8 100644 --- a/apps/desktop/src/components/pet/floating-pet.tsx +++ b/apps/desktop/src/components/pet/floating-pet.tsx @@ -22,6 +22,25 @@ interface Point { y: number } +interface PetInfoMeta { + enabled: boolean + slug?: string + displayName?: string + scale?: number + spritesheetRevision?: string +} + +function samePetRevision(info: PetInfo, meta: PetInfoMeta): boolean { + return ( + info.enabled && + Boolean(info.spritesheetBase64) && + info.slug === meta.slug && + info.displayName === meta.displayName && + info.scale === meta.scale && + info.spritesheetRevision === meta.spritesheetRevision + ) +} + function clampToViewport({ x, y }: Point): Point { const maxX = Math.max(0, (window.innerWidth || 800) - 80) const maxY = Math.max(0, (window.innerHeight || 600) - 80) @@ -63,12 +82,15 @@ function loadPosition(): Point { * Adopting a pet is fully in-app: type `/pet boba` in the composer. That * writes `display.pet.*` from the slash worker, so we keep polling `pet.info` * while no pet is active and the mascot pops in within a few seconds — no - * reload, no CLI. Once a pet is live we stop polling. + * reload, no CLI. Once a pet is live we still refresh more slowly so generated + * pets rewritten on disk (or renamed/rebuilt by the hatch flow) repaint without + * restarting the app. * * Promotion to a separate frameless OS-level window is a follow-up — the * sprite + state logic here is reused as-is, only the host changes. */ const PET_POLL_MS = 3000 +const PET_ACTIVE_REFRESH_MS = 15000 export function FloatingPet() { const { requestGateway } = useGatewayRequest() @@ -93,11 +115,12 @@ export function FloatingPet() { // state is only committed on release. const dragRef = useRef<{ dx: number; dy: number; x: number; y: number } | null>(null) - // Fetch pet.info on connect, then keep polling while no pet is active so an - // in-app `/pet ` shows up live. Stops polling once a pet is enabled. + // Fetch pet.info on connect. Poll quickly while inactive so an in-app + // `/pet ` appears, then slowly while active so regenerated spritesheets + // and row-count metadata replace the cached base64 payload. const active = info.enabled && Boolean(info.spritesheetBase64) useEffect(() => { - if (gatewayState !== 'open' || active) { + if (gatewayState !== 'open') { return } @@ -105,9 +128,39 @@ export function FloatingPet() { const pull = async () => { try { + if (active) { + try { + const meta = await requestGateway('pet.info.meta', { profile: petProfile() }) + if (cancelled || !meta) { + return + } + if (!meta.enabled) { + setPetInfo({ enabled: false }) + return + } + if (samePetRevision($petInfo.get(), meta)) { + return + } + } catch { + // Older gateways may not have pet.info.meta yet; fall back to pet.info. + } + } + const next = await requestGateway('pet.info', { profile: petProfile() }) if (!cancelled && next) { + const current = $petInfo.get() + if ( + next.enabled && + current.enabled && + current.slug === next.slug && + current.displayName === next.displayName && + current.scale === next.scale && + current.spritesheetRevision && + current.spritesheetRevision === next.spritesheetRevision + ) { + return + } setPetInfo(next) } } catch { @@ -116,10 +169,12 @@ export function FloatingPet() { } void pull() - const timer = window.setInterval(() => void pull(), PET_POLL_MS) + const timer = window.setInterval(() => void pull(), active ? PET_ACTIVE_REFRESH_MS : PET_POLL_MS) + window.addEventListener('focus', pull) return () => { cancelled = true + window.removeEventListener('focus', pull) window.clearInterval(timer) } }, [gatewayState, active, requestGateway]) diff --git a/apps/desktop/src/components/pet/pet-egg-hatch.tsx b/apps/desktop/src/components/pet/pet-egg-hatch.tsx index a677d84b13c..f542a5a0488 100644 --- a/apps/desktop/src/components/pet/pet-egg-hatch.tsx +++ b/apps/desktop/src/components/pet/pet-egg-hatch.tsx @@ -44,14 +44,16 @@ export function PetProgress({ done, total }: { done?: number; total?: number }) export function PetEggHatch({ subtitle, onCancel, cancelLabel }: PetEggHatchProps) { return ( -
+
- + {/* The egg sprite has transparent canvas below the art, so pull the + shadow up ~a fifth of its size to sit at the egg's base. */} +
{subtitle && ( -

+

{subtitle}

)} diff --git a/apps/desktop/src/components/pet/pet-sprite.tsx b/apps/desktop/src/components/pet/pet-sprite.tsx index ed9e4fbfcdc..455f6a956aa 100644 --- a/apps/desktop/src/components/pet/pet-sprite.tsx +++ b/apps/desktop/src/components/pet/pet-sprite.tsx @@ -91,6 +91,7 @@ function PetSpriteImpl({ info, zoom = 1, stateOverride, rowOverride }: PetSprite const frameH = info.frameH ?? DEFAULT_FRAME_H const frames = info.framesPerState ?? DEFAULT_FRAMES const framesByState = info.framesByState + const framesByRow = info.framesByRow const loopMs = info.loopMs ?? DEFAULT_LOOP_MS const scale = (info.scale ?? DEFAULT_SCALE) * zoom const rows = info.stateRows ?? DEFAULT_STATE_ROWS @@ -134,6 +135,8 @@ function PetSpriteImpl({ info, zoom = 1, stateOverride, rowOverride }: PetSprite let lastStep = performance.now() let drawnFrame = -1 let drawnRow = -1 + let activeRow = -1 + let activeCount = -1 const rowIndexForState = (s: PetState): number => { for (const key of STATE_ALIASES[s] ?? [s]) { @@ -161,13 +164,25 @@ function PetSpriteImpl({ info, zoom = 1, stateOverride, rowOverride }: PetSprite const resolveRow = (rowName: string): { row: number; count: number } => { const row = rows.indexOf(rowName) const state = ROW_TO_STATE[rowName] - const count = Math.max(1, framesByState?.[rowName] ?? (state ? framesByState?.[state] : 0) ?? frames) + const count = Math.max( + 1, + framesByRow?.[rowName] ?? framesByState?.[rowName] ?? (state ? framesByState?.[state] : 0) ?? frames + ) return { row: row >= 0 ? row : rowIndexForState(state ?? 'idle'), count } } const render = (now: number) => { const forcedRow = rowOverrideRef.current const { row, count } = forcedRow ? resolveRow(forcedRow) : resolve(overrideRef.current ?? stateRef.current) + + if (row !== activeRow || count !== activeCount) { + activeRow = row + activeCount = count + frame = 0 + lastStep = now + drawnFrame = -1 + } + // Per-state step keeps every state's loop ~loopMs even when frame counts // differ; counts vary per row so derive the cadence here, not once. const stepMs = loopMs / count @@ -201,7 +216,7 @@ function PetSpriteImpl({ info, zoom = 1, stateOverride, rowOverride }: PetSprite cancelAnimationFrame(raf) unsubState() } - }, [image, frameW, frameH, frames, framesByState, loopMs, drawW, drawH, rows]) + }, [image, frameW, frameH, frames, framesByState, framesByRow, loopMs, drawW, drawH, rows]) return ( = { + error: 'bg-destructive/12 text-destructive', + warn: 'bg-primary/12 text-primary', + info: 'bg-[color-mix(in_srgb,var(--ui-chat-bubble-background),white_30%)] text-[color-mix(in_srgb,var(--ui-chat-bubble-background),black_60%)] dark:bg-[color-mix(in_srgb,var(--ui-chat-bubble-background),black_20%)] dark:text-[color-mix(in_srgb,var(--ui-chat-bubble-background),white_60%)]' +} + function DialogContent({ className, children, showCloseButton = true, + fitContent = false, + banner, + bannerTone = 'error', ...props }: React.ComponentProps & { showCloseButton?: boolean + // Size the dialog to its content (capped at the viewport) instead of the + // default fixed `max-w-lg`. For content that has no intrinsic width (grids, + // full-width inputs) pair it with a `min-w-*` in `className`. + fitContent?: boolean + // A dialog-level notice rendered as a banner flush to the bottom edge (tinted, + // inherited bottom radius) so it reads as part of the dialog, not a floating + // alert. Falsy → no banner. Tone picks the colour. + banner?: React.ReactNode + bannerTone?: DialogBannerTone }) { const { t } = useI18n() + const widthClass = fitContent ? 'w-auto max-w-[92vw]' : 'w-full max-w-lg' + + const closeButton = showCloseButton ? ( + + + + ) : null + + // With a banner, the border can't live on the scroll/clip box (it would draw a + // line around the banner too). The white body keeps its own bottom radius and + // sits over the tinted footer; the outer shell only clips the banner to the + // dialog's rounded bottom edge. + if (banner) { + return ( + + + + {/* Scroll lives on an inner box so this shell keeps a painted bottom radius. */} +
+
{children}
+
+
+ {banner} +
+ {closeButton} +
+
+ ) + } + return ( @@ -53,26 +135,15 @@ function DialogContent({ // Cap height at 85vh and let long content scroll inside the dialog // instead of overflowing off-screen (long cron titles, tool detail // dumps, etc.). Individual dialogs can still override via className. - 'fixed left-1/2 top-1/2 z-[130] pointer-events-auto grid max-h-[85vh] w-full max-w-lg -translate-x-1/2 -translate-y-1/2 gap-3 overflow-y-auto rounded-xl border border-(--stroke-nous) bg-(--ui-chat-bubble-background) p-4 text-[length:var(--conversation-text-font-size)] text-foreground shadow-nous duration-200 data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=closed]:zoom-out-95 data-[state=open]:animate-in data-[state=open]:fade-in-0 data-[state=open]:zoom-in-95', + 'fixed left-1/2 top-1/2 z-[130] pointer-events-auto grid max-h-[85vh] -translate-x-1/2 -translate-y-1/2 gap-3 overflow-y-auto rounded-xl border border-(--stroke-nous) bg-(--ui-chat-bubble-background) p-4 text-[length:var(--conversation-text-font-size)] text-foreground shadow-nous duration-200 data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=closed]:zoom-out-95 data-[state=open]:animate-in data-[state=open]:fade-in-0 data-[state=open]:zoom-in-95', + widthClass, className )} data-slot="dialog-content" {...props} > {children} - {showCloseButton && ( - - - - )} + {closeButton} ) diff --git a/apps/desktop/src/i18n/en.ts b/apps/desktop/src/i18n/en.ts index fab233cd7ff..b1da6d1aae5 100644 --- a/apps/desktop/src/i18n/en.ts +++ b/apps/desktop/src/i18n/en.ts @@ -788,13 +788,17 @@ export const en: Translations = { hatch: 'Hatch', spawning: 'Spawning…', hatching: 'Hatching your pet…', - hatchingSub: 'Bringing every frame to life — this takes a moment.', + hatchingSub: 'Bringing it to life…', hatched: 'It hatched!', - hatchRow: (state, done, total) => `Drawing ${state}… ${done}/${total}`, - hatchComposing: 'Composing the spritesheet…', - hatchSaving: 'Saving your pet…', + hatchRow: (_state, done, total) => `Sketching frame ${done} of ${total}…`, + hatchComposing: 'Piecing it together…', + hatchSaving: 'Almost there…', namePlaceholder: 'Name your pet', staleBackend: 'Update Hermes to generate pets.', + backgroundHint: 'You can close this — Hermes will notify you when it’s done.', + genericError: 'Generation failed — try again or pick a suggestion.', + referenceImageTooLarge: 'Reference image is too large. Use one under 16 MB.', + referenceImageInvalid: 'Could not read that reference image. Try a PNG, JPG, WebP, or GIF.', adopt: 'Adopt', startOver: 'Start over' }, diff --git a/apps/desktop/src/i18n/ja.ts b/apps/desktop/src/i18n/ja.ts index e1c748c5ee6..6e56e941de4 100644 --- a/apps/desktop/src/i18n/ja.ts +++ b/apps/desktop/src/i18n/ja.ts @@ -908,13 +908,17 @@ export const ja = defineLocale({ hatch: '孵化', spawning: 'スポーン中…', hatching: 'ペットを孵化しています…', - hatchingSub: 'すべてのフレームに命を吹き込んでいます。少々お待ちください。', + hatchingSub: '命を吹き込んでいます…', hatched: '孵化しました!', - hatchRow: (state, done, total) => `${state} を描画中… ${done}/${total}`, - hatchComposing: 'スプライトシートを合成中…', - hatchSaving: 'ペットを保存中…', + hatchRow: (_state, done, total) => `フレームを描画中… ${done}/${total}`, + hatchComposing: 'まとめています…', + hatchSaving: 'もうすぐです…', namePlaceholder: 'ペットに名前を付ける', staleBackend: 'ペットを生成するには Hermes を更新してください。', + backgroundHint: 'このウィンドウは閉じても大丈夫です。完了したら Hermes が通知します。', + genericError: '生成に失敗しました。もう一度試すか、候補を選んでください。', + referenceImageTooLarge: '参照画像が大きすぎます。16 MB 未満の画像を使ってください。', + referenceImageInvalid: '参照画像を読み込めませんでした。PNG/JPG/WebP/GIF を試してください。', adopt: '迎え入れる', startOver: 'やり直す' }, diff --git a/apps/desktop/src/i18n/types.ts b/apps/desktop/src/i18n/types.ts index 9d1e213b97d..badafd549fd 100644 --- a/apps/desktop/src/i18n/types.ts +++ b/apps/desktop/src/i18n/types.ts @@ -670,6 +670,10 @@ export interface Translations { hatchSaving: string namePlaceholder: string staleBackend: string + backgroundHint: string + genericError: string + referenceImageTooLarge: string + referenceImageInvalid: string adopt: string startOver: string } diff --git a/apps/desktop/src/i18n/zh-hant.ts b/apps/desktop/src/i18n/zh-hant.ts index eb6e2ff7ead..92a00637baf 100644 --- a/apps/desktop/src/i18n/zh-hant.ts +++ b/apps/desktop/src/i18n/zh-hant.ts @@ -878,13 +878,17 @@ export const zhHant = defineLocale({ hatch: '孵化', spawning: '召喚中……', hatching: '正在孵化你的寵物……', - hatchingSub: '正在為每一格注入生命——請稍候。', + hatchingSub: '正在注入生命……', hatched: '孵化成功!', - hatchRow: (state, done, total) => `正在繪製 ${state}…… ${done}/${total}`, - hatchComposing: '正在合成精靈表……', - hatchSaving: '正在儲存你的寵物……', + hatchRow: (_state, done, total) => `正在繪製畫面…… ${done}/${total}`, + hatchComposing: '正在拼合……', + hatchSaving: '快好了……', namePlaceholder: '為寵物命名', staleBackend: '請更新 Hermes 以生成寵物。', + backgroundHint: '你可以關閉此視窗——完成後 Hermes 會通知你。', + genericError: '生成失敗——請重試或選一個建議。', + referenceImageTooLarge: '參考圖片過大。請使用小於 16 MB 的圖片。', + referenceImageInvalid: '無法讀取該參考圖片。請嘗試 PNG、JPG、WebP 或 GIF。', adopt: '領養', startOver: '重新開始' }, diff --git a/apps/desktop/src/i18n/zh.ts b/apps/desktop/src/i18n/zh.ts index effbaf328f8..83781ce0eba 100644 --- a/apps/desktop/src/i18n/zh.ts +++ b/apps/desktop/src/i18n/zh.ts @@ -975,13 +975,17 @@ export const zh: Translations = { hatch: '孵化', spawning: '召唤中……', hatching: '正在孵化你的宠物……', - hatchingSub: '正在为每一帧注入生命——请稍候。', + hatchingSub: '正在注入生命……', hatched: '孵化成功!', - hatchRow: (state, done, total) => `正在绘制 ${state}…… ${done}/${total}`, - hatchComposing: '正在合成精灵表……', - hatchSaving: '正在保存你的宠物……', + hatchRow: (_state, done, total) => `正在绘制画面…… ${done}/${total}`, + hatchComposing: '正在拼合……', + hatchSaving: '马上就好……', namePlaceholder: '给宠物起个名字', staleBackend: '请更新 Hermes 以生成宠物。', + backgroundHint: '你可以关闭此窗口——完成后 Hermes 会通知你。', + genericError: '生成失败——请重试或选择一个建议。', + referenceImageTooLarge: '参考图过大。请使用小于 16 MB 的图片。', + referenceImageInvalid: '无法读取该参考图。请尝试 PNG、JPG、WebP 或 GIF。', adopt: '领养', startOver: '重新开始' }, diff --git a/apps/desktop/src/lib/desktop-slash-commands.ts b/apps/desktop/src/lib/desktop-slash-commands.ts index 5f2b51f8d9a..5cc11e00424 100644 --- a/apps/desktop/src/lib/desktop-slash-commands.ts +++ b/apps/desktop/src/lib/desktop-slash-commands.ts @@ -32,6 +32,7 @@ export type DesktopActionId = | 'branch' | 'browser' | 'handoff' + | 'hatch' | 'help' | 'new' | 'pet' @@ -130,6 +131,7 @@ const DESKTOP_COMMAND_SPECS: readonly DesktopCommandSpec[] = [ { name: '/goal', description: 'Manage the standing goal for this session', surface: exec() }, { name: '/personality', description: 'Switch personality for this session', surface: exec(), args: true }, { name: '/pet', description: 'Toggle or adopt a petdex mascot (/pet, /pet list, /pet boba)', surface: action('pet'), args: true }, + { name: '/hatch', description: 'Generate a new pet (opens the pet generator)', aliases: ['/generate-pet'], surface: action('hatch') }, { name: '/queue', description: 'Queue a prompt for the next turn', aliases: ['/q'], surface: exec() }, { name: '/retry', description: 'Retry the last user message', surface: exec() }, { name: '/rollback', description: 'List or restore filesystem checkpoints', surface: exec() }, diff --git a/apps/desktop/src/store/pet-generate.ts b/apps/desktop/src/store/pet-generate.ts index bfcd7117d50..b47c858bd76 100644 --- a/apps/desktop/src/store/pet-generate.ts +++ b/apps/desktop/src/store/pet-generate.ts @@ -1,8 +1,12 @@ import { atom } from 'nanostores' +import { persistString, storedString } from '@/lib/storage' import { $gateway } from '@/store/gateway' +import { dispatchNativeNotification } from '@/store/native-notifications' +import { notify } from '@/store/notifications' import { type PetInfo } from '@/store/pet' -import { type GatewayRequest, applyAdoptedPet } from '@/store/pet-gallery' +import { applyAdoptedPet, type GatewayRequest } from '@/store/pet-gallery' +import { $activeSessionId } from '@/store/session' /** * Feature store for the "generate a pet" flow (Cmd-K → Pets → Generate). @@ -57,8 +61,10 @@ export function cleanPetName(prompt: string): string { .replace(/[^\p{L}\p{N}\s-]/gu, ' ') .split(/\s+/) .filter(Boolean) + const meaningful = words.filter(w => !NAME_STOPWORDS.has(w.toLowerCase())) const picked = (meaningful.length ? meaningful : words).slice(0, 3) + const name = picked .map(w => w.charAt(0).toUpperCase() + w.slice(1)) .join(' ') @@ -101,11 +107,42 @@ export const $petGenError = atom(null) // re-probes on open and on return from settings. export const $petGenAvailable = atom(null) +/** A reference-capable image backend the user can pick for generation. */ +export interface PetGenProvider { + name: string + label: string + /** One-line speed/quality tradeoff note. */ + note: string + /** Whether this is the backend's default pick (no override needed). */ + default: boolean +} + +const PROVIDER_KEY = 'hermes.desktop.petgen.provider' + +/** Reference-capable providers available to pick (from `pet.generate.status`). */ +export const $petGenProviders = atom([]) +/** The picked provider name; `''` means "use the backend default". Persisted. */ +export const $petGenProvider = atom(storedString(PROVIDER_KEY) ?? '') + +/** Set (and persist) the pet-gen provider override. `''` clears it. */ +export function setPetGenProvider(name: string): void { + $petGenProvider.set(name) + persistString(PROVIDER_KEY, name || null) +} + /** Probe whether generation is possible (a reference-capable backend exists). */ export async function checkPetGenAvailable(request: GatewayRequest): Promise { try { - const res = await request<{ available: boolean }>('pet.generate.status') + const res = await request<{ available: boolean; providers?: PetGenProvider[] }>('pet.generate.status') $petGenAvailable.set(Boolean(res?.available)) + const providers = res?.providers ?? [] + $petGenProviders.set(providers) + // Drop a stale pick if that backend is no longer configured. + const picked = $petGenProvider.get() + + if (picked && !providers.some(p => p.name === picked)) { + setPetGenProvider('') + } } catch { // Unknown (old backend / transient) — don't gate the UI on a failed probe. $petGenAvailable.set(true) @@ -116,14 +153,20 @@ export async function checkPetGenAvailable(request: GatewayRequest): Promise(null) /** Prompt that produced the current draft token; hatch uses this for consistency. */ export const $petGenPrompt = atom('') @@ -132,13 +175,20 @@ export const $petGenSelected = atom(null) /** The hatched-but-unadopted pet: its renderer payload, played in the preview. */ export const $petGenPreview = atom(null) +// Live composer inputs live in atoms (not component state) so closing the +// overlay mid-flow — or letting it run in the background — and reopening (or +// clicking the "done" notification) restores exactly what you had. +export const $petGenInput = atom('') +export const $petGenRefImage = atom(null) +export const $petGenRefName = atom('') + function isMissingMethod(error: unknown): boolean { const message = error instanceof Error ? error.message : String(error) return /method not found|-32601|unknown method|no such method/i.test(message) } -/** Clear all generation state (on close, or before a fresh run). */ +/** Clear all generation state (before a fresh run). */ export function resetPetGen(): void { $petGenStatus.set('idle') $petGenStage.set(null) @@ -148,26 +198,44 @@ export function resetPetGen(): void { $petGenDrafts.set([]) $petGenSelected.set(null) $petGenPreview.set(null) + $petGenInput.set('') + $petGenRefImage.set(null) + $petGenRefName.set('') } /** - * Reset on palette close, deleting an unadopted preview pet first so a hatched- - * but-never-adopted creature doesn't linger in the gallery. Fire-and-forget. + * Close-time cleanup: if a pet is already hatched but not adopted, discard it so + * abandoned previews do not accumulate on disk. In-flight generate/hatch runs + * are intentionally left alone (background-resumable). */ -export function cleanupPetGen(request: GatewayRequest): void { +export function cleanupPetGenOnClose(request: GatewayRequest): void { + const status = $petGenStatus.get() const preview = $petGenPreview.get() - if ($petGenStatus.get() === 'preview' && preview?.slug) { + if ((status === 'preview' || status === 'adopting') && preview?.slug) { void request('pet.remove', { slug: preview.slug }).catch(() => {}) + resetPetGen() + } +} + +// A finished background run (overlay closed) nudges the user back: an in-app +// toast with a View action always, plus an OS notification when enabled and the +// app is in the background. Clicking either reopens the overlay to its state. +function notifyPetGenDone(title: string, message: string, kind: 'error' | 'success'): void { + if ($petGenerateOpen.get()) { + return } - resetPetGen() + notify({ kind, title, message, action: { label: 'View', onClick: openPetGenerate } }) + dispatchNativeNotification({ kind: 'backgroundDone', title, body: message, sessionId: $activeSessionId.get() }) } interface GenerateOptions { prompt: string style?: string count?: number + /** Optional data-URL reference image — every draft is grounded on it. */ + referenceImage?: string } // A Stop (or a fresh round) must invalidate the in-flight call. This primitive @@ -185,6 +253,7 @@ interface Run { function cancelableRun(): Run { let id = 0 let cancel: (() => void) | null = null + return { begin: () => (id += 1), isCurrent: n => n === id, @@ -216,11 +285,14 @@ export function cancelGenerate(): void { $petGenError.set(null) const drafts = $petGenDrafts.get() + if (drafts.length > 0) { if ($petGenSelected.get() === null) { $petGenSelected.set(drafts[0]?.index ?? 0) } + $petGenStatus.set('ready') + return } @@ -230,6 +302,19 @@ export function cancelGenerate(): void { $petGenToken.set(null) } +/** + * Abandon the current drafts and return to the prompt (step 1). Stops any + * in-flight generation; keeps the prompt text so the user can tweak + retry. + */ +export function discardDrafts(): void { + gen.stop() + $petGenDrafts.set([]) + $petGenSelected.set(null) + $petGenToken.set(null) + $petGenError.set(null) + $petGenStatus.set('idle') +} + const hatch = cancelableRun() // A Stop invalidates the in-flight hatch and drops back to the draft picker (the @@ -245,8 +330,10 @@ export function cancelHatch(): void { /** Generate (or retry) a fresh set of base-look drafts for `prompt`. */ export async function generateDrafts(request: GatewayRequest, options: GenerateOptions): Promise { const prompt = options.prompt.trim() + const referenceImage = options.referenceImage - if (!prompt) { + // Need *something* to ground on: a description or a reference image. + if (!prompt && !referenceImage) { return false } @@ -255,6 +342,7 @@ export async function generateDrafts(request: GatewayRequest, options: GenerateO gen.arm(() => { controller.abort() const token = $petGenToken.get() + if (token) { void request('pet.cancel', { token }).catch(() => {}) } @@ -262,6 +350,7 @@ export async function generateDrafts(request: GatewayRequest, options: GenerateO // Starting a fresh generation round supersedes any unadopted preview pet. const preview = $petGenPreview.get() + if (preview?.slug) { await request('pet.remove', { slug: preview.slug }).catch(() => {}) } @@ -284,6 +373,7 @@ export async function generateDrafts(request: GatewayRequest, options: GenerateO if (gen.isCurrent(runId) && $petGenStatus.get() === 'generating') { $petGenToken.set(draft.token) } + return } @@ -302,6 +392,7 @@ export async function generateDrafts(request: GatewayRequest, options: GenerateO } const current = $petGenDrafts.get() + if (current.some(d => d.index === draft.index)) { return } @@ -317,7 +408,9 @@ export async function generateDrafts(request: GatewayRequest, options: GenerateO { prompt, style: options.style ?? 'auto', - count: options.count ?? 4 + count: options.count ?? 4, + ...(referenceImage ? { referenceImage } : {}), + ...($petGenProvider.get() ? { provider: $petGenProvider.get() } : {}) }, GENERATE_TIMEOUT_MS, controller.signal @@ -333,10 +426,12 @@ export async function generateDrafts(request: GatewayRequest, options: GenerateO } $petGenToken.set(result.token) - $petGenPrompt.set(prompt) + // Keep a concept for the hatch row prompts even on an image-only generate. + $petGenPrompt.set(prompt || 'a custom pet') $petGenDrafts.set(result.drafts) $petGenSelected.set(result.drafts[0]?.index ?? 0) $petGenStatus.set('ready') + notifyPetGenDone('Pet drafts ready', 'Your pet looks finished — pick one to hatch.', 'success') return true } catch (e) { @@ -349,6 +444,7 @@ export async function generateDrafts(request: GatewayRequest, options: GenerateO } else { $petGenStatus.set('error') $petGenError.set(e instanceof Error ? e.message : 'Could not generate pet drafts.') + notifyPetGenDone('Pet generation failed', 'Reopen to try again.', 'error') } return false @@ -381,11 +477,15 @@ export async function hatchSelected(request: GatewayRequest, options: HatchOptio return false } + // Hatch cancellation rides its own token (not the draft token): hatching + // mid-generation leaves pet.generate releasing that token, which would race + // the arm. The draft token still locates the staged image server-side. + const cancelToken = crypto.randomUUID() const hatchRunId = hatch.begin() const controller = new AbortController() hatch.arm(() => { controller.abort() - void request('pet.cancel', { token }).catch(() => {}) + void request('pet.cancel', { token: cancelToken }).catch(() => {}) }) $petGenStatus.set('hatching') @@ -399,6 +499,7 @@ export async function hatchSelected(request: GatewayRequest, options: HatchOptio .get() ?.on<{ event: string; state?: string; done?: string; total?: string }>('pet.hatch.progress', event => { const p = event.payload + if (!p || !hatch.isCurrent(hatchRunId) || $petGenStatus.get() !== 'hatching') { return } @@ -422,11 +523,13 @@ export async function hatchSelected(request: GatewayRequest, options: HatchOptio 'pet.hatch', { token, + cancelToken, index, name, description: options.description ?? '', prompt: concept, - style: options.style ?? 'auto' + style: options.style ?? 'auto', + ...($petGenProvider.get() ? { provider: $petGenProvider.get() } : {}) }, HATCH_TIMEOUT_MS, controller.signal @@ -437,6 +540,7 @@ export async function hatchSelected(request: GatewayRequest, options: HatchOptio if (result?.slug) { void request('pet.remove', { slug: result.slug }).catch(() => {}) } + return false } @@ -446,6 +550,7 @@ export async function hatchSelected(request: GatewayRequest, options: HatchOptio $petGenPreview.set({ ...result.pet, enabled: true }) $petGenStatus.set('preview') + notifyPetGenDone('Your pet hatched', 'Reopen to name and adopt it.', 'success') return true } catch (e) { @@ -455,10 +560,12 @@ export async function hatchSelected(request: GatewayRequest, options: HatchOptio $petGenStatus.set('error') $petGenError.set(e instanceof Error ? e.message : 'Could not hatch the pet.') + notifyPetGenDone('Hatching failed', 'Reopen to try again.', 'error') return false } finally { offProgress() + if (hatch.isCurrent(hatchRunId)) { $petGenStage.set(null) hatch.disarmIf(hatchRunId) @@ -494,11 +601,13 @@ export async function adoptHatched(request: GatewayRequest, name?: string): Prom // rename failure shouldn't block adopting under the provisional slug. const finalName = name?.trim() let adoptSlug = preview.slug + if (finalName && finalName !== preview.displayName) { const renamed = await request<{ ok: boolean; slug: string }>('pet.rename', { slug: preview.slug, name: finalName }).catch(() => null) + if (renamed?.slug) { adoptSlug = renamed.slug } diff --git a/apps/desktop/src/store/pet.ts b/apps/desktop/src/store/pet.ts index e4863f45712..f62ee25745d 100644 --- a/apps/desktop/src/store/pet.ts +++ b/apps/desktop/src/store/pet.ts @@ -20,6 +20,9 @@ export interface PetInfo { displayName?: string mime?: string spritesheetBase64?: string + // Stable sheet revision (`mtime_ns:size`) from the gateway; lets the desktop + // skip full sprite payload refreshes when the active pet hasn't changed. + spritesheetRevision?: string frameW?: number frameH?: number framesPerState?: number @@ -27,6 +30,9 @@ export interface PetInfo { // canvas step only frames that exist instead of a fixed framesPerState, which // would animate into the transparent padding of ragged sheets (blank flash). framesByState?: Record + // Concrete Codex row counts (e.g. running-right may have 8 frames even though + // the Hermes "run" activity state uses the in-place running row). + framesByRow?: Record loopMs?: number scale?: number stateRows?: string[] diff --git a/apps/desktop/src/styles.css b/apps/desktop/src/styles.css index bd584237eea..f4026ba5e29 100644 --- a/apps/desktop/src/styles.css +++ b/apps/desktop/src/styles.css @@ -1491,16 +1491,35 @@ canvas { width: 4.5rem; height: 0.8rem; border-radius: 50%; - background: radial-gradient(circle, color-mix(in srgb, #000 32%, transparent) 0%, transparent 72%); + /* Lighter on light backgrounds (~20% less ink); dark mode keeps it grounded. */ + background: radial-gradient(circle, color-mix(in srgb, #000 var(--pet-egg-shadow-ink, 26%), transparent) 0%, transparent 72%); animation: pet-egg-shadow 2.4s ease-in-out infinite; } +.dark .pet-egg-shadow { + --pet-egg-shadow-ink: 32%; +} + /* Contact shadow sized for the compact incubator egg (roughly its footprint). */ .pet-egg-shadow--sm { width: 3rem; height: 0.6rem; } +/* Contact shadow under the revealed pet — mirrors the floating mascot's in-app + shadow: an ellipse at the feet, ~55% of the sprite width, sitting behind it. */ +.pet-contact-shadow { + position: absolute; + bottom: -0.15rem; + left: 50%; + width: 55%; + aspect-ratio: 100 / 28; + transform: translateX(-50%); + background: radial-gradient(ellipse at center, color-mix(in srgb, #000 42%, transparent) 0%, transparent 70%); + pointer-events: none; + z-index: 0; +} + /* Hatch wiggle for the pixel egg (rocks around its base). */ .pet-wobble { transform-origin: 50% 85%; diff --git a/cli.py b/cli.py index 63d6fb71153..289d5ec7b12 100644 --- a/cli.py +++ b/cli.py @@ -8194,6 +8194,9 @@ class HermesCLI(CLIAgentSetupMixin, CLICommandsMixin): self._handle_personality_command(cmd_original) elif canonical == "pet": self._handle_pet_command(cmd_original) + + elif canonical == "hatch": + self._handle_hatch_command(cmd_original) elif canonical == "retry": retry_msg = self.retry_last() if retry_msg and hasattr(self, '_pending_input'): diff --git a/hermes_cli/cli_commands_mixin.py b/hermes_cli/cli_commands_mixin.py index 45e6bdbe79d..eefce82461a 100644 --- a/hermes_cli/cli_commands_mixin.py +++ b/hermes_cli/cli_commands_mixin.py @@ -1051,6 +1051,74 @@ class CLICommandsMixin: _set_active(arg) print(f"(^_^)b {pet.display_name} is out — it'll pop in shortly.") + def _handle_hatch_command(self, cmd: str): + """Generate ("hatch") a brand-new petdex pet from a description. + + ``/hatch `` runs the full pet pipeline in-process: a base + look, then one grounded animation row per state, sliced + normalized into + a spritesheet, then adopted as the active mascot. Progress streams inline + (it's ~a minute of image-model calls). In the desktop app this command + opens the richer generate overlay instead; here we run it directly. + """ + from agent.pet import store + from agent.pet.generate import orchestrate + from agent.pet.generate.imagegen import GenerationError + from hermes_cli.pets import _set_active + + parts = cmd.split(maxsplit=1) + concept = parts[1].strip() if len(parts) > 1 else "" + + if not concept: + try: + concept = input("(o_o) Describe your pet: ").strip() + except (EOFError, KeyboardInterrupt): + print() + return + + if not concept: + print("(o_o) Usage: /hatch (e.g. /hatch a tiny cyber fox)") + return + + # A short, friendly display name from the first few words of the concept. + display_name = " ".join(w.capitalize() for w in concept.split()[:3])[:28].strip() or "Pet" + slug = store.slugify(display_name) or store.slugify(concept) or "pet" + + print(f"(o_o) Designing '{concept}'… (a minute of image-model calls)") + try: + drafts = orchestrate.generate_base_drafts(concept, n=1) + except GenerationError as exc: + print(f"(x_x) Couldn't generate a base look: {exc}") + return + + if not drafts: + print("(x_x) No base draft came back — try again.") + return + + def _progress(event: str, detail: str) -> None: + if event == "row": + # detail is "::"; show the state name. + state = detail.split(":", 1)[0] + print(f" ┊ drawing {state}…") + elif event == "compose": + print(" ┊ composing spritesheet…") + elif event == "save": + print(" ┊ saving…") + + try: + result = orchestrate.hatch_pet( + base_image=drafts[0], + slug=slug, + display_name=display_name, + concept=concept, + on_progress=_progress, + ) + except GenerationError as exc: + print(f"(x_x) Hatch failed: {exc}") + return + + _set_active(result.slug) + print(f"(^_^)b {result.display_name} hatched and adopted — it'll pop in shortly!") + def _handle_cron_command(self, cmd: str): """Handle the /cron command to manage scheduled tasks.""" from cli import get_job diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py index c7ce5566e40..63f316bde16 100644 --- a/hermes_cli/commands.py +++ b/hermes_cli/commands.py @@ -181,6 +181,8 @@ COMMAND_REGISTRY: list[CommandDef] = [ "Tools & Skills"), CommandDef("pet", "Toggle or adopt a petdex mascot (/pet, /pet list, /pet )", "Tools & Skills", cli_only=True, args_hint="[toggle|list|scale |]", subcommands=("toggle", "list", "scale", "off")), + CommandDef("hatch", "Generate a new petdex pet from a description", + "Tools & Skills", cli_only=True, aliases=("generate-pet",), args_hint="[description]"), CommandDef("learn", "Learn a reusable skill from anything you describe (dirs, URLs, this chat, notes)", "Tools & Skills", args_hint=""), CommandDef("cron", "Manage scheduled tasks", "Tools & Skills", diff --git a/tests/agent/test_pet_generate.py b/tests/agent/test_pet_generate.py index 1a3ad424bb8..82bd3f15de5 100644 --- a/tests/agent/test_pet_generate.py +++ b/tests/agent/test_pet_generate.py @@ -106,6 +106,23 @@ def test_extract_strip_frames_drops_small_side_lobes_from_adjacent_frames(): assert right_edge_mass == 0 +def test_extract_strip_frames_uses_real_gutters_when_spacing_is_uneven(): + # gpt-image often returns a square chroma strip whose poses are separated but + # not laid out on exact equal-width slots. Equal slot slicing would include + # the next pose's wing/cape in frame 0; gutter-derived crops keep it out. + img = Image.new("RGBA", (600, 208), (0, 0, 0, 0)) + draw = ImageDraw.Draw(img) + draw.rectangle((40, 58, 140, 178), fill=(80, 120, 220, 255)) + draw.rectangle((182, 58, 282, 178), fill=(220, 120, 80, 255)) + draw.rectangle((430, 58, 530, 178), fill=(80, 220, 120, 255)) + + frames = atlas.extract_strip_frames(img, 3, method="auto", fit=False) + + assert len(frames) == 3 + assert frames[0].getbbox()[2] <= 120 + assert frames[1].getbbox()[0] <= 16 + + def test_extract_strip_frames_slot_fallback_when_unsegmentable(): # A single connected smear can't be split into 5 components → slot fallback. img = Image.new("RGBA", (200 * 5, 208), (0, 0, 0, 0)) @@ -181,6 +198,27 @@ def test_single_frame_fits_cell(): assert frame.getchannel("A").getextrema()[1] > 0 +def test_normalize_cells_uses_consistent_pose_scale_for_motion_rows(): + # A jump row needs a taller union crop than idle, but the pet itself should + # not shrink just because the motion envelope is taller. + idle = Image.new("RGBA", (160, 180), (0, 0, 0, 0)) + jump_low = Image.new("RGBA", (160, 180), (0, 0, 0, 0)) + jump_high = Image.new("RGBA", (160, 180), (0, 0, 0, 0)) + ImageDraw.Draw(idle).rectangle((50, 80, 110, 160), fill=(80, 120, 220, 255)) + ImageDraw.Draw(jump_low).rectangle((50, 80, 110, 160), fill=(220, 120, 80, 255)) + ImageDraw.Draw(jump_high).rectangle((50, 60, 110, 140), fill=(220, 120, 80, 255)) + + normalized = atlas.normalize_cells({"idle": [idle], "jumping": [jump_low, jump_high]}) + idle_box = normalized["idle"][0].getbbox() + jump_box = normalized["jumping"][0].getbbox() + + assert idle_box is not None + assert jump_box is not None + idle_h = idle_box[3] - idle_box[1] + jump_h = jump_box[3] - jump_box[1] + assert abs(idle_h - jump_h) <= 8 + + # ───────────────────────── store register / adopt ───────────────────────── @@ -252,7 +290,7 @@ def test_generate_base_drafts_returns_n(monkeypatch, tmp_path): calls = {"n": 0} - def fake_generate(prompt, *, n=1, reference_images=None, provider=None, prefix="pet"): + def fake_generate(prompt, *, n=1, reference_images=None, provider=None, prefix="pet", aspect_ratio="square"): paths = [] for i in range(n): calls["n"] += 1 @@ -272,7 +310,7 @@ def test_generate_base_drafts_hardens_opaque_background(monkeypatch, tmp_path): """A provider that ignores background=transparent still yields a cutout.""" from agent.pet.generate import imagegen, orchestrate - def fake_generate(prompt, *, n=1, reference_images=None, provider=None, prefix="pet"): + def fake_generate(prompt, *, n=1, reference_images=None, provider=None, prefix="pet", aspect_ratio="square"): # Solid-green backdrop with a blob — i.e. the provider painted a backdrop. p = tmp_path / f"{prefix}_opaque.png" _strip(1, transparent=False, bg=(0, 255, 0, 255)).save(p) @@ -300,7 +338,7 @@ def test_hatch_pet_end_to_end(monkeypatch, tmp_path): base = tmp_path / "base.png" _strip(1).save(base) - def fake_generate(prompt, *, n=1, reference_images=None, provider=None, prefix="pet"): + def fake_generate(prompt, *, n=1, reference_images=None, provider=None, prefix="pet", aspect_ratio="square"): # Return a synthetic row strip; frame count is inferable from the spec. state = prefix.replace("pet_row_", "") count = atlas_mod.FRAME_COUNTS.get(state, 6) @@ -337,7 +375,7 @@ def test_hatch_pet_idle_fallback_when_row_fails(monkeypatch, tmp_path): base = tmp_path / "base.png" _strip(1).save(base) - def fake_generate(prompt, *, n=1, reference_images=None, provider=None, prefix="pet"): + def fake_generate(prompt, *, n=1, reference_images=None, provider=None, prefix="pet", aspect_ratio="square"): if prefix == "pet_row_idle": raise GenerationError("boom") state = prefix.replace("pet_row_", "") @@ -361,7 +399,7 @@ def test_hatch_pet_rejects_missing_required_animation_rows(monkeypatch, tmp_path base = tmp_path / "base.png" _strip(1).save(base) - def fake_generate(prompt, *, n=1, reference_images=None, provider=None, prefix="pet"): + def fake_generate(prompt, *, n=1, reference_images=None, provider=None, prefix="pet", aspect_ratio="square"): if prefix == "pet_row_running-right": raise GenerationError("bad row") state = prefix.replace("pet_row_", "") @@ -388,6 +426,48 @@ def test_resolve_provider_errors_without_backend(monkeypatch): imagegen.resolve_provider(require_references=True) +class _FakeImgProvider: + def __init__(self, name, available=True): + self.name = name + self._available = available + + def is_available(self): + return self._available + + +def test_resolve_provider_honors_available_preference(monkeypatch): + """An explicit, configured, ref-capable preference wins over the active one.""" + from agent.pet.generate import imagegen + + registry = {"openai": _FakeImgProvider("openai"), "openrouter": _FakeImgProvider("openrouter")} + monkeypatch.setattr(imagegen, "_discover", lambda: None) + monkeypatch.setattr("agent.image_gen_registry.get_active_provider", lambda: registry["openai"]) + monkeypatch.setattr("agent.image_gen_registry.get_provider", lambda name: registry.get(name)) + + assert imagegen.resolve_provider(prefer="openrouter").name == "openrouter" + # An unavailable / unknown preference is ignored — fall back to the active one. + registry["openrouter"]._available = False + assert imagegen.resolve_provider(prefer="openrouter").name == "openai" + assert imagegen.resolve_provider(prefer="not-a-provider").name == "openai" + + +def test_list_sprite_providers_marks_default(monkeypatch): + """Lists only available ref-capable backends, flagging the default pick.""" + from agent.pet.generate import imagegen + + registry = {"openai": _FakeImgProvider("openai"), "nous": _FakeImgProvider("nous")} + monkeypatch.setattr(imagegen, "_discover", lambda: None) + monkeypatch.setattr("agent.image_gen_registry.get_active_provider", lambda: registry["openai"]) + monkeypatch.setattr("agent.image_gen_registry.get_provider", lambda name: registry.get(name)) + + listed = imagegen.list_sprite_providers() + names = {p["name"] for p in listed} + assert names == {"openai", "nous"} + # Every entry carries display metadata, and exactly one is the default. + assert all(p["label"] and "note" in p for p in listed) + assert [p["name"] for p in listed if p["default"]] == ["openai"] + + def test_generate_retries_without_transparent_background(monkeypatch, tmp_path): """A model that rejects background=transparent still produces images.""" from agent.pet.generate import imagegen diff --git a/tests/tui_gateway/test_pet_generate_rpc.py b/tests/tui_gateway/test_pet_generate_rpc.py index 99d65b3d85a..98dd494bd52 100644 --- a/tests/tui_gateway/test_pet_generate_rpc.py +++ b/tests/tui_gateway/test_pet_generate_rpc.py @@ -23,10 +23,32 @@ def test_pet_generate_requires_prompt(): assert "error" in resp +def test_pet_generate_rejects_invalid_reference_image(): + resp = server._methods["pet.generate"]( + "r_invalid_ref", + {"referenceImage": "data:image/svg+xml;base64,PHN2Zy8+"}, + ) + assert "error" in resp + assert "unsupported reference image type" in resp["error"]["message"] + + +def test_pet_generate_rejects_oversized_reference_image(monkeypatch): + import base64 + + monkeypatch.setattr(server, "_PET_REFERENCE_MAX_BYTES", 8) + payload = base64.b64encode(b"0123456789").decode("ascii") + resp = server._methods["pet.generate"]( + "r_big_ref", + {"referenceImage": f"data:image/png;base64,{payload}"}, + ) + assert "error" in resp + assert "too large" in resp["error"]["message"].lower() + + def test_pet_generate_returns_token_and_previews(monkeypatch, tmp_path): import agent.pet.generate as gen - def fake_drafts(prompt, *, n=4, style="auto", on_draft=None, is_cancelled=None): + def fake_drafts(prompt, *, n=4, style="auto", reference_images=None, provider=None, on_draft=None, is_cancelled=None): paths = [] for i in range(n): p = tmp_path / f"d{i}.png" @@ -66,7 +88,7 @@ def test_pet_generate_cancel_stops_run(monkeypatch, tmp_path): monkeypatch.setattr(server, "_emit", cap_emit) - def fake_drafts(prompt, *, n=4, style="auto", on_draft=None, is_cancelled=None): + def fake_drafts(prompt, *, n=4, style="auto", reference_images=None, provider=None, on_draft=None, is_cancelled=None): # Simulate a Stop landing mid-run: the cooperative flag must read True. server._pet_cancel_request(seen["token"]) assert is_cancelled() is True @@ -93,7 +115,7 @@ def test_pet_hatch_expired_draft(): def _fake_drafts_factory(tmp_path): - def fake_drafts(prompt, *, n=4, style="auto", on_draft=None, is_cancelled=None): + def fake_drafts(prompt, *, n=4, style="auto", reference_images=None, provider=None, on_draft=None, is_cancelled=None): paths = [] for i in range(n): p = tmp_path / f"d{i}.png" @@ -178,3 +200,46 @@ def test_pet_hatch_then_adopt_activates(monkeypatch, tmp_path): adopt = server._methods["pet.select"]("r3", {"slug": hatched["slug"]})["result"] assert adopt["ok"] assert activated["slug"] == "my-fox" + + +def test_pet_sprite_payload_includes_concrete_row_counts(): + from agent.pet import constants, store + + cols, rows = 8, 9 + sheet = Image.new("RGBA", (constants.FRAME_W * cols, constants.FRAME_H * rows), (0, 0, 0, 0)) + # Current Codex rows can have more/fewer frames than Hermes' generic + # FRAMES_PER_STATE. The desktop preview needs the concrete row count. + real = {0: 6, 1: 8, 3: 4, 4: 5, 7: 6} + for row, count in real.items(): + for col in range(count): + block = Image.new("RGBA", (constants.FRAME_W, constants.FRAME_H), (80, 120, 220, 255)) + sheet.paste(block, (col * constants.FRAME_W, row * constants.FRAME_H)) + + pet = store.register_local_pet(sheet, slug="row-counts", display_name="Row Counts") + payload = server._pet_sprite_payload(pet, scale=0.7) + + assert payload["framesByRow"]["running-right"] == 8 + assert payload["framesByRow"]["waving"] == 4 + assert payload["framesByRow"]["jumping"] == 5 + assert payload["framesByState"]["run"] == 6 + + +def test_pet_info_meta_avoids_full_payload(monkeypatch): + import hermes_cli.config as cli_config + from agent.pet import constants, store + + sheet = Image.new("RGBA", (constants.FRAME_W * 8, constants.FRAME_H * 9), (80, 120, 220, 255)) + pet = store.register_local_pet(sheet, slug="meta-pet", display_name="Meta Pet") + monkeypatch.setattr( + cli_config, + "load_config", + lambda: {"display": {"pet": {"enabled": True, "slug": pet.slug, "scale": 0.7}}}, + ) + + resp = server._methods["pet.info.meta"]("r_meta", {}) + result = resp["result"] + assert result["enabled"] is True + assert result["slug"] == pet.slug + assert result["displayName"] == "Meta Pet" + assert result["scale"] == 0.7 + assert ":" in result["spritesheetRevision"] diff --git a/tui_gateway/server.py b/tui_gateway/server.py index 750a6840270..726a143d87e 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -5579,6 +5579,75 @@ def _pet_frame_counts(spritesheet) -> dict: return {} +_pet_payload_cache_lock = threading.Lock() +_pet_payload_cache: dict[tuple, dict] = {} + + +def _pet_sheet_revision(spritesheet) -> str: + """Stable revision id for one spritesheet file.""" + try: + stat = spritesheet.stat() + return f"{stat.st_mtime_ns}:{stat.st_size}" + except Exception: # noqa: BLE001 - cosmetic, never break the surface + return "0:0" + + +def _pet_payload_cache_key(pet, *, scale: float) -> tuple | None: + """Cache key for the expensive sprite payload build.""" + try: + stat = pet.spritesheet.stat() + except Exception: # noqa: BLE001 + return None + return ( + str(pet.spritesheet), + stat.st_mtime_ns, + stat.st_size, + pet.slug, + pet.display_name, + round(scale, 4), + ) + + +def _clone_pet_payload(payload: dict) -> dict: + """Shallow-clone cached payloads so callers can't mutate shared state.""" + out = dict(payload) + if isinstance(payload.get("framesByState"), dict): + out["framesByState"] = dict(payload["framesByState"]) + if isinstance(payload.get("framesByRow"), dict): + out["framesByRow"] = dict(payload["framesByRow"]) + if isinstance(payload.get("stateRows"), list): + out["stateRows"] = list(payload["stateRows"]) + return out + + +def _pet_row_frame_counts(spritesheet) -> dict: + """Real frame count per concrete spritesheet row name.""" + try: + from PIL import Image + + from agent.pet import constants, render + + with Image.open(spritesheet) as opened: + image = opened.convert("RGBA") + cols = max(1, image.width // constants.FRAME_W) + row_count = max(1, image.height // constants.FRAME_H) + rows = constants.state_rows_for_grid(row_count) + out: dict[str, int] = {} + for row_idx, name in enumerate(rows[:row_count]): + top = row_idx * constants.FRAME_H + count = 0 + for col in range(cols): + left = col * constants.FRAME_W + frame = image.crop((left, top, left + constants.FRAME_W, top + constants.FRAME_H)) + if render._frame_is_blank(frame): + break + count += 1 + out[name] = count + return out + except Exception: # noqa: BLE001 - cosmetic, never break the surface + return {} + + def _pet_config_scale() -> float: """Configured ``display.pet.scale`` (or the engine default), never raises.""" from agent.pet import constants @@ -5604,22 +5673,57 @@ def _pet_sprite_payload(pet, *, scale: float) -> dict: from agent.pet import constants + cache_key = _pet_payload_cache_key(pet, scale=scale) + if cache_key is not None: + with _pet_payload_cache_lock: + cached = _pet_payload_cache.get(cache_key) + if cached is not None: + return _clone_pet_payload(cached) + raw = pet.spritesheet.read_bytes() suffix = pet.spritesheet.suffix.lower() mime = "image/png" if suffix == ".png" else "image/webp" - return { + payload = { "slug": pet.slug, "displayName": pet.display_name, "mime": mime, "spritesheetBase64": base64.standard_b64encode(raw).decode("ascii"), + "spritesheetRevision": _pet_sheet_revision(pet.spritesheet), "frameW": constants.FRAME_W, "frameH": constants.FRAME_H, "framesPerState": constants.FRAMES_PER_STATE, "framesByState": _pet_frame_counts(pet.spritesheet), + "framesByRow": _pet_row_frame_counts(pet.spritesheet), "loopMs": constants.LOOP_MS, "scale": scale, "stateRows": _pet_state_rows(pet.spritesheet), } + if cache_key is not None: + with _pet_payload_cache_lock: + _pet_payload_cache[cache_key] = payload + while len(_pet_payload_cache) > 8: + _pet_payload_cache.pop(next(iter(_pet_payload_cache))) + return _clone_pet_payload(payload) + + +def _pet_active_selection(): + """Resolve configured active pet + scale from config.""" + from agent.pet import constants, store + + try: + from hermes_cli.config import load_config + + cfg = load_config() + display = cfg.get("display", {}) if isinstance(cfg.get("display"), dict) else {} + pet_cfg = display.get("pet", {}) if isinstance(display.get("pet"), dict) else {} + except Exception: + pet_cfg = {} + + enabled = bool(pet_cfg.get("enabled")) + configured_slug = str(pet_cfg.get("slug", "") or "") + pet = store.resolve_active_pet(configured_slug) if enabled else None + scale = float(pet_cfg.get("scale", constants.DEFAULT_SCALE) or constants.DEFAULT_SCALE) + return enabled, pet, scale def _pet_state_rows(spritesheet) -> list[str]: @@ -5658,31 +5762,40 @@ def _(rid, params: dict) -> dict: on any error rather than erroring the surface. """ try: - from agent.pet import constants, store - - try: - from hermes_cli.config import load_config - - cfg = load_config() - display = cfg.get("display", {}) if isinstance(cfg.get("display"), dict) else {} - pet_cfg = display.get("pet", {}) if isinstance(display.get("pet"), dict) else {} - except Exception: - pet_cfg = {} - - enabled = bool(pet_cfg.get("enabled")) - configured_slug = str(pet_cfg.get("slug", "") or "") - pet = store.resolve_active_pet(configured_slug) if enabled else None + enabled, pet, scale = _pet_active_selection() if not enabled or pet is None or not pet.exists: return _ok(rid, {"enabled": False}) - scale = float(pet_cfg.get("scale", constants.DEFAULT_SCALE) or constants.DEFAULT_SCALE) return _ok(rid, {"enabled": True, **_pet_sprite_payload(pet, scale=scale)}) except Exception as exc: # noqa: BLE001 - cosmetic, never break the surface logger.debug("pet.info failed: %s", exc) return _ok(rid, {"enabled": False}) +@method("pet.info.meta") +@_profile_scoped +def _(rid, params: dict) -> dict: + """Cheap active-pet metadata used to avoid full payload refreshes.""" + try: + enabled, pet, scale = _pet_active_selection() + if not enabled or pet is None or not pet.exists: + return _ok(rid, {"enabled": False}) + return _ok( + rid, + { + "enabled": True, + "slug": pet.slug, + "displayName": pet.display_name, + "scale": scale, + "spritesheetRevision": _pet_sheet_revision(pet.spritesheet), + }, + ) + except Exception as exc: # noqa: BLE001 - cosmetic, never break the surface + logger.debug("pet.info.meta failed: %s", exc) + return _ok(rid, {"enabled": False}) + + @method("pet.cells") @_profile_scoped def _(rid, params: dict) -> dict: @@ -6107,6 +6220,53 @@ def _pet_png_data_uri(path, *, max_px: int = 160) -> str: # hatch_pet poll between provider calls to skip work they haven't started. _pet_cancel_lock = threading.Lock() _pet_cancelled: set[str] = set() +_PET_REFERENCE_MIME_EXT = { + "png": "png", + "jpeg": "jpg", + "jpg": "jpg", + "webp": "webp", + "gif": "gif", +} +try: + _PET_REFERENCE_MAX_BYTES = max( + 1, + int(os.environ.get("HERMES_PET_REFERENCE_MAX_BYTES") or str(16 * 1024 * 1024)), + ) +except (TypeError, ValueError): + _PET_REFERENCE_MAX_BYTES = 16 * 1024 * 1024 + + +def _pet_reference_images_from_data_url(ref_raw: str, stage) -> list: + """Decode + validate a reference-image data URL into the stage dir.""" + import base64 + import binascii + import re as _re + + match = _re.match(r"^data:image/([a-zA-Z0-9.+-]+);base64,(.*)$", ref_raw, _re.DOTALL) + if not match: + raise ValueError("invalid reference image format") + + mime = match.group(1).lower() + ext = _PET_REFERENCE_MIME_EXT.get(mime) + if ext is None: + raise ValueError("unsupported reference image type") + + payload = "".join(match.group(2).split()) + approx = (len(payload) * 3) // 4 + if approx > _PET_REFERENCE_MAX_BYTES: + raise ValueError("reference image too large") + + try: + raw = base64.b64decode(payload, validate=True) + except (binascii.Error, ValueError) as exc: + raise ValueError("invalid reference image data") from exc + + if len(raw) > _PET_REFERENCE_MAX_BYTES: + raise ValueError("reference image too large") + + ref_path = stage / f"reference.{ext}" + ref_path.write_bytes(raw) + return [ref_path] def _pet_cancel_arm(token: str) -> None: @@ -6148,34 +6308,46 @@ def _(rid, params: dict) -> dict: def _(rid, params: dict) -> dict: """Whether pet generation is possible right now. - True only when a reference-capable image backend (OpenRouter / Nous Portal / + True only when a reference-capable image backend (Nous Portal / OpenRouter / OpenAI gpt-image) is configured — the desktop checks this on open so it can offer setup instead of a dead prompt. Cheap (config + plugin discovery). """ try: - from agent.pet.generate.imagegen import GenerationError, resolve_provider + from agent.pet.generate.imagegen import ( + GenerationError, + list_sprite_providers, + resolve_provider, + ) try: resolve_provider(require_references=True) - return _ok(rid, {"available": True}) + available = True except GenerationError: - return _ok(rid, {"available": False}) + available = False + try: + providers = list_sprite_providers() + except Exception as exc: # noqa: BLE001 - picker is best-effort + logger.debug("pet provider list failed: %s", exc) + providers = [] + return _ok(rid, {"available": available, "providers": providers}) except Exception as exc: # noqa: BLE001 - never break the surface logger.debug("pet.generate.status failed: %s", exc) - return _ok(rid, {"available": False}) + return _ok(rid, {"available": False, "providers": []}) @method("pet.generate") def _(rid, params: dict) -> dict: """Generate candidate base looks for a new pet (the draft/variant step). - Params: ``prompt`` (required), ``count`` (default 4), ``style`` (default - ``auto``). Returns ``{ok, token, drafts:[{index, dataUri}]}`` — the token - keys the staged base images for a later ``pet.hatch``. Retry == call again - (fresh token). Heavy (network): runs on the worker pool. + Params: ``prompt`` (required unless ``referenceImage`` is given), ``count`` + (default 4), ``style`` (default ``auto``), ``referenceImage`` (optional data + URL — a user photo/reference every draft is grounded on, e.g. to make *their* + pet). Returns ``{ok, token, drafts:[{index, dataUri}]}`` — the token keys the + staged base images for a later ``pet.hatch``. Heavy (network): worker pool. """ prompt = str(params.get("prompt") or "").strip() - if not prompt: + ref_raw = str(params.get("referenceImage") or "").strip() + if not prompt and not ref_raw: return _err(rid, 4004, "missing prompt") try: count = max(1, min(4, int(params.get("count") or 4))) @@ -6188,7 +6360,7 @@ def _(rid, params: dict) -> dict: import uuid from agent.pet.generate import generate_base_drafts - from agent.pet.generate.imagegen import GenerationError + from agent.pet.generate.imagegen import GenerationError, resolve_provider root = _pet_gen_root() _pet_gen_sweep(root) @@ -6199,6 +6371,27 @@ def _(rid, params: dict) -> dict: _pet_cancel_arm(token) stage = root / token stage.mkdir(parents=True, exist_ok=True) + + reference_images = None + if ref_raw: + try: + reference_images = _pet_reference_images_from_data_url(ref_raw, stage) + except ValueError as exc: + _pet_cancel_release(token) + return _err(rid, 4004, str(exc)) + + # Optional desktop picker override: resolve the chosen provider up front so + # a bad/uncredentialed pick fails fast instead of mid-fan-out. + provider_name = str(params.get("provider") or "").strip() + sprite = None + if provider_name: + try: + sprite = resolve_provider(require_references=bool(reference_images), prefer=provider_name) + except GenerationError as exc: + _pet_cancel_release(token) + return _err(rid, 5031, str(exc)) + + concept = prompt or "a pet based on the reference image" out: list[dict] = [] # Hand the token to the client up front (token-only init event) so a Stop @@ -6230,9 +6423,11 @@ def _(rid, params: dict) -> dict: try: generate_base_drafts( - prompt, + concept, n=count, style=style, + reference_images=reference_images, + provider=sprite, on_draft=_on_draft, is_cancelled=lambda: _pet_is_cancelled(token), ) @@ -6268,6 +6463,11 @@ def _(rid, params: dict) -> dict: ``pet`` is the renderer payload. Heavy (network + raster): worker pool. """ token = str(params.get("token") or "").strip() + # Hatch cancellation rides its own key, not the generation token: hatching a + # draft mid-generation means pet.generate is still releasing `token`, which + # would otherwise wipe the arm we set here. Falls back to `token` for clients + # that don't send one. + cancel_token = str(params.get("cancelToken") or "").strip() or token index = params.get("index", 0) name = str(params.get("name") or "").strip() if not token: @@ -6282,13 +6482,22 @@ def _(rid, params: dict) -> dict: try: from agent.pet import store from agent.pet.generate import hatch_pet - from agent.pet.generate.imagegen import GenerationError + from agent.pet.generate.imagegen import GenerationError, resolve_provider base = _pet_gen_root() / token / f"draft-{index}.png" if not base.is_file(): return _err(rid, 4004, "draft expired — generate again") - _pet_cancel_arm(token) + # Optional desktop picker override (rows always need reference grounding). + provider_name = str(params.get("provider") or "").strip() + sprite = None + if provider_name: + try: + sprite = resolve_provider(require_references=True, prefer=provider_name) + except GenerationError as exc: + return _err(rid, 5031, str(exc)) + + _pet_cancel_arm(cancel_token) slug = store.unique_slug(name) def _on_progress(event: str, detail: str) -> None: @@ -6312,13 +6521,14 @@ def _(rid, params: dict) -> dict: description=str(params.get("description") or ""), concept=str(params.get("prompt") or name), style=str(params.get("style") or "auto").strip() or "auto", + provider=sprite, on_progress=_on_progress, - is_cancelled=lambda: _pet_is_cancelled(token), + is_cancelled=lambda: _pet_is_cancelled(cancel_token), ) except GenerationError as exc: return _err(rid, 5031, str(exc)) finally: - _pet_cancel_release(token) + _pet_cancel_release(cancel_token) pet = store.load_pet(result.slug) payload = _pet_sprite_payload(pet, scale=_pet_config_scale()) if pet else {}