diff --git a/skills/creative/baoyu-comic/PORT_NOTES.md b/skills/creative/baoyu-comic/PORT_NOTES.md index 24b21db873..142a08d1ee 100644 --- a/skills/creative/baoyu-comic/PORT_NOTES.md +++ b/skills/creative/baoyu-comic/PORT_NOTES.md @@ -12,10 +12,10 @@ Ported from [JimLiu/baoyu-skills](https://github.com/JimLiu/baoyu-skills) v1.56. | Trigger | Slash commands / CLI flags | Natural language skill matching | | User config | EXTEND.md file (project/user/XDG paths) | Removed — not part of Hermes infra | | User prompts | `AskUserQuestion` (batched) | `clarify` tool (one question at a time) | -| Image generation | baoyu-imagine (Bun/TypeScript) | `image_generate` tool | +| Image generation | baoyu-imagine (Bun/TypeScript, supports `--ref`) | `image_generate` — **prompt-only**, returns a URL; no reference image input; agent must download the URL to the output directory | +| PDF assembly | `scripts/merge-to-pdf.ts` (Bun + `pdf-lib`) | Removed — the PDF merge step is out of scope for this port; pages are delivered as PNGs only | | Platform support | Linux/macOS/Windows/WSL/PowerShell | Linux/macOS only | | File operations | Generic instructions | Hermes file tools (`write_file`, `read_file`) | -| Runtime abstraction | `${BUN_X}` resolution | Direct `bun` invocation for `scripts/merge-to-pdf.ts` | ### Structural removals @@ -23,17 +23,32 @@ Ported from [JimLiu/baoyu-skills](https://github.com/JimLiu/baoyu-skills) v1.56. - `first-time-setup.md` — blocking first-time setup flow for EXTEND.md - `preferences-schema.md` — EXTEND.md YAML schema - `watermark-guide.md` — watermark config (tied to EXTEND.md) +- **`scripts/` directory** (removed entirely): upstream's `merge-to-pdf.ts` depended on `pdf-lib`, which is not declared anywhere in the Hermes repo. Rather than add a new dependency, the port drops PDF assembly and delivers per-page PNGs. +- **Workflow Step 8 (Merge to PDF)** removed from `workflow.md`; Step 9 (Completion report) renumbered to Step 8. - **Workflow Step 1.1** — "Load Preferences (EXTEND.md)" section removed from `workflow.md`; steps 1.2/1.3 renumbered to 1.1/1.2. - **Generic "User Input Tools" and "Image Generation Tools" preambles** — SKILL.md no longer lists fallback rules for multiple possible tools; it references `clarify` and `image_generate` directly. +### Image generation strategy changes + +`image_generate`'s schema accepts only `prompt` and `aspect_ratio` (`landscape` | `portrait` | `square`). Upstream's reference-image flow (`--ref characters.png` for character consistency, plus user-supplied refs for style/palette/scene) does not map to this tool, so the workflow was restructured: + +- **Character sheet** is still generated, but it is an **agent-facing** reference used when writing each page's prompt text. `image_generate` never sees it as a visual input. +- **User-supplied reference images** are reduced to `style` / `palette` / `scene` trait extraction — traits are embedded in the prompt body; the image files themselves are kept only for provenance under `refs/`. +- **Page prompts** now mandate that character descriptions are embedded inline (copied from `characters/characters.md`) — this is the only mechanism left to enforce cross-page character consistency. +- **Download step** — after every `image_generate` call, the returned URL is fetched to disk (e.g., `curl -fsSL "" -o .png`) and verified before the workflow advances. + ### SKILL.md reductions - CLI option columns (`--art`, `--tone`, `--layout`, `--aspect`, `--lang`, `--ref`, `--storyboard-only`, `--prompts-only`, `--images-only`, `--regenerate`) converted to plain-English option descriptions. - Preset files (`presets/*.md`) and `ohmsha-guide.md`: `` `--style X` `` / `` `--art X --tone Y` `` shorthand rewritten to `art=X, tone=Y` + natural-language references. -- `partial-workflows.md`: per-skill slash command invocations rewritten as user-intent cues. +- `partial-workflows.md`: per-skill slash command invocations rewritten as user-intent cues; PDF-related outputs removed. - `auto-selection.md`: priority order dropped the EXTEND.md tier. - `analysis-framework.md`: language-priority comment updated (user option → conversation → source). +### File naming convention + +Source content pasted by the user is saved as `source-{slug}.md`, where `{slug}` is the kebab-case topic slug used for the output directory. Backups follow the same pattern with a `-backup-YYYYMMDD-HHMMSS` suffix. SKILL.md and `workflow.md` now agree on this single convention. + ### What was preserved verbatim - All 6 art-style definitions (`references/art-styles/`) @@ -41,7 +56,6 @@ Ported from [JimLiu/baoyu-skills](https://github.com/JimLiu/baoyu-skills) v1.56. - All 7 layout definitions (`references/layouts/`) - Core templates: `character-template.md`, `storyboard-template.md`, `base-prompt.md` - Preset bodies (only the first few intro lines adapted; special rules unchanged) -- `scripts/merge-to-pdf.ts` (Bun-compatible on Linux/macOS) - Author, version, homepage attribution ## Syncing with upstream @@ -59,3 +73,5 @@ diff <(curl -sL https://raw.githubusercontent.com/JimLiu/baoyu-skills/main/skill ``` Art-style, tone, and layout reference files can usually be overwritten directly (they're upstream-verbatim). `SKILL.md`, `references/workflow.md`, `references/partial-workflows.md`, `references/auto-selection.md`, `references/analysis-framework.md`, `references/ohmsha-guide.md`, and `references/presets/*.md` must be manually merged since they contain Hermes-specific adaptations. + +If upstream adds a Hermes-compatible PDF merge step (no extra npm deps), restore `scripts/` and reintroduce Step 8 in `workflow.md`. diff --git a/skills/creative/baoyu-comic/SKILL.md b/skills/creative/baoyu-comic/SKILL.md index e6ec91d6fd..9c950cbac0 100644 --- a/skills/creative/baoyu-comic/SKILL.md +++ b/skills/creative/baoyu-comic/SKILL.md @@ -22,10 +22,10 @@ Trigger this skill when the user asks to create a knowledge/educational comic, b ## Reference Images -Users may supply reference images to guide art style, palette, scene composition, or subject. This is **separate from** the auto-generated character sheet (Step 7.1) — both can coexist: user refs guide the look, the character sheet anchors recurring character identity. +Hermes' `image_generate` tool is **prompt-only** — it accepts a text prompt and an aspect ratio, and returns an image URL. It does **NOT** accept reference images. When the user supplies a reference image, use it to **extract traits in text** that get embedded in every page prompt: **Intake**: Accept file paths when the user provides them (or pastes images in conversation). -- File path(s) → copy to `refs/NN-ref-{slug}.{ext}` alongside the comic output +- File path(s) → copy to `refs/NN-ref-{slug}.{ext}` alongside the comic output for provenance - Pasted image with no path → ask the user for the path via `clarify`, or extract style traits verbally as a text fallback - No reference → skip this section @@ -33,9 +33,9 @@ Users may supply reference images to guide art style, palette, scene composition | Usage | Effect | |-------|--------| -| `direct` | Pass the file to `image_generate` as a reference image on every page (or selected pages) | | `style` | Extract style traits (line treatment, texture, mood) and append to every page's prompt body | | `palette` | Extract hex colors and append to every page's prompt body | +| `scene` | Extract scene composition or subject notes and append to the relevant page(s) | **Record in each page's prompt frontmatter** when refs exist: @@ -43,14 +43,11 @@ Users may supply reference images to guide art style, palette, scene composition references: - ref_id: 01 filename: 01-ref-scene.png - usage: direct + usage: style + traits: "muted earth tones, soft-edged ink wash, low-contrast backgrounds" ``` -**At generation time**: -- Verify each referenced file exists on disk -- If `usage: direct` AND `image_generate` accepts multiple reference images → pass both the character sheet (Step 7.2) and the user refs; compress images first per Step 7.1's guidance to avoid payload failures -- If only one ref slot is available → prefer the character sheet for pages with recurring characters; embed user-ref traits in the prompt body instead -- For `style`/`palette` usage → embed extracted traits in every page's prompt text +Character consistency is still driven by the **character sheet workflow** (Step 7.1–7.2) below, which relies on detailed text descriptions rather than direct image references. ## Options @@ -63,7 +60,7 @@ references: | Layout | standard (default), cinematic, dense, splash, mixed, webtoon, four-panel | Panel arrangement | | Aspect | 3:4 (default, portrait), 4:3 (landscape), 16:9 (widescreen) | Page aspect ratio | | Language | auto (default), zh, en, ja, etc. | Output language | -| Refs | File paths | Reference images applied to every page for style / palette / scene guidance. See [Reference Images](#reference-images) above. | +| Refs | File paths | Reference images used for style / palette trait extraction (not passed to the image model). See [Reference Images](#reference-images) above. | ### Partial Workflow Options @@ -94,14 +91,6 @@ Details: [references/partial-workflows.md](references/partial-workflows.md) - **Compatibility matrix** and **content-signal → preset** table live in [references/auto-selection.md](references/auto-selection.md). Read it before recommending combinations in Step 2. -## Scripts - -| Script | Purpose | -|--------|---------| -| `scripts/merge-to-pdf.ts` | Merge comic pages into PDF (runs with `bun`) | - -Resolve `{baseDir}` as this SKILL.md's directory; script path is `{baseDir}/scripts/merge-to-pdf.ts`. - ## File Structure Output directory: `comic/{topic-slug}/` @@ -111,14 +100,14 @@ Output directory: `comic/{topic-slug}/` **Contents**: | File | Description | |------|-------------| -| `source-{slug}.{ext}` | Source files | +| `source-{slug}.md` | Saved source content (kebab-case slug matches the output directory) | | `analysis.md` | Content analysis | | `storyboard.md` | Storyboard with panel breakdown | | `characters/characters.md` | Character definitions | -| `characters/characters.png` | Character reference sheet | +| `characters/characters.png` | Character reference sheet (downloaded from `image_generate`) | | `prompts/NN-{cover\|page}-[slug].md` | Generation prompts | -| `NN-{cover\|page}-[slug].png` | Generated images | -| `{topic-slug}.pdf` | Final merged PDF | +| `NN-{cover\|page}-[slug].png` | Generated images (downloaded from `image_generate`) | +| `refs/NN-ref-{slug}.{ext}` | User-supplied reference images (optional, for provenance) | ## Language Handling @@ -151,22 +140,21 @@ Comic Progress: - [ ] Step 6: Review prompts (conditional) - [ ] Step 7: Generate images - [ ] 7.1 Generate character sheet (if needed) → characters/characters.png - - [ ] 7.2 Generate pages (with character ref if sheet exists) -- [ ] Step 8: Merge to PDF -- [ ] Step 9: Completion report + - [ ] 7.2 Generate pages (with character descriptions embedded in prompt) +- [ ] Step 8: Completion report ``` ### Flow ``` -Input → Analyze → [Check Existing?] → [Confirm: Style + Reviews] → Storyboard → [Review?] → Prompts → [Review?] → Images → PDF → Complete +Input → Analyze → [Check Existing?] → [Confirm: Style + Reviews] → Storyboard → [Review?] → Prompts → [Review?] → Images → Complete ``` ### Step Summary | Step | Action | Key Output | |------|--------|------------| -| 1.1 | Analyze content | `analysis.md` | +| 1.1 | Analyze content | `analysis.md`, `source-{slug}.md` | | 1.2 | Check existing directory | Handle conflicts | | 2 | Confirm style, focus, audience, reviews | User preferences | | 3 | Generate storyboard + characters | `storyboard.md`, `characters/` | @@ -174,9 +162,8 @@ Input → Analyze → [Check Existing?] → [Confirm: Style + Reviews] → Story | 5 | Generate prompts | `prompts/*.md` | | 6 | Review prompts (if requested) | User approval | | 7.1 | Generate character sheet (if needed) | `characters/characters.png` | -| 7.2 | Generate pages (with character ref if available) | `*.png` files | -| 8 | Merge to PDF | `{slug}.pdf` | -| 9 | Completion report | Summary | +| 7.2 | Generate pages | `*.png` files | +| 8 | Completion report | Summary | ### User Questions @@ -184,11 +171,11 @@ Use the `clarify` tool to confirm options. Since `clarify` handles one question ### Step 7: Image Generation -Use Hermes' built-in `image_generate` tool for all image rendering. +Use Hermes' built-in `image_generate` tool for all image rendering. Its schema accepts only `prompt` and `aspect_ratio` (`landscape` | `portrait` | `square`); it **returns a URL**, not a local file. Every generated page or character sheet must therefore be downloaded to the output directory. **Prompt file requirement (hard)**: write each image's full, final prompt to a standalone file under `prompts/` (naming: `NN-{type}-[slug].md`) BEFORE calling `image_generate`. The prompt file is the reproducibility record. -**Aspect ratio mapping** — `image_generate` supports `landscape`, `portrait`, and `square`. Map as follows: +**Aspect ratio mapping** — the storyboard's `aspect_ratio` field maps to `image_generate`'s format as follows: | Storyboard ratio | `image_generate` format | |------------------|-------------------------| @@ -196,19 +183,21 @@ Use Hermes' built-in `image_generate` tool for all image rendering. | `4:3`, `16:9`, `3:2` | `landscape` | | `1:1` | `square` | -**7.1 Character sheet** — generate it (to `characters/characters.png`, aspect `landscape`) when the comic is multi-page with recurring characters. Skip for simple presets (e.g., four-panel minimalist) or single-page comics. Compress to JPEG before using as a reference (`sips -s format jpeg -s formatOptions 80 …` on macOS, `pngquant --quality=65-80 …` on Linux) to avoid payload failures. The prompt file at `characters/characters.md` must exist before invoking `image_generate`. +**Download step** — after every `image_generate` call: +1. Read the URL from the tool result +2. Fetch the image bytes (e.g., `curl -fsSL "" -o .png`) +3. Verify the file exists and is non-empty before proceeding to the next page -**7.2 Pages** — each page's prompt MUST already be at `prompts/NN-{cover|page}-[slug].md` before invoking `image_generate`. Strategy depends on the character sheet: +**7.1 Character sheet** — generate it (to `characters/characters.png`, aspect `landscape`) when the comic is multi-page with recurring characters. Skip for simple presets (e.g., four-panel minimalist) or single-page comics. The prompt file at `characters/characters.md` must exist before invoking `image_generate`. After download, the character sheet is consumed **for the agent's own reference** when writing each page's prompt text — Hermes' `image_generate` cannot accept it as a visual input. -| Character sheet | `image_generate` reference support | Strategy | -|-----------------|------------------------------------|----------| -| Exists | Supported | Pass sheet as reference image on every page | -| Exists | Not supported | Prepend character descriptions to every prompt file | -| Skipped | — | All descriptions inline in prompt | +**7.2 Pages** — each page's prompt MUST already be at `prompts/NN-{cover|page}-[slug].md` before invoking `image_generate`. Because `image_generate` is prompt-only, character consistency is enforced by **embedding character descriptions in every prompt**: -**Backup rule**: existing `prompts/…md` and `…png` files → rename with `-backup-YYYYMMDD-HHMMSS` suffix (use `write_file` / standard shell rename) before regenerating. Aspect ratio from storyboard (default `3:4`; preset may override). +| Character sheet | Strategy | +|-----------------|----------| +| Exists | Prepend relevant character descriptions (from `characters/characters.md`) to every page prompt | +| Skipped | Prompt file already contains all descriptions inline | -**Reference failure recovery**: compress sheet → retry → still fails → drop the reference and embed character descriptions in the prompt text. +**Backup rule**: existing `prompts/…md` and `…png` files → rename with `-backup-YYYYMMDD-HHMMSS` suffix before regenerating. Full step-by-step workflow (analysis, storyboard, review gates, regeneration variants): [references/workflow.md](references/workflow.md). @@ -235,18 +224,18 @@ Full step-by-step workflow (analysis, storyboard, review gates, regeneration var | Action | Steps | |--------|-------| -| **Edit** | **Update prompt file FIRST** → regenerate image → regenerate PDF | -| **Add** | Create prompt at position → generate with character ref → renumber subsequent → update storyboard → regenerate PDF | -| **Delete** | Remove files → renumber subsequent → update storyboard → regenerate PDF | +| **Edit** | **Update prompt file FIRST** → regenerate image → download new PNG | +| **Add** | Create prompt at position → generate with character descriptions embedded → renumber subsequent → update storyboard | +| **Delete** | Remove files → renumber subsequent → update storyboard | **IMPORTANT**: When updating pages, ALWAYS update the prompt file (`prompts/NN-{cover|page}-[slug].md`) FIRST before regenerating. This ensures changes are documented and reproducible. ## Pitfalls - Image generation: 10-30 seconds per page; auto-retry once on failure +- **Always download** the URL returned by `image_generate` to a local PNG — downstream tooling (and the user's review) expects files in the output directory, not ephemeral URLs - Use stylized alternatives for sensitive public figures - **Step 2 confirmation required** - do not skip - **Steps 4/6 conditional** - only if user requested in Step 2 -- **Step 7.1 character sheet** - recommended for multi-page comics, optional for simple presets -- **Step 7.2 character reference** - pass sheet as reference if it exists; compress/convert on failure; fall back to prompt-only +- **Step 7.1 character sheet** - recommended for multi-page comics, optional for simple presets. It is an **agent-facing** reference used to write consistent page prompts; `image_generate` does not accept it as a visual input - **Strip secrets** — scan source content for API keys, tokens, or credentials before writing any output file diff --git a/skills/creative/baoyu-comic/references/partial-workflows.md b/skills/creative/baoyu-comic/references/partial-workflows.md index 816aa61e09..749b5ac7b2 100644 --- a/skills/creative/baoyu-comic/references/partial-workflows.md +++ b/skills/creative/baoyu-comic/references/partial-workflows.md @@ -8,8 +8,8 @@ Options to run specific parts of the workflow. Trigger these via natural languag |--------|----------------|--------| | Storyboard only | 1-3 | `storyboard.md` + `characters/` | | Prompts only | 1-5 | + `prompts/*.md` | -| Images only | 7-9 | + images + PDF | -| Regenerate N | 7 (partial) | Specific page(s) + PDF | +| Images only | 7-8 | + images | +| Regenerate N | 7 (partial) | Specific page(s) | --- @@ -60,7 +60,7 @@ Generate images from existing prompts (starts at Step 7). **User cue**: "generate images from existing prompts", "run the images now" (pointing at an existing `comic/topic-slug/` directory). -**Workflow**: Skip to Step 7, then 8-9 +**Workflow**: Skip to Step 7, then 8 **Prerequisites** (must exist in directory): - `prompts/` directory with page prompt files @@ -70,7 +70,6 @@ Generate images from existing prompts (starts at Step 7). **Output**: - `characters/characters.png` (if not exists) - `NN-{cover|page}-[slug].png` images -- `{topic-slug}.pdf` **Use case**: Re-generate images after editing prompts. Useful for: - Recovering from failed image generation @@ -88,15 +87,14 @@ Regenerate specific pages only. **Workflow**: 1. Read existing prompts for specified pages 2. Regenerate images only for those pages via `image_generate` -3. Regenerate PDF +3. Download each returned URL and overwrite the existing PNG **Prerequisites** (must exist): - `prompts/NN-{cover|page}-[slug].md` for specified pages -- `characters/characters.png` (for reference, if it was used originally) +- `characters/characters.md` (for agent-side consistency checks, if it was used originally) **Output**: - Regenerated `NN-{cover|page}-[slug].png` for specified pages -- Updated `{topic-slug}.pdf` **Use case**: Fix specific pages without regenerating entire comic. Useful for: - Fixing a single problematic page diff --git a/skills/creative/baoyu-comic/references/workflow.md b/skills/creative/baoyu-comic/references/workflow.md index a2de541c96..e8c811beb5 100644 --- a/skills/creative/baoyu-comic/references/workflow.md +++ b/skills/creative/baoyu-comic/references/workflow.md @@ -19,14 +19,13 @@ Comic Progress: - [ ] Step 7: Generate images - [ ] 7.1 Character sheet (if needed) - [ ] 7.2 Generate pages -- [ ] Step 8: Merge to PDF -- [ ] Step 9: Completion report +- [ ] Step 8: Completion report ``` ## Flow Diagram ``` -Input → Analyze → [Check Existing?] → [Confirm: Style + Reviews] → Storyboard → [Review Outline?] → Prompts → [Review Prompts?] → Images → PDF → Complete +Input → Analyze → [Check Existing?] → [Confirm: Style + Reviews] → Storyboard → [Review Outline?] → Prompts → [Review Prompts?] → Images → Complete ``` --- @@ -40,8 +39,8 @@ Read source content, save it if needed, and perform deep analysis. **Actions**: 1. **Save source content** (if not already a file): - If user provides a file path: use as-is - - If user pastes content: save to `source.md` in target directory using `write_file` - - **Backup rule**: If `source.md` exists, rename to `source-backup-YYYYMMDD-HHMMSS.md` + - If user pastes content: save to `source-{slug}.md` in the target directory using `write_file`, where `{slug}` is the kebab-case topic slug used for the output directory + - **Backup rule**: If `source-{slug}.md` already exists, rename it to `source-{slug}-backup-YYYYMMDD-HHMMSS.md` before writing 2. Read source content 3. **Deep analysis** following `analysis-framework.md`: - Target audience identification @@ -246,7 +245,7 @@ Create image generation prompts for all pages. **For each page (cover + pages)**: 1. Create prompt following art style + tone guidelines -2. Include character visual descriptions for consistency +2. **Embed character descriptions** inline (copy relevant traits from `characters/characters.md`) — `image_generate` is prompt-only, so the prompt text is the sole vehicle for character consistency 3. Save to `prompts/NN-{cover|page}-[slug].md` using `write_file` - **Backup rule**: If prompt file exists, rename to `prompts/NN-{cover|page}-[slug]-backup-YYYYMMDD-HHMMSS.md` @@ -257,8 +256,9 @@ Create image generation prompts for all pages. ## Visual Style Art: [art style] | Tone: [tone] | Layout: [layout type] -## Character Reference -[Character descriptions from characters/characters.md] +## Character Reference (embedded inline — maintain exact traits below) +- [Character A]: [detailed visual traits from characters/characters.md] +- [Character B]: [detailed visual traits from characters/characters.md] ## Panel Breakdown [From storyboard.md - panel descriptions, actions, dialogue] @@ -306,9 +306,9 @@ options: ## Step 7: Generate Images -With confirmed prompts from Step 5/6, use the `image_generate` tool for all image rendering. +With confirmed prompts from Step 5/6, use the `image_generate` tool. The tool accepts only `prompt` and `aspect_ratio` (`landscape` | `portrait` | `square`) and **returns a URL** — it does not accept reference images and does not write local files. Every invocation must be followed by a download step. -**Aspect ratio mapping** — `image_generate` supports `landscape`, `portrait`, and `square`: +**Aspect ratio mapping** — map the storyboard's `aspect_ratio` to the tool's enum: | Storyboard ratio | `image_generate` format | |------------------|-------------------------| @@ -316,6 +316,12 @@ With confirmed prompts from Step 5/6, use the `image_generate` tool for all imag | `4:3`, `16:9`, `3:2` | `landscape` | | `1:1` | `square` | +**Download procedure** (run after every successful `image_generate` call): + +1. Extract the `url` field from the tool result +2. Fetch it to disk, e.g. `curl -fsSL "" -o comic/{slug}/.png` +3. Verify the file is non-empty (`test -s .png`); on failure, retry the generation once + ### 7.1 Generate Character Reference Sheet (conditional) Character sheet is recommended for multi-page comics with recurring characters, but **NOT required** for all presets. @@ -331,96 +337,56 @@ Character sheet is recommended for multi-page comics with recurring characters, **When generating**: 1. Use Reference Sheet Prompt from `characters/characters.md` 2. **Backup rule**: If `characters/characters.png` exists, rename to `characters/characters-backup-YYYYMMDD-HHMMSS.png` -3. Call `image_generate` with `landscape` format → save to `characters/characters.png` -4. **Compress** to reduce payload size when used as a reference: - - macOS: `sips -s format jpeg -s formatOptions 80 characters.png --out characters-compressed.jpg` - - Linux: `pngquant --quality=65-80 characters.png -o characters-compressed.png` +3. Call `image_generate` with `landscape` format +4. Download the returned URL → save to `characters/characters.png` + +**Important**: the downloaded sheet is for the **agent's own reference** when writing each page's prompt text below. `image_generate` cannot accept it as a visual input. ### 7.2 Generate Comic Pages **Before generating any page**: 1. Confirm each prompt file exists at `prompts/NN-{cover|page}-[slug].md` -2. Check whether `image_generate` accepts a reference image in the current runtime -3. Determine if character sheet exists -4. Choose the appropriate strategy below +2. Confirm that each prompt has character descriptions embedded inline (see Step 5). `image_generate` is prompt-only, so the prompt text is the sole consistency mechanism. -**Page Generation Strategy**: +**Page Generation Strategy** (embed everything in the prompt text): -| Character Sheet | `image_generate` reference support | Strategy | -|-----------------|------------------------------------|----------| -| Exists | Supported | **A**: Pass character sheet as reference with every page | -| Exists | Not supported | **B**: Embed character descriptions in every prompt | -| Skipped | — | **C**: Prompt file contains all descriptions inline | +| Character sheet | Strategy | +|-----------------|----------| +| Exists | Use it as an agent-side reference when composing each prompt; embed the key traits inline in the prompt text | +| Skipped | Prompt file already contains all descriptions inline | -**Strategy A: Pass reference image** - -- For every page, read `prompts/NN-{type}-[slug].md` as the prompt input -- Save output to `NN-{type}-[slug].png` -- Use aspect ratio from storyboard (mapped to `landscape`/`portrait`/`square`) -- Pass `characters/characters.png` (or compressed version) as the reference image - -**Reference failure recovery**: -If generation fails when passing the reference: -1. **Compress/convert** reference image: - - `sips -s format jpeg -s formatOptions 70 characters.png --out characters-compressed.jpg` - - Or reduce resolution: `sips -Z 1024 characters.png --out characters-small.png` -2. **Retry** with compressed/converted image -3. **If still fails**: Fall back to **Strategy C** — generate WITHOUT reference, with character descriptions embedded in prompt text - -**Strategy B: Embedding character descriptions in prompt** - -When reference images are not supported, create combined prompt files: +**Example embedded prompt** (`prompts/01-page-xxx.md`): ```markdown -# prompts/01-page-xxx.md (with embedded character reference) +# Page 01: [Title] -## Character Reference (maintain consistency) -[Copy relevant sections from characters/characters.md here] -- 大雄:Japanese boy, round glasses, yellow shirt, navy shorts... -- 哆啦 A 梦:Round blue robot cat, white belly, red nose, golden bell... +## Character Reference (embedded inline — maintain consistency) +- 大雄:Japanese boy, round glasses, yellow shirt, navy shorts, worried expression... +- 哆啦 A 梦:Round blue robot cat, white belly, red nose, golden bell, 4D pocket... ## Page Content -[Original page prompt here] +[Original page prompt body — panels, dialogue, visual metaphors] ``` -**Strategy C: Prompt-only (no character sheet)** - -When character sheet was skipped or the reference failed: -- Prompt file already contains all character descriptions inline -- No reference image needed -- Rely on detailed text descriptions for character consistency - **For each page (cover + pages)**: 1. Read prompt from `prompts/NN-{cover|page}-[slug].md` 2. **Backup rule**: If image file exists, rename to `NN-{cover|page}-[slug]-backup-YYYYMMDD-HHMMSS.png` -3. Generate image via `image_generate` using Strategy A, B, or C -4. Save to `NN-{cover|page}-[slug].png` +3. Call `image_generate` with the prompt text and mapped aspect ratio +4. Download the returned URL → save to `NN-{cover|page}-[slug].png` 5. Report progress after each generation: "Generated X/N: [page title]" --- -## Step 8: Merge to PDF - -After all images generated: - -```bash -bun {baseDir}/scripts/merge-to-pdf.ts -``` - -Where `{baseDir}` is this skill's directory. Creates `{topic-slug}.pdf` with all pages as full-page images. - ---- - -## Step 9: Completion Report +## Step 8: Completion Report ``` Comic Complete! Title: [title] | Art: [art] | Tone: [tone] | Pages: [count] | Aspect: [ratio] | Language: [lang] Location: [path] +✓ source-{slug}.md (if content was pasted) ✓ analysis.md ✓ characters.png (if generated) ✓ 00-cover-[slug].png ... NN-page-[slug].png -✓ {topic-slug}.pdf ``` --- @@ -429,9 +395,9 @@ Location: [path] | Action | Steps | |--------|-------| -| **Edit** | Update prompt → Regenerate image → Regenerate PDF | -| **Add** | Create prompt at position → Generate image → Renumber subsequent (NN+1) → Update storyboard → Regenerate PDF | -| **Delete** | Remove files → Renumber subsequent (NN-1) → Update storyboard → Regenerate PDF | +| **Edit** | Update prompt → Regenerate image → Download new PNG | +| **Add** | Create prompt at position → Generate image → Download PNG → Renumber subsequent (NN+1) → Update storyboard | +| **Delete** | Remove files → Renumber subsequent (NN-1) → Update storyboard | **File naming**: `NN-{cover|page}-[slug].png` (e.g., `03-page-enigma-machine.png`) - Slugs: kebab-case, unique, derived from content diff --git a/skills/creative/baoyu-comic/scripts/merge-to-pdf.ts b/skills/creative/baoyu-comic/scripts/merge-to-pdf.ts deleted file mode 100644 index bdd29c60c8..0000000000 --- a/skills/creative/baoyu-comic/scripts/merge-to-pdf.ts +++ /dev/null @@ -1,116 +0,0 @@ -import { existsSync, readdirSync, readFileSync } from "fs"; -import { join, basename } from "path"; -import { PDFDocument } from "pdf-lib"; - -interface PageInfo { - filename: string; - path: string; - index: number; - promptPath?: string; -} - -function parseArgs(): { dir: string; output?: string } { - const args = process.argv.slice(2); - let dir = ""; - let output: string | undefined; - - for (let i = 0; i < args.length; i++) { - if (args[i] === "--output" || args[i] === "-o") { - output = args[++i]; - } else if (!args[i].startsWith("-")) { - dir = args[i]; - } - } - - if (!dir) { - console.error("Usage: bun merge-to-pdf.ts [--output filename.pdf]"); - process.exit(1); - } - - return { dir, output }; -} - -function findComicPages(dir: string): PageInfo[] { - if (!existsSync(dir)) { - console.error(`Directory not found: ${dir}`); - process.exit(1); - } - - const files = readdirSync(dir); - const pagePattern = /^(\d+)-(cover|page)(-[\w-]+)?\.(png|jpg|jpeg)$/i; - const promptsDir = join(dir, "prompts"); - const hasPrompts = existsSync(promptsDir); - - const pages: PageInfo[] = files - .filter((f) => pagePattern.test(f)) - .map((f) => { - const match = f.match(pagePattern); - const baseName = f.replace(/\.(png|jpg|jpeg)$/i, ""); - const promptPath = hasPrompts ? join(promptsDir, `${baseName}.md`) : undefined; - - return { - filename: f, - path: join(dir, f), - index: parseInt(match![1], 10), - promptPath: promptPath && existsSync(promptPath) ? promptPath : undefined, - }; - }) - .sort((a, b) => a.index - b.index); - - if (pages.length === 0) { - console.error(`No comic pages found in: ${dir}`); - console.error("Expected format: 00-cover-slug.png, 01-page-slug.png, etc."); - process.exit(1); - } - - return pages; -} - -async function createPdf(pages: PageInfo[], outputPath: string) { - const pdfDoc = await PDFDocument.create(); - pdfDoc.setAuthor("baoyu-comic"); - pdfDoc.setSubject("Generated Comic"); - - for (const page of pages) { - const imageData = readFileSync(page.path); - const ext = page.filename.toLowerCase(); - const image = ext.endsWith(".png") - ? await pdfDoc.embedPng(imageData) - : await pdfDoc.embedJpg(imageData); - - const { width, height } = image; - const pdfPage = pdfDoc.addPage([width, height]); - - pdfPage.drawImage(image, { - x: 0, - y: 0, - width, - height, - }); - - console.log(`Added: ${page.filename}${page.promptPath ? " (prompt available)" : ""}`); - } - - const pdfBytes = await pdfDoc.save(); - await Bun.write(outputPath, pdfBytes); - - console.log(`\nCreated: ${outputPath}`); - console.log(`Total pages: ${pages.length}`); -} - -async function main() { - const { dir, output } = parseArgs(); - const pages = findComicPages(dir); - - const dirName = basename(dir) === "comic" ? basename(join(dir, "..")) : basename(dir); - const outputPath = output || join(dir, `${dirName}.pdf`); - - console.log(`Found ${pages.length} pages in: ${dir}\n`); - - await createPdf(pages, outputPath); -} - -main().catch((err) => { - console.error("Error:", err.message); - process.exit(1); -});