diff --git a/website/docs/developer-guide/image-gen-provider-plugin.md b/website/docs/developer-guide/image-gen-provider-plugin.md new file mode 100644 index 0000000000..e356e58228 --- /dev/null +++ b/website/docs/developer-guide/image-gen-provider-plugin.md @@ -0,0 +1,288 @@ +--- +sidebar_position: 11 +title: "Image Generation Provider Plugins" +description: "How to build an image-generation backend plugin for Hermes Agent" +--- + +# Building an Image Generation Provider Plugin + +Image-gen provider plugins register a backend that services every `image_generate` tool call — DALL·E, gpt-image, Grok, Flux, Imagen, Stable Diffusion, fal, Replicate, a local ComfyUI rig, anything. Built-in providers (OpenAI, OpenAI-Codex, xAI) all ship as plugins. You can add a new one, or override a bundled one, by dropping a directory into `plugins/image_gen//`. + +:::tip +Image-gen is one of several **backend plugins** Hermes supports. The others (with more specialized ABCs) are [Memory Provider Plugins](/docs/developer-guide/memory-provider-plugin), [Context Engine Plugins](/docs/developer-guide/context-engine-plugin), and [Model Provider Plugins](/docs/developer-guide/model-provider-plugin). General tool/hook/CLI plugins live in [Build a Hermes Plugin](/docs/guides/build-a-hermes-plugin). +::: + +## How discovery works + +Hermes scans for image-gen backends in three places: + +1. **Bundled** — `/plugins/image_gen//` (auto-loaded with `kind: backend`, always available) +2. **User** — `~/.hermes/plugins/image_gen//` (opt-in via `plugins.enabled`) +3. **Pip** — packages declaring a `hermes_agent.plugins` entry point + +Each plugin's `register(ctx)` function calls `ctx.register_image_gen_provider(...)` — that puts it into the registry in `agent/image_gen_registry.py`. The active provider is picked by `image_gen.provider` in `config.yaml`; `hermes tools` walks users through selection. + +The `image_generate` tool wrapper asks the registry for the active provider and dispatches there. If no provider is registered, the tool surfaces a helpful error pointing at `hermes tools`. + +## Directory structure + +``` +plugins/image_gen/my-backend/ +├── __init__.py # ImageGenProvider subclass + register() +└── plugin.yaml # Manifest with kind: backend +``` + +A bundled plugin is complete at this point. User plugins at `~/.hermes/plugins/image_gen//` need to be added to `plugins.enabled` in `config.yaml` (or run `hermes plugins enable `). + +## The ImageGenProvider ABC + +Subclass `agent.image_gen_provider.ImageGenProvider`. The only required members are the `name` property and the `generate()` method — everything else has sane defaults: + +```python +# plugins/image_gen/my-backend/__init__.py +from typing import Any, Dict, List, Optional +import os + +from agent.image_gen_provider import ( + DEFAULT_ASPECT_RATIO, + ImageGenProvider, + error_response, + resolve_aspect_ratio, + save_b64_image, + success_response, +) + + +class MyBackendImageGenProvider(ImageGenProvider): + @property + def name(self) -> str: + # Stable id used in image_gen.provider config. Lowercase, no spaces. + return "my-backend" + + @property + def display_name(self) -> str: + # Human label shown in `hermes tools`. Defaults to name.title() if omitted. + return "My Backend" + + def is_available(self) -> bool: + # Return False if credentials or deps are missing. + # The tool's availability gate calls this before dispatch. + if not os.environ.get("MY_BACKEND_API_KEY"): + return False + try: + import my_backend_sdk # noqa: F401 + except ImportError: + return False + return True + + def list_models(self) -> List[Dict[str, Any]]: + # Catalog shown in `hermes tools` model picker. + return [ + { + "id": "my-model-fast", + "display": "My Model (Fast)", + "speed": "~5s", + "strengths": "Quick iteration", + "price": "$0.01/image", + }, + { + "id": "my-model-hq", + "display": "My Model (HQ)", + "speed": "~30s", + "strengths": "Highest fidelity", + "price": "$0.04/image", + }, + ] + + def default_model(self) -> Optional[str]: + return "my-model-fast" + + def get_setup_schema(self) -> Dict[str, Any]: + # Metadata for the `hermes tools` picker — keys to prompt for at setup. + return { + "name": "My Backend", + "badge": "paid", # optional; shown as a short tag in the picker + "tag": "One-line description shown under the name", + "env_vars": [ + { + "key": "MY_BACKEND_API_KEY", + "prompt": "My Backend API key", + "url": "https://my-backend.example.com/api-keys", + }, + ], + } + + def generate( + self, + prompt: str, + aspect_ratio: str = DEFAULT_ASPECT_RATIO, + **kwargs: Any, + ) -> Dict[str, Any]: + prompt = (prompt or "").strip() + aspect_ratio = resolve_aspect_ratio(aspect_ratio) + + if not prompt: + return error_response( + error="Prompt is required", + error_type="invalid_input", + provider=self.name, + prompt="", + aspect_ratio=aspect_ratio, + ) + + # Model selection precedence: env var → config → default. The helper + # _resolve_model() in the built-in openai plugin is a good reference. + model_id = kwargs.get("model") or self.default_model() or "my-model-fast" + + try: + import my_backend_sdk + client = my_backend_sdk.Client(api_key=os.environ["MY_BACKEND_API_KEY"]) + result = client.generate( + prompt=prompt, + model=model_id, + aspect_ratio=aspect_ratio, + ) + + # Two shapes supported: + # - URL string: return it as `image` + # - base64 data: save under $HERMES_HOME/cache/images/ via save_b64_image() + if result.get("image_b64"): + path = save_b64_image( + result["image_b64"], + prefix=self.name, + extension="png", + ) + image = str(path) + else: + image = result["image_url"] + + return success_response( + image=image, + model=model_id, + prompt=prompt, + aspect_ratio=aspect_ratio, + provider=self.name, + ) + except Exception as exc: + return error_response( + error=str(exc), + error_type=type(exc).__name__, + provider=self.name, + model=model_id, + prompt=prompt, + aspect_ratio=aspect_ratio, + ) + + +def register(ctx) -> None: + """Plugin entry point — called once at load time.""" + ctx.register_image_gen_provider(MyBackendImageGenProvider()) +``` + +## plugin.yaml + +```yaml +name: my-backend +version: 1.0.0 +description: My image backend — text-to-image via My Backend SDK +author: Your Name +kind: backend +requires_env: + - MY_BACKEND_API_KEY +``` + +`kind: backend` is what routes the plugin to the image-gen registration path. `requires_env` is prompted during `hermes plugins install`. + +## ABC reference + +Full contract in `agent/image_gen_provider.py`. The methods you'll typically override: + +| Member | Required | Default | Purpose | +|---|---|---|---| +| `name` | ✅ | — | Stable id used in `image_gen.provider` config | +| `display_name` | — | `name.title()` | Label shown in `hermes tools` | +| `is_available()` | — | `True` | Gate for missing creds/deps | +| `list_models()` | — | `[]` | Catalog for `hermes tools` model picker | +| `default_model()` | — | first from `list_models()` | Fallback when no model is configured | +| `get_setup_schema()` | — | minimal | Picker metadata + env-var prompts | +| `generate(prompt, aspect_ratio, **kwargs)` | ✅ | — | The call | + +## Response format + +`generate()` must return a dict built via `success_response()` or `error_response()`. Both live in `agent/image_gen_provider.py`. + +**Success:** +```python +success_response( + image=, + model=, + prompt=, + aspect_ratio="landscape" | "square" | "portrait", + provider=, + extra={...}, # optional backend-specific fields +) +``` + +**Error:** +```python +error_response( + error="human-readable message", + error_type="provider_error" | "invalid_input" | "", + provider=, + model=, + prompt=, + aspect_ratio=, +) +``` + +The tool wrapper JSON-serializes the dict and hands it to the LLM. Errors are surfaced as the tool result; the LLM decides how to explain them to the user. + +## Handling base64 vs URL output + +Some backends return image URLs (fal, Replicate); others return base64 payloads (OpenAI gpt-image-2). For the base64 case, use `save_b64_image()` — it writes to `$HERMES_HOME/cache/images/__.` and returns the absolute `Path`. Pass that path (as `str`) as `image=` in `success_response()`. Gateway delivery (Telegram photo bubble, Discord attachment) recognizes both URLs and absolute paths. + +## User overrides + +Drop a user plugin at `~/.hermes/plugins/image_gen//` with the same `name` property as a bundled one and enable it via `hermes plugins enable ` — the registry is last-writer-wins, so your version replaces the built-in. Useful for pointing an `openai` plugin at a private proxy, or swapping in a custom model catalog. + +## Testing + +```bash +export HERMES_HOME=/tmp/hermes-imggen-test +mkdir -p $HERMES_HOME/plugins/image_gen/my-backend +# …copy __init__.py + plugin.yaml into that dir… + +export MY_BACKEND_API_KEY=your-test-key +hermes plugins enable my-backend + +# Pick it as the active provider +echo "image_gen:" >> $HERMES_HOME/config.yaml +echo " provider: my-backend" >> $HERMES_HOME/config.yaml + +# Exercise it +hermes -z "Generate an image of a corgi in a spacesuit" +``` + +Or interactively: `hermes tools` → "Image Generation" → select `my-backend` → enter API key if prompted. + +## Reference implementations + +- **`plugins/image_gen/openai/__init__.py`** — gpt-image-2 at low/medium/high tiers as three virtual model IDs sharing one API model with different `quality` params. Good example of tiered models under a single backend + config.yaml precedence chain. +- **`plugins/image_gen/xai/__init__.py`** — Grok Imagine via xAI. Different shape (URL output, simpler catalog). +- **`plugins/image_gen/openai-codex/__init__.py`** — Codex-style Responses API variant reusing the OpenAI SDK with a different routing base URL. + +## Distribute via pip + +```toml +# pyproject.toml +[project.entry-points."hermes_agent.plugins"] +my-backend-imggen = "my_backend_imggen_package" +``` + +`my_backend_imggen_package` must expose a top-level `register` function. See [Distribute via pip](/docs/guides/build-a-hermes-plugin#distribute-via-pip) in the general plugin guide for the full setup. + +## Related pages + +- [Image Generation](/docs/user-guide/features/image-generation) — user-facing feature documentation +- [Plugins overview](/docs/user-guide/features/plugins) — all plugin types at a glance +- [Build a Hermes Plugin](/docs/guides/build-a-hermes-plugin) — general tools/hooks/slash commands guide diff --git a/website/docs/guides/build-a-hermes-plugin.md b/website/docs/guides/build-a-hermes-plugin.md index a005035d5c..881d0a4cc3 100644 --- a/website/docs/guides/build-a-hermes-plugin.md +++ b/website/docs/guides/build-a-hermes-plugin.md @@ -19,13 +19,13 @@ Hermes has several distinct pluggable interfaces — some use Python `register_* | A **gateway channel** (Discord/Telegram/IRC/Teams/etc.) | [Adding Platform Adapters](/docs/developer-guide/adding-platform-adapters) | | A **memory backend** (Honcho/Mem0/Supermemory/etc.) | [Memory Provider Plugins](/docs/developer-guide/memory-provider-plugin) | | A **context-compression engine** | [Context Engine Plugins](/docs/developer-guide/context-engine-plugin) | -| An **image-generation backend** | See bundled examples in `plugins/image_gen/openai/` and `plugins/image_gen/xai/` | +| An **image-generation backend** | [Image Generation Provider Plugins](/docs/developer-guide/image-gen-provider-plugin) | | A **TTS backend** (any CLI — Piper, VoxCPM, Kokoro, voice cloning, …) | [TTS custom command providers](/docs/user-guide/features/tts#custom-command-providers) — config-driven, no Python needed | | An **STT backend** (custom whisper / ASR CLI) | [Voice Message Transcription](/docs/user-guide/features/tts#voice-message-transcription-stt) — set `HERMES_LOCAL_STT_COMMAND` to a shell template | | **External tools via MCP** (filesystem, GitHub, Linear, any MCP server) | [MCP](/docs/user-guide/features/mcp) — declare `mcp_servers.` in `config.yaml` | | **Gateway event hooks** (fire on startup, session events, commands) | [Event Hooks](/docs/user-guide/features/hooks#gateway-event-hooks) — drop `HOOK.yaml` + `handler.py` into `~/.hermes/hooks//` | | **Shell hooks** (run a shell command on events) | [Shell Hooks](/docs/user-guide/features/hooks#shell-hooks) — declare under `hooks:` in `config.yaml` | -| **Additional skill sources** (custom GitHub repos, private skill indexes) | [Skills](/docs/user-guide/features/skills) — `hermes skills tap add ` | +| **Additional skill sources** (custom GitHub repos, private skill indexes) | [Skills](/docs/user-guide/features/skills) — `hermes skills tap add ` · [Publishing a tap](/docs/user-guide/features/skills#publishing-a-custom-skill-tap) | | A first-class **core** inference provider (not a plugin) | [Adding Providers](/docs/developer-guide/adding-providers) | See the full [Pluggable interfaces table](/docs/user-guide/features/plugins#pluggable-interfaces--where-to-go-for-each) for a consolidated view of every extension surface including config-driven (TTS, STT, MCP, shell hooks) and drop-in directory (gateway hooks) styles. @@ -854,6 +854,8 @@ version: 1.0.0 description: Custom image generation backend ``` +**Full guide:** [Image Generation Provider Plugins](/docs/developer-guide/image-gen-provider-plugin) — full `ImageGenProvider` ABC, `list_models()` / `get_setup_schema()` metadata, `success_response()`/`error_response()` helpers, base64 vs URL output, user overrides, pip distribution. + **Reference examples:** `plugins/image_gen/openai/` (DALL-E / GPT-Image via OpenAI SDK), `plugins/image_gen/openai-codex/`, `plugins/image_gen/xai/` (Grok image gen). ## Non-Python extension surfaces @@ -921,7 +923,7 @@ Supports all the same events as Python plugin hooks (`pre_tool_call`, `post_tool ### Skill sources — add a custom skill registry -If you maintain a private GitHub repo of skills (or want to pull from a community index beyond the built-in sources), add it as a **tap**: +If you maintain a GitHub repo of skills (or want to pull from a community index beyond the built-in sources), add it as a **tap**: ```bash hermes skills tap add myorg/skills-repo @@ -929,7 +931,9 @@ hermes skills search my-workflow --source myorg/skills-repo hermes skills install myorg/skills-repo/my-workflow ``` -**Full guide:** [Skills Hub](/docs/user-guide/features/skills#skills-hub). +Publishing your own tap is just a GitHub repo with `skills//SKILL.md` directories — no server or registry signup needed. + +**Full guides:** [Skills Hub](/docs/user-guide/features/skills#skills-hub) · [Publishing a custom tap](/docs/user-guide/features/skills#publishing-a-custom-skill-tap) (repo layout, minimal example, non-default paths, trust levels). ### TTS / STT via command templates diff --git a/website/docs/user-guide/features/plugins.md b/website/docs/user-guide/features/plugins.md index bd49b02bf6..5c4628a88e 100644 --- a/website/docs/user-guide/features/plugins.md +++ b/website/docs/user-guide/features/plugins.md @@ -108,7 +108,7 @@ Every `ctx.*` API below is available inside a plugin's `register(ctx)` function. | Gate on env vars | `requires_env: [API_KEY]` in plugin.yaml — prompted during `hermes plugins install` | | Distribute via pip | `[project.entry-points."hermes_agent.plugins"]` | | Register a gateway platform (Discord, Telegram, IRC, …) | `ctx.register_platform(name, label, adapter_factory, check_fn, ...)` — see [Adding Platform Adapters](/docs/developer-guide/adding-platform-adapters) | -| Register an image-generation backend | `ctx.register_image_gen_provider(provider)` — see `plugins/image_gen/openai/` for an example | +| Register an image-generation backend | `ctx.register_image_gen_provider(provider)` — see [Image Generation Provider Plugins](/docs/developer-guide/image-gen-provider-plugin) | | Register a context-compression engine | `ctx.register_context_engine(engine)` — see [Context Engine Plugins](/docs/developer-guide/context-engine-plugin) | | Register a memory backend | Subclass `MemoryProvider` in `plugins/memory//__init__.py` — see [Memory Provider Plugins](/docs/developer-guide/memory-provider-plugin) (uses a separate discovery system) | | Register an inference backend (LLM provider) | `register_provider(ProviderProfile(...))` in `plugins/model-providers//__init__.py` — see [Model Provider Plugins](/docs/developer-guide/model-provider-plugin) (uses a separate discovery system) | @@ -228,11 +228,11 @@ The table above shows the four plugin categories, but within "General plugins" t | A **gateway channel** (Discord / Telegram / IRC / Teams / etc.) | Platform plugin — `ctx.register_platform()` in `plugins/platforms//` | [Adding Platform Adapters](/docs/developer-guide/adding-platform-adapters) | | A **memory backend** (Honcho, Mem0, Supermemory, …) | Memory plugin — subclass `MemoryProvider` in `plugins/memory//` | [Memory Provider Plugins](/docs/developer-guide/memory-provider-plugin) | | A **context-compression strategy** | Context-engine plugin — `ctx.register_context_engine()` | [Context Engine Plugins](/docs/developer-guide/context-engine-plugin) | -| An **image-generation backend** (DALL·E, SDXL, …) | Backend plugin — `ctx.register_image_gen_provider()` | See bundled examples in `plugins/image_gen/openai/` and `plugins/image_gen/xai/` | +| An **image-generation backend** (DALL·E, SDXL, …) | Backend plugin — `ctx.register_image_gen_provider()` | [Image Generation Provider Plugins](/docs/developer-guide/image-gen-provider-plugin) | | A **TTS backend** (any CLI — Piper, VoxCPM, Kokoro, xtts, voice-cloning scripts, …) | Config-driven — declare under `tts.providers.` with `type: command` in `config.yaml` | [TTS setup](/docs/user-guide/features/tts#custom-command-providers) | | An **STT backend** (custom whisper binary, local ASR CLI) | Config-driven — set `HERMES_LOCAL_STT_COMMAND` env var to a shell template | [Voice Message Transcription (STT)](/docs/user-guide/features/tts#voice-message-transcription-stt) | | **External tools via MCP** (filesystem, GitHub, Linear, Notion, any MCP server) | Config-driven — declare `mcp_servers.` with `command:` / `url:` in `config.yaml`. Hermes auto-discovers the server's tools and registers them alongside built-ins. | [MCP](/docs/user-guide/features/mcp) | -| **Additional skill sources** (custom GitHub repos, private skill indexes) | CLI — `hermes skills tap add ` | [Skills Hub](/docs/user-guide/features/skills#skills-hub) | +| **Additional skill sources** (custom GitHub repos, private skill indexes) | CLI — `hermes skills tap add ` | [Skills Hub](/docs/user-guide/features/skills#skills-hub) · [Publishing a custom tap](/docs/user-guide/features/skills#publishing-a-custom-skill-tap) | | **Gateway event hooks** (fire on `gateway:startup`, `session:start`, `agent:end`, `command:*`) | Drop `HOOK.yaml` + `handler.py` into `~/.hermes/hooks//` | [Event Hooks](/docs/user-guide/features/hooks#gateway-event-hooks) | | **Shell hooks** (run a shell command on events — notifications, audit logs, desktop alerts) | Config-driven — declare under `hooks:` in `config.yaml` | [Shell Hooks](/docs/user-guide/features/hooks#shell-hooks) | diff --git a/website/docs/user-guide/features/skills.md b/website/docs/user-guide/features/skills.md index f0c1b34fd4..9499e15d80 100644 --- a/website/docs/user-guide/features/skills.md +++ b/website/docs/user-guide/features/skills.md @@ -464,6 +464,119 @@ This uses the stored source identifier plus the current upstream bundle content Skills hub operations use the GitHub API, which has a rate limit of 60 requests/hour for unauthenticated users. If you see rate-limit errors during install or search, set `GITHUB_TOKEN` in your `.env` file to increase the limit to 5,000 requests/hour. The error message includes an actionable hint when this happens. ::: +### Publishing a custom skill tap + +If you want to share a curated set of skills — for your team, your org, or publicly — you can publish them as a **tap**: a GitHub repository other Hermes users add with `hermes skills tap add `. No server, no registry sign-up, no release pipeline. Just a directory of `SKILL.md` files. + +#### Repo layout + +A tap is any GitHub repo (public or private — private needs `GITHUB_TOKEN`) laid out like this: + +``` +owner/repo +├── skills/ # default path; configurable per-tap +│ ├── my-workflow/ +│ │ ├── SKILL.md # required +│ │ ├── references/ # optional supporting files +│ │ ├── templates/ +│ │ └── scripts/ +│ ├── another-skill/ +│ │ └── SKILL.md +│ └── third-skill/ +│ └── SKILL.md +└── README.md # optional but helpful +``` + +Rules: +- Each skill lives in its own directory under the tap's root path (default `skills/`). +- The directory name becomes the skill's install slug. +- Each skill directory must contain a `SKILL.md` with standard [SKILL.md frontmatter](#skillmd-format) (`name`, `description`, plus optional `metadata.hermes.tags`, `version`, `author`, `platforms`, `metadata.hermes.config`). +- Subdirectories like `references/`, `templates/`, `scripts/`, `assets/` are downloaded alongside `SKILL.md` at install time. +- Skills whose directory name starts with `.` or `_` are ignored. + +Hermes discovers skills by listing every subdirectory of the tap path and probing each for `SKILL.md`. + +#### Minimal tap example + +``` +my-org/hermes-skills +└── skills/ + └── deploy-runbook/ + └── SKILL.md +``` + +`skills/deploy-runbook/SKILL.md`: + +```markdown +--- +name: deploy-runbook +description: Our deployment runbook — services, rollback, Slack channels +version: 1.0.0 +author: My Org Platform Team +metadata: + hermes: + tags: [deployment, runbook, internal] +--- + +# Deploy Runbook + +Step 1: ... +``` + +After pushing that to GitHub, any Hermes user can subscribe and install: + +```bash +hermes skills tap add my-org/hermes-skills +hermes skills search deploy +hermes skills install my-org/hermes-skills/deploy-runbook +``` + +#### Non-default paths + +If your skills don't live under `skills/` (common when you're adding a `skills/` subtree to an existing project), edit the tap entry in `~/.hermes/.hub/taps.json`: + +```json +{ + "taps": [ + {"repo": "my-org/platform-docs", "path": "internal/skills/"} + ] +} +``` + +The `hermes skills tap add` CLI defaults new taps to `path: "skills/"`; edit the file directly if you need a different path. `hermes skills tap list` shows the effective path per tap. + +#### Installing individual skills directly (without adding a tap) + +Users can also install a single skill from any public GitHub repo without adding the whole repo as a tap: + +```bash +hermes skills install owner/repo/skills/my-workflow +``` + +Useful when you want to share one skill without asking the user to subscribe to your whole registry. + +#### Trust levels for taps + +New taps are assigned `community` trust by default. Skills installed from them run through the standard security scan and show the third-party warning panel on first install. If your org or a widely-trusted source should get higher trust, add its repo to `TRUSTED_REPOS` in `tools/skills_hub.py` (requires a Hermes core PR). + +#### Tap management + +```bash +hermes skills tap list # show all configured taps +hermes skills tap add myorg/skills-repo # add (default path: skills/) +hermes skills tap remove myorg/skills-repo # remove +``` + +Inside a running session: + +``` +/skills tap list +/skills tap add myorg/skills-repo +/skills tap remove myorg/skills-repo +``` + +Taps are stored in `~/.hermes/.hub/taps.json` (created on demand). + ## Bundled skill updates (`hermes skills reset`) Hermes ships with a set of bundled skills in `skills/` inside the repo. On install and on every `hermes update`, a sync pass copies those into `~/.hermes/skills/` and records a manifest at `~/.hermes/skills/.bundled_manifest` mapping each skill name to the content hash at the time it was synced (the **origin hash**). diff --git a/website/sidebars.ts b/website/sidebars.ts index 611bdbf554..04c7506598 100644 --- a/website/sidebars.ts +++ b/website/sidebars.ts @@ -211,6 +211,7 @@ const sidebars: SidebarsConfig = { 'developer-guide/memory-provider-plugin', 'developer-guide/context-engine-plugin', 'developer-guide/model-provider-plugin', + 'developer-guide/image-gen-provider-plugin', 'developer-guide/creating-skills', 'developer-guide/extending-the-cli', ],